在以前公司的时候用MFC container写过一个处理CSV文件的简单的Parser。换到新公司之后公司有条要求,就是能用STL Container的就不用MFC container。于是刚刚用STL重写了一遍,如果大家觉得有用不妨用用。欢迎帮我查查错。
CSV格式指的是用逗号分隔的字符串(Comma sepatate value)。在读取简单文本数据时应用最为广泛。(数据复杂是一般存储为XML格式。)任何用只有一层结构的数据,或者Class,Object都可以很方便的用CSV读取。
在读写CSV时要注意逗号和引号,如果一列数据已经有一个逗号,就不能再简单的用逗号作分割符,这一列数据会被自动加上""。同样如果一列数据中如果用引号也会造成混淆。CSV会在没个引号后附加一个引号。
我写的Class CCSVLineParser主要用一下几个函数:GetAt(), SetAt(), GetFullString(), SetFullString(), size()。分别是读,写某一列,读写整行和得到总列数。我没有用operator overloading重载符号“[]"而用了GetAt, SetAt()是wo我扩展了一点SetAt()的功能。GetAt(int intIndex)返回所制定的列的字符,如果intIndex越界会throw exception。而SetAt()只有在index小于零时throw exception,如果所制定的列数index过大,CSV会自动括展至这一列。同时表示整行字符的变量也会更新。
把下面的一个字符存在.csv文件然后用excel打开,你会发现他是5列。和程序的接过一样
下面是测试程序:
 程序代码:
程序代码:
#include \"stdafx.h\"
#include \"csvlineparser.h\"
// basic_string_erase.cpp
// compile with: /EHsc
#include <string>
#include <iostream>
int _tmain(int argc, _TCHAR* argv[])
{
   using namespace std;
   string str1 ( \"a,\\"b\\"\\"sassaa\\",\\"aw\\"\\",\\"\\"w\\"\\"jj\\",asd,\\"axsxs\\"\" );
   CCSVLineParser csv(str1);
try
{
  for (int i = 0; i < (int)csv.size()+1; i++)
  {
   cout << csv.GetAt(i) << endl;
  }
}
catch(CCSVLineParser::Range ex)
{
  cout << ex.sText << endl;
}
catch(...)
{
}
cout << str1 << endl;
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;
csv.SetAt(6, string(\"haha\\",\\"haha\"));
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;
}
下面是.h和.cpp
CSVLineParser.h
#pragma once
#include <vector>
#include <string>
using namespace std;
class CCSVColumn
{
public:
CCSVColumn(string sDisplay);
CCSVColumn(string sActual, string sDisplay): m_sActual(sActual), m_sDisplay(sDisplay) {}
private:
string m_sDisplay;
string m_sActual;
friend class CCSVLineParser;
};
class CCSVLineParser
{
public:
CCSVLineParser(string sFullString = \"\");
~CCSVLineParser(void);
size_t size(void);
string GetAt(int intIndex, bool bActual = false);
void SetFullString(string sLine);
void SetAt(int intIndex, string & sColumn);
string GetFullString(bool bActual = true);
bool swapColumn(int intCol1, int intCol2);
class Range
{
public:
  Range() : sText(\"Out of range\") {};
  string sText;
};
private:
void Read(string sFullString);
vector<CCSVColumn> m_vecElements;
static void PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber);
friend class CCSVColumn;
};
CSVLineParser.cpp 程序代码:
程序代码:
#include \"StdAfx.h\"
#include \".\csvlineparser.h\"
//===============================================================================
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
//===============================================================================
CCSVColumn::CCSVColumn(string sDisplay) : m_sDisplay(sDisplay)
{
int intStartPosition=0, intQuoteLocation;
if ((int)sDisplay.find(',') >= 0)
  m_sActual += \"\\"\";
while ((intQuoteLocation = (int)sDisplay.find('\"', intStartPosition)) >= 0)
{
  m_sActual += string(sDisplay, intStartPosition, intQuoteLocation);
  m_sActual += '\"';
  intStartPosition = intQuoteLocation+1;
}
m_sActual += string(sDisplay, intStartPosition, (int)sDisplay.size());
if ((int)sDisplay.find(',') >= 0)
  m_sActual += \"\\"\";
}
CCSVLineParser::CCSVLineParser(string sFullString)
{
Read(sFullString);
}
CCSVLineParser::~CCSVLineParser(void)
{
}
void CCSVLineParser::SetFullString(string strLine)
{
Read(strLine);
}
void CCSVLineParser::Read(string sFullString)
{
int intFindLocation, intStartLocation, intQuoteNumber;
string sRemaining = sFullString, sDelimiter, sActual, sDisplay, sColumn;
char tmp[200];
m_vecElements.clear();
if (sRemaining.empty())
  return;
if  (sRemaining.at(0) != '\"')
  sDelimiter = \",\";
else
{
  sDelimiter = \"\\",\";
  sRemaining.erase(0, 1);
}
intStartLocation = 0;
PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
while ((intFindLocation = (int)sRemaining.find(sDelimiter)) >= 0)
{
  sDisplay= string(sRemaining, 0, intFindLocation);
  sDisplay = sColumn + sDisplay;
  sRemaining.erase(0, intFindLocation+sDelimiter.size());
  if (sDelimiter.size() > 1)
   intQuoteNumber += 2;
  sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
  strcpy(tmp, sActual.c_str());
  m_vecElements.push_back(CCSVColumn(sActual, sDisplay));
  intStartLocation += (int)sDisplay.size()+intQuoteNumber+1;
  // looking for next delimiter
  if  (*sRemaining.begin() != '\"')
   sDelimiter = \",\";
  else
  {
   sDelimiter = \"\\",\";
   sRemaining.erase(0, 1);
  }
  PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
}
// grab the last column
if (sDelimiter.size() > 1)
{
  sRemaining.erase(sRemaining.size()-1 , sRemaining.size()-1);
  intQuoteNumber += 2;
}
sDisplay = sColumn + sRemaining;
sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
m_vecElements.push_back(CCSVColumn(sActual, sDisplay));
return;
}
size_t CCSVLineParser::size(void)
{
return m_vecElements.size();
}
string CCSVLineParser::GetAt(int intIndex, bool bActual)
{
if (intIndex < 0 || intIndex >= (int)m_vecElements.size())
  throw Range();
return bActual ? m_vecElements[intIndex].m_sActual : m_vecElements[intIndex].m_sDisplay;
}
void CCSVLineParser::PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber)
{
int intQuoteLocation, intDelimiterLocation;
char chararQuote[] = \"\\"\\"\";
string sPrefix;
sColumn.clear();
intQuoteNumber = 0;
if (sFullString.empty() || sDelimiter.empty())
  return;
intQuoteLocation = (int)sFullString.find(chararQuote);
intDelimiterLocation = (int)sFullString.find(sDelimiter);
while (intQuoteLocation >= 0 && (intQuoteLocation < intDelimiterLocation || intDelimiterLocation < 0))
{
  sPrefix = sFullString;
  sPrefix.erase(intQuoteLocation+1 , sPrefix.size()-1 );
  sFullString.erase(0, intQuoteLocation+sizeof(chararQuote)/sizeof(char)-1);
  sColumn += sPrefix;
  intQuoteLocation = (int)sFullString.find(chararQuote);
  intDelimiterLocation = (int)sFullString.find(sDelimiter);
  intQuoteNumber++;
}
return;
}
void CCSVLineParser::SetAt(int intIndex, string & sColumn)
{
if (intIndex < 0)
  throw Range();
// automatically expand to the column
while ((int)m_vecElements.size() < intIndex+1)
  m_vecElements.push_back(CCSVColumn(\"\"));
m_vecElements[intIndex] = CCSVColumn(sColumn);
}
string CCSVLineParser::GetFullString(bool bActual)
{
string sFullString;
for (int intColumnCount = 0; intColumnCount < (int)size(); intColumnCount++)
{
  if (intColumnCount > 0)
   sFullString += ',';
  if (bActual)
   sFullString += m_vecElements[intColumnCount].m_sActual;
  else
   sFullString += m_vecElements[intColumnCount].m_sDisplay;
}
return sFullString;
}
// column has to exist.
bool CCSVLineParser::swapColumn(int intCol1, int intCol2)
{
if (intCol1 < 0 || intCol2 < 0 || intCol1 >= (int)size() || intCol2 >= (int)size())
  return false;
if (intCol1 == intCol2)
  return true;
swap(m_vecElements[intCol1], m_vecElements[intCol2]);
string a=m_vecElements[intCol1].m_sDisplay, b=m_vecElements[intCol2].m_sDisplay;
return true;
}
[此贴子已经被作者于2006-7-15 3:04:38编辑过]



 
											





 
	    

 
	
