[原创]CSV Parser
在以前公司的时候用MFC container写过一个处理CSV文件的简单的Parser。换到新公司之后公司有条要求,就是能用STL Container的就不用MFC container。于是刚刚用STL重写了一遍,如果大家觉得有用不妨用用。欢迎帮我查查错。
CSV格式指的是用逗号分隔的字符串(Comma sepatate value)。在读取简单文本数据时应用最为广泛。(数据复杂是一般存储为XML格式。)任何用只有一层结构的数据,或者Class,Object都可以很方便的用CSV读取。
在读写CSV时要注意逗号和引号,如果一列数据已经有一个逗号,就不能再简单的用逗号作分割符,这一列数据会被自动加上""。同样如果一列数据中如果用引号也会造成混淆。CSV会在没个引号后附加一个引号。
我写的Class CCSVLineParser主要用一下几个函数:GetAt(), SetAt(), GetFullString(), SetFullString(), size()。分别是读,写某一列,读写整行和得到总列数。我没有用operator overloading重载符号“[]"而用了GetAt, SetAt()是wo我扩展了一点SetAt()的功能。GetAt(int intIndex)返回所制定的列的字符,如果intIndex越界会throw exception。而SetAt()只有在index小于零时throw exception,如果所制定的列数index过大,CSV会自动括展至这一列。同时表示整行字符的变量也会更新。
把下面的一个字符存在.csv文件然后用excel打开,你会发现他是5列。和程序的接过一样
下面是测试程序:
程序代码:<br>#include "stdafx.h"<br>#include "csvlineparser.h"</FONT></P>
<P><FONT size=2>// basic_string_erase.cpp<br>// compile with: /EHsc<br>#include <string><br>#include <iostream></FONT></P>
<P><FONT size=2>int _tmain(int argc, _TCHAR* argv[])<br>{<br> using namespace std;<br> string str1 ( "a,\"b\"\"sassaa\",\"aw\"\",\"\"w\"\"jj\",asd,\"axsxs\"" );<br> CCSVLineParser csv(str1);</FONT></P>
<P><FONT size=2>try<br>{<br> for (int i = 0; i < (int)csv.size()+1; i++)<br> {<br> cout << csv.GetAt(i) << endl;<br> }<br>}<br>catch(CCSVLineParser::Range ex)<br>{<br> cout << ex.sText << endl;<br>}<br>catch(...)<br>{<br>}</FONT></P>
<P><FONT size=2>cout << str1 << endl;<br>cout << csv.GetFullString(false) << endl;<br>cout << csv.GetFullString() << endl;</FONT></P>
<P><FONT size=2>csv.SetAt(6, string("haha\",\"haha"));<br>cout << csv.GetFullString(false) << endl;<br>cout << csv.GetFullString() << endl;<br>}<br>
下面是.h和.cpp
CSVLineParser.h
程序代码:<br>#pragma once</FONT></P>
<P><FONT size=2>#include <vector><br>#include <string><br>using namespace std;</FONT></P>
<P><FONT size=2>class CCSVColumn<br>{<br>public:<br>CCSVColumn(string sDisplay);<br>CCSVColumn(string sActual, string sDisplay): m_sActual(sActual), m_sDisplay(sDisplay) {}<br>private:<br>string m_sDisplay;<br>string m_sActual;<br>friend class CCSVLineParser;<br>};</FONT></P>
<P><FONT size=2>class CCSVLineParser<br>{<br>public:<br>CCSVLineParser(string sFullString = "");<br>~CCSVLineParser(void);<br>size_t size(void);<br>string GetAt(int intIndex, bool bActual = false);<br>void SetFullString(string sLine);<br>void SetAt(int intIndex, string & sColumn);<br>string GetFullString(bool bActual = true);<br>bool swapColumn(int intCol1, int intCol2);<br>class Range<br>{<br>public:<br> Range() : sText("Out of range") {};<br> string sText;<br>};<br>private:<br>void Read(string sFullString);<br>vector<CCSVColumn> m_vecElements;<br>static void PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber);<br>friend class CCSVColumn;<br>};<br>
CSVLineParser.cpp
程序代码:<br>#include "StdAfx.h"<br>#include ".\csvlineparser.h"</FONT></P>
<P><FONT size=2>//===============================================================================<br>#ifdef _DEBUG<br>#define new DEBUG_NEW<br>#undef THIS_FILE<br>static char THIS_FILE[] = __FILE__;<br>#endif<br>//===============================================================================</FONT></P>
<P><FONT size=2>CCSVColumn::CCSVColumn(string sDisplay) : m_sDisplay(sDisplay)<br>{<br>int intStartPosition=0, intQuoteLocation;</FONT></P>
<P><FONT size=2>if ((int)sDisplay.find(',') >= 0)<br> m_sActual += "\"";</FONT></P>
<P><FONT size=2>while ((intQuoteLocation = (int)sDisplay.find('"', intStartPosition)) >= 0)<br>{<br> m_sActual += string(sDisplay, intStartPosition, intQuoteLocation);<br> m_sActual += '"';<br> intStartPosition = intQuoteLocation+1;<br>}<br>m_sActual += string(sDisplay, intStartPosition, (int)sDisplay.size());</FONT></P>
<P><FONT size=2>if ((int)sDisplay.find(',') >= 0)<br> m_sActual += "\"";<br>}</FONT></P>
<P><FONT size=2>CCSVLineParser::CCSVLineParser(string sFullString)<br>{<br>Read(sFullString);<br>}</FONT></P>
<P><FONT size=2>CCSVLineParser::~CCSVLineParser(void)<br>{<br>}</FONT></P>
<P><FONT size=2>void CCSVLineParser::SetFullString(string strLine)<br>{<br>Read(strLine);<br>}</FONT></P>
<P><FONT size=2>void CCSVLineParser::Read(string sFullString)<br>{<br>int intFindLocation, intStartLocation, intQuoteNumber;<br>string sRemaining = sFullString, sDelimiter, sActual, sDisplay, sColumn;<br>char tmp[200];</FONT></P>
<P><FONT size=2>m_vecElements.clear();</FONT></P>
<P><FONT size=2>if (sRemaining.empty())<br> return;</FONT></P>
<P><FONT size=2>if (sRemaining.at(0) != '"')<br> sDelimiter = ",";<br>else<br>{<br> sDelimiter = "\",";<br> sRemaining.erase(0, 1);<br>}</FONT></P>
<P><FONT size=2>intStartLocation = 0;<br>PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);<br>while ((intFindLocation = (int)sRemaining.find(sDelimiter)) >= 0)<br>{<br> sDisplay= string(sRemaining, 0, intFindLocation);<br> sDisplay = sColumn + sDisplay;<br> sRemaining.erase(0, intFindLocation+sDelimiter.size());<br> if (sDelimiter.size() > 1)<br> intQuoteNumber += 2;<br> sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);<br> strcpy(tmp, sActual.c_str());</FONT></P>
<P><FONT size=2> m_vecElements.push_back(CCSVColumn(sActual, sDisplay));<br> intStartLocation += (int)sDisplay.size()+intQuoteNumber+1;<br> // looking for next delimiter<br> if (*sRemaining.begin() != '"')<br> sDelimiter = ",";<br> else<br> {<br> sDelimiter = "\",";<br> sRemaining.erase(0, 1);<br> }<br> PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);<br>}<br>// grab the last column<br>if (sDelimiter.size() > 1)<br>{<br> sRemaining.erase(sRemaining.size()-1 , sRemaining.size()-1);<br> intQuoteNumber += 2;<br>}<br>sDisplay = sColumn + sRemaining;<br>sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);<br>m_vecElements.push_back(CCSVColumn(sActual, sDisplay));</FONT></P>
<P><FONT size=2>return;<br>}</FONT></P>
<P><FONT size=2>size_t CCSVLineParser::size(void)<br>{<br>return m_vecElements.size();<br>}</FONT></P>
<P><FONT size=2><br>string CCSVLineParser::GetAt(int intIndex, bool bActual)<br>{<br>if (intIndex < 0 || intIndex >= (int)m_vecElements.size())<br> throw Range();</FONT></P>
<P><FONT size=2>return bActual ? m_vecElements[intIndex].m_sActual : m_vecElements[intIndex].m_sDisplay;<br>}</FONT></P>
<P><FONT size=2>void CCSVLineParser::PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber)<br>{<br>int intQuoteLocation, intDelimiterLocation;<br>char chararQuote[] = "\"\"";<br>string sPrefix;</FONT></P>
<P><FONT size=2>sColumn.clear();<br>intQuoteNumber = 0;</FONT></P>
<P><FONT size=2>if (sFullString.empty() || sDelimiter.empty())<br> return;</FONT></P>
<P><FONT size=2>intQuoteLocation = (int)sFullString.find(chararQuote);<br>intDelimiterLocation = (int)sFullString.find(sDelimiter);</FONT></P>
<P><FONT size=2>while (intQuoteLocation >= 0 && (intQuoteLocation < intDelimiterLocation || intDelimiterLocation < 0))<br>{<br> sPrefix = sFullString;<br> sPrefix.erase(intQuoteLocation+1 , sPrefix.size()-1 );<br> sFullString.erase(0, intQuoteLocation+sizeof(chararQuote)/sizeof(char)-1);<br> sColumn += sPrefix;<br> intQuoteLocation = (int)sFullString.find(chararQuote);<br> intDelimiterLocation = (int)sFullString.find(sDelimiter);<br> intQuoteNumber++;<br>}<br>return;<br>}</FONT></P>
<P><FONT size=2>void CCSVLineParser::SetAt(int intIndex, string & sColumn)<br>{<br>if (intIndex < 0)<br> throw Range();</FONT></P>
<P><FONT size=2>// automatically expand to the column<br>while ((int)m_vecElements.size() < intIndex+1)<br> m_vecElements.push_back(CCSVColumn(""));<br>m_vecElements[intIndex] = CCSVColumn(sColumn);<br>}</FONT></P>
<P><FONT size=2>string CCSVLineParser::GetFullString(bool bActual)<br>{<br>string sFullString;</FONT></P>
<P><FONT size=2>for (int intColumnCount = 0; intColumnCount < (int)size(); intColumnCount++)<br>{<br> if (intColumnCount > 0)<br> sFullString += ',';</FONT></P>
<P><FONT size=2> if (bActual)<br> sFullString += m_vecElements[intColumnCount].m_sActual;<br> else<br> sFullString += m_vecElements[intColumnCount].m_sDisplay;<br>}<br>return sFullString;<br>}</FONT></P>
<P><FONT size=2>// column has to exist.<br>bool CCSVLineParser::swapColumn(int intCol1, int intCol2)<br>{<br>if (intCol1 < 0 || intCol2 < 0 || intCol1 >= (int)size() || intCol2 >= (int)size())<br> return false;</FONT></P>
<P><FONT size=2>if (intCol1 == intCol2)<br> return true;</FONT></P>
<P><FONT size=2>swap(m_vecElements[intCol1], m_vecElements[intCol2]);<br>string a=m_vecElements[intCol1].m_sDisplay, b=m_vecElements[intCol2].m_sDisplay;<br>return true;<br>}<br>
[此贴子已经被作者于2006-7-15 3:04:38编辑过]