现在编译运行几乎没有问题。但有一个问题是关于 getline函数的(我猜的):如果要查询的文件内容很长(特别是每一行都很长时),程序会非正常退出。不知道怎么修正。。
也希望作业做完能加精,呵呵

/* 作业:
文本查询系统ver1.01 查询和生成文件同一文件夹下的一个txt文件的单词。
author laigaoat2005 date:2011-5-8
vector <string,pair<string:value_type,int>>* read_text(); 读入单词
vector <string,pair<string:value_type,int>>* _in_text; 单词内部表示
//*/
#include <stdio.h>
#include <conio.h>
#include <locale.h> //for tolower()
#include <utility> // for pair
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
class textquery
{
public:
textquery():_in_text(0),_in_words_loc(0),_words_locs(0){};
~textquery(){ delete _in_text; delete _in_words;delete _words_locs;delete _words_map; }
void in_text(); //ok 读入文本 行
void text_to_words(); //ok 文本行转换为单词
void filter_word(); //ok 去标点
void to_lower(); //ok 转换为小写
void build_words_loc(); //ok 构建单词vector指针、位置vector指针构成的pair
void build_words_map(); // 建设中……
void display(); // 显示 单词和位置
void display_map(); // 正在建设中…… 显示 map
private:
vector <string>* _in_text; //文本行 ok by in_text();
vector <string>* _in_words; //单词集合 ok by text_to_word();
vector < pair <int, int> >* _in_words_loc; //位置集合 ok by text_to_word();
pair < vector< string >*, vector< pair<int,int> >* >* _words_locs; //单词vector指针、位置vector指针构成的pair ok by build_text_loc();
map <string,vector< pair<int,int> >*>* _words_map;
};
inline void textquery:: display_map() //已经完成
{
map < string, vector < pair< int, int > >* >::iterator iter=_words_map->begin(),iter_end=_words_map->end();
while( iter != iter_end ) //迭代map
{
cout << "map键[" << iter->first <<"] {\n" ;
vector< pair< int, int > >::iterator v_iter = iter->second->begin(),
v_iter_end = iter->second->end() ;
int l_cnt=1;
while(v_iter != v_iter_end) //迭代位置vector
{
cout << "(" << v_iter->first << "," << v_iter->second << ") ";
if(l_cnt%10==0) cout << "\n";
v_iter++;
l_cnt++;
}
///*/
cout << "\n}\n\n\n";
iter++;
}
}
inline void textquery:: in_text()
{
string file_name,read_line;
cin >> file_name;
file_name.append(".txt");
ifstream infile(file_name.c_str(),ios::in);
if(infile)
{
_in_text = new vector<string>;
while( getline( infile,read_line, '\n') )
{
_in_text->push_back(read_line);
}
}
else
{
cerr << "error! file not oend!";
exit (-1);
}
}
inline void textquery:: text_to_words()
{
_in_words = new vector<string>;
_in_words_loc = new vector < pair <int, int> >;
int line_num=0;
vector<string>::iterator iter = _in_text->begin(),iter_end=_in_text->end();
while( iter != iter_end )
{
string::value_type pos=0,prev=0;
int lie_num=0;
while( (pos = iter->find_first_of(" ",pos)) != string::npos )
{
_in_words->push_back(iter->substr(prev,pos-prev));
_in_words_loc->push_back( make_pair(line_num,lie_num) );
pos++;
prev=pos;
lie_num++;
}
_in_words->push_back(iter->substr(prev,pos-prev));
_in_words_loc->push_back( make_pair(line_num,lie_num) );
iter++;
line_num++;
}
}
inline void textquery::display()
{
vector<string>::iterator iter,iter_end;
iter = _in_words->begin();
iter_end = _in_words->end();
while( iter != iter_end )
{
cout << *iter << " ";
iter++;
}
cout << "\n\n";
vector < pair <int, int> >::iterator piter,piter_end;
piter = _in_words_loc->begin();piter_end=_in_words_loc->end();
while(piter!=piter_end)
{
cout << "(" << piter->first << "," << piter->second << ")" ;
piter++;
}
cout << "\n\n";
/* ///*/
}
inline void textquery:: filter_word()
{
string fuhao("\"1234567890 ,,【】./;'\\。、!@#¥…_&::*()—+||+_)(*&^%$#@!~-={}[]<>《》\??");
vector<string>::iterator iter = _in_words->begin(),iter_end=_in_words->end();
while ( iter!=iter_end )
{
string::value_type pos=0;
while( ( pos = iter->find_first_of(fuhao,pos) ) != string::npos )
{
iter->erase(pos,1);
}
iter++;
}
}
inline void textquery::to_lower()
{
vector<string>::iterator iter = _in_words->begin(),iter_end=_in_words->end();
string bigletter("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
while( iter!=iter_end )
{
string::value_type pos;
while( (pos=iter->find_first_of(bigletter,pos) )!= string::npos )
{
(*iter)[pos]=tolower((*iter)[pos]);
pos++;
}
iter++;
}
}
inline void textquery::build_words_loc()
{
_words_locs = new pair<vector<string>*,vector<pair<int,int> >*>(_in_words,_in_words_loc) ;
}
inline void textquery:: build_words_map()
{
vector< string >* p_words = (*_words_locs).first ;
vector< pair<int,int> >* p_locs = (*_words_locs).second;
_words_map = new map < string,vector<pair<int,int> >* >;
vector<string>::iterator iter = p_words->begin(), iter_end = p_words->end(); //单词vector iter 对
int line_num=0; //单词元素和位置元素 的编号 每个单词和每个位置的编号相同
while( iter != iter_end ) //对单词进行迭代
{
//there is or there isn't
if( _words_map->count(*iter) > 0) //如果单词 键存在 则修改 map的值:位置 vector
// there is. modify the location value
{
//cout << "map的值: " << (*_words_map)[*iter] << "\n" ;//是位置vector指针
(*_words_map)[*iter] //是位置vector指针
->insert( //插入成员函数
(*_words_map)[*iter]->end(), // vectot插入位置(末尾)
(*p_locs)[line_num] //当前单词位置pair
);
//上面五行应该也可以用push_back();成员函数,空了试下
}
else
{
vector< pair <int ,int> > * word_loc = new vector<pair<int,int> >;
word_loc->push_back( (*p_locs)[line_num] ) ;
(*_words_map).insert( map<string, vector< pair<int,int> >* >:: value_type( (*iter) , word_loc ) );
}
iter++;
line_num++;
}
}
int main()
{
textquery query_it;
query_it.in_text();
query_it.text_to_words();
query_it.filter_word();
query_it.to_lower();
query_it.build_words_loc();
//query_it.display();
query_it.build_words_map();
query_it.display_map();
getch();
return 0;
}
文本查询系统ver1.01 查询和生成文件同一文件夹下的一个txt文件的单词。
author laigaoat2005 date:2011-5-8
vector <string,pair<string:value_type,int>>* read_text(); 读入单词
vector <string,pair<string:value_type,int>>* _in_text; 单词内部表示
//*/
#include <stdio.h>
#include <conio.h>
#include <locale.h> //for tolower()
#include <utility> // for pair
#include <vector>
#include <map>
#include <set>
#include <iostream>
#include <fstream>
#include <string>
using namespace std;
class textquery
{
public:
textquery():_in_text(0),_in_words_loc(0),_words_locs(0){};
~textquery(){ delete _in_text; delete _in_words;delete _words_locs;delete _words_map; }
void in_text(); //ok 读入文本 行
void text_to_words(); //ok 文本行转换为单词
void filter_word(); //ok 去标点
void to_lower(); //ok 转换为小写
void build_words_loc(); //ok 构建单词vector指针、位置vector指针构成的pair
void build_words_map(); // 建设中……
void display(); // 显示 单词和位置
void display_map(); // 正在建设中…… 显示 map
private:
vector <string>* _in_text; //文本行 ok by in_text();
vector <string>* _in_words; //单词集合 ok by text_to_word();
vector < pair <int, int> >* _in_words_loc; //位置集合 ok by text_to_word();
pair < vector< string >*, vector< pair<int,int> >* >* _words_locs; //单词vector指针、位置vector指针构成的pair ok by build_text_loc();
map <string,vector< pair<int,int> >*>* _words_map;
};
inline void textquery:: display_map() //已经完成
{
map < string, vector < pair< int, int > >* >::iterator iter=_words_map->begin(),iter_end=_words_map->end();
while( iter != iter_end ) //迭代map
{
cout << "map键[" << iter->first <<"] {\n" ;
vector< pair< int, int > >::iterator v_iter = iter->second->begin(),
v_iter_end = iter->second->end() ;
int l_cnt=1;
while(v_iter != v_iter_end) //迭代位置vector
{
cout << "(" << v_iter->first << "," << v_iter->second << ") ";
if(l_cnt%10==0) cout << "\n";
v_iter++;
l_cnt++;
}
///*/
cout << "\n}\n\n\n";
iter++;
}
}
inline void textquery:: in_text()
{
string file_name,read_line;
cin >> file_name;
file_name.append(".txt");
ifstream infile(file_name.c_str(),ios::in);
if(infile)
{
_in_text = new vector<string>;
while( getline( infile,read_line, '\n') )
{
_in_text->push_back(read_line);
}
}
else
{
cerr << "error! file not oend!";
exit (-1);
}
}
inline void textquery:: text_to_words()
{
_in_words = new vector<string>;
_in_words_loc = new vector < pair <int, int> >;
int line_num=0;
vector<string>::iterator iter = _in_text->begin(),iter_end=_in_text->end();
while( iter != iter_end )
{
string::value_type pos=0,prev=0;
int lie_num=0;
while( (pos = iter->find_first_of(" ",pos)) != string::npos )
{
_in_words->push_back(iter->substr(prev,pos-prev));
_in_words_loc->push_back( make_pair(line_num,lie_num) );
pos++;
prev=pos;
lie_num++;
}
_in_words->push_back(iter->substr(prev,pos-prev));
_in_words_loc->push_back( make_pair(line_num,lie_num) );
iter++;
line_num++;
}
}
inline void textquery::display()
{
vector<string>::iterator iter,iter_end;
iter = _in_words->begin();
iter_end = _in_words->end();
while( iter != iter_end )
{
cout << *iter << " ";
iter++;
}
cout << "\n\n";
vector < pair <int, int> >::iterator piter,piter_end;
piter = _in_words_loc->begin();piter_end=_in_words_loc->end();
while(piter!=piter_end)
{
cout << "(" << piter->first << "," << piter->second << ")" ;
piter++;
}
cout << "\n\n";
/* ///*/
}
inline void textquery:: filter_word()
{
string fuhao("\"1234567890 ,,【】./;'\\。、!@#¥…_&::*()—+||+_)(*&^%$#@!~-={}[]<>《》\??");
vector<string>::iterator iter = _in_words->begin(),iter_end=_in_words->end();
while ( iter!=iter_end )
{
string::value_type pos=0;
while( ( pos = iter->find_first_of(fuhao,pos) ) != string::npos )
{
iter->erase(pos,1);
}
iter++;
}
}
inline void textquery::to_lower()
{
vector<string>::iterator iter = _in_words->begin(),iter_end=_in_words->end();
string bigletter("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
while( iter!=iter_end )
{
string::value_type pos;
while( (pos=iter->find_first_of(bigletter,pos) )!= string::npos )
{
(*iter)[pos]=tolower((*iter)[pos]);
pos++;
}
iter++;
}
}
inline void textquery::build_words_loc()
{
_words_locs = new pair<vector<string>*,vector<pair<int,int> >*>(_in_words,_in_words_loc) ;
}
inline void textquery:: build_words_map()
{
vector< string >* p_words = (*_words_locs).first ;
vector< pair<int,int> >* p_locs = (*_words_locs).second;
_words_map = new map < string,vector<pair<int,int> >* >;
vector<string>::iterator iter = p_words->begin(), iter_end = p_words->end(); //单词vector iter 对
int line_num=0; //单词元素和位置元素 的编号 每个单词和每个位置的编号相同
while( iter != iter_end ) //对单词进行迭代
{
//there is or there isn't
if( _words_map->count(*iter) > 0) //如果单词 键存在 则修改 map的值:位置 vector
// there is. modify the location value
{
//cout << "map的值: " << (*_words_map)[*iter] << "\n" ;//是位置vector指针
(*_words_map)[*iter] //是位置vector指针
->insert( //插入成员函数
(*_words_map)[*iter]->end(), // vectot插入位置(末尾)
(*p_locs)[line_num] //当前单词位置pair
);
//上面五行应该也可以用push_back();成员函数,空了试下
}
else
{
vector< pair <int ,int> > * word_loc = new vector<pair<int,int> >;
word_loc->push_back( (*p_locs)[line_num] ) ;
(*_words_map).insert( map<string, vector< pair<int,int> >* >:: value_type( (*iter) , word_loc ) );
}
iter++;
line_num++;
}
}
int main()
{
textquery query_it;
query_it.in_text();
query_it.text_to_words();
query_it.filter_word();
query_it.to_lower();
query_it.build_words_loc();
//query_it.display();
query_it.build_words_map();
query_it.display_map();
getch();
return 0;
}