关于文本文件分割问题
程序代码:/********************************************************************
File Name : fdiv.c
Author : g Version : 1.0 Date : 2010-7-31
Description : 该程序用于查找当前文件夹下文本文件(*.txt),将其显示出来,
选择要分割的文件及大小,创建新文件夹,将分割的文件放入其中。
Other :
Function List :
1. fsearch(),
2. select(),
3. cdir(),
4. syns(),
5. dividetxt().
History :
********************************************************************/
#include<stdio.h>
#include<stdlib.h>
#include<dir.h>
char org_file[10][13]; /* 原始文件名存放 */
int choose_num; /* 需处理文件编号 */
long org_file_size; /* 原始文件大小 */
long cur_file_size; /* 当前剩余文件大小 */
long fns_file_size; /* 已分割处理文件大小 */
long tsize; /* 需分割文件的大小 */
char f_folder_name[13]; /* 文件夹名 */
char path[20]; /* 相对路径 */
int new_file_num; /* 分割后文件数量 */
char table[100][4] = { /* 分割后文件基本名 */
"000","001","002","003","004",
"005","006","007","008","009",
"010","011","012","013","014",
"015","016","017","018","019",
"020","021","022","023","024",
"025","026","027","028","029",
"030","031","032","033","034",
"035","036","037","038","039",
"040","041","042","043","044",
"045","046","047","048","049",
"050","051","052","053","054",
"055","056","057","058","059",
"060","061","062","063","064",
"065","066","067","068","069",
"070","071","072","073","074",
"075","076","077","078","079",
"080","081","082","083","084",
"085","086","087","088","089",
"090","091","092","093","094",
"095","096","097","098","099"
};
/* search and display local text files */
void fsearch(void)
{
int done;
int i=0, j;
struct ffblk ffblk;
printf("the list of files :\n");
done = findfirst("*.txt",&ffblk,0);
while (!done)
{
printf("%d\t%s\t%ld\n",i,ffblk.ff_name,ffblk.ff_fsize);
org_file_size = ffblk.ff_fsize;
for (j=0; j<13; j++)
{
org_file[i][j] = ffblk.ff_name[j];
}
i++;
done = findnext(&ffblk);
}
}
/* choose the file which need to divide */
void select(void)
{
printf("please input the file number:\t");
scanf("%d",&choose_num);
printf("please input the size you need(KB):\t");
scanf("%ld",&tsize);
tsize = tsize * 1024;
/* 计算分割后文件数量 */
(org_file_size%tsize == 0) ? (new_file_num = org_file_size/tsize) :
(new_file_num = org_file_size/tsize+1);
sprintf(f_folder_name,"%d",choose_num); /* 将选择的int型-->转为字符串型 */
}
/* create the dir */
void cdir(void)
{
int check;
char *pdir;
pdir = f_folder_name;
check = mkdir(pdir);
(!check) ? (printf("directory created\n")) :
(printf("directory failed\n"));
}
/* 为文件路径添加"\" */
void syns(void)
{
int n;
char rega[3] = "\\";
strcat(f_folder_name,rega);
for (n=0;n<20;n++)
{
path[n] = f_folder_name[n];
}
}
/* divide the txt */
void dividetxt(void)
{
FILE *fsource, *fdestination;
char subname[20]; /* 新建文件的路径及文件名称,可变 */
char *pstore; /* 临时存储需要分割数据块,缓冲区 */
char s[5] = ".txt";/* 新建文件的扩展名 */
int m=0, ci;
long *psub;
if ((fsource = fopen(org_file[choose_num],"rt")) == NULL) /* 打开源文件 */
{
printf("error!can't open the source file!\n");
getch();
exit(1);
}
/* printf("%s\n",org_file[choose_num]); */
/* ------------------------------------ */
psub = table;
while (m < new_file_num) /* 新建文件路径及完整名称,如"1\001.txt" */
{
m++;
for (ci=0;ci<20;ci++)
{
subname[ci] = path[ci];
}
strcat(subname,psub);
strcat(subname,s);
psub++;
/* printf("%s\n",subname); */
if ((fdestination = fopen(subname,"wt")) == NULL) /* 新建并打开新文件 */
{
printf("error!can't create *.txt file");
fclose(fsource);
getch();
exit(1);
}
/* 获取当前剩余文件大小,如果小于输入需求大小,则按当前剩余文件大小 */
/* 申请内存,否则按需求大小 */
fns_file_size = ftell(fsource);
cur_file_size = org_file_size - fns_file_size;
if (cur_file_size < tsize)
{
tsize = cur_file_size;
}
if ((pstore = (char *)malloc(tsize+1)) == NULL) /* 申请内存 */
{
printf("can't get memory!\n");
fclose(fsource); /* 若失败,则关闭已打开的文件 */
fclose(fdestination);
getch();
exit(1);
}
fread(pstore,tsize,1,fsource); /* 从原始文件读取数据,写入缓冲区 */
fwrite(pstore,tsize,1,fdestination); /* 将缓冲区中数据写入新文件 */
fclose(fdestination);
free(pstore);
pstore = NULL;
}
fclose(fsource);
printf("ok!");
getch();
}
int main(void)
{
fsearch();
select();
cdir();
syns();
dividetxt();
}
现在学习C,练习下。该程序用于分割文本文件,现有几个问题请帮助解决下:
1.当大于50K时,分割失败;
2.小于50K分割后,最后一个文件末尾多出一些内容(这些内容是被分割原文件中的);
3.若分割处为一段的中间,该段落的后部分出现在下一新的文件中,可能开头一段是乱码。
请各位前辈指点!谢谢!










。你试试拿编码是ASCII的分割一下,看会不会出错吧。