一个使用rle和huffman算法的压缩程序
某日看到一个jpeg图片的原理,突发兴趣写了个压缩程序,vc2005编译人比较懒,头文件啥的都堆一个文件里面了
huffman.exe [-r|-h] filename//用rle还是huffman压缩
huffman.exe srcfile dstfile//解压缩
程序代码:#define _CRT_SECURE_NO_WARNINGS
#include <windows.h>
#include <Winbase.h>
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<conio.h>
#ifndef UINT16
#define UINT16 unsigned short
#endif
#ifndef UINT8
#define UINT8 unsigned char
#endif
#ifndef UINT32
#define UINT32 unsigned long
#endif
#define CANONICAL_HUFFMAN
#define ASC_NUM 256
#define ASC_HUFFMAN_NODE (2*256-1)
#define BITS_PER_BYTE 8
#define HUFFMAN_CODE 0x01
#define RLE_CODE 0x10
#define BITMAP_SET(bitMap, gBitLoc, bitVal) \
(((bitVal) == 0) ? \
(((char*)bitMap)[((gBitLoc)-1)/8]=(((char*)bitMap)[((gBitLoc)-1)/8]&(~((unsigned char)(1<<(7-((gBitLoc)-1)%8)))))) : \
(((char*)bitMap)[((gBitLoc)-1)/8]=(((char*)bitMap)[((gBitLoc)-1)/8]|((unsigned char)(1<<(7-((gBitLoc)-1)%8))))) \
)
#define BITMAP_BITVAL(bitMap, gBitLoc) \
((((char*)bitMap)[((gBitLoc)-1)/8]&(1<<(7-((gBitLoc)-1)%8))) == 0 ? 0 : 1)
typedef UINT32 HUFFMAN_NODE_HEADER;
typedef struct {
UINT8 c;
UINT32 weight;
int p, l, r;/* parent, left child, right child */
int flag;/* 0x01:selected 0x02:NULL node 0x04:NULL node padding*/
UINT8 code[4];
UINT32 bits;
}HUFFMAN_NODE, *PHUFFMAN_NODE;
HUFFMAN_NODE huffmanNode[ASC_HUFFMAN_NODE] = {0};
HUFFMAN_NODE_HEADER nodeHeader;
UINT32 fileSize = 0;
UINT32 worked = 0;
char* avg;
#define FGET_C(fd) (++worked)?fgetc((fd)):0
/* by haoxu 2010-07-05
offset:dst offset, unit is bits
len:src len, unit is bits
*/
#define MEMCPY_BITS(dst, src, offset, len) {\
int i;\
for(i=0;i<(len);i++)\
BITMAP_SET((dst), ((offset)+i+1), BITMAP_BITVAL((src), (i+1)));\
}
/* by haoxu 2010-07-07
dst : UINT8 string
src : int
bits : bit length
*/
#define CONVERT_INT2BITS(dst, src, bits) {\
int x;\
for (x = 0; x < (bits); x++)\
(void)BITMAP_SET((dst), (bits-x), (src)&(1<<x));\
}
#define BITS_PRINT(src, offset, bits) {\
int x;\
for (x = 0; x < (bits); x++)\
printf("%d", BITMAP_BITVAL(src, (x+offset+1)));\
}
void disStat(int c)
{
int i;
if(c == 1)
{
for(i = 0; i< ASC_HUFFMAN_NODE; i++)
{
printf("%04x : %d %s%s%s\n",
i,
huffmanNode[i].bits,
(huffmanNode[i].flag & 0x01)?" s":"",
(huffmanNode[i].flag & 0x02)?" n":"",
(huffmanNode[i].flag & 0x04)?" p":"");
}
}
if(c == 2)
for(i = 0; i< ASC_HUFFMAN_NODE; i++)
{
printf("%04x (%02x): %02x %02x\n",
i, huffmanNode[i].c, huffmanNode[i].l, huffmanNode[i].r);
}
}
void printError(int no)
{
if(no == 1)
printf("huffman tree is too deep\n");
else if(no == 2)
printf("file is incomplete\n");
else if(no == 3)
printf("buffer overflow\n");
else if(no == 4)
printf("open file failed\n");
#if defined(_DEBUG)
disStat(2);
#endif
exit(0);
}
void disHuffmanTree(PHUFFMAN_NODE node,
int current,
char* code,
int path)
{
#define NODE node[current]
int i;
if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02))
{
printf("deep %d weight %d %02x : ",path, NODE.weight, NODE.c);
for(i=0;i<path;i++)
printf("%d", code[i]);
printf("\n");
return;
}
if((i = NODE.l) != -1)
{
code[path] = 0;
disHuffmanTree(node, i, code, path+1);
}
if((i = NODE.r) != -1)
{
code[path] = 1;
disHuffmanTree(node, i, code, path+1);
}
}
#ifdef CANONICAL_HUFFMAN
#define LIST_DEEP 32
int node_isSingleChar(PHUFFMAN_NODE node)
{
int i;
for(i=ASC_NUM;i<ASC_HUFFMAN_NODE;++i)
{
if(node[i].flag&0x04)
return 0;
}
return 1;
}
void node_calcDeep(PHUFFMAN_NODE node,
int current,
int path,
UINT8* buffer,
int* list)
{
#undef NODE
#define NODE node[current]
int i = 0;
if(path > LIST_DEEP)
{
printError(1);
}
if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02))
{
NODE.bits = path;
buffer[path-1]++;
if(!buffer[path-1])
buffer[-1] = (1<<7)|(path-1);
NODE.p = list[path-1];
list[path-1] = current;
return;
}
if((i = NODE.l) != -1)
{
node_calcDeep(node, i, path+1, buffer, list);
}
if((i = NODE.r) != -1)
{
node_calcDeep(node, i, path+1, buffer, list);
}
}
int node_buildHuffmanTree(PHUFFMAN_NODE node, HUFFMAN_NODE_HEADER header, UINT8* buffer, int bufSize)
{
int list[LIST_DEEP];
int i, j, k;
int last = -1;
int lastBits = 0;
int tmp;
int lastNonNullList = 0;
UINT8 buf[256];
#if defined(_DEBUG)
int c;
#endif
if(bufSize < (512))
return 0;
memset(buffer, 0, 512);
memset(list, 0xff, LIST_DEEP*sizeof(int));/* code length list header */
if(node_isSingleChar(node))
{
*buffer = 1;
*(buffer+1) = 1;
*(buffer+2) = (UINT8)header;
node[header].bits = 1;
memset(node[header].code, 0, 4);
return 3;
}
node_calcDeep(node, header, 0, buffer+1, list);
j = 0;
if(*buffer)
{
k = *buffer&(~(1<<7));
while(k != -1)
{
tmp = (last + 1)<<(node[k].bits - lastBits);
CONVERT_INT2BITS(node[k].code, tmp, node[k].bits);
#if defined(_DEBUG)
printf("deep %d weight %d %02x : ",node[k].bits, node[k].weight, node[k].c);
for(c=1;c<=node[k].bits;c++)
printf("%d", BITMAP_BITVAL(node[k].code, c));
printf("\n");
#endif
last = tmp;
lastBits = node[k].bits;
buf[j++] = node[k].c;
k = node[k].p;
}
memcpy(buffer+1, buf, j);
return (1+j);
}
else
{
for(i=0;i<LIST_DEEP;i++)
{
#if defined(_DEBUG)
//printf("code length: %d\n", i+1);
#endif
k = list[i];
while(k != -1)
{
lastNonNullList = i+1;
tmp = (last + 1)<<(node[k].bits - lastBits);
CONVERT_INT2BITS(node[k].code, tmp, node[k].bits);
#if defined(_DEBUG)
printf("deep %d weight %d %02x : ",node[k].bits, node[k].weight, node[k].c);
for(c=1;c<=node[k].bits;c++)
printf("%d", BITMAP_BITVAL(node[k].code, c));
printf("\n");
#endif
last = tmp;
lastBits = node[k].bits;
buf[j++] = node[k].c;
k = node[k].p;
}
}
*buffer = lastNonNullList;
memcpy(buffer+1+lastNonNullList, buf, j);
return (1+lastNonNullList+j);
}
}
#else
void node_buildHuffmanTree(PHUFFMAN_NODE node,
int current,
char* code,
int path)
{
#define NODE node[current]
int i;
if(NODE.l == -1 && NODE.r == -1 && !(NODE.flag&0x02))
{
for(i=0;i<path;i++)
BITMAP_SET(NODE.code, i+1, code[i]);
NODE.bits = path;
return;
}
if((i = NODE.l) != -1)
{
code[path] = 0;
node_buildHuffmanTree(node, i, code, path+1);
}
if((i = NODE.r) != -1)
{
code[path] = 1;
node_buildHuffmanTree(node, i, code, path+1);
}
}
#endif
int node_selectMinsWeight(PHUFFMAN_NODE node, UINT32* n1, UINT32* n2)
{
int i, ret;
UINT32 w1, w2;
ret = -1;
w1 = 0xFFFFFFFF;
w2 = 0xFFFFFFFF;
for(i = 0; i < ASC_HUFFMAN_NODE; i++)
{
/* the node is selected or the node is NULL but not selected or weight is zero*/
if(node[i].flag&0x01
|| (!(node[i].flag&0x04) && node[i].flag&0x02)
|| (!node[i].weight && !(node[i].flag&0x02)))
continue;
if(node[i].weight < w1 && node[i].weight < w2 )/* weight < w1, weight < w2 */
{
if(w1 < w2)
{
w2 = w1;
*n2 = *n1;
}
w1 = node[i].weight;
*n1 = i;
nodeHeader = i;
}
else if(node[i].weight >= w1 && node[i].weight < w2 )/* w1 <= weight < w2 */
{
w2 = node[i].weight;
*n2 = i;
}
ret = 0;
}
/* w1 is node header */
if(w2 == 0xFFFFFFFF)
return 1;
return ret;
}
int node_getMinWeight(PHUFFMAN_NODE node, UINT32* n)
{
int i, ret;
UINT32 w;
ret = -1;
w = 0xFFFFFFFF;
for(i = 0; i < ASC_NUM; i++)
{
if(node[i].weight < w)
{
w = node[i].weight;
*n = i;
}
ret = 0;
}
return ret;
}
int node_getEmptyNode(PHUFFMAN_NODE node)
{
int i;
for(i = ASC_NUM; i < ASC_HUFFMAN_NODE; i++)
{
if((!(node[i].flag&0x04))&&(node[i].flag&0x02))
return i;
}
return -1;
}
int node_makeParentNode(PHUFFMAN_NODE node)
{
int n;
int ret;
UINT32 n1, n2;
ret = node_selectMinsWeight(node, &n1, &n2);
if(ret)
return ret;
else
{
if((n = node_getEmptyNode(node)) != -1)
{
node[n].flag |= 0x04;
node[n].l = n1;
node[n].r = n2;
node[n].weight = node[n1].weight + node[n2].weight;
node[n1].p = n;
node[n1].flag |= 0x01;
node[n2].p = n;
node[n2].flag |= 0x01;
}
else
return -1;
}
return 0;
}
void node_init(PHUFFMAN_NODE node)
{
int i;
memset(node, 0, sizeof(HUFFMAN_NODE)*ASC_HUFFMAN_NODE);
/* init char node */
for(i=0;i<ASC_NUM;i++)
{
node[i].c = i;
node[i].weight = 0;
node[i].l = -1;
node[i].r = -1;
node[i].p = -1;
}
/* init NULL node */
for(i=ASC_NUM;i<ASC_HUFFMAN_NODE;i++)
{
node[i].flag = 0x02;
node[i].l = -1;
node[i].r = -1;
node[i].p = -1;
}
}
int node_getWeightFromFile(char* file, PHUFFMAN_NODE node)
{
FILE * sfp;
int c;
printf("parse file...\n");
if(!(sfp = fopen(file, "rb")))
{
printError(4);
return -1;
}
while(!feof(sfp))
{
c = fgetc(sfp);
if(c == -1)
break;
node[c].weight++;
fileSize++;
}
fclose(sfp);
return 0;
}
/* by haoxu 2010-07-07
if success return write number
if error return -1
*/
int make_huffman_code_from_file(char* file, PHUFFMAN_NODE node, UINT8* buffer, int bufSize)
{
int ret;
node_init(node);
if(node_getWeightFromFile(file, node))
return -1;
while(!(ret = node_makeParentNode(node))){}
#if defined(_DEBUG)
printf("header %04x\n", nodeHeader);
#endif
if(ret == -1)
return -1;
else if(ret == 1)
{
#ifdef CANONICAL_HUFFMAN
return node_buildHuffmanTree(node, nodeHeader, buffer, bufSize);
#else
node_buildHuffmanTree(node, nodeHeader, buffer, 0);
disHuffmanTree(node, nodeHeader, buffer, 0);
#endif
}
return 0;
}
/* by haoxu 2010-07-08
canonical huffman struct
| encrpyt flag(1 byte) | file size(4 bytes) |
|list length=x(1 byte) | list (x bytes) | code (sum list bytes) |
*/
int huffman_encrpyt_file(char* src)
{
FILE * sfp,* dfp;
int c;
UINT8 buf[512];
int offset;
int i;
char dst[256];
if((i = make_huffman_code_from_file(src, huffmanNode, buf, sizeof(buf))) == -1)
return -1;
#if defined(_DEBUG)
printf("huffman code number %d\n", i);
#endif
if(!(sfp = fopen(src, "rb")))
{
printError(4);
}
sprintf(dst, "%s.hfm", src);
if(!(dfp=fopen(dst, "wb")))
{
fclose(sfp);
printError(4);
}
fputc(HUFFMAN_CODE, dfp);
fwrite(&fileSize, sizeof(UINT32), 1, dfp);
fwrite(buf, i, 1, dfp);
offset = 0;
while(!feof(sfp))
{
c = FGET_C(sfp);
if(c == -1)
break;
#if defined(_DEBUG)
BITS_PRINT(huffmanNode[c].code,0,huffmanNode[c].bits);
printf("\n");
#endif
MEMCPY_BITS(buf, huffmanNode[c].code, offset, huffmanNode[c].bits);
offset += huffmanNode[c].bits;
if(offset > BITS_PER_BYTE*100)
{
fwrite(buf, 100, 1, dfp);
memcpy(buf, buf+100, 4);
offset -= BITS_PER_BYTE*100;
}
}
if(offset)
{
fwrite(buf, offset/BITS_PER_BYTE+1, 1, dfp);
}
return 0;
}
void huffman_unencrpyt_file(FILE* sfp, FILE *dfp)
{
#undef NODE
#define NODE huffmanNode
int i, k, listLength;
int last = -1;
int lastBits = 0;
int tmp;
int offset = 0;
UINT8 buf[512];
UINT8 list[32];
int ret, c;
node_init(huffmanNode);
/* get old file size */
ret = fread(&fileSize, sizeof(UINT32), 1, sfp);
if(ret != 1) printError(2);
if(fileSize == 0) return;
/* get list length */
listLength = fgetc(sfp);
if(listLength == -1) printError(2);
if(listLength > LIST_DEEP)
{
lastBits = listLength&(~(1<<7));
for(i = 0;i<ASC_NUM;i++)
{
c = fgetc(sfp);
if(c == -1) printError(2);
tmp = last + 1;
CONVERT_INT2BITS(NODE[c].code, tmp, lastBits);
NODE[c].bits = lastBits;
last = tmp;
}
}
else
{
ret = fread(list, sizeof(UINT8), listLength, sfp);
if(ret != listLength) printError(2);
for(i=0;i<listLength;i++)
{
ret = list[i];
while(ret--)
{
c = fgetc(sfp);
if(c == -1) printError(2);
tmp = (last + 1)<<(i - lastBits);
NODE[c].bits = i+1;
CONVERT_INT2BITS(NODE[c].code, tmp, NODE[c].bits);
last = tmp;
lastBits = i;
#if 0
printf("deep %d %02x : ",NODE[c].bits, NODE[c].c);
for(k=1;k<=NODE[c].bits;k++)
printf("%d", BITMAP_BITVAL(NODE[c].code, k));
printf("\n");
#endif
}
}
}
c = node_getEmptyNode(NODE);
nodeHeader = c;
NODE[nodeHeader].flag |= 0x04;
for(i=0;i<ASC_NUM;i++)
{
if(NODE[i].bits)
{
tmp = nodeHeader;
for(k=1;k<NODE[i].bits;k++)
{
if(BITMAP_BITVAL(NODE[i].code, k)?(NODE[tmp].r != -1):(NODE[tmp].l != -1))
{
tmp = BITMAP_BITVAL(NODE[i].code, k)?(NODE[tmp].r):(NODE[tmp].l);
continue;
}
c = node_getEmptyNode(NODE);
if(c < 0) printError(3);
NODE[c].flag |= 0x04;
/* 0:l, 1:r */
if(BITMAP_BITVAL(NODE[i].code, k))/* r */
NODE[tmp].r = c;
else
NODE[tmp].l = c;
tmp = c;
}
if(BITMAP_BITVAL(NODE[i].code, k))/* r */
NODE[tmp].r = i;
else
NODE[tmp].l = i;
}
}
tmp = nodeHeader;
listLength = 0;
worked = 0;
while(!feof(sfp))
{
ret = fread(buf, sizeof(UINT8), 512, sfp);
k = ret*BITS_PER_BYTE;
for(i=0;i<k;i++)
{
if(BITMAP_BITVAL(buf, i+1))
tmp = NODE[tmp].r;
else
tmp = NODE[tmp].l;
#if defined(_DEBUG)
printf("%d", BITMAP_BITVAL(buf, i+1));
#endif
if(NODE[tmp].bits)
{
#if defined(_DEBUG)
printf("\n");
#endif
worked++;
listLength += NODE[tmp].bits;
fputc(NODE[tmp].c, dfp);
tmp = nodeHeader;
if(worked == fileSize)
break;
}
}
}
#if defined(_DEBUG)
printf("listLength %d worked %d filesize %d\n", listLength,worked ,fileSize);
#endif
if(worked < fileSize)
printError(2);
#if 0
listLength = strlen(avg);
tmp = nodeHeader;
for(i = 0;i<listLength;i++)
{
if(avg[i] == '1')
tmp = NODE[tmp].r;
else
tmp = NODE[tmp].l;
if(NODE[tmp].c)
{
printf("%02x ", NODE[tmp].c);
tmp = nodeHeader;
}
}
printf("\n");
#endif
}
int rle_encrpyt_file(char* src)
{
FILE * sfp,* dfp;
int keyPadding;
unsigned char keyNum;
int count = 0;
int c;
int code;
int ret;
UINT8 key;
char dst[256];
/* get key from file */
node_init(huffmanNode);
if(node_getWeightFromFile(src, huffmanNode))
return -1;
if(node_getMinWeight(huffmanNode, &ret))
return -1;
key = huffmanNode[ret].c;
keyNum = 0;
keyPadding = 0;
code = -1;
if(!(sfp = fopen(src, "rb")))
{
printError(4);
}
sprintf(dst, "%s.rle", src);
if(!(dfp=fopen(dst,"wb")))
{
fclose(sfp);
printError(4);
}
fputc(RLE_CODE, dfp);
fputc(key, dfp);
worked = 0;
while(!feof(sfp))
{
c = FGET_C(sfp);
if(c == -1)
{
if(keyPadding)/* if EOF, check action */
{
fputc(key, dfp);
fputc(code, dfp);
fputc(keyNum, dfp);
}
else
fputc(code, dfp);
break;
}
if(c != key)/* c is not key */
{
if(keyPadding)/* if action is calc repeat char */
{
if(code == c)/* if the char is repeated,incr it */
{
keyNum++;
if(keyNum == 0xff)
{
fputc(key, dfp);
fputc(code, dfp);
fputc(keyNum, dfp);
keyNum = 0;
}
}
else/* if the char is not repeated, record it and puts repeat data */
{
fputc(key, dfp);
fputc(code, dfp);
fputc(keyNum, dfp);
code = c;
keyPadding = 0;
keyNum = 0;
}
}
else/* if first get the char */
{
if(code == c)/* if the char equal last one,padding start */
{
keyPadding = 1;
keyNum = 2;
}
else/* if the char unequal last one,record it and puts last one */
{
if(code != -1)
fputc(code, dfp);
code = c;
}
}
}
else/* if c is key */
{
if(keyPadding)/* if action is padding,ignore key */
{
fputc(key, dfp);
fputc(code, dfp);
fputc(keyNum, dfp);
code = -1;
keyPadding = 0;
keyNum = 0;
}
else/* if no action, puts last code */
{
if(code != -1)
fputc(code, dfp);
code = -1;
}
/* puts double key */
fputc(key, dfp);
fputc(key, dfp);
}
}
fclose(sfp);
fclose(dfp);
return 0;
}
void rle_unencrpyt_file(FILE* sfp, FILE *dfp)
{
UINT8 key;
int c;
int count;
int i;
if(!feof(sfp))
key = (UINT8)fgetc(sfp);
while(!feof(sfp))
{
c = FGET_C(sfp);
if(c == -1)
break;
if(c == key)
{
c = FGET_C(sfp);
if(key == c)
fputc(key, dfp);
else
{
count = FGET_C(sfp);
for(i = 0; i < count; i++)
fputc(c, dfp);
}
}
else
fputc(c, dfp);
}
}
UINT32 getfilesize(char* file)
{
FILE* fp;
UINT32 length;
fp = fopen(file,"rb");
if(fp==NULL)
printError(4);
fseek(fp, 0L, SEEK_END);
length = ftell(fp);
fclose(fp);
return length;
}
int unencrpyt_file(char* src, char* dst)
{
FILE * sfp,* dfp;
int c;
if(!(sfp = fopen(src, "rb")))
{
printError(4);
}
if(!(dfp=fopen(dst,"wb")))
{
fclose(sfp);
printError(4);
}
while(!feof(sfp))
{
c = FGET_C(sfp);
if(c == -1)
break;
if(c == HUFFMAN_CODE )
{
huffman_unencrpyt_file(sfp, dfp);
break;
}
else if(c == RLE_CODE)
{
fileSize = getfilesize(src);
rle_unencrpyt_file(sfp, dfp);
break;
}
}
fclose(sfp);
fclose(dfp);
return 0;
}
DWORD WINAPI printfThread ( LPVOID pvParam )
{
while(1)
{
if(worked && fileSize)
{
printf("build file...%.2f%%\r",100*((float)worked)/fileSize);
}
#ifdef WIN32
Sleep(500);
#else
delay(1);
#endif
}
return 0;
}
int main(int argc, char **argv)
{
char tmp[] = "tmp";
if(argc < 3)
return 0;
#ifdef WIN32
CreateThread ( NULL, 0, printfThread, NULL, 0, ( LPDWORD ) NULL );
#endif
if(!strcmp("-r", argv[1]))
rle_encrpyt_file(argv[2]);
else if(!strcmp("-h", argv[1]))
huffman_encrpyt_file(argv[2]);
else
{
unencrpyt_file(argv[1], argv[2]);
}
printf("build file...100%% \n");
exit(0);
}







顶
