还是汉字编码问题,已经差不了,但是还有小问题。
有几个问题:①用UE打开把一个汉字用十六进制打开,比如:我D2 CE,这个算什么编码?是unicode吗?
②用什么方式验证一个汉字的utf8是否正确?没有字库。
③看看这个程序吧,自己感觉已经对了,但是有人说有问题,没说什么问题,请看看。
谢谢了!是这两天刚刚接触这些编码,有什么贻笑大方的话请见谅。
程序代码:#include<stdio.h>
#include<string.h>
#include<malloc.h>
#define uint8_t unsigned _int8
#define uint16_t unsigned _int16
void printutf8(uint8_t *utf8)
{
uint8_t *p = utf8;
while(*p)
{
printf("%02X", *p);
p++;
}
printf("\n");
}
void printuni(uint16_t *utf16, int size)
{
uint8_t *p = (uint8_t *)utf16;
int i = 0;
for (i = 0; i < size; i++)
{
printf("%02X", *p);
p++;
}
printf("\n");
}
void unicode_to_utf8(uint16_t *in, int insize, uint8_t **out)
{
int i;
int charscount;
int outsize=0;
uint8_t *result;
uint8_t *tmp;
charscount = insize / sizeof(uint16_t);
result = (uint8_t *)malloc(charscount * 3 + 1);
memset(result, 0, charscount * 3 + 1); //将result中后charscount*3+1个字节,用0替换并返回result
tmp = result;
for (i = 0; i < charscount; i++)
{
uint16_t unicode = in[i];
if (unicode <= 0x0000007f)
*tmp = (uint8_t)unicode;
else
if (unicode >= 0x00000080 && unicode <= 0x000007ff)
{
*tmp = 0x000000c0 | (unicode >> 6);
tmp += 1;
*tmp = 0x00000080 | (unicode & (0x000000ff >> 2));
tmp += 1;
}
else
if (unicode >= 0x00000800 && unicode <= 0x0000ffff)
{
*tmp = 0x000000e0 | (unicode >> 12);
tmp += 1;
*tmp = 0x00000080 | (unicode >> 6 & 0x000000ff);
tmp += 1;
*tmp = 0x00000080 | (unicode & (0x000000ff >> 2));
tmp += 1;
}
}
*out = result;
}
void main()
{
uint16_t unicode[] = {'中'};
uint8_t *utf8 = NULL;
uint16_t *uni = NULL;
printf("字符“我”转换为 unicode: \n");
printuni(unicode, sizeof(unicode));
printf("再将unicode转换为 utf8: \n");
unicode_to_utf8(unicode, sizeof(unicode), &utf8);
printutf8(utf8);
}






