用下面的 IsUTF8Chinese() 检测 0x4E00 ~ 0x9FFF 的UTF-8直接判断好像未见异常。
程序代码:
生成的对照表
测试代码:
程序代码:
程序代码:
** 如果字符表达式的前3个字节是一个UTF-8汉字字符,则返回真(.T.)
FUNCTION IsUTF8Chinese(cStr)
LOCAL bytes
bytes = LEFT(cStr,3)
RETURN LEN(bytes)==3 AND;
BETWEEN(SUBSTR(bytes,1,1),0hE4,0hE9) AND;
BETWEEN(SUBSTR(bytes,2,1),0h80,0hBF) AND;
BETWEEN(SUBSTR(bytes,3,1),0h80,0hBF)
ENDFUNC
生成的对照表
测试代码:
程序代码:
SET DEFAULT TO (ADDBS(JUSTPATH(SYS(16))))
STRTOFILE("", "utf16.txt", 2)
STRTOFILE("", "utf8.txt", 4)
err = .f.
utf16 = STRCONV("0x4E00",5)
utf8 = STRCONV("0x4E00",9)
n = 0
FOR i=0x4E00 TO 0x9FFF
int_16 = IIF(i<0x8000, i, i-2^16)
utf16_chinese = BINTOC(int_16,"2rs")
utf8_chinese = STRCONV(utf16_chinese,10)
IF !IsUTF8Chinese(utf8_chinese)
err = .t.
EXIT
ENDIF
n = n+1
utf16 = utf16 + 0h2000 + utf16_chinese
utf8 = utf8 + 0h20 + utf8_chinese
IF n%32 == 0
STRTOFILE(utf16+0h0D000A00, "utf16.txt", 1)
STRTOFILE(utf8+0h0D0A, "utf8.txt", 1)
utf16 = STRCONV("0x"+RIGHT(TRANSFORM(i+1,"@0"),4),5)
utf8 = STRCONV(utf16,10)
n = 0
ENDIF
ENDFOR
IF !err
RUN notepad utf16.txt
RUN notepad utf8.txt
ELSE
? "error UTF-16: "+TRANSFORM(i,"@0"), " UTF-8: "+STRCONV(utf8_chinese,15)
ENDIF
** 如果字符表达式的前3个字节是一个UTF-8汉字字符,则返回真(.T.)
FUNCTION IsUTF8Chinese(cStr)
LOCAL bytes
bytes = LEFT(cStr,3)
RETURN LEN(bytes)==3 AND;
BETWEEN(SUBSTR(bytes,1,1),0hE4,0hE9) AND;
BETWEEN(SUBSTR(bytes,2,1),0h80,0hBF) AND;
BETWEEN(SUBSTR(bytes,3,1),0h80,0hBF)
ENDFUNC







