【VB6】不借助API将UTF-8编码的字符串解码为VB6能识别的字符串
Private Function utf8toString(utf8byte() As Byte) As StringDim I&
Dim charcode As Long
Dim cb As Long
cb = UBound(utf8byte) + 1
Do While I < cb
If (utf8byte(I) And &HFE) = &HFC& Then '1111110x
If I + 6 <= cb Then
charcode = _
((utf8byte(I + 0) And &H1&) * &H40000000) Or _
((utf8byte(I + 1) And &H3F&) * &H1000000) Or _
((utf8byte(I + 2) And &H3F&) * &H40000) Or _
((utf8byte(I + 3) And &H3F&) * &H1000&) Or _
((utf8byte(I + 4) And &H3F&) * &H40&) Or _
((utf8byte(I + 5) And &H3F&))
I = I + 6
Else
Exit Do
End If
ElseIf (utf8byte(I) And &HFC) = &HF8& Then '111110xx
If I + 5 <= cb Then
charcode = _
((utf8byte(I + 0) And &H3&) * &H1000000) Or _
((utf8byte(I + 1) And &H3F&) * &H40000) Or _
((utf8byte(I + 2) And &H3F&) * &H1000&) Or _
((utf8byte(I + 3) And &H3F&) * &H40&) Or _
((utf8byte(I + 4) And &H3F&))
I = I + 5
Else
Exit Do
End If
ElseIf (utf8byte(I) And &HF8) = &HF0& Then '11110xxx
If I + 4 <= cb Then
charcode = _
((utf8byte(I + 0) And &H7&) * &H40000) Or _
((utf8byte(I + 1) And &H3F&) * &H1000&) Or _
((utf8byte(I + 2) And &H3F&) * &H40&) Or _
((utf8byte(I + 3) And &H3F&))
I = I + 4
Else
Exit Do
End If
ElseIf (utf8byte(I) And &HF0) = &HE0& Then '1110xxxx
If I + 3 <= cb Then
charcode = _
((utf8byte(I + 0) And &HF&) * &H1000&) Or _
((utf8byte(I + 1) And &H3F&) * &H40&) Or _
((utf8byte(I + 2) And &H3F&))
I = I + 3
Else
Exit Do
End If
ElseIf (utf8byte(I) And &HE0) = &HC0& Then '110xxxxx
If I + 2 <= cb Then
charcode = _
((utf8byte(I + 0) And &H1F&) * &H40&) Or _
((utf8byte(I + 1) And &H3F&))
I = I + 2
Else
Exit Do
End If
ElseIf (utf8byte(I) And &HC0) = &H80& Then '10xxxxxx
'遇到高2位是10的字符,这是不应该出现的。
Exit Do
ElseIf (utf8byte(I) And &H80) = &H0& Then'&Hxxxxxx
charcode = utf8byte(I) And &H7F
I = I + 1
Else
Exit Do
End If
utf8toString = utf8toString & ChrW(charcode)
Loop
End Function 拜模已经不足以对你的表达了
楼主大能,感谢感谢 谢谢A5的分享,HAHA... 本帖最后由 tlwh163 于 2023-10-9 18:17 编辑
''1. 不会出现的字节: 0xC0, 0xC1, 0xF5-0xFF
''2. 字符的第1个字节值域:0-0x7F(ANSI), 0xC2-0xF4(UTF8)
''3. 字符的第2+个字节值域:0x80-0xBF
页:
[1]