- UID
- 7437
- 精华
- 积分
- 1172
- 威望
- 点
- 宅币
- 个
- 贡献
- 次
- 宅之契约
- 份
- 最后登录
- 1970-1-1
- 在线时间
- 小时
|
发表于 2024-6-9 22:03:50
|
显示全部楼层
手头没有VB6 用VFB写的 也没法调试 逻辑纯靠脑补...
Function VB6_UTF8_TO_STRING(UTF8() As Byte, Optional ByVal nByte As Long = -1) As String
If UBound(UTF8) = -1 Or nByte = 0 Then Exit Function
Dim Buf() As Byte, k As Long ''输出缓存,缓存字节计数器
Dim i As Long, j As Long ''状态量(j=0初始态; j=[1-5]UTF8后续字节数)
Dim c As Long ''UNICODE缓冲
ReDim Buf(0 To UBound(UTF8) - LBound(UTF8) + 1)
For i = LBound(UTF8) To UBound(UTF8)
Select Case UTF8(i)
Case &H1 To &H7F ''ANSI
If j <> 0 Then Exit For
Buf(k) = UTF8(i) : k = k + 1
Case &H80 To &HBF ''UTF8的后续字节: 10xxxxxx
j = j - 1 : If j < 0 Then Exit For
''UNICODE左移6位 + UTF8低6位
c = (c * &H40& ) Or (UTF8(i) And &H3F)
If j = 0 Then
Buf(k) = (c And &HFF&) : k = k + 1
Buf(k) = ((c And &HFF00&) \ &H100&) : k = k + 1
If c > &H10000 Then ''多字节可能会出现UTF32
Buf(k) = ((c And &HFF0000) \ &H10000&) : k = k + 1
Buf(k) = ((c And &HFF000000) \ &H1000000&) : k = k + 1
End If : c = 0
End If
Case &HC0 To &HFD ''UTF8的首字节
If j <> 0 Then Exit For
Select Case UTF8(i)
Case &HD0 To &HDF ''3字节: 1110xxxx
j = 2 : c = (UTF8(i) And &H0F)
Case &HF0 To &HF7 ''4字节: 11110xxx
j = 3 : c = (UTF8(i) And &H07)
Case &HC0 To &HCF ''2字节: 110xxxxx
j = 1 : c = (UTF8(i) And &H1F)
Case &HF8 To &HFB ''5字节: 111110xx
j = 4 : c = (UTF8(i) And &H03)
Case &HFC To &HFD ''6字节: 1111110x
j = 5 : c = (UTF8(i) And &H01)
End Select
Case 0 ''终止符\0
If nByte > 0 Then j = -1 ''数据中出现了0,设置为错误状态
Exit For ''否则认为读完了全部数据,总之必须退出
Case Else ''其它非法字符
j = -1 : Exit For
End Select
nByte = nByte - 1 : If nByte = 0 Then Exit For
Next
''If j <> 0 Then
'' Debug.Print "转换过程中检测到非法的字符数据"
'' Exit Function
''End If
If k > 0 Then ''最大限度输出转换的结果
ReDim Preserve Buf(0 To k - 1)
VB6_UTF8_TO_STRING = Buf ''这要StrConv吗?
End If ''最后1个字符可能乱码(解决的办法是再增加一个字符数计数器)
End Function |
|