在VB中,如果检测到某文本文件为非UTF8格式则自动转换为UTF8格式?

2025-04-02 07:33:55
推荐回答(3个)
回答1:

检测文本文件格式:

Private Sub Command1_Click()
Dim TempFile As Long
Dim LoadBytes() As Byte
'以二进制读入文件内容
TempFile = FreeFile
filepath = "c:\example.txt"
Open filepath For Binary As #TempFile
ReDim LoadBytes(1 To LOF(TempFile)) As Byte
Get #TempFile, , LoadBytes
Close TempFile

'On Error GoTo NoEncoding
Dim i As Integer, FileCharset As String, strFileHead As String
'从二进制流中分析文件头的编码信息
For i = 1 To 3
strFileHead = strFileHead & Hex(LoadBytes(i))
Next
'判断文件采用了何种编码
If strFileHead = "EFBBBF" Then
'前3个byte如果为 EF BB BF 则表示为 UTF-8编码
FileCharset = "UTF-8"
Else
strFileHead = Left(strFileHead, 4)
If Left(strFileHead, 4) = "FEFF" Then
'此时为 unicode big endian 编码
FileCharset = "Unicode big endian"
ElseIf Left(strFileHead, 4) = "FFFE" Then
'此时为 unicode 编码
FileCharset = "UNICODE"
Else
'否则默认为普通简体中文 GB2312
'你如果知道其它编码的格式还可以继续的分析判断!
FileCharset = "GB2312"
End If
End If
MsgBox filepath & " Text Format is " & FileCharset
End Sub

回答2:

把下面代码放到模块中,然后调用FileToUTF8即可。声明:此非原创,但记不清作者了,抱歉。
Public Enum Encoding
ANSI
Unicode
UnicodeBigEndian
UTF8
End Enum

Public Function GetEncoding(FileName As String) As Encoding
On Error GoTo Err

Dim fBytes(1) As Byte, freeNum As Integer
freeNum = FreeFile

Open FileName For Binary Access Read As #freeNum
Get #freeNum, , fBytes(0)
Get #freeNum, , fBytes(1)
Close #freeNum

If fBytes(0) = &HFF And fBytes(1) = &HFE Then GetEncoding = Unicode
If fBytes(0) = &HFE And fBytes(1) = &HFF Then GetEncoding = UnicodeBigEndian
If fBytes(0) = &HEF And fBytes(1) = &HBB Then GetEncoding = UTF8
Err:
End Function

Public Sub FileToUTF8(FileName As String)
If GetEncoding(FileName) = UTF8 Then
'如果是UTF8格式,则退出
'否则,进行转换
Exit Sub
End If

Dim fBytes() As Byte, uniString As String, freeNum As Integer
Dim ADO_Stream As Object

freeNum = FreeFile

ReDim fBytes(FileLen(FileName))
Open FileName For Binary Access Read As #freeNum
Get #freeNum, , fBytes
Close #freeNum

uniString = StrConv(fBytes, vbUnicode)

Set ADO_Stream = CreateObject("ADODB.Stream")
With ADO_Stream
.Type = 2
.Mode = 3
.Charset = "UTF-8"
.open
.WriteText uniString
.SaveToFile FileName, 2
.Close
End With
Set ADO_Stream = Nothing
End Sub

回答3:

进来学习一下