将GB2312的文件转为Unicode格式

mac2022-06-30  79

将GB2312的文件转为Unicode格式

using System; using System.Collections.Generic; using System.Text; namespace CrmTools { class FileUtil { public static string readFile(string fileName) { //return System.IO.File.ReadAllText(fileName,System.Text.Encoding.Default); System.Text.StringBuilder sb = new StringBuilder(); System.IO.FileStream fs = System.IO.File.Open(fileName, System.IO.FileMode.Open); byte[] b = new byte[1024]; while (fs.Read(b, 0, b.Length) > 0) { sb.Append(System.Text.Encoding.Default.GetString(b)); } fs.Close(); return sb.ToString(); } /// /// 将一个文件转化为UTF8编码 /// /// /// /// public static bool convertFileEncode(string srcFileName,string destEncode) { if (isUTF8File(srcFileName)) return true; try { string destFileName = System.IO.Path.GetTempFileName(); System.Text.StringBuilder sb = new StringBuilder(); System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open); /* System.IO.StreamWriter sw = System.IO.File.CreateText(destFileName); byte[] b = new byte[1024]; while (fs.Read(b, 0, b.Length) > 0) { string line = System.Text.Encoding.Default.GetString(b); line = GB2312toUnicode(line); sw.Write(line); } sw.Close(); fs.Close(); */ System.IO.FileStream sw = System.IO.File.Create(destFileName); if (fs.Length > 0) { sw.WriteByte(0xFF); sw.WriteByte(0xFE); } byte[] b = new byte[1024]; while (true) { int i = fs.Read(b, 0, b.Length); byte[] bc = new byte[i]; Array.Copy(b, bc, i); if (i <= 0) break; byte[] btemp = GB2312toUnicodeBytes(bc); foreach (byte bi in btemp) { sw.WriteByte(bi); } } sw.Close(); fs.Close(); System.IO.File.Copy(destFileName, srcFileName,true); System.IO.File.Delete(destFileName); } catch { return false; } return true; } /// /// 判断一个文件是不是UTF8文件 /// /// /// public static bool isUTF8File(string srcFileName) { bool isUTF8 = false; System.IO.FileStream fs = System.IO.File.Open(srcFileName, System.IO.FileMode.Open,System.IO.FileAccess.Read,System.IO.FileShare.Read); byte[] b = new byte[2]; if (fs.Read(b, 0, b.Length) > 0) { if (b[0] == 0xFF && b[1] == 0xFE) { isUTF8 = true; } } fs.Close(); return isUTF8; } //public static string GB2312toUnicodeString(string content) //{ // string gb2312Info = string.Empty; // Encoding utf8 = Encoding.Unicode; // Encoding gb2312 = Encoding.GetEncoding("gb2312"); // // Convert the string into a byte[]. // byte[] gb2312Bytes = gb2312.GetBytes(content); // // Perform the conversion from one encoding to the other. // byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes); // // Convert the new byte[] into a char[] and then into a string. // // This is a slightly different approach to converting to illustrate // // the use of GetCharCount/GetChars. // char[] utf8Chars = new char[utf8.GetCharCount(utf8Bytes, 0, utf8Bytes.Length)]; // utf8.GetChars(utf8Bytes, 0, utf8Bytes.Length, utf8Chars, 0); // string utf8info = new string(utf8Chars); // return utf8info; //} public static byte[] GB2312toUnicodeBytes(byte[] gb2312Bytes) { Encoding gb2312 = Encoding.GetEncoding("gb2312"); Encoding utf8 = Encoding.Unicode; byte[] utf8Bytes = Encoding.Convert(gb2312, utf8, gb2312Bytes); return utf8Bytes; } } }

转载于:https://www.cnblogs.com/antony1029/archive/2008/09/21/1295421.html

最新回复(0)