自动辨别文本是不是utf-8的c#程序.

private void FindNoUTFFile(string Path)
{
System.IO.StreamReader reader = null;
StringBuilder sb;
StringBuilder sb2;
DirectoryInfo Folder = new System.IO.DirectoryInfo(Path);
DirectoryInfo[] subFolders = Folder.GetDirectories();
for (int i=0;i

 1<subfolders.length;i++) !="0" &="" (="" (!butf8)="" (3="" (ascii="" (chr="" )="" ,="" -="" 0000="" 0000-0000="" 007f="" 0080-0000="" 07ff="" 0800-0000="" 0x80)="" 0xxxxxxx="" 1="" 10xxxxxx="" 110xxxxx="" 1110xxxx="" 2="" ;j++)="" <="" ballascii="false;" bool="" butf8="IsUTF8(fs);" byte="" character="" chr="" chr;="" coctets="0" coctets;="" converts="" encoded="" ffff="" fileinfo[]="" filemode.open,fileaccess.read);="" filestream="" filestream(subfiles[j].fullname="" findnoutffile(subfolders[i].fullname);="" for(="" for(int="" format)="" fs="new" fs.close();="" go="" i="" i++="" i;="" if="" if(="" if(checkfiletype(subfiles[j].extension.tolower()))="" ilen="sbInputStream.Length;" ilen;="" in="" int="" isutf8(filestream="" j="0;j&lt;subFiles.Length" long="" octet="" octet!)="" octets="" private="" reader="new" reader.close();="" sb="new" sb.append(reader.readtoend());="" sb2="new" sb2.append(reader.readtoend());="" sbinputstream)="" static="" stringbuilder();="" subfiles="Folder.GetFiles();" system.io.streamreader(subfiles[j].fullname,="" system.io.streamreader(subfiles[j].fullname,system.text.encoding.utf8);="" system.text.encoding.default,true);="" this="" to="" utf-8="" {="" }="">= 0x80 )   
 2{   
 3do   
 4{   
 5chr &lt;&lt;= 1;   
 6cOctets++;   
 7}   
 8while( (chr &amp; 0x80) != 0 ); 
 9
10cOctets--;   
11if( cOctets == 0 ) return false;   
12}   
13}   
14else   
15{   
16if( (chr &amp; 0xC0) != 0x80 )   
17{   
18return false;   
19}   
20cOctets--;   
21}   
22} 
23
24if( cOctets &gt; 0 )   
25{   
26return false;   
27} 
28
29if( bAllAscii )   
30{   
31return false;   
32} 
33
34return true; 
35
36}   
37}   
38  
39  
40}</subfolders.length;i++)>
Published At
Categories with Web编程
Tagged with
comments powered by Disqus