讀utf-8檔寫成其他編碼, isTrans那邊是判別該檔是否含有英數字以外的字(中文)
其實這個是為了繁轉簡掃檔案用的~不過老實說~因為os的關係, wordpad跟word跟ultraedit上面看到的字編碼似乎不是那麼正確(我跟同事討論個老半天的結論, 卡住超久~囧~)
結果就在那邊一直改編碼改字排列組合測過來測過去|||orz~
void readAndWriteWord(File sourceFile, String outFile) {
if(!sourceFile.canRead()) {
return;
}
BufferedReader br = null;
FileOutputStream fos = null;
try {
boolean isTrans = false;
StringBuffer sb = new StringBuffer();
InputStreamReader ir = new InputStreamReader(new FileInputStream(sourceFile), "utf-8");
fos = new FileOutputStream(outFile);
br = new BufferedReader(ir);
int line = 0;
while(br.ready()) {
line++;
String aRow = br.readLine();
if(!isTrans) {
for(int i = 0 ; i < aRow.length() ; i++) {
if(aRow.charAt(i) > 255) {
isTrans = true;
System.out.println("---word file:"+outFile);
break;
}
}
}
//fos.write((aRow+LINE_SEP).getBytes("utf-8"));
fos.write((aRow+LINE_SEP).getBytes("GBK"));
}
} catch(Exception e) {
e.printStackTrace();
} finally {
if(br != null) {
try { br.close(); } catch(Exception ee) {}
}
if(fos != null) {
try { fos.close(); } catch(Exception ee) {}
}
}
}
沒有留言:
張貼留言