今天收到一份代码,拖到IDE中发现乱码,看来下编码是GBK的(????)。只能手写个小工具批量转换下。代码在这里分享下,需要的可自取。
核心文件就两个:
ConverterUtil.java
public class ConverterUtil { private static String lineSepator; static { String osName = System.getProperty("os.name"); if (osName.contains("Windows")) { lineSepator = "\r\n"; } else { lineSepator = "\n"; } } public static Map<String, String> parseArgs(String[] args) { Map<String, String> argsMap = new HashMap<String, String>(); for (int i = 0; i < args.length; i++) { String key = args[i]; if (++i > args.length) { argsMap.put(key, ""); } else { argsMap.put(key, args[i]); } } return argsMap; } public static void writeString(File file, String content, String encoding) throws Exception { OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(file), encoding); osw.write(content); osw.close(); } public static String readString(File file, String encoding) throws Exception { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding)); StringBuilder buff = new StringBuilder(); String line; while ((line = br.readLine()) != null) { if (buff.length() > 0) { buff.append(lineSepator); } buff.append(line); } br.close(); return buff.toString(); } public static List<File> listFiles(File dir, String ext, int maxDepth) { List<File> files = new ArrayList<File>(); if (maxDepth-- <= 0) { return files; } files.addAll(Arrays.asList(dir.listFiles(f -> { return f.isFile() && "".equals(ext) ? true : f.getName().endsWith("." + ext); }))); for (File _dir : dir.listFiles(f -> f.isDirectory())) { files.addAll(listFiles(_dir, ext, maxDepth)); } return files; } }
Converter.java
/** * change file's encoding(gbk) to utf-8 * * @author lichmama * */ public class Converter { private static String sourceDir; private static String fileExtension; private static int maxDepth; /** * java -jar gbk2utf.jar --src xxx --ext xxx --max-depth xxx * * @param args */ public static void main(String[] args) { if (usage(args)) { logger("Usage: java -jar gbk2utf.jar [OPTIONS]"); logger("Change file's encoding(gbk) to utf-8 (@lichmama)"); logger("OPTIONS:"); logger(" --src <source path>, essential"); logger(" --ext <file extension>, optional"); logger(" --max-depth <max depth>, optional"); return; } logger("source: " + sourceDir); logger(" file: *." + fileExtension); logger("===================================="); Converter converter = new Converter(); converter.process(sourceDir, fileExtension, maxDepth); logger("====================================**DONE"); } public void process(String src, String ext, int maxDepth) { File _src = new File(src); if (!_src.exists()) { logger("the path [\"" + src + "\"] not exist or!"); return; } if (_src.isDirectory()) { List<File> files = ConverterUtil.listFiles(_src, ext, maxDepth); if (files.size() == 0) { logger("NO FILES TO BE PROCESSED"); return; } files.stream().forEach(f -> gbk2utf(f)); } else { gbk2utf(_src); } } public boolean gbk2utf(File file) { logger("process " + file.getName()); try { String content = ConverterUtil.readString(file, "GBK"); ConverterUtil.writeString(file, content, "UTF-8"); } catch (Exception e) { logger("- FAILED, reason: " + e.getMessage()); return false; } finally { logger("- SUCCESS"); } return true; } private static boolean usage(String[] args) { Map<String, String> argsMap = ConverterUtil.parseArgs(args); if (!argsMap.containsKey("--src")) { return true; // illegal status } else { sourceDir = argsMap.get("--src"); } if (argsMap.containsKey("--ext")) { fileExtension = argsMap.get("--ext"); } else { fileExtension = ""; } if (argsMap.containsKey("--max-depth")) { maxDepth = Integer.parseInt(argsMap.get("--max-depth")); } else { maxDepth = Integer.MAX_VALUE; } return false; } private static void logger(String s) { System.out.println(s); } }
编译后打包成Runnable Jar File,
$> java -jar gbk2utf.jar Usage: java -jar gbk2utf.jar [OPTIONS] Change file's encoding(gbk) to utf-8 (@lichmama) OPTIONS: --src <source path>, essential --ext <file extension>, optional --max-depth <max depth>, optional
使用示例:
1.转换code目录下java文件的编码 java -jar gbk2utf.jar --src code/ --ext java 2.转转code目录下xml文件的编码,最大遍历深度为3 java -jar gbk2utf.jar --src code/ --ext xml --max-depth 3 3.转换test.html文件的编码 java -jar gbk2utf.jar --src test.html
温馨提示:使用前请先备份原始文件以免造成不必要的损失!!!
另,本程序运行环境未JDK1.8(及以上)