java验证各种文件内容合法性

时间:2025-03-10 08:19:40

平时我们通过获取文件后缀名进行校验,这样做会漏掉手工修改文件名和文件字节码的情况。推荐通过字节码16进制进行验证。

public class FileFormatVerifyUtil {
    private static Map<String,String> FileTypes = new HashMap<>();
    static {
        ("696b2e71623d696b2e71",".js"); // js
        ("252150532D41646F6265",".ps");
        ("1f8b0800000000000000",".gz");// gz文件
        ("4d616e69666573742d56",".mf");// MF文件
        ("ffd8ffe000104a464946",".jpg"); // JPEG (jpg)
        ("89504e470d0a1a0a0000",".png"); // PNG (png)
        ("47494638",".gif"); // GIF (gif)47494638396126026f01
        ("49492a00227105008037",".tif"); // TIFF (tif)
        ("424d",".bmp"); //图(bmp)
        ("41433130313500000000",".dwg"); // CAD (dwg)
        ("3c21646f637479706520",".htm"); // HTM (htm)
        ("48544d4c207b0d0a0942",".css"); // css
        ("7b5c727466315c616e73",".rtf"); // Rich Text Format (rtf)
        ("38425053000100000000",".psd"); // Photoshop (psd)
        ("46726f6d3a203d3f6762",".eml"); // Email [Outlook Express 6] (eml)
        ("d0cf11e0a1b11ae10000",".doc"); // MS Excel 注意:word、msi 和 excel的文件头一样
        ("d0cf11e0a1b11ae10000",".xls"); // MS Excel 注意:word、msi 和 excel的文件头一样
        ("d0cf11e0a1b11ae10000",".vsd"); // Visio 绘图
        ("5374616E64617264204A",".mdb"); // MS Access (mdb)
        ("255044462d312e",".pdf");
        ("464c5601050000000900",".flv"); // flv与f4v相同
        ("00000020667479706d70",".mp4");
        ("49443303000000002176",".mp3");
        ("000001ba210001000180",".mpg"); //
        ("3026b2758e66cf11a6d9",".wmv"); // wmv与asf相同
        ("52494646e27807005741",".wav"); // Wave (wav)
        ("52494646d07d60074156",".avi");
        ("4d546864000000060001",".mid"); // MIDI (mid)
        ("504b0304140000000800",".zip");
        ("526172211a0700cf9073",".rar");
        ("235468697320636f6e66",".ini");
        ("504b03040a0000000000",".jar");
        ("4d5a9000030000000400",".exe");// 可执行文件
        ("3c25402070616765206c",".jsp");// jsp文件
        ("3c3f786d6c2076657273",".xml");// xml文件
        ("494e5345525420494e54",".sql");// xml文件
        ("406563686f206f66660d",".bat");// bat文件
        ("49545346030000006000",".chm");// bat文件
        ("04000000010000001300",".mxp");// bat文件
        ("d0cf11e0a1b11ae10000",".wps");// WPS文字wps、表格et、演示dps都是一样的
        ("3c21444f435459504520",".html"); // HTML (html)
        ("2e524d46000000120001",".rmvb"); // rmvb/rm相同
        ("7061636b616765207765",".java");// java文件
        ("504b0304140006000800",".docx");// docx文件
        ("504b0304140006000800",".xlsx");// xlsx文件
        ("cafebabe0000002e0041",".class");// bat文件
        ("6431303a637265617465",".torrent");
        ("6c6f67346a2e726f6f74",".properties");// bat文件
        ("CFAD12FEC5FD746F",".dbx"); // Outlook Express (dbx)
        ("6D6F6F76",".mov"); // Quicktime (mov)
        ("FF575043",".wpd"); // WordPerfect (wpd)
        ("2142444E",".pst"); // Outlook (pst)
        ("AC9EBD8F",".qdf"); // Quicken (qdf)
        ("E3828596",".pwl"); // Windows pd (pwl)
        ("2E7261FD",".ram"); // Real Audio (ram)
    }


    /**
     * @Description 根据传入的文件获得后缀,获得指定文件格式byte[]数组中的前N位字符
     *              将传入文件转化为byte[]数组,取前N位.判断传入文件的前N位和我们指定好的文件byte[]的前N位是否相同,
     *              如果相同则文件格式没有被篡改,反之,文件后缀格式被篡改
     * @Param [file]
     * @return boolean 返回true 表示文件格式验证通过, 返回false 文件格式验证失败
     **/
    public static boolean suffixVerify(File file,String fileType){
        //根据文件的后缀获取,获取文件的byte[]的前8位
        if((())){
            for(<String, String> filetype:()){
                if(().equals(fileType)){
                    String fileByte8 = (());
                    //获取传入文件的byte[]的前N位
                    byte[] bytes = inputStream2ByteArray(file);
                    String compareByte = bytesToHexString(bytes);
                    compareByte = ();
                    //如果传入文件的byte[]的前N位和我们定义好的byte[]的前N位相同,验证通过.
                    if ((())){
                        //如果格式校验成功
                        return true;
                    }
                }
            }
        }else{
            return false;
        }
        return false;
    }

    /**
     * @Description  将file文件转化为byte[]
     * @Param [file]
     * @return byte[]
     **/
    public static   byte[] inputStream2ByteArray(File file){
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        FileInputStream fis = null;
        byte[] buffer = null;
        try {
            fis = new FileInputStream(file);
            //不用读取全部文件,只读文件前面的部分
            byte[] b = new byte[1024];
            (b);
            (b, 0, 1024);
            buffer = ();
        }catch (FileNotFoundException e){
            ();
        }catch (IOException e1){
            ();
        }finally {
            try {
                if(fis !=null){
                    ();
                }
            }catch (Exception e){
                ();
            }
            try {
                if(bos !=null){
                    ();
                }
            }catch (Exception e){
                ();
            }
        }
        return buffer;
    }

    /**
     * @Description  取byte[]前N位的为字符串,转成16进制,防止修改字节码
     * @Param [src]
     * @return 
     **/
    public static String bytesToHexString(byte[] src) {
        StringBuilder stringBuilder = new StringBuilder();
        if (src == null ||  <= 0) {
            return null;
        }
        for (int i = 0; i < ; i++) {
            int v = src[i] & 0xFF;
            String hv = (v);
            if (() < 2) {
                (0);
            }
            (hv);
        }
        return ();
    }


    public static void main(String[] args) throws IOException {
        File f = new File("C:\\Users\\Administrator\\Desktop\\filetest\\115 - 副本.pdf");
        ((f,".pdf"));
    }
}