java大文件读取拆分(30G以上)

时间:2025-02-21 19:40:52

java大文件读取拆分,本次示例解析拆分40G大小的SQL文件,解析过程50万行数据一个文件,具体逻辑直接上代码 test1 是单文件读取过滤test2 是文件拆分

代码可以直接运行,只需修改:fileName(需要操作的目标文件路径)outDir(拆分后文件输出目录)outFile(单文件过滤后的输出路径)

代码如下:

package ;

import ;

import ;
import ;
import ;
import ;
import ;
import ;
import ;

/**
 * @description: java读取拆分大文件
 * @author: w*****
 * @date: 2021/4/8 18:51
 */
public class FileSplit {
    private static String fileName = "D://备份//历史待恢复数据//sql//";
    private static String outFile = "D://备份//历史待恢复数据//sql//sql//";
    private static String outDir = "D://备份//历史待恢复数据//sql//2sql//";


    /**
     * 2G以上大文件读取
     *
     * @param inputFile, outputFile
     * @return void
     * @author wang****
     * @date: 2021/4/7 16:16
     */
    void largeFileIO(String inputFile, String outputFile) {
        try {
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
            // 10M缓存
            BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);
            FileWriter fw = new FileWriter(outputFile);
            while (()) {
                String line = ();
                if (("INSERT")) {
                    (line + "\r");
                }
            }
            ();
            ();
            ();
        } catch (IOException ex) {
            ();
        }
    }


    /**
     * 大文件拆分
     *
     * @param inputFile, outputFile
     * @return void
     * @author wang****
     * @date: 2021/4/8 16:19
     */
    void largeFileSplit(String inputFile, String outputDir) {
        int j = 0;
        try {
            File out = new File(outputDir);
            if (!()) {
                ();
            }
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
            // 20M缓存
            BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 20 * 1024 * 1024);
            int i = 0;
            while (()) {
                long startTime = ();
                ++i;
                FileWriter fw = new FileWriter(outputDir + i + ".sql");
                String line = null;
                //50万行一个文件拆分
                for (long lineCounter = 0; lineCounter < 500000 && (line = ()) != null; ++lineCounter) {
                    if (("INSERT INTO `cur_rec` VALUES")) {
                        (line + "\r");
                        ++j;
                    }
                }
                ();
                ();
                long endTime = ();
                ("第" + i + "个文件拆分成功,耗时:" + (float) (endTime - startTime) / 1000 + "秒");
            }
            ();
        } catch (IOException ex) {
            ("第" + j + "行数据拆分异常\r" + ex);
            ();
        }
    }

    /**
     * 读取测试
     */
    @Test
    public void test1() {
        largeFileIO(fileName, outFile);
    }

    /**
     * 大文件拆分
     */
    @Test
    public void test2() {
        long startTime = ();
        largeFileSplit(fileName, outDir);
        long endTime = ();
        ("拆文件成功,共耗时:" + (float) (endTime - startTime) / 1000 + "秒");
    }
}