笔试题分析（1）

最近看到一个笔试题目：

10W个文本文件存放在/opt/test/目录及其子目录下，每个文件的大小为1M。
统计文件中的字母A出现的个数。
4核CPU，8G内存。

看到这样的题目，首先想到的方法是遍历所有文件，然后把每个文件中的A的次数给统计出来。

那我们一步步分开来。

1、遍历10W个文本文件

方法一：通过迭代，得到所有的文本数量，但是这样是单线程，量级越大，效率就越来越慢。

    //方法一 通过迭代，得到所有的文本数量
public long traverseFolder(String path) {
long count = 0;
        File file = new File(path);
if (file.exists()) {
            File[] files = file.listFiles();
if (null != files && files.length >= 0) {
for (File file1 : files) {
if (file1.isDirectory()) {
// System.out.println("文件夹:" + file1.getAbsolutePath());
long i = traverseFolder(file1.getAbsolutePath());
count = count + i;
                    } else {
count ++;
// System.out.println("文件:" + file1.getAbsolutePath());
                    }
                }
            }
        } else {
// System.out.println("文件不存在!");
        }
return count;
    }

方法二：通过 ForkJoinPool 来完成遍历所有文件。其实题目中 4核CPU，8G内存。 这个提示，就是希望我们用并发来完成，这样可以增加效率。关于 ForkJoinPool 这个并发工具，如果不了解，这里就不解释了，请自行查阅。

public class Demo extends RecursiveTask<Integer> {

private String path;

private Demo(String path) {
this.path = path;
    }

    @Override
protected Integer compute() {
int count = 0;
        List<Demo> subTasks = new ArrayList<>();

// 读取目录 dir 的子路径。
try {
            File file = new File(path);
            File[] files = file.listFiles();
if (null != files && files.length > 0) {
for (File file1 : files) {
if (file1.isDirectory()) {
                        subTasks.add(new Demo(file1.getPath()));
                    } else {
count ++;
                    }
                }
            }
if (!subTasks.isEmpty()) {
// 在当前的 ForkJoinPool 上调度所有的子任务。
for (Demo subTask : invokeAll(subTasks)) {
count += subTask.join();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
return count;
    }
}

2、对每个Txt进行统计A出现的次数

这个属于流操作，基本大家都没什么问题。

    private static int getcount(String path, char c1) {
int count = 0;
        BufferedReader bfr = null;   //定义字符读取(缓冲)流
try {
            bfr = new BufferedReader(new FileReader(path));//给该流赋值
            String value; //定义一个临时接收文件中的字符串变量
            String newValue = "";    //接收文件中所有字符串的变量
while ((value = bfr.readLine()) != null) {    //开始读取文件中的字符
                newValue = newValue + value;    //存入newValue变量中
            }
char[] ch = newValue.toCharArray();//把newValue变成字符数组
for (char c : ch) {   //遍历ch 将ch中所有的字符存入一个Map集合中(TreeSet)，键对应字符，值对应字符出现的次数
if (c1 == c) {  //如果TreeMap(tm)中有该键，则取出该键中的值，也就是出现的次数
                    count++;
                }
            }
        } catch (Exception e) {
            System.out.println("文件读取错误");
        } finally {
try {
if (bfr != null)
                    bfr.close();
            } catch (IOException e) {
                System.out.println("文件关闭错误");
            }
        }
return count;
    }

3、完整代码

到这里，应该是把整个题目给完成了，下面我贴一下完整的代码。我对代码进行了修改，因为我测试的是自己桌面中所有的文件，也不都是 txt 。

package com.blog.common.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveTask;

/**
 * 测试便利所有一个目录下的所有的文件夹。用迭代法 和 ForkJoinPool
 * Created by dyw on 2017/7/25.
 */
public class Demo extends RecursiveTask<Integer> {

public long traverseFolder(String path) {
long count = 0;
        File file = new File(path);
if (file.exists()) {
            File[] files = file.listFiles();
if (null != files && files.length >= 0) {
for (File file1 : files) {
if (file1.isDirectory()) {
// System.out.println("文件夹:" + file2.getAbsolutePath());
long i = traverseFolder(file1.getAbsolutePath());
                        count = count + i;
                    } else {
if (file1.getAbsolutePath().endsWith(".txt")) {
int a = getcount(file1.getAbsolutePath(), 'a');
                            System.out.println(file1.getAbsolutePath() + "==========================" + a);
                            count += a;
                        }
// System.out.println("文件:" + file2.getAbsolutePath());
                    }
                }
            }
        } else {
// System.out.println("文件不存在!");
        }
return count;
    }

public static void main(String[] args) {
//传统迭代法
long oldDate1 = System.currentTimeMillis();
long count1 = new Demo().traverseFolder("C:\\Users\\dyw\\Desktop");
        System.out.println(System.currentTimeMillis() - oldDate1 + "==============" + count1);
//ForkJoinPool
long oldDate2 = System.currentTimeMillis();
        Integer count2 = new ForkJoinPool().invoke(new Demo("C:\\Users\\dyw\\Desktop"));
        System.out.println(System.currentTimeMillis() - oldDate2 + "==============" + count2);
    }

private String path;

private Demo() {

    }

private Demo(String path) {
this.path = path;
    }

    @Override
protected Integer compute() {
int count = 0;
        List<Demo> subTasks = new ArrayList<>();

// 读取目录 dir 的子路径。
try {
            File file = new File(path);
            File[] files = file.listFiles();
if (null != files && files.length > 0) {
for (File file1 : files) {
if (file1.isDirectory()) {
                        subTasks.add(new Demo(file1.getPath()));
                    } else {
if (file1.getAbsolutePath().endsWith(".txt")) {
int a = getcount(file1.getAbsolutePath(), 'a');
                            System.out.println(file1.getAbsolutePath() + "==========================" + a);
                            count += a;
                        }
                    }
                }
            }
if (!subTasks.isEmpty()) {
// 在当前的 ForkJoinPool 上调度所有的子任务。
for (Demo subTask : invokeAll(subTasks)) {
                    count += subTask.join();
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
return count;
    }

/**
 * 获取字符出现的次数
 *
 * @param path .
 * @return count
 */
private static int getcount(String path, char c1) {
int count = 0;
        BufferedReader bfr = null;   //定义字符读取(缓冲)流
try {
            bfr = new BufferedReader(new FileReader(path));//给该流赋值
            String value; //定义一个临时接收文件中的字符串变量
            String newValue = "";    //接收文件中所有字符串的变量
while ((value = bfr.readLine()) != null) {    //开始读取文件中的字符
                newValue = newValue + value;    //存入newValue变量中
            }
char[] ch = newValue.toCharArray();//把newValue变成字符数组
for (char c : ch) {   //遍历ch 将ch中所有的字符存入一个Map集合中(TreeSet)，键对应字符，值对应字符出现的次数
if (c1 == c) {  //如果TreeMap(tm)中有该键，则取出该键中的值，也就是出现的次数
                    count++;
                }
            }
        } catch (Exception e) {
            System.out.println("文件读取错误");
        } finally {
try {
if (bfr != null)
                    bfr.close();
            } catch (IOException e) {
                System.out.println("文件关闭错误");
            }
        }
return count;
    }
}

4、运行结果

因为我电脑是4核的，基本上，用ForkJoinPool是迭代法的 4分之1 时间。

//迭代法
4223==============32390
//ForkJoinPool方法
1220==============32390

上面是我对这题目的见解，如果我的理解有什么问题，或者代码可以优化的地方，请给我留言，大家一起互相进步。

秒客网

笔试题分析（1）

相关文章