1 package zuoye1;
2
3 import java.io.BufferedReader;
4 import java.io.FileNotFoundException;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.StringTokenizer;
14 import java.util.Map.Entry;
15
16 public class FileWord {
17
18 /**
19 * 读入文件,实现词频统计
20 */
21 public static void main(String[] args) {
22 HashMap<String,Integer> map=new HashMap<String,Integer>();//用于统计各个单词的个数,排序
23 //过滤字符串中的所有标点符号
24 String regex=" ?.!:,\"\"'';\n";
25 BufferedReader br;
26 try {
27 //FileReader类创建了一个可以读取文件内容的Reader类、调用构造方法FileReader()
28 br = new BufferedReader(new FileReader("c:\\english.txt"));//文件完整路径
29 String sentence;
30 int wordCount = 0;
31 try {
32 while((sentence = br.readLine()) !=null){ //用readLine读取文件,判断读取文件是否为空
33 sentence = sentence.replaceAll(regex, "");
34 StringTokenizer token=new StringTokenizer(sentence);
35 while(token.hasMoreTokens()){ //循环遍历
36 wordCount++;
37 String word = token.nextToken();
38 if(map.containsKey(word)){ //HashMap不允许重复的key,所以利用这个特性,去统计单词的个数
39 int count=map.get(word);
40 map.put(word, count+1); //如果HashMap已有这个单词,则设置它的数量加1
41 }
42 else{
43 map.put(word, 1); //如果没有这个单词,则新填入,数量为1
44 }
45 }
46 }
47 System.out.println("总共单词数:"+wordCount);
48 sort(map);
49 } catch (IOException e) {
50 e.printStackTrace();
51 }
52 }catch(FileNotFoundException e) {
53 e.printStackTrace();
54 }
55 }
56 //排序
57 public static void sort(HashMap<String,Integer> map){
58 //声明集合folder,存放单词和单词个数
59 List<Map.Entry<String, Integer>> folder = new ArrayList<Map.Entry<String, Integer>>(map.entrySet());
60 Collections.sort(folder, new Comparator<Map.Entry<String, Integer>>() {
61 public int compare(Map.Entry<String, Integer> obj1, Map.Entry<String, Integer> obj2) {
62 return (obj2.getValue() - obj1.getValue());
63 }
64 });
65 //输出
66 for (int i = 0; i < folder.size(); i++) {
67 Entry<String, Integer> en = folder.get(i);
68 System.out.println(en.getKey()+":"+en.getValue());
69 }
70 }
71 }
实现结果
总共单词数:181
as:7
the:7
not:6
it:6
to:5
are:4
a:4
your:4
in:4
they:3
live:3
and:3
of:2
do:2
may:2
by:2
be:2
clothes:2
that:2
often:2
have:2
from:2
above:2
is:2
you:2
door:1
its:1
suppose.It:1
palace.The:1
contentedly:1
snow:1
friends,Turn:1
yourself:1
means.which:1
or:1
windows:1
life,poor:1
bad:1
quiet:1
like:1
without:1
thoughts.:1
simply:1
abode;the:1
change.Sell:1
will:1
some:1
fault-finder:1
herb,like:1
before:1
most:1
I:1
old,return:1
trouble:1
life:1
change;we:1
supported:1
is.You:1
spring.:1
me:1
mind:1
town;but:1
there,and:1
paradise.Love:1
hardnames.It:1
is,meet:1
should:1
seem:1
independent:1
new:1
alms-house:1
poor-house.The:1
pleasant,thrilling,glorious:1
;do:1
garden:1
happens:1
keep:1
but:1
However:1
reflected:1
being:1
brightly:1
enough:1
Cultivate:1
any.May:1
looks:1
more:1
sage.Do:1
town's:1
when:1
faults:1
richest.The:1
disreputable.:1
think:1
get:1
so:1
much:1
lives:1
perhaps:1
early:1
things,whether:1
call:1
dishonest:1
sun:1
shun:1
melts:1
setting:1
them.Things:1
poverty:1
poorest:1
mean:1
receive:1
find:1
hourss,even:1
thoughts,as:1
rich:1
poor:1
man's:1
cheering:1
great:1
see:1
supporting:1
themselves:1
misgiving.Most:1
ssh://git@git.coding.net:linliaimeli/FileWord.git
https://git.coding.net/linliaimeli/FileWord.git