Java单词计数器

花了一个中午搞定了一个单词计数器，可以按照字典和频率两种排序，功能还比较强大。
package treeroot.util;
//anthor treeroot
//since 2004-12-3
public class Word{
private String value;
public Word(String value){
  this.value=value.toLowerCase();
}
private int count=1;
protected void increase(){
  count++;
}

public String getWord(){
  return value;
}
public int getCount(){
  return count;
}
public boolean equals(Object o){
  return (o instanceof Word)&&(((Word)o).value.equals(value));
}
public int hashCode(){
  return value.hashCode();
}
}

package treeroot.util;
//author treeroot
//since 2004-12-3
import java.util.*;

public class WordCount
{
private static class WordSet implements Set{

private Map map=new HashMap();

  public int size(){
   return map.size();
  }
  public boolean isEmpty(){
   return map.isEmpty();
  }
  public boolean contains(Object o){
   return map.containsKey(o);
  }
  public Iterator iterator(){
   return map.keySet().iterator();
  }
  public Object[] toArray(){
   return map.keySet().toArray();
  }
  public Object[] toArray(Object[] a){
   return map.keySet().toArray(a);
  }
  public boolean add(Object o){
   if(map.containsKey(o)){
    ((Word)map.get(o)).increase();
   }
   else {
    map.put(o,o);
   }
   return true;
  }
  public boolean remove(Object o){
   return false;
  }
  public boolean addAll(Collection c){
   return false;
  }
  public boolean removeAll(Collection c){
   return false;
  }
  public boolean retainAll(Collection c){
   return false;
  }
  public boolean containsAll(Collection c){
   return map.keySet().containsAll(c);
  }
  public void clear(){}
  public boolean equals(Object c){
   return map.keySet().equals(c);
  }
  public int hashCode(){
   return map.keySet().hashCode();
  }
}

public static Set getWordCount(String s,Comparator order){
  Set set=new WordSet();
  String split1="[^a-zA-Z//-_']+";
  String split2="[^a-zA-Z]+[//-_'][^a-zA-Z]*";
  String split3="[^a-zA-Z]*[//-_'][^a-zA-Z]+";

  String regex = "("+split2+")|("+split3+")|("+split1+")";
  String[] words = s.split(regex);
  for(int i=0;i<words.length;i++){
   set.add(new Word(words[i]));
  }
  Set sort=new TreeSet(order);
  sort.addAll(set);

  return Collections.synchronizedSet(sort);
}
public static Comparator DICTIONARY_ORDER=new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   return w1.getWord().compareTo(w2.getWord());
  }
};
public static Comparator FREQUENCY_ORDER =new Comparator(){
  public int compare(Object o1,Object o2){
   Word w1=(Word)o1;
   Word w2=(Word)o2;
   int i=w2.getCount()-w1.getCount();
   if(i==0){
    return w1.getWord().compareTo(w2.getWord());
   }
   return i;
  }
};
public static void main(String[] args)
{
  String s="A regular expression, specified as a string, must first be compiled into an instance of this class. The resulting pattern can then be used to create a Matcher object that can match arbitrary character sequences against the regular expression. All of the state involved in performing a match resides in the matcher, so many matchers can share the same pattern. ";
  Set set=WordCount.getWordCount(s,WordCount.FREQUENCY_ORDER);
  for(Iterator it=set.iterator();it.hasNext();){
   Word w=(Word)it.next();
   int i=4-w.getWord().length()/8;
   String tab="";
   for(int j=0;j<i;j++)
    tab+="/t";
   System.out.println(w.getWord()+tab+w.getCount());
  }
}
}

秒客网

Java单词计数器

相关文章