import nltk def freq_sorted(text,ranklimit):
fd=nltk.FreqDist(text)
cumulative = 0.0
for rank, (word,freq) in enumerate(sorted(fd.items(), key=lambda x: (-1*x[1], x[0]))[:ranklimit]):
cumulative += fd[word] * 100 / fd.N()
print "%3d %6.2f%% %s" % (rank+1, cumulative, word) def test_freq_sorted():
freq_sorted(nltk.corpus.brown.words(),15)
结果为:
1 5.00% the
2 10.00% ,
3 14.00% .
4 17.00% of
5 19.00% and
6 21.00% to
7 22.00% a
8 23.00% in
9 23.00% that
10 23.00% is
11 23.00% was
12 23.00% for
13 23.00% ``
14 23.00% ''
15 23.00% The