Bar Chart of Frequency of modals in different sections of the Brown Corpus

时间:2021-12-05 00:16:45

Natural Language Processing with Python

Chapter 4.8

 colors = 'rgbcmyk' # red, green, blue, cyan, magenta, yellow, black

 def bar_chart(categories, words, counts):
"Plot a bar chart showing counts for each word by category"
import pylab ind = pylab.arange(len(words))
width = 0.1*1 / (len(categories) + 1)*10
bar_groups = []
for c in range(len(categories)):
bars = pylab.bar(ind+c*width, counts[categories[c]], width,
color=colors[c % len(colors)])
bar_groups.append(bars)
pylab.xticks(ind+width, words)
pylab.legend([b[0] for b in bar_groups], categories, loc='upper left')
pylab.ylabel('Frequency')
pylab.title('Frequency of Six Modal Verbs by Genre')
pylab.show() def test_bar_char():
genres = ['news', 'religion', 'hobbies', 'government', 'adventure']
modals = ['can', 'could', 'may', 'might', 'must', 'will']
cfdist = nltk.ConditionalFreqDist(
(genre, word)
for genre in genres
for word in nltk.corpus.brown.words(categories=genre)
if word in modals)
counts = {}
for genre in genres:
counts[genre] = [cfdist[genre][word] for word in modals]
bar_chart(genres, modals, counts)

修改了width,结果为:

Bar Chart of Frequency of modals in different sections of the Brown Corpus