词云分析——西游记词云分析函数代码及分析

时间:2025-03-10 11:14:07
# 封装词云分析函数 # 下载第三方库 # !pip install jieba # !pip install wordcloud #导入第三方库 import jieba from wordcloud import WordCloud import numpy as np import pandas as pd import matplotlib.pyplot as plt #展示在当前页面中 %matplotlib inline def Ciyunfenxi(): #文件读写 f = open('西游记.txt','r',encoding = 'utf-8') txt = f.read() f.close #print(txt) //输出txt文本 #读取中文停用字表 st_word = [] with open('中文停用词表.txt','r',encoding = 'utf-8') as f1: for line in f1: st_word.append(line.strip()) f1.close st_words = np.array(st_word) #print(st_words) //输出中文停用词字表 #分词 segments = [] segs = jieba.cut(txt) for seg in segs: if len(seg.strip())>1:#去掉单字词 segments.append(seg); #分词结果添加到数据框中 segmentDF = pd.DataFrame({'segment':segments}); #词频统计 word_FR = segmentDF['segment'].value_counts() # print(word_FR.index) //输出词频索引 # word_FR["我们"] #移除停用词 for word in word_FR.index: if word in st_words: word_FR.drop(index = word,inplace = True) # print(word_FR.head(200)) //输出词频前200个 #写入统计西游记词频文件 with open('西游记_词频.txt','w',encoding='utf-8') as f: for i in range(200): f.write(word_FR.index[i]+" "+str(word_FR[i])+"\n") f.close #生成词云并输出词云文件和词云图片 wc = WordCloud(background_color="white",font_path="",height=700,width=1000) wordcloud=wc.fit_words(word_FR) plt.imshow(wordcloud) wordcloud.to_file('词云图.png') Ciyunfenxi()