python数据挖掘学习笔记】十三.WordCloud词云配置过程及词频分析

时间:2021-01-22 06:19:32
#2018-03-28 09:59:40 March Wednesday the 13 week, the 087 day SZ SSMR
11,12因为涉及到数据库被我暂时放弃了
python数据挖掘学习笔记】十三.WordCloud词云配置过程及词频分析 
四. 照片背景的词云代码


from os import path
from scipy.misc import imread  
import jieba
import sys
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator  
filename = 'xingqiudazhan.txt'
# 打开本体TXT文件
#text = open(filename).read()
text = open(filename, encoding="utf8").read()

# 结巴分词 cut_all=True 设置为全模式 
wordlist = jieba.cut(text)     #cut_all = True

# 使用空格连接 进行中文分词
wl_space_split = " ".join(wordlist)


# #######读取mask/color图片
d = path.dirname(__file__)
liu_coloring = imread(path.join(d, "liu.jpg"))

#wordcloud = WordCloud(font_path = "simhei.ttf", background_color = "white", max_font_size = 80)		#设置字体属性

########### 对分词后的文本生成词云
my_wordcloud = WordCloud( background_color = 'white',      # 设置背景颜色
                            mask = liu_coloring,          # 设置背景图片
                            font_path = "simhei.ttf",		#没有这行就无法显示汉字
                            max_words = 2000,              # 设置最大现实的字数
                            stopwords = STOPWORDS,         # 设置停用词
                            max_font_size = 50,            # 设置字体最大值
                            random_state = 30,             # 设置有多少种随机生成状态,即有多少种配色方案
                            )

# generate word cloud 
my_wordcloud.generate(wl_space_split)

###### create coloring from image  
image_colors = ImageColorGenerator(liu_coloring)

######## recolor wordcloud and show  
my_wordcloud.recolor(color_func=image_colors)

plt.imshow(my_wordcloud)    # 显示词云图
plt.axis("off")             # 是否显示x轴、y轴下标
plt.show()

# save img  
my_wordcloud.to_file(path.join(d, "cloudimg.png"))