在Python中利用wordcloud生成词云

时间:2024-03-08 15:45:29

一、生成汉字词云图的代码如下:

from wordcloud import WordCloud
import matplotlib.pyplot as plt #绘制图像的模块
import jieba     #jieba分词
 
path_txt=\'E://python/all.txt\'
f = open(path_txt,\'r\',encoding=\'UTF-8\').read()
 
# 结巴分词,生成字符串,wordcloud无法直接生成正确的中文词云
cut_text = " ".join(jieba.cut(f))
 
wordcloud = WordCloud(
 #设置字体,不然会出现口字乱码,文字的路径是电脑的字体一般路径,可以换成别的
 font_path="C:/Windows/Fonts/simfang.ttf",
 #设置了背景,宽高
 background_color="white",width=1000,height=880).generate(cut_text)
 
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()

 

说明:

1、其中path_txt的值为生成词云文件的所在路径。

2、字符串界定符前面加字母r或R表示原始字符串,其中的特殊字符不需要进行转义,但字符串的最后一个字符不能是\符号。

3、通过pip install 安装所需要的模块。例如 pip install wordcloud ,pip install jieba, pip install matplotlib。其中在电脑的命令提示符中输入即可。

4、plt.axis("off")意思是关闭轴线和标签,与假一样。

二、生成英文词云

1、简单例子

from wordcloud import WordCloud
import matplotlib.pyplot as plt

f = open(\'E:/Python/练习/sports.txt\',\'r\').read()
wordcloud = WordCloud(background_color = "white" , width = 1000 , height = 860 , margin = 2).generate(f)
plt.imshow(wordcloud)
plt.axis("off")
plt.show()

  

2、设置字体的颜色的例子

from wordcloud import (WordCloud , get_single_color_func)
import matplotlib.pyplot as plt

class SimpleGroupedColorFunc(object):
    def __init__(self , color_to_words,default_color):
        self.word_to_color = {word:color
                              for(color , words) in color_to_words.items()
                              for word in words}
        self.default_color = default_color

    def __call__(self , word , **kwargs):
         
         return self.word_to_color.get(word , self.default_color)
        
class GroupedColorFunc(object):
     
     def __init__(self , color_to_words , default_color):
         self.color_func_to_words = [
             (get_single_color_func(color) , set(words))
             for (color , words) in color_to_words.items()]
         
         self.default_color_func = get_single_color_func(default_color)
         
     def get_color_func(self , word):
         try:         
             color_func = next(
                 color_func for (color_func , words) in self.color_func_to_words
                 if word in words)
         except StopIteration:
             color_func = self.default_color_func
         return color_func

     def __call__(self , word , **kwargs):
         
         return self.get_color_func(word)(word , **kwargs)

f = open(\'E:/Python/练习/sports.txt\',\'r\').read()
wc = WordCloud( width = 1000 , height = 860 , collocations = False ).generate(f)


color_to_words ={
    \'#00ff00\':[\'important\',\'beat\',\'minute\',\'proud\',\'frist\',\'coach\'
        ],
    \'red\':[\'Chinese\',\'win\',\'team\',\'said\',\'goal\',\'header\'
        ],
     \'yellow\':[\'Japan\',\'Korea\',\'South\',\'China\'
             ]

    }
default_color = \'grey\'
grouped_color_func = GroupedColorFunc(color_to_words , default_color) wc.recolor(color_func = grouped_color_func) plt.imshow(wc , interpolation = "bilinear") plt.axis("off") plt.show()

python os.path模块常用方法详解:https://www.jianshu.com/p/d77ef16a38c3

参考于:https://blog.csdn.net/cskywit/article/details/79285988