Python飞猪旅行爬取丽江旅游售价情况数据可视化分析

时间:2024-03-01 15:53:34

一、选题背景

现如今我们出去游玩都是通过网上预定来选择游玩地点,项目等等,来查看游玩的价钱。这类的平台有很多种,途牛、携程、去哪儿、飞猪。为此呢,我选择了飞猪这个平台,爬取丽江游玩的项目,以及价格。然后做可视化分析。

二、网络爬虫设计方案

爬虫名称:丽江旅游项目数据爬取

内容:通过爬虫程序爬取游玩项目,然后进行数据可视化分析。

方案描述:

1、request请求访问

2、解析网页,爬取数据。这里采用xtree.xpath

3、数据保存,使用sys

三、结构特征分析

1、结构特征:内容导航型

 

 

 2、Htmls页面解析:

游玩主题:

 

 

 

价格:

 

 

 

出售情况:

 

 

 

评论条数:

 

 

 

3、节点查找方法:

 title = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/div[1]/a/h3/div/text()".format(coun))
 price = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[3]/div/div/span/text()".format(coun))
 sell = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[1]/text()".format(coun))
 coumm = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[2]/text()".format(coun))

3、遍历方法:

            for i in range(48):
                title = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/div[1]/a/h3/div/text()".format(coun))
                for i in title:
                    title = i
                price = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[3]/div/div/span/text()".format(coun))
                for i in price:
                    price = i
                sell = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[1]/text()".format(coun))
                sell1 = []
                for i in sell:
                    sell = i.strip(\'月售\')
                    sell = sell.strip(\'\')
                if sell == sell1:
                    sell = \'0\'
                    # print(sell)
                coumm = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[2]/text()".format(coun))
                coumm1 = []
                for i in coumm:
                    if i in \'评价\':
                        pass
                    elif i in \'\':
                        pass
                    elif int(i) > 1:
                        coumm = i

 

四、网络爬虫设计

1、数据爬取与采集

代码分析:

  1 import time
  2 import random
  3 import requests
  4 from lxml import etree
  5 import sys
  6 import re
  7 
  8 USER_AGENTS = [
  9                 \'Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0\',
 10                 \'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0\',
 11                 \'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0\',
 12                 \'Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0\',
 13                 \'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1\',
 14                 \'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1\',
 15                 \'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0\',
 16                 \'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0\',
 17                 \'Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0\',
 18                 \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0\',
 19                 \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0\',
 20                 \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0\',
 21                 \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0\',
 22                 \'Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0\',
 23                 \'Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0\',
 24                 \'Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0\',
 25                 \'Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0\',
 26                 \'Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0\',
 27                 \'Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0\',
 28                 \'Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0\',
 29                 \'Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0\',
 30                 \'Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0\',
 31                 \'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1\',
 32                 \'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0)  Gecko/20100101 Firefox/18.0\',
 33                 \'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\'
 34                 ]
 35 headers = {
 36     \'User-Agent\':random.choice(USER_AGENTS),
 37     \'Connection\':\'keep-alive\',
 38     \'Accept-Language\':\'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2\'
 39     }
 40 
 41 #销量排序
 42 def sales_volume(page):
 43     # 创建Feizhu_sales_volume.csv
 44     file = open("Feizhu_sales_volume.csv", "a")
 45     file.write("title" + "," + "price" + "," + "sell" + "," + "coumm" + \'\n\')
 46     file = file.close()
 47 
 48     for i in range(page):
 49         # 请求访问
 50         try:
 51             url = \'https://travelsearch.fliggy.com/index.htm?spm=181.15077045.1398723350.1.48f3620d7UbQ9z&searchType=product&keyword=%E4%B8%BD%E6%B1%9F&category=MULTI_SEARCH&pagenum=\'+str(page)+\'&-1=sales_des&conditions=-1%3Asales_des\'
 52             res = requests.get(url,headers=headers)
 53             res.encoding = \'utf-8\'
 54             html = etree.HTML(res.text)
 55             coun = 1
 56             # 主题title、价格price、已售sell、评论数coumm
 57             for i in range(48):
 58                 title = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/div[1]/a/h3/div/text()".format(coun))
 59                 for a in title:
 60                     title = a
 61                 price = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[3]/div/div/span/text()".format(coun))
 62                 for a in price:
 63                     price = a
 64                 sell = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[1]/text()".format(coun))
 65                 for a in sell:
 66                     sell = a.strip(\'月售\')
 67                     sell = sell.strip(\'\')
 68                 coumm = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[2]/text()".format(coun))
 69                 for i in coumm:
 70                     if i in \'评价\':
 71                         pass
 72                     elif i in \'\':
 73                         pass
 74                     elif int(i) > 1:
 75                         coumm = str(i)
 76                 coun +=1
 77                 # 保存数据
 78                 with open("Feizhu_sales_volume.csv", "a", encoding=\'utf-8\') as f2:
 79                     f2.writelines(title + "," + price + "," + sell + "," + coumm + "," + \'\n\')
 80                 print(\'主题:\',title,\'\n\',
 81                       \'价格:\',price,\'元\n\',
 82                       \'已售出:\',sell,\'笔\n\',
 83                       \'评论:\',coumm,\'条\n\')
 84         except:
 85             pass
 86 
 87         synthesize(page)
 88         page+=1
 89         time.sleep(1)
 90 
 91 # 综合排序
 92 def synthesize(page):
 93     # 创建Feizhu_synthesize.csv
 94     file = open("Feizhu_synthesize.csv", "a")
 95     file.write("title" + "," + "price" + "," + "sell" + "," + "coumm" + \'\n\')
 96     file = file.close()
 97     try:
 98         for i in range(page):
 99             # 请求访问
100             url = \'https://travelsearch.fliggy.com/index.htm?spm=181.15077045.1398723350.1.48f3620d7UbQ9z&searchType=product&keyword=%E4%B8%BD%E6%B1%9F&category=MULTI_SEARCH&pagenum=\'+str(page)+\'&-1=popular&conditions=-1%3Apopular\'
101             res = requests.get(url, headers=headers)
102             res.encoding = \'utf-8\'
103             html = etree.HTML(res.text)
104             coun = 1
105             #主题title、价格price、已售sell、评论数coumm
106             for i in range(48):
107                 title = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/div[1]/a/h3/div/text()".format(coun))
108                 for i in title:
109                     title = i
110                 price = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[3]/div/div/span/text()".format(coun))
111                 for i in price:
112                     price = i
113                 sell = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[1]/text()".format(coun))
114                 sell1 = []
115                 for i in sell:
116                     sell = i.strip(\'月售\')
117                     sell = sell.strip(\'\')
118                 if sell == sell1:
119                     sell = \'0\'
120                     # print(sell)
121                 coumm = html.xpath("//*[@id=\'content\']/div[6]/div[1]/div[1]/div/div[{}]/div[2]/p[2]/span[2]/text()".format(coun))
122                 coumm1 = []
123                 for i in coumm:
124                     if i in \'评价\':
125                         pass
126                     elif i in \'\':
127                         pass
128                     elif int(i) > 1:
129                         coumm = i
130                 if coumm == coumm1:
131                     coumm = \'0\'
132                 coun += 1
133                 # 保存数据
134                 with open("Feizhu_synthesize.csv", "a", encoding=\'utf-8\') as f2:
135                     f2.writelines(title + "," + price + "," + sell + "," + coumm + "," + \'\n\')
136                 print(\'主题:\', title, \'\n\',
137                       \'价格:\', price, \'元\n\',
138                       \'已售出:\', sell, \'笔\n\',
139                       \'评论:\', coumm, \'条\n\')
140             page +=1
141             time.sleep(1)
142     except:
143         pass
144 
145 if __name__ == \'__main__\':
146     page = 2
147     sales_volume(page)
148     # synthesize(page)

2、数据的清洗与处理

import pandas as pd
import numpy as np
# xs为销量排行的表格、zh为综合表排序
xs =  pd.read_csv(r\'D:\wtx\Feizhu_sales_volume.csv\',error_bad_lines=False)
zh =  pd.read_csv(r\'D:\wtx\Feizhu_synthesize.csv\',error_bad_lines=False)

xs

 

# 重复值处理
xs = xs.drop_duplicates(\'title\')
zh = zh.drop_duplicates(\'title\')
# Nan处理
xs = xs.dropna(axis = 0)
zh = zh.dropna(axis = 0)
# 根据销售数降序排序
xs.sort_values(by=["sell"],inplace=True,ascending=[False])
zh.sort_values(by=["sell"],inplace=True,ascending=[False])

 

按销售量排序可视化分析:

# 销售排行可视化分析
import matplotlib.pyplot as plt
x = xs[\'title\'].head(20)
y = xs[\'price\'].head(20)
z = xs[\'sell\'].head(20)
plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
plt.rcParams[\'axes.unicode_minus\']=False
plt.plot(x,z,\'-\',color = \'r\',label="sell")
plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="price")
plt.xticks(rotation=90)
plt.legend(loc = "best")#图例
plt.title("飞猪丽江旅游销售量趋势图")
plt.xlabel("主题",)#横坐标名字
plt.ylabel("价格")#纵坐标名字
plt.show()

 

xs.sort_values(by=["coumm"],inplace=True,ascending=[False])
x = xs[\'title\'].head(20)
y = xs[\'price\'].head(20)
z = xs[\'coumm\'].head(20)
plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
plt.rcParams[\'axes.unicode_minus\']=False
plt.plot(x,z,\'-\',color = \'r\',label="评论数")
plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="价格")
plt.xticks(rotation=90)
plt.legend(loc = "best")#图例
plt.title("飞猪丽江旅游销售量趋势图")
plt.xlabel("主题",)#横坐标名字
plt.ylabel("价格")#纵坐标名字
plt.show()

 

# 水平图
plt.barh(x,y, alpha=0.2, height=0.4, color=\'g\',label="价格", lw=3)
plt.title("飞猪丽江旅游销售量水平图")
plt.legend(loc = "best")#图例
plt.xlabel("价格",)#横坐标名字
plt.ylabel("主题")#纵坐标名字
plt.show()

# 水平图
plt.barh(x,z, alpha=0.2, height=0.4, color=\'r\',label="评论数", lw=3)
plt.title("飞猪丽江旅游销售量水平图")
plt.legend(loc = "best")#图例
plt.xlabel("评论数",)#横坐标名字
plt.ylabel("主题")#纵坐标名字
plt.show()

 

# 散点图
plt.scatter(x,z,color=\'gray\',marker=\'o\',s=40,alpha=0.5)
plt.xticks(rotation=90)
plt.title("飞猪丽江旅游销售量散点图")
plt.xlabel("主题",)#横坐标名字
plt.ylabel("销售量")#纵坐标名字
plt.show()

 

 

 

# 盒图
plt.boxplot(z)
plt.title("飞猪丽江旅游销售量量盒图")
plt.show()

 

综合排序可视化分析:

#价格降序排行
zh.sort_values(by=["price"],inplace=True,ascending=[False])

 

# 综合排序可视化分析
import matplotlib.pyplot as plt
x = zh[\'title\'].head(20)
y = zh[\'price\'].head(20)
z = zh[\'sell\'].head(20)
d = zh[\'coumm\'].head(20)
plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
plt.rcParams[\'axes.unicode_minus\']=False
plt.plot(x,d,\'-\',color = \'r\',label="coumm")
plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="price")
plt.xticks(rotation=90)
plt.legend(loc = "best")#图例
plt.title("飞猪丽江旅游销综合排序趋势图")
plt.xlabel("主题",)#横坐标名字
plt.ylabel("价格")#纵坐标名字
plt.show()

 

# 水平图
plt.barh(x,d, alpha=0.2, height=0.4, color=\'pink\',label="评论数", lw=3)
plt.title("飞猪丽江旅游综合排序水平图")
plt.legend(loc = "best")#图例
plt.xlabel("出售数",)#横坐标名字
plt.ylabel("主题")#纵坐标名字
plt.show()

 

# 散点图
plt.scatter(x,d,color=\'b\',marker=\'o\',s=40,alpha=0.5)
plt.xticks(rotation=90)
plt.title("飞猪丽江旅游综合排序散点图")
plt.xlabel("主题",)#横坐标名字
plt.ylabel("销售量")#纵坐标名字
plt.show()

 

# 盒图
plt.boxplot(d)
plt.title("飞猪丽江旅游综合排序盒图")
plt.show()

 

 云词:

import pandas as pd
import numpy as np
import wordcloud as wc
from PIL import Image
import matplotlib.pyplot as plt
import random

bk = np.array(Image.open("111.jpg"))
mask = bk
# 定义尺寸
word_cloud = wc.WordCloud(
                       width=1000,  # 词云图宽
                       height=1000,  # 词云图高
                       mask = mask,
                       background_color=\'black\',  # 词云图背景颜色,默认为白色
                       font_path=\'msyhbd.ttc\',  # 词云图 字体(中文需要设定为本机有的中文字体)
                       max_font_size=400,  # 最大字体,默认为200
                       random_state=50,  # 为每个单词返回一个PIL颜色
                       )
text = random.choices([\'云南\',\'大理\',\'丽江\',\'洱海\',\'昆明\',\'石林\',\'古镇\',\'花之城\'],k=100)
text = " ".join(text)
word_cloud.generate(text)
plt.imshow(word_cloud)
plt.show()

 总代码:

  1 import pandas as pd
  2 import numpy as np
  3 # xs为销量排行的表格、zh为综合表排序
  4 xs =  pd.read_csv(r\'D:\wtx\Feizhu_sales_volume.csv\',error_bad_lines=False)
  5 zh =  pd.read_csv(r\'D:\wtx\Feizhu_synthesize.csv\',error_bad_lines=False)
  6 
  7 xs
  8 
  9 # 重复值处理
 10 xs = xs.drop_duplicates(\'title\')
 11 zh = zh.drop_duplicates(\'title\')
 12 
 13 # Nan处理
 14 xs = xs.dropna(axis = 0)
 15 zh = zh.dropna(axis = 0)
 16 
 17 # 根据销售数降序排序
 18 xs.sort_values(by=["sell"],inplace=True,ascending=[False])
 19 zh.sort_values(by=["sell"],inplace=True,ascending=[False])
 20 
 21 # 销售排行可视化分析
 22 import matplotlib.pyplot as plt
 23 x = xs[\'title\'].head(20)
 24 y = xs[\'price\'].head(20)
 25 z = xs[\'sell\'].head(20)
 26 plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
 27 plt.rcParams[\'axes.unicode_minus\']=False
 28 plt.plot(x,z,\'-\',color = \'r\',label="sell")
 29 plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="price")
 30 plt.xticks(rotation=90)
 31 plt.legend(loc = "best")#图例
 32 plt.title("飞猪丽江旅游销售量趋势图")
 33 plt.xlabel("主题",)#横坐标名字
 34 plt.ylabel("价格")#纵坐标名字
 35 plt.show()
 36 
 37 xs.sort_values(by=["coumm"],inplace=True,ascending=[False])
 38 
 39 x = xs[\'title\'].head(20)
 40 y = xs[\'price\'].head(20)
 41 z = xs[\'coumm\'].head(20)
 42 plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
 43 plt.rcParams[\'axes.unicode_minus\']=False
 44 plt.plot(x,z,\'-\',color = \'r\',label="评论数")
 45 plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="价格")
 46 plt.xticks(rotation=90)
 47 plt.legend(loc = "best")#图例
 48 plt.title("飞猪丽江旅游销售量趋势图")
 49 plt.xlabel("主题",)#横坐标名字
 50 plt.ylabel("价格")#纵坐标名字
 51 plt.show()
 52 
 53 # 水平图
 54 plt.barh(x,y, alpha=0.2, height=0.4, color=\'r\',label="价格", lw=3)
 55 plt.title("飞猪丽江旅游销售量水平图")
 56 plt.legend(loc = "best")#图例
 57 plt.xlabel("评论数",)#横坐标名字
 58 plt.ylabel("主题")#纵坐标名字
 59 plt.show()
 60 
 61 # 水平图
 62 plt.barh(x,z, alpha=0.2, height=0.4, color=\'r\',label="评论数", lw=3)
 63 plt.title("飞猪丽江旅游销售量水平图")
 64 plt.legend(loc = "best")#图例
 65 plt.xlabel("评论数",)#横坐标名字
 66 plt.ylabel("主题")#纵坐标名字
 67 plt.show()
 68 
 69 
 70 # 散点图
 71 plt.scatter(x,z,color=\'gray\',marker=\'o\',s=40,alpha=0.5)
 72 plt.xticks(rotation=90)
 73 plt.title("飞猪丽江旅游销售量散点图")
 74 plt.xlabel("主题",)#横坐标名字
 75 plt.ylabel("销售量")#纵坐标名字
 76 plt.show()
 77 
 78 
 79 # 盒图
 80 plt.boxplot(z)
 81 plt.title("飞猪丽江旅游销售量量盒图")
 82 plt.show()
 83 
 84 zh.sort_values(by=["price"],inplace=True,ascending=[False])
 85 
 86 zh.sort_values(by=["coumm"],inplace=True,ascending=[False])
 87 zh.head(20)
 88 
 89 # 综合排序可视化分析
 90 import matplotlib.pyplot as plt
 91 x = zh[\'title\'].head(20)
 92 y = zh[\'price\'].head(20)
 93 z = zh[\'sell\'].head(20)
 94 d = zh[\'coumm\'].head(20)
 95 plt.rcParams[\'font.sans-serif\']=[\'SimHei\'] #用来正常显示中文标签
 96 plt.rcParams[\'axes.unicode_minus\']=False
 97 plt.plot(x,d,\'-\',color = \'r\',label="coumm")
 98 plt.bar(x,y,alpha=0.2, width=0.4, color=\'b\', lw=3,label="price")
 99 plt.xticks(rotation=90)
100 plt.legend(loc = "best")#图例
101 plt.title("飞猪丽江旅游销综合排序趋势图")
102 plt.xlabel("主题",)#横坐标名字
103 plt.ylabel("价格")#纵坐标名字
104 plt.show()
105 
106 # 水平图
107 plt.barh(x,d, alpha=0.2, height=0.4, color=\'pink\',label="评论数", lw=3)
108 plt.title("飞猪丽江旅游综合排序水平图")
109 plt.legend(loc = "best")#图例
110 plt.xlabel("出售数",)#横坐标名字
111 plt.ylabel("主题")#纵坐标名字
112 plt.show()
113 
114 # 散点图
115 plt.scatter(x,d,color=\'b\',marker=\'o\',s=40,alpha=0.5)
116 plt.xticks(rotation=90)
117 plt.title("飞猪丽江旅游综合排序散点图")
118 plt.xlabel("主题",)#横坐标名字
119 plt.ylabel("销售量")#纵坐标名字
120 plt.show()
121 
122 # 盒图
123 plt.boxplot(d)
124 plt.title("飞猪丽江旅游综合排序盒图")
125 plt.show()
126 
127 import pandas as pd
128 import numpy as np
129 import wordcloud as wc
130 from PIL import Image
131 import matplotlib.pyplot as plt
132 import random
133 
134 bk = np.array(Image.open("111.jpg"))
135 mask = bk
136 # 定义尺寸
137 word_cloud = wc.WordCloud(
138                        width=1000,  # 词云图宽
139                        height=1000,  # 词云图高
140                        mask = mask,
141                        background_color=\'black\',  # 词云图背景颜色,默认为白色
142                        font_path=\'msyhbd.ttc\',  # 词云图 字体(中文需要设定为本机有的中文字体)
143                        max_font_size=400,  # 最大字体,默认为200
144                        random_state=50,  # 为每个单词返回一个PIL颜色
145                        )
146 text = random.choices([\'云南\',\'大理\',\'丽江\',\'洱海\',\'昆明\',\'石林\',\'古镇\',\'花之城\'],k=100)
147 text = " ".join(text)
148 word_cloud.generate(text)
149 plt.imshow(word_cloud)
150 plt.show()

 

 

 五、总结

1.经过对主题数据的分析与可视化,可以得到哪些结论?是否达到预期的目标?
从可视化分析可以得出
销售排序:跟团游、双飞游、玉龙雪山旅游项目比较热门。
综合排序:云向旅游江到活湖二日游纯玩商务车小泊活湖旅游旅拍团游、云向旅游昆明大理江玉龙雪山6天5晚洱海旅拍纯玩跟团游双飞游比较受人们欢迎。
分析结果达到预期。
2.在完成此设计过程中,得到哪些收获?以及要改进的建议?

 在此次设计过程种我对数据处理种的数据筛出有了很大的收获,说白了就是怎么进行类型转换,然后达到自己的想要的效果。受益匪浅!需要改进的地方可能就是编写程序反应时间过慢了!编程经验比较欠缺。