class TourismSpider:
def __init__(self, base_url):
self.base_url = base_url
def fetch_page(self, url):
response = requests.get(url)
return BeautifulSoup(response.content, 'html.parser')
def parse_scenic_spot(self, spot_element):
name = spot_element.find('h3', class_='spot-name').text.strip()
location = spot_element.find('span', class_='location').text.strip()
description = spot_element.find('p', class_='description').text.strip()
rating = float(spot_element.find('span', class_='rating').text.strip())
return {
'name': name,
'location': location,
'description': description,
'rating': rating,
}
@transaction.atomic
def crawl_and_save(self):
page = 1
while True:
url = f"{self.base_url}/page/{page}"
soup = self.fetch_page(url)
spot_elements = soup.find_all('div', class_='scenic-spot')
if not spot_elements:
break
for element in spot_elements:
spot_data = self.parse_scenic_spot(element)
ScenicSpot.objects.create(**spot_data)
page += 1
def data_visualization(request):
# 景点地点分布统计
location_distribution = ScenicSpot.objects.values('location').annotate(count=Count('id'))
# 景点浏览人数统计
view_count_distribution = ScenicSpot.objects.values('name', 'view_count').order_by('-view_count')[:10]
# 用户年龄分布统计
age_distribution = User.objects.values('age').annotate(count=Count('id'))
# 用户性别统计
gender_distribution = User.objects.values('gender').annotate(count=Count('id'))
# 生成景点地点分布图
plt.figure(figsize=(10, 6))
locations = [item['location'] for item in location_distribution]
counts = [item['count'] for item in location_distribution]
plt.bar(locations, counts)
plt.title('景点地点分布')
plt.xlabel('地点')
plt.ylabel('景点数量')
plt.xticks(rotation=45, ha='right')
# 将图表转换为base64编码
img_location = io.BytesIO()
plt.savefig(img_location, format='png', bbox_inches='tight')
img_location.seek(0)
location_img = base64.b64encode(img_location.getvalue()).decode()
# 生成用户年龄分布图
plt.figure(figsize=(10, 6))
ages = [item['age'] for item in age_distribution]
age_counts = [item['count'] for item in age_distribution]
plt.bar(ages, age_counts)
plt.title('用户年龄分布')
plt.xlabel('年龄')
plt.ylabel('用户数量')
img_age = io.BytesIO()
plt.savefig(img_age, format='png', bbox_inches='tight')
img_age.seek(0)
age_img = base64.b64encode(img_age.getvalue()).decode()
context = {
'location_distribution': location_distribution,
'view_count_distribution': view_count_distribution,
'age_distribution': age_distribution,
'gender_distribution': gender_distribution,
'location_img': location_img,
'age_img': age_img,
}
return render(request, 'data_visualization.html', context)