大数据毕业设计选题推荐-内蒙古旅游景点数据分析系统-Hive-Hadoop-Spark-四、代码参考

时间:2024-09-30 13:50:55
  • 项目实战代码参考:
class TourismSpider:
    def __init__(self, base_url):
        self.base_url = base_url

    def fetch_page(self, url):
        response = requests.get(url)
        return BeautifulSoup(response.content, 'html.parser')

    def parse_scenic_spot(self, spot_element):
        name = spot_element.find('h3', class_='spot-name').text.strip()
        location = spot_element.find('span', class_='location').text.strip()
        description = spot_element.find('p', class_='description').text.strip()
        rating = float(spot_element.find('span', class_='rating').text.strip())
        
        return {
            'name': name,
            'location': location,
            'description': description,
            'rating': rating,
        }

    @transaction.atomic
    def crawl_and_save(self):
        page = 1
        while True:
            url = f"{self.base_url}/page/{page}"
            soup = self.fetch_page(url)
            spot_elements = soup.find_all('div', class_='scenic-spot')
            
            if not spot_elements:
                break

            for element in spot_elements:
                spot_data = self.parse_scenic_spot(element)
                ScenicSpot.objects.create(**spot_data)

            page += 1
def data_visualization(request):
    # 景点地点分布统计
    location_distribution = ScenicSpot.objects.values('location').annotate(count=Count('id'))

    # 景点浏览人数统计
    view_count_distribution = ScenicSpot.objects.values('name', 'view_count').order_by('-view_count')[:10]

    # 用户年龄分布统计
    age_distribution = User.objects.values('age').annotate(count=Count('id'))

    # 用户性别统计
    gender_distribution = User.objects.values('gender').annotate(count=Count('id'))

    # 生成景点地点分布图
    plt.figure(figsize=(10, 6))
    locations = [item['location'] for item in location_distribution]
    counts = [item['count'] for item in location_distribution]
    plt.bar(locations, counts)
    plt.title('景点地点分布')
    plt.xlabel('地点')
    plt.ylabel('景点数量')
    plt.xticks(rotation=45, ha='right')
    
    # 将图表转换为base64编码
    img_location = io.BytesIO()
    plt.savefig(img_location, format='png', bbox_inches='tight')
    img_location.seek(0)
    location_img = base64.b64encode(img_location.getvalue()).decode()

    # 生成用户年龄分布图
    plt.figure(figsize=(10, 6))
    ages = [item['age'] for item in age_distribution]
    age_counts = [item['count'] for item in age_distribution]
    plt.bar(ages, age_counts)
    plt.title('用户年龄分布')
    plt.xlabel('年龄')
    plt.ylabel('用户数量')
    
    img_age = io.BytesIO()
    plt.savefig(img_age, format='png', bbox_inches='tight')
    img_age.seek(0)
    age_img = base64.b64encode(img_age.getvalue()).decode()

    context = {
        'location_distribution': location_distribution,
        'view_count_distribution': view_count_distribution,
        'age_distribution': age_distribution,
        'gender_distribution': gender_distribution,
        'location_img': location_img,
        'age_img': age_img,
    }

    return render(request, 'data_visualization.html', context)