requests bs4 爬取资讯图片

#!/usr/bin/env python

# Version = 3.5.2

# __auth__ = '无名小妖'

import requests

from bs4 import BeautifulSoup

import uuid

response = requests.get(

    url='http://www.autohome.com.cn/news/'

)

response.encoding = response.apparent_encoding  # 使用原页面的编码进行解析

# response.status_code 状态码

# 将页面字符串转化成bs对象，features 是转化方式，此处用的html.parser，而生产中用的是lxml，性能较好

soup = BeautifulSoup(response.text, features='html.parser')

# 获取id为'auto-channel-lazyload-article' 的标签

target = soup.find(id='auto-channel-lazyload-article')

# 在此标签下找到所有的li标签

li = target.find_all('li')

# 获取每个li标签下的a标签

for i in li:

    a = i.find('a')

    if a:

        # print(a.attrs.get('href'))

        txt = a.find('h3').text

        # 获取图片的地址

        img_url = a.find('img').attrs.get('src')

        if not img_url.startswith("http:"):

            img_url = "http:" + img_url

        # 下载图片

        img_response = requests.get(url=img_url)

        h = img_url.split('.')

        jpg_name = '{}.{}'.format(uuid.uuid4(), h[-1])

        with open(jpg_name, 'wb') as f:

            f.write(img_response.content)

requests bs4 爬取资讯图片

秒客网

requests bs4 爬取资讯图片

相关文章

requests bs4 爬取 资讯 图片

相关文章

requests bs4 爬取资讯图片