python 抓取网页一部分

时间:2022-03-28 17:12:21
import re
import requests
from bs4 import BeautifulSoup response = requests.get("https://jecvay.com")
soup = BeautifulSoup(response.text,"html.parser")
soup = str(soup)
#re_row = re.match(r'div(.+?)div',soup)
#soup = '<div> class="col-md-7"><div class="panel panel-default"></div><div class="panel-body" style="padding-left: 30px; min-height: 250px;"></div>'; #re_row=re.findall(r"<a.*?href=.*?<\/a>",soup,re.I)
# <h4 style="margin-top: 0px;"><span class="label label-default">最新文章</span><h3>
#re_row=re.findall(r'<li>(.*?)<\/li>',soup,re.S|re.M) #抓取 li 任何内容
#print(re_row)
#print(type(str(soup))) # 抓取网页一部分
start = soup.find(r'<span class="label label-default">')
end = soup.find(r'<div class="col-md-5">');
infobox = ''
infobox = soup[start:end];
print(infobox)