1.打开要爬取的网页https://tophub.today/n/L4MdA5ldxD
2.按F12获取headers
3.右键查看源代码
4.代码实现
import requests import pandas as pd from bs4 import BeautifulSoup from pandas import DataFrame url=\'https://tophub.today/n/L4MdA5ldxD\' def getHTMLText(url): try: headers={\'user-agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0\'} r=requests.get(url,timeout=30,headers=headers) r.raise_for_status() r.encoding=\'utf-8\' return r.text except: return\'异常\' def saveHTMLText(title,html,c): soup=BeautifulSoup(html,\'html.parser\') a=soup.find_all(\'span\',class_=\'t\') print(\'排名\', \'标题\') index=[i for i in range(c)] print(index) title.append(a) title=[] saveHTMLText(title,html,c=10) html=getHTMLText(url) df=pd.DataFrame(title,columns=[\'排名\',\'标题\']) print(df.T)