#抓取淘宝数据 import re import requests from bs4 import BeautifulSoup import string import os import sqlite3 class Getdata: def getHTMLText(url,header): try: r = requests.get(url,headers=header) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def parsePage(ilt,html): try: plt = re.findall(r\'\"view_price\"\:\"[\d\.]*\"\',html) tlt = re.findall(r\'\"raw_title\"\:\".*?\"\',html) for i in range(len(plt)): price = eval(plt[i].split(":")[1]) title = eval(tlt[i].split(":")[1]) ilt.append([price,title]) except: print("爬取失败") def GetCount(html): total=re.findall(\'"totalPage"\:\d+\',html) for i in range(len(total)): totalPage = eval(total[i].split(":")[1]) return totalPage def printGoodsList(ilt): tplt = "{:4}\t{:8}\t{:16}" print(tplt.format("序号","商品价格","商品名称")) count = 0 for g in ilt: count = count + 1 print(tplt.format(count,g[0],g[1])) class DatabaseMannege: #创建数据表 def CreateDataBase(): try: db =sqlite3.connect("taobao.db") except: print("创建数据库失败") try: db.execute(\'create table GoodMsg(id varchar(10),price varchar(10),name varchar(40))\') except: print("创建表失败或表已经存在") ##将数据写入数据库 def InsertDatabase(data): db =sqlite3.connect("taobao.db") cur = db.cursor() for property in data: try: print("开始插入") sql_insert = ("insert into GoodMsg(price,name)values(\'{}\',\'{}\')").format(property[0],property[1]) cur.execute(sql_insert) db.commit() print("插入成功") except : print(\'插入失败\') class Main: def main(): print("请输入查询商品") goods = input() infoList = [] start_url = "https://s.taobao.com/search?q=" + goods header = {"cookie":"thw=cn; cna=ktJ/FI8k0gQCAbaLv4XUGVvh; tg=0; enc=%2FDi9xgv2fnznKtXV88N9fUTdV6UcRLyw3G6h3pjdwcpbHwkSTh%2FO1B1zsb29cDTL5N8TU0t4TdkRNxzvKIn4Ig%3D%3D; hng=CN%7Czh-CN%7CCNY%7C156; tracknick=1052071694www; t=0a525deca2dff81647d91643519e7e37; UM_distinctid=16b9bd49a2a5ef-031997ebe67ce2-37c143e-144000-16b9bd49a2b92e; miid=1364685100501550517; _cc_=W5iHLLyFfA%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; _m_h5_tk=98af7fdaf32be92fe72127eda6e0044e_1571041861081; _m_h5_tk_enc=ca1bdc50118e6ce4e5fd587ccc946e6c; mt=ci%3D-1_0; v=0; cookie2=1aac9317cb43d8f5dfab37bd0222fcf9; _tb_token_=578e3e4e7eedb; JSESSIONID=021AC0B7547DE41EE0944D2ECB89C106; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; l=dBjS2MZrqT2zAZFsBOCgSZ1_aY79jIRAguWbYNq9i_5BK6L_qNbOkg25WFp6cjWfthYB4NSLztv9-etkiKy06Pt-g3fPNxDc.; isg=BHR0oiuylQB4VAH5skFM2Q9IRTLsTpjNHWdJ-w7VA_-CeRTDNlnkx4w7_fEEgdCP", "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36"} html = Getdata.getHTMLText(start_url,header) depth = Getdata.GetCount(html) DatabaseMannege.CreateDataBase() for i in range(depth): try: url = start_url + "&s=" + str(44 * i) html = Getdata.getHTMLText(url,header) Getdata.parsePage(infoList,html) Getdata.printGoodsList(infoList) DatabaseMannege.InsertDatabase(infoList) except: continue Main.main()