前些时间,公司让做一个自动回复卖家信息的程序,现在总结下(用python实现的)
1.登陆雅虎拍卖后台手动获取cookie文件
#coding=utf-8 import sqlite3 import cookielib from cStringIO import StringIO def sqlite2cookieChrome(filename): con = sqlite3.connect(filename) con.text_factory = str cur = con.cursor() cur.execute("select host_key, path, secure, expires_utc, name, value from cookies") ftstr = ["FALSE","TRUE"] s = StringIO() s.write( """# Netscape HTTP Cookie File # http://www.netscape.com/newsref/std/cookie_spec.html # This is a generated file! Do not edit.\n""") for item in cur.fetchall(): try: s.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % ( item[0], ftstr[item[0].startswith('.')], item[1], ftstr[item[2]], item[3], item[4], item[5])) except UnicodeError: continue s.seek(0) return s.read() cookie_jar = cookielib.MozillaCookieJar() cookie_jar._really_load(s, '', True, True) return cookie_jar yahoo_id = raw_input("please input yahoo id: ").strip() cookie_jar = sqlite2cookieChrome('C:\Documents and Settings\Administrator\Local Settings\Application Data\Google\Chrome\User Data\Default\cookies') #windows xp open('./%s.txt' % yahoo_id, 'w').write(cookie_jar)
2.回复卖家信息
主要实现代码:
# auction_url: http://page10.auctions.yahoo.co.jp/jp/auction/m114547886 # subject: 评论类型 # comment_content:评论内容 # cookie_jar: cookie def post_yahoo_auction_comment(auction_url, subject, comment_content, cookie_jar): try: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar)) opener.addheaders = [("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11")] #-------------------------------抓取取引的链接--------------------------------------# html = opener.open(auction_url).read() write_file('thefile1.html',html,'w'); soup = BeautifulSoup(html) url = soup.find("a", text="取引ナビ")['href'] #新版的yahoo页面 if(url[0:15] == 'https://contact'): #-------------------------------抓提交的标单隐藏域--------------------------------------# html = opener.open(url).read() write_file('thefile2.html',html,'w'); soup = BeautifulSoup(html) aid = soup.find("input", attrs={"name":"aid"})['value'] bid = soup.find("input", attrs={"name":"bid"})['value'] syid = soup.find("input", attrs={"name":"syid"})['value'] oid = soup.find("input", attrs={"name":"oid"})['value'] crumb = soup.find("input", attrs={"name":".crumb"})['value'] #-------------------------------初次提交标单--------------------------------------# comment_content = comment_content.encode("utf-8", "ignore") post_param = urllib.urlencode({ "aid" :aid, "syid":syid, "bid":bid, "oid":oid, "body" :comment_content, ".crumb":crumb, "subject" : subject }) html = opener.open("https://contact.auctions.yahoo.co.jp/preview", post_param).read() write_file('thefile3.html',html,'w'); soup = BeautifulSoup(html) crumb = soup.find("input", attrs={"name":".crumb"})['value'] #-------------------------------确认提交标单--------------------------------------# confirm_param = urllib.urlencode({ ".crumb" : crumb, "aid" : aid, "syid" : syid, "bid" : bid, "oid" : oid, "subject" : subject, "body" : comment_content }) html = opener.open("https://contact.auctions.yahoo.co.jp/send", confirm_param).read() write_file('thefile4.html',html,'w'); return True else: #旧版的雅虎页面 comment_content = comment_content.encode("EUC_JP", "ignore") page_number = auction_url.split('.')[0].replace("http://", "").replace('page', '') item_id = auction_url.split('/')[-1] post_param = urllib.urlencode({ "aID" : item_id, "subject" : subject, "comment" : comment_content }) html = opener.open("http://pageinfo%s.auctions.yahoo.co.jp/jp/show/contact_preview#message"%page_number, post_param).read() write_file('thefile2.html',html,'w'); soup = BeautifulSoup(html) crumb = soup.find("input", attrs={"name":".crumb"})['value'] target = soup.find("input", attrs={"name":"target"})['value'] confirm_param = urllib.urlencode({ ".crumb" : crumb, "aID" : item_id, "subject" : subject, "comment" : comment_content, "target" : target, }) html = opener.open("http://edit%s.auctions.yahoo.co.jp/jp/config/contact_submit"%page_number, confirm_param).read() return True except Exception as e: print e ") == -1: return -1 else: return -2
3.雅虎后台自动评论
主要代码:
def post_yahoo_auction_comment(auction_url,item_id, seller,cookie_jar): try: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie_jar)) opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11')] page_number = auction_url.split('.')[0].replace('http://', '').replace('page', '') url = 'http://edit'+page_number+'.auctions.yahoo.co.jp/jp/config/leavefeedback?t='+seller+'&nr=1&aID='+item_id #抓取表单提交页面 html = opener.open(url).read() write_file('thefile1.html',html,'w') findstr = '前回、評価した内容' findstr = findstr.encode("utf-8", "ignore") if html.find(findstr) > -1: return 2 rating = 5 comment = '今回ありがとうございます!早速の発送ので取引ができます! 今後もよろしくお願いします!!こちらの評価もお願いします。^_^ ' comment = comment.encode("EUC_JP", "ignore") soup= BeautifulSoup(html, from_encoding="EUC_JP") #要指定编码,不然会出现编码错误 aID = soup.find('input', attrs={'name':'aID'})['value'] author = soup.find('input', attrs={'name':'author'})['value'] c_comment = soup.find('input', attrs={'name':'c_comment'})['value'] c_date = soup.find('input', attrs={'name':'c_date'})['value'] cacheid = soup.find('input', attrs={'name':'cacheid'})['value'] catid = soup.find('input', attrs={'name':'catid'})['value'] curprice = soup.find('input', attrs={'name':'curprice'})['value'] endtime = soup.find('input', attrs={'name':'endtime'})['value'] isBidByDocomo = soup.find('input', attrs={'name':'isBidByDocomo'})['value'] isIntlAuction = soup.find('input', attrs={'name':'isIntlAuction'})['value'] isIntlTarget = soup.find('input', attrs={'name':'isIntlTarget'})['value'] isIntlUser = soup.find('input', attrs={'name':'isIntlUser'})['value'] isResponse = soup.find('input', attrs={'name':'isResponse'})['value'] isRevision = soup.find('input', attrs={'name':'isRevision'})['value'] logininwinnerslist = soup.find('input', attrs={'name':'logininwinnerslist'})['value'] preview = soup.find('input', attrs={'name':'preview'})['value'] r_comment = soup.find('input', attrs={'name':'r_comment'})['value'] r_date = soup.find('input', attrs={'name':'r_date'})['value'] r_id = soup.find('input', attrs={'name':'r_id'})['value'] r_rating = soup.find('input', attrs={'name':'r_rating'})['value'] role = soup.find('input', attrs={'name':'role'})['value'] starttime = soup.find('input', attrs={'name':'starttime'})['value'] t = soup.find('input', attrs={'name':'t'})['value'] target = soup.find('input', attrs={'name':'target'})['value'] targetinwinnerslist = soup.find('input', attrs={'name':'targetinwinnerslist'})['value'] u = soup.find('input', attrs={'name':'u'})['value'] UEtitle = soup.find('input', attrs={'name':'UEtitle'})['value'] wc = soup.find('input', attrs={'name':'wc'})['value'] write = soup.find('input', attrs={'name':'write'})['value'] post_param = urllib.urlencode({ 'aID':aID, 'author':author, 'c_comment':c_comment, 'c_date':c_date, 'cacheid':cacheid, 'catid':catid, 'comment':comment, 'curprice':curprice, 'endtime':endtime, 'isBidByDocomo':isBidByDocomo, 'isIntlAuction':isIntlAuction, 'isIntlTarget':isIntlTarget, 'isIntlUser':isIntlUser, 'isResponse':isResponse, 'isRevision':isRevision, 'logininwinnerslist':logininwinnerslist, 'preview':preview, 'r_comment':r_comment, 'r_date':r_date, 'r_id':r_id, 'r_rating':r_rating, 'rating':rating, 'role':role, 'starttime':starttime, 't' :t, 'target':target, 'targetinwinnerslist':targetinwinnerslist, 'u':u, 'UEtitle':UEtitle, 'wc':wc, 'write' :write, }) #抓取确认提交后的页面 html = opener.open('http://redit.auctions.yahoo.co.jp/jp/rconfig/leavefeedback', post_param).read() write_file('thefile2.html',html,'w'); soup= BeautifulSoup(html, from_encoding="EUC_JP") post_param = urllib.urlencode({ 'aID':aID, 'author':author, 'c_comment':c_comment, 'c_date':c_date, 'cacheid':cacheid, 'catid':catid, 'comment':comment, 'curprice':curprice, 'endtime':endtime, 'isBuyer':soup.find('input', attrs={'name':'isBuyer'})['value'], 'isBidByDocomo':isBidByDocomo, 'isIntlAuction':isIntlAuction, 'isIntlTarget':isIntlTarget, 'isIntlUser':isIntlUser, 'isResponse':isResponse, 'isRevision':isRevision, 'isSeller':soup.find('input', attrs={'name':'isSeller'})['value'], 'logininwinnerslist':logininwinnerslist, 'own':soup.find('input', attrs={'name':'own'})['value'], 'preview':soup.find('input', attrs={'name':'preview'})['value'], 'r_comment':r_comment, 'r_date':r_date, 'r_id':r_id, 'r_rating':soup.find('input', attrs={'name':'r_rating'})['value'], 'rating':rating, 'role':role, 'starttime':starttime, 't' :t, 'target':target, 'targetinwinnerslist':targetinwinnerslist, 'u':u, 'UEtitle':UEtitle, 'wc':wc, 'write' :soup.find('input', attrs={'name':'write'})['value'], }) html = opener.open('http://redit.auctions.yahoo.co.jp/jp/rconfig/leavefeedback', post_param).read() write_file('thefile3.html',html,'w'); return True except Exception as e: print e ') == -1: return -1 else: return -2
代码说明:其实自动回复卖家信息并不难,主要是抓取页面表单数据,并将数据提交到另一个页面。函数的auction_url, comment_content, seller等参数都是从数据库读的,我在程序用到了ConfigParser库读取配置文件。
python实现如下:
import ConfigParser def get_db_con(): config = ConfigParser.ConfigParser(); config.read('conf/db.cfg') host = config.get("mysqllink1","host") port = config.getint("mysqllink1","port") user = config.get("mysqllink1","user") password = config.get("mysqllink1","password") dbname = config.get("mysqllink1","dbname") charset = config.get("mysqllink1","charset") return [host,port,user,password,dbname,charset] def get_bid_item_from_db(): link = get_db_con() conn = MySQLdb.connect(host=link[0], port=link[1], user=link[2], passwd=link[3], db=link[4], charset=link[5], cursorclass=MySQLdb.cursors.DictCursor) cursor = conn.cursor() config = ConfigParser.ConfigParser(); config.read('conf/db.cfg') limit = config.get("selpar","limit") limit = '' if limit=='All' else ' limit '+limit query = "SELECT a.*, b.seller, b.is_send_mail FROM bid_item a JOIN bid_win b ON a.bid = b.bid JOIN biogg_auction_data bd ON bd.AuctionId = a.item_id WHERE ( to_days(now()) - to_days(b.win_date) = 1 OR to_days(now()) = to_days(b.win_date)) AND a.is_store<>1 AND b.is_send_mail <= 0"+limit cursor.execute(query) bid_items = cursor.fetchall() cursor.close() conn.close() return bid_items
db.cfg:
[mysqllink1] host=localhost user=root password=pwd port=3306 dbname=test charset=utf8 [selpar] limit=All
其中还用到BeautifulSoup库,链接:http://www.crummy.com/software/BeautifulSoup/bs4/doc/