# -*- coding: utf-8 -*- import re import urllib2 import sqlite3 import os import xlrd import sqlite3 # 打开数据库文件 rrdai_db = sqlite3.connect(r'C:\Users\SX1489\Desktop\rrdai.db') cursor = rrdai_db.cursor() # 建表 # cursor.execute('DROP TABLE IF EXISTS loanlog') # cursor.execute('CREATE TABLE loanlog (user_id varchar(8),user_name varchar(50), address varchar(50), job_type varchar(100), credit_id varchar(8), borrow_id varchar(8), amount varchar(10), interest varchar(6), months varchar(3), risk_level varchar(3), finish_ratio varchar(6))') # user_id, user_name, address, job_type, credit_id, borrow_id, amount, interest, months, risk_level, finish_ratio pattern = re.compile(r'"creditPassedTimeId":(?P<credit_id>\d+),"user":(?P<user_id>\d+).+"amount":(?P<amount>[^,]+),"interest":(?P<interest>[0-9.]+),"months":(?P<months>\d+).*"finishedRatio":(?P<finish_ratio>[^,]+).*"borrowerId":(?P<borrow_id>\d+).+"nickName":"(?P<user_name>[^"]+).+"borrowerLevel":"(?P<risk_level>[^"]+)","address":"(?P<address>[^"]*)","jobType":"(?P<job_type>[^"]*)"',re.S|re.M) def parse(url): req = urllib2.Request(url, None, {'User-Agent': 'Mozilla/5.0'}) #pretend to be a browser try: html = urllib2.urlopen(req).read() return [ m.groupdict() for m in pattern.finditer(html)] except: return None page_start = 114789 page_end = 180804 index = 1 try: for page_index in range (page_start,page_end): sName = '%d' %page_index surl = r'http://www.renrendai.com/lend/detailPage.action?loanId='+sName print sName x = parse(surl) if x != None and len(x) != 0: index = index + 1 cursor.execute('INSERT INTO loanlog (user_id, user_name, address, job_type, credit_id, borrow_id, amount, interest, months, risk_level, finish_ratio ) VALUES (?,?,?,?,?,?,?,?,?,?,?)', (x[0]['user_id'], x[0]['user_name'].decode("utf8"), x[0]['address'].decode("utf8"), x[0]['job_type'].decode("utf8"), x[0]['credit_id'], x[0]['borrow_id'], x[0]['amount'], x[0]['interest'], x[0]['months'], x[0]['risk_level'], x[0]['finish_ratio'])) if index == 50: index = 1 rrdai_db.commit() print '50 records has been submitted!!!!!!!' print 'jobes done!' except: print 'there is an error at'+sName # print x['address'].decode("utf8") os.system("pause")