在这个脚本中我使用的是Python的ORM框架SQLAlchemy来连接远程的MySQL数据库的,在写这个脚本的时候为了方便参数的输入,我就给之前和这个脚本添加了一个新的功能,就是在调用这个脚本的时候后面可以传入参数,这样只需要在执行脚本的时候就可以方便的输入所要使用的路径。
那么现在开始对导出的数据进行解析咯~由于之前已经将列名和数据都写入到了文本文件中, 现在只需要一行一行的读取出来,然后和类中的属性一一对应就可以了,那么现在开始读取资料吧~老样子,先用切片将之前写在文本文件中的第一行列名取出来咯~然后我定义了一个字典,其中key值存入列名,value中存数组的下标,这样在后面赋值的时候就可以一个属性名对应一个列名了,不仅逻辑清晰,代码也好看多了,这样再使用Python中的split函数将字符串转换成list来读取到类中,然后每读取1000次再将session提交到数据库一次,这样可以大幅度的减小对数据库的操作,而提高性能。下面就附上代码了:
from sqlalchemy.orm import mapper, sessionmaker
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData,Double,TIMESTAMP
from sqlalchemy.sql.expression import Cast
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.mysql import \
BIGINT, BINARY, BIT, BLOB, BOOLEAN, CHAR, DATE, \
DATETIME, DECIMAL, DECIMAL, DOUBLE, ENUM, FLOAT, INTEGER, \
LONGBLOB, LONGTEXT, MEDIUMBLOB, MEDIUMINT, MEDIUMTEXT, NCHAR, \
NUMERIC, NVARCHAR, REAL, SET, SMALLINT, TEXT, TIME, TIMESTAMP, \
TINYBLOB, TINYINT, TINYTEXT, VARBINARY, VARCHAR, YEAR
import os
import re,pdb,sys,getopt
opts, args = getopt.getopt(sys.argv[1:], "hf:u:p:i:d",["help","file=","user=","pass=","ip=","db="])
input_file=""
username=""
password=""
host=""
databasename=""
def usage():
print """
-h --help print the help
-g --gbk The path of the backup file
-b --gdb Have to backup the database path
"""
for op, value in opts:
if op in ("-f","--file"):
input_file = value
elif op in ("-u","--user"):
username = value
elif op in ("-p","--pass"):
password = value
elif op in ("-i","--ip"):
host = value
elif op in ("-d","--db"):
databasename = value
elif op in ("-h","--help"):
usage()
sys.exit()
Base = declarative_base()
mysql_db = create_engine('mysql://'+username+':'+password+'@'+host+':3306/'+databasename+'')
metadata.create_all(mysql_db)
class User(Base):
__tablename__ = 'users'
DAY_BATCH_ID = Column(Integer, primary_key=True)
ITEMID = Column(Integer, primary_key=True)
NETSALES = Column(DOUBLE, nullable=True)
NETQTY = Column(DOUBLE, nullable=True)
VOIDSALES = Column(DOUBLE, nullable=True)
VOIDQTY = Column(DOUBLE, nullable=True)
RETURNSALES = Column(DOUBLE, nullable=True)
RETURNQTY = Column(DOUBLE, nullable=True)
DISCOUNTSALES = Column(DOUBLE, nullable=True)
DISCOUNTQTY = Column(DOUBLE, nullable=True)
OTHQTY = Column(DOUBLE, nullable=True)
STAFFQTY = Column(DOUBLE, nullable=True)
WASTEQTY = Column(DOUBLE, nullable=True)
MODIFQTY = Column(DOUBLE, nullable=True)
COST = Column(DOUBLE, nullable=True)
UNITCOST = Column(DOUBLE, nullable=True)
SALEQTY = Column(DOUBLE, nullable=True)
SALEAMT = Column(DOUBLE, nullable=True)
ADJ_AMT = Column(DOUBLE, nullable=True)
OVERRIDE_AMT = Column(DOUBLE, nullable=True)
OVERRIDE_QTY = Column(Integer, nullable=True)
OTHAMT = Column(DOUBLE, nullable=True)
WASTEAMT = Column(DOUBLE, nullable=True)
TAXAMT = Column(DOUBLE, nullable=True)
SELFCOMMISSION = Column(DOUBLE, nullable=True)
FULLCOMMISSION = Column(DOUBLE, nullable=True)
NET_ITEMS = Column(Integer, nullable=True)
VOID_ITEMS = Column(Integer, nullable=True)
RETURN_ITEMS = Column(Integer, nullable=True)
DISCOUNT_ITEMS = Column(Integer, nullable=True)
OTH_ITEMS = Column(Integer, nullable=True)
SALE_ITEMS = Column(Integer, nullable=True)
MODIF_ITEMS = Column(Integer, nullable=True)
WASTE_ITEMS = Column(Integer, nullable=True)
TICKETCOUNT = Column(Integer, nullable=True)
AVG_COST = Column(DOUBLE, nullable=True)
VOIDSALES_TAX = Column(DOUBLE, nullable=True)
RETURNSALES_TAX = Column(DOUBLE, nullable=True)
DISCOUNTSALES_TAX = Column(DOUBLE, nullable=True)
OVERRIDE_AMT_TAX = Column(DOUBLE, nullable=True)
OTHAMT_TAX = Column(DOUBLE, nullable=True)
WASTEAMT_TAX = Column(DOUBLE, nullable=True)
SALESAMT_TAX = Column(DOUBLE, nullable=True)
LAST_TRANS_TIME = Column(TIMESTAMP, nullable=True)
mapper(User, userTable)
Session = sessionmaker()
Session.configure(bind=mysql_db)
session = Session()
def main():
file1 = open(input_file).readlines()
#通过切片取得第一行中的值并将第一行的字符串组装成数组保持在字典中
file2 = file1[:1]
for line in lines2:
result = line.split(",")
mydic={}
for v,k in enumerate(result):
mydic[k]=v
#取得文件中从第二行开始的所有数据,通过切片去除第一行的数据
file3 = file1[1:-1]
for line1 in file3:
commint = 0
u = User()
line1 = line1.split(',')
u.DAY_BATCH_ID = line1[mydic['DAY_BATCH_ID']]
u.ITEMID = line1[mydic['ITEMID']]
u.NETSALES = line1[mydic['NETSALES']]
u.NETQTY = line1[mydic['NETQTY']]
u.VOIDSALES = line1[mydic['VOIDSALES']]
u.VOIDQTY = line1[mydic['VOIDQTY']]
u.RETURNSALES = line1[mydic['RETURNSALES']]
u.RETURNQTY = line1[mydic['RETURNQTY']]
u.DISCOUNTSALES = line1[mydic['DISCOUNTSALES']]
u.DISCOUNTQTY = line1[mydic['DISCOUNTQTY']]
u.OTHQTY = line1[mydic['OTHQTY']]
u.STAFFQTY = line1[mydic['STAFFQTY']]
u.WASTEQTY = line1[mydic['WASTEQTY']]
u.MODIFQTY = line1[mydic['MODIFQTY']]
u.COST = line1[mydic['COST']]
u.UNITCOST = line1[mydic['UNITCOST']]
u.SALEQTY = line1[mydic['SALEQTY']]
u.SALEAMT = line1[mydic['SALEAMT']]
u.ADJ_AMT = line1[mydic['ADJ_AMT']]
u.OVERRIDE_AMT = line1[mydic['OVERRIDE_AMT']]
u.OVERRIDE_QTY = line1[mydic['OVERRIDE_QTY']]
u.OTHAMT = line1[mydic['THAMT']]
u.WASTEAMT = line1[mydic['WASTEAMT']]
u.TAXAMT = line1[mydic['TAXAMT']]
u.SELFCOMMISSION = line1[mydic['SELFCOMMISSION']]
u.FULLCOMMISSION = line1[mydic['FULLCOMMISSION']]
u.NET_ITEMS = line1[mydic['NET_ITEMS']]
u.VOID_ITEMS = line1[mydic['VOID_ITEMS']]
u.RETURN_ITEMS = line1[mydic['RETURN_ITEMS']]
u.DISCOUNT_ITEMS = line1[mydic['DISCOUNT_ITEMS']]
u.OTH_ITEMS = line1[mydic['OTH_ITEMS']]
u.SALE_ITEMS = line1[mydic['SALE_ITEMS']]
u.MODIF_ITEMS = line1[mydic['MODIF_ITEMS']]
u.WASTE_ITEMS = line1[mydic['WASTE_ITEMS']]
u.TICKETCOUNT = line1[mydic['TICKETCOUNT']]
u.AVG_COST = line1[mydic['AVG_COST']]
u.VOIDSALES_TAX = line1[mydic['VOIDSALES_TAX']]
u.RETURNSALES_TAX = line1[mydic['RETURNSALES_TAX']]
u.DISCOUNTSALES_TAX = line1[mydic['DISCOUNTSALES_TAX']]
u.OVERRIDE_AMT_TAX = line1[mydic['OVERRIDE_AMT_TAX']]
u.OTHAMT_TAX = line1[mydic['OTHAMT_TAX']]
u.WASTEAMT_TAX = line1[mydic['WASTEAMT_TAX']]
u.SALESAMT_TAX = line1[mydic['SALESAMT_TAX']]
u.LAST_TRANS_TIME = line1[mydic['LAST_TRANS_TIME']]
session.add(u)
commint = commint+1
if commint>1000:
commint = commint %1000
session.commit()
session.close()
if __name__ == '__main__':
main()
至此,这一周的工作终于是结束了!从无到有,从没学过Python到现在写出这样的代码,仅仅过去了2周,期间查阅了InterBase和Firebird数据库,试验了无数次的导出导入,为了文件格式化也是想了许久,以前学Java时最讨厌的正则也无数次的试来试去……也算是掌握的差不多了,Python也对于基本语法掌握了个大概,也算是没白忙活额~~加油!!!