import json
import psycopg2
import sys conn = psycopg2.connect(dbname='dev', host='127.0.0.1',
port='', user='master', password='')
cur = conn.cursor() column_names = None def execute(sql):
cur.execute(sql)
conn.commit() def build_value(item):
global column_names
column_names = [] fields = []
if tablename.startswith('sold_'):
keylist = ['soldUrl', 'country', 'category0', 'itemNumber', 'price', 'quantity', 'Date_of_Purchase', 'ts',
'ts_string']
else:
keylist = None
for key in sorted(item.keys()) if not keylist else keylist:
field = item.get(key, None)
if isinstance(field, str):
field = field.replace("'", "''").replace("\\", "") column_names.append(key) if key in ['page', 'index']:
fields.append(str(field))
else:
fields.append("'{}'".format(field)) return '({})'.format(', '.join(fields)) if __name__ == '__main__': data_file = sys.argv[1]
tablename = sys.argv[2] values = []
for line in open(data_file):
item = json.loads(line) values.append(build_value(item)) if len(values) == 10000:
sql = 'insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values))
execute(sql) values.clear() execute('insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values)))