I am trying to import CSVs to mysql database automatically using python script. I used the script from https://bitbucket.org/richardpenman/csv2mysql to achieve this task. Below is the code:
我正在尝试使用python脚本将csv自动导入mysql数据库。我使用了https://bitbucket.org/richardpenman/csv2mysql中的脚本来完成这个任务。下面是代码:
import os
import re
import sys
import csv
import time
import argparse
import collections
import MySQLdb
import warnings
# suppress annoying mysql warnings
warnings.filterwarnings(action='ignore', category=MySQLdb.Warning)
def get_type(s):
"""Find type for this string
"""
# try integer type
try:
v = int(s)
except ValueError:
pass
else:
if abs(v) > 2147483647:
return 'bigint'
else:
return 'int'
# try float type
try:
float(s)
except ValueError:
pass
else:
return 'double'
# check for timestamp
dt_formats = (
('%Y-%m-%d %H:%M:%S', 'datetime'),
('%Y-%m-%d %H:%M:%S.%f', 'datetime'),
('%Y-%m-%d', 'date'),
('%H:%M:%S', 'time'),
)
for dt_format, dt_type in dt_formats:
try:
time.strptime(s, dt_format)
except ValueError:
pass
else:
return dt_type
# doesn't match any other types so assume text
if len(s) > 255:
return 'text'
else:
return 'varchar(255)'
def most_common(l, default='varchar(255)'):
"""Return most common value from list
"""
# some formats trump others
if l:
for dt_type in ('text', 'bigint'):
if dt_type in l:
return dt_type
return max(l, key=l.count)
return default
def get_col_types(input_file, max_rows=1000):
"""Find the type for each CSV column
"""
csv_types = collections.defaultdict(list)
print (os.getcwd())
# os.chdir("scripts/CSV")
reader = csv.reader(open(input_file))
# test the first few rows for their data types
for row_i, row in enumerate(reader):
if row_i == 0:
header = row
else:
for col_i, s in enumerate(row):
data_type = get_type(s)
csv_types[header[col_i]].append(data_type)
if row_i == max_rows:
break
# take the most common data type for each row
return [most_common(csv_types[col]) for col in header]
def get_insert(table, header):
"""Generate the SQL for inserting rows
"""
field_names = ', '.join(header)
field_markers = ', '.join('%s' for col in header)
return 'INSERT INTO %s (%s) VALUES (%s);' % \
(table, field_names, field_markers)
def format_header(row):
"""Format column names to remove illegal characters and duplicates
"""
safe_col = lambda s: re.sub('\W+', '_', s.lower()).strip('_')
header = []
counts = collections.defaultdict(int)
for col in row:
col = safe_col(col)
counts[col] += 1
if counts[col] > 1:
col = '{}{}'.format(col, counts[col])
header.append(col)
return header
def main(input_file, user, password, host, table, database, max_inserts=10000):
print ("Importing `%s' into MySQL database `%s.%s'" % (input_file, database, table))
db = MySQLdb.connect(host=host, user=user, passwd=password, charset='utf8')
cursor = db.cursor()
# create database and if doesn't exist
cursor.execute('CREATE DATABASE IF NOT EXISTS %s;' % database)
db.select_db(database)
# define table
print ("Analyzing column types ...")
col_types = get_col_types(input_file)
print (col_types)
header = None
for i, row in enumerate(csv.reader(open(input_file))):
if header:
while len(row) < len(header):
row.append('') # this row is missing columns so pad blank values
cursor.execute(insert_sql, row)
if i % max_inserts == 0:
db.commit()
print ("commit")
else:
header = format_header(row)
print ("Inserting rows ...")
# SQL string for inserting data
insert_sql = get_insert(table, header)
# commit rows to database
print ("Committing rows to database ...")
db.commit()
print ("Done!")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Automatically insert CSV contents into MySQL')
parser.add_argument('--table', dest='table', help='Set the name of the table. If not set the CSV filename will be used')
parser.add_argument('--database', dest='database', default=os.environ['MYSQL_DATABASE'], help='Set the name of the database. If not set the test database will be used')
parser.add_argument('--user', dest='user', default=os.environ['MYSQL_USER'], help='The MySQL login username')
parser.add_argument('--password', dest='password', default=os.environ['MYSQL_PASSWORD'], help='The MySQL login password')
parser.add_argument('--host', dest='host', default=os.environ['MYSQL_CONTAINER_NAME'], help='The MySQL host')
parser.add_argument('input_file', help='The input CSV file')
args = parser.parse_args(sys.argv[1:])
if not args.table:
# use input file name for table
args.table = os.path.splitext(os.path.basename(args.input_file))[0]
main(args.input_file, args.user, args.password, args.host, args.table, args.database)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "WebApp.settings.local")
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)
Even though I am able to read my csv file and import , it is throwing error for one particular table i.e
即使我能够读取csv文件并导入,它仍然会为一个特定的表抛出错误
DROP TABLE IF EXISTS `param_system`;
CREATE TABLE `param_system` (
`ID` int(11) NOT NULL,
`EXTERNAL_EDIT` int(11) DEFAULT '0',
`INTERNAL_EDIT` int(11) DEFAULT '0',
`FORTRAN_TYPE` varchar(50) DEFAULT NULL,
`LABEL` varchar(255) DEFAULT NULL,
`DESCRIPTION` varchar(255) DEFAULT NULL,
`HELP_ID` int(11) DEFAULT '0',
`HELP_TEXT` text DEFAULT NULL,
`GROUPNAME` varchar(255) DEFAULT NULL,
`ROWNUM` int(11) DEFAULT '0',
`WIDGET` varchar(50) DEFAULT NULL,
`OPTIONS` varchar(255) DEFAULT NULL,
`DISABLED` int(11) DEFAULT '0',
`READONLY` int(11) DEFAULT '0',
`REQUIRED` int(11) DEFAULT '0',
`UI` text DEFAULT NULL,
`MIN_VALUE` varchar(50) DEFAULT NULL,
`MAX_VALUE` varchar(50) DEFAULT NULL,
`FORM_VAR_NAME` varchar(255) DEFAULT NULL,
`PARAM` varchar(255) DEFAULT NULL,
`VAL` varchar(255) DEFAULT NULL,
`DEFAULT` varchar(255) DEFAULT NULL
) ENGINE=InnoDB AUTO_INCREMENT=50 DEFAULT CHARSET=utf8;
The error i.e being thrown is:
我的错误。e抛出是:
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'default) VALUES ('5', '0', '0', 'integer', '', '', '1', '', 'Base Parameters', '' at line 1")
_mysql_exceptions。编程错误:(1064,“您的SQL语法有错误;检查与MySQL服务器版本对应的手册,找到正确的语法以使用接近‘default’值(‘5’、‘0’、‘0’、‘integer’、‘1’、‘Base Parameters’、‘at line 1’)
Below is the screen shot of the csv that I am trying to import:
下面是我要导入的csv的屏幕截图:
As you can see that it is not able to read the number "1" after "Base Parameters" and throwing the error. Could someone help me with what is going wrong?
如您所见,它不能读取“基本参数”后的数字“1”并抛出错误。有人能帮我解决问题吗?
2 个解决方案
#1
2
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'default) VALUES ('5', '0', '0', 'integer', '', '', '1', '', 'Base Parameters', '' at line 1")
_mysql_exceptions。编程错误:(1064,“您的SQL语法有错误;检查与MySQL服务器版本对应的手册,找到正确的语法以使用接近‘default’值(‘5’、‘0’、‘0’、‘integer’、‘1’、‘Base Parameters’、‘at line 1’)
What you are seeing is a fragment of an INSERT statement. It isn't showing you the whole INSERT statement, it cuts it off. You said you think it is not reading the '1' in the ROWNUM field of your input data, but you are misinterpreting the error message.
您看到的是插入语句的片段。它没有显示整个INSERT语句,而是将它截断。您说过,您认为它没有在输入数据的ROWNUM字段中读取“1”,但您误解了错误消息。
It's just a coincidence that you see two single-quotes next to each other in the error message. The error message is formatted like this:
在错误消息中,您看到两个单引号相邻,这只是一个巧合。错误消息的格式如下:
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '...' at line 1")
_mysql_exceptions。编程错误:(1064,“您的SQL语法有错误;检查与MySQL服务器版本相对应的手册,找到正确的语法可以使用near '…在第1行”)
Where ...
will be a fragment of the long SQL statement, starting with the first token that confused the parser, and continuing in this case for 80 characters. This 80-character fragment is:
在那里……将是长SQL语句的一个片段,从迷惑解析器的第一个令牌开始,在本例中继续使用80个字符。这80字的片段:
default) VALUES ('5', '0', '0', 'integer', '', '', '1', '', 'Base Parameters', '
It's purely by accident that the 80th character is a single-quote, and then the next character in the error message is also a single-quote. It is not an empty string in place of the value '1'
you expected to be read from the input. In fact, I assume it is reading the data value from the input.
第80个字符是单引号完全是偶然的,然后错误消息中的下一个字符也是单引号。它不是一个空字符串,代替了您期望从输入中读取的值“1”。实际上,我假设它正在从输入中读取数据值。
So the problem reported in the error is that you're using the SQL reserved word DEFAULT
as a column name. This Python script is not delimiting it. So the appearance of the reserved word in the INSERT statement confuses the parser.
因此错误中报告的问题是,您正在使用SQL保留字DEFAULT作为列名。这个Python脚本没有对它进行限制。因此,INSERT语句中的保留字的出现混淆了解析器。
I believe you can fix this in the Python script by formatting the column names inside back-ticks in the INSERT statement:
我相信您可以在Python脚本中修复这个问题,方法是将插入语句中后勾中的列名格式化:
def get_insert(table, header):
"""Generate the SQL for inserting rows
"""
field_names = ', '.join('`%s`' % col for col in header)
field_markers = ', '.join('%s' for col in header)
return 'INSERT INTO %s (%s) VALUES (%s);' % \
(table, field_names, field_markers)
You could alternatively edit your input CSV file to avoid using SQL reserved words in the column names defined in the header.
您也可以编辑您的输入CSV文件,以避免在头中定义的列名中使用SQL保留字。
#2
0
@BillKarwin, When I used Django admin page to see the same table that was loaded in mysql db (after it was modified to take DEFAULT
as a field name), it was throwing the "string index out of range" error. I couldn't pinpoint to the exact location where it is throwing the error. Is it because of the len(header)
code in the main
function?
@BillKarwin,当我使用Django管理页面查看在mysql db中加载的同一表(在它被修改为默认为字段名之后)时,它抛出了“字符串索引超出范围”错误。我无法准确地指出它在哪里抛出错误。是因为主函数中的len(header)代码吗?
#1
2
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'default) VALUES ('5', '0', '0', 'integer', '', '', '1', '', 'Base Parameters', '' at line 1")
_mysql_exceptions。编程错误:(1064,“您的SQL语法有错误;检查与MySQL服务器版本对应的手册,找到正确的语法以使用接近‘default’值(‘5’、‘0’、‘0’、‘integer’、‘1’、‘Base Parameters’、‘at line 1’)
What you are seeing is a fragment of an INSERT statement. It isn't showing you the whole INSERT statement, it cuts it off. You said you think it is not reading the '1' in the ROWNUM field of your input data, but you are misinterpreting the error message.
您看到的是插入语句的片段。它没有显示整个INSERT语句,而是将它截断。您说过,您认为它没有在输入数据的ROWNUM字段中读取“1”,但您误解了错误消息。
It's just a coincidence that you see two single-quotes next to each other in the error message. The error message is formatted like this:
在错误消息中,您看到两个单引号相邻,这只是一个巧合。错误消息的格式如下:
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '...' at line 1")
_mysql_exceptions。编程错误:(1064,“您的SQL语法有错误;检查与MySQL服务器版本相对应的手册,找到正确的语法可以使用near '…在第1行”)
Where ...
will be a fragment of the long SQL statement, starting with the first token that confused the parser, and continuing in this case for 80 characters. This 80-character fragment is:
在那里……将是长SQL语句的一个片段,从迷惑解析器的第一个令牌开始,在本例中继续使用80个字符。这80字的片段:
default) VALUES ('5', '0', '0', 'integer', '', '', '1', '', 'Base Parameters', '
It's purely by accident that the 80th character is a single-quote, and then the next character in the error message is also a single-quote. It is not an empty string in place of the value '1'
you expected to be read from the input. In fact, I assume it is reading the data value from the input.
第80个字符是单引号完全是偶然的,然后错误消息中的下一个字符也是单引号。它不是一个空字符串,代替了您期望从输入中读取的值“1”。实际上,我假设它正在从输入中读取数据值。
So the problem reported in the error is that you're using the SQL reserved word DEFAULT
as a column name. This Python script is not delimiting it. So the appearance of the reserved word in the INSERT statement confuses the parser.
因此错误中报告的问题是,您正在使用SQL保留字DEFAULT作为列名。这个Python脚本没有对它进行限制。因此,INSERT语句中的保留字的出现混淆了解析器。
I believe you can fix this in the Python script by formatting the column names inside back-ticks in the INSERT statement:
我相信您可以在Python脚本中修复这个问题,方法是将插入语句中后勾中的列名格式化:
def get_insert(table, header):
"""Generate the SQL for inserting rows
"""
field_names = ', '.join('`%s`' % col for col in header)
field_markers = ', '.join('%s' for col in header)
return 'INSERT INTO %s (%s) VALUES (%s);' % \
(table, field_names, field_markers)
You could alternatively edit your input CSV file to avoid using SQL reserved words in the column names defined in the header.
您也可以编辑您的输入CSV文件,以避免在头中定义的列名中使用SQL保留字。
#2
0
@BillKarwin, When I used Django admin page to see the same table that was loaded in mysql db (after it was modified to take DEFAULT
as a field name), it was throwing the "string index out of range" error. I couldn't pinpoint to the exact location where it is throwing the error. Is it because of the len(header)
code in the main
function?
@BillKarwin,当我使用Django管理页面查看在mysql db中加载的同一表(在它被修改为默认为字段名之后)时,它抛出了“字符串索引超出范围”错误。我无法准确地指出它在哪里抛出错误。是因为主函数中的len(header)代码吗?