python如何将json格式的数据快速的转化成指定格式的数据呢?或者转换成sql文件?
下面的例子是将json格式的数据准换成以#_#分割的文本数据,也可用于生成sql文件。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
[root@bogon tutorial] # vi json2txt.py
#-*- coding: UTF-8 -*-
import json
data = []
with open ( './tencent_test.json' ) as f:
for line in f:
data.append(json.loads(line))
#print json.dumps(data, ensure_ascii=False)
import codecs
file_object = codecs. open ( 'tencent.txt' , 'w' , "utf-8" )
str = "\r\n"
splitstr = "#_#"
for item in data:
#print json.dumps(item)
#str = str + "insert into tencent(name,catalog,workLocation,recruitNumber,detailLink,publishTime) values "
#str = str + "'%s','%s','%s','%s','%s'\r\n" % (item['parentTitle'],item['parentLink'],item['author'],item['link'],item['title'])
#print json.loads(item['author']) + "\r\n"
str = "%s#_#%s#_#%s#_#%s#_#%s\r\n" % (item[ 'parentTitle' ],item[ 'parentLink' ],item[ 'author' ],item[ 'link' ],item[ 'title' ].strip())
file_object.write(str)
#import codecs
#file_object = codecs.open('tencent.txt', 'w' ,"utf-8")
#file_object.write(str)
file_object.close()
print "success"
|
注:如果数据量过大不宜全部放在str里一次性写入文件。可以考虑逐行写入,或者到达一定程度时批量写入。
另外:python通过strip(rm)替换字符串,当rm为空时,默认删除空白符(包括'\n', '\r', '\t', ' ')。
测试:
json格式的数据
1
2
3
4
5
6
7
8
9
10
11
12
13
|
[root@bogon tutorial] # vi tencent_test.json
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
{ "author" : "作者" , "parentTitle" : "父标题" , "title" : "标题" , "pageUrl" : "pageurl" , "link" : "linkurl" , "parentLink" : "parenturl" }
|
运行python json2txt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
[root@bogon tutorial] # python json2txt.py
success
[root@bogon tutorial] # more tencent.txt
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
父标题 #_#parenturl#_#作者#_#linkurl#_#标题
|