1、读文件
1
2
3
4
5
|
import csv
csv_reader = csv.reader( open ( "data.file" , encoding = "utf-8" ))
for row in csv_reader:
print (row)
|
csv_reader把每一行数据转化成了一个list,list中每个元素是一个字符串。
2、写文件
读文件时,我们把csv文件读入列表中,写文件时会把列表中的元素写入到csv文件中。
1
2
3
4
|
list = [ "1" , "2" , "3" , "4" ]
out = open (outfile, "w" )
csv_writer = csv.writer(out)
csv_writer.writerow( list )
|
可能遇到的问题:直接使用这种写法会导致文件每一行后面会多一个空行。
解决办法如下:
1
2
3
|
out = open (outfile, "w" , newline = "")
csv_writer = csv.writer(out, dialect = "excel" )
csv_writer.writerow( list )
|
在*上找到了比较经典的解释,原来 python3里面对 str和bytes类型做了严格的区分,不像python2里面某些函数里可以混用。所以用python3来写wirterow时,打开文件不要用wb模式,只需要使用w模式,然后带上newline=''。
3、示例
- 简单读写
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
import csv
class writer:
def __init__( self ):
self . dict = {
"标题" : "标题" ,
"链接" : "链接" ,
"服务" : "服务" ,
"dsr" : "dsr" ,
"店铺名" : "店铺名" ,
"价格" : "店铺名" ,
"付款人数" : "付款人数" ,
"发货地" : "发货地" ,
}
out = open ( "outfile.csv" , "w" , newline = "")
self .csv_writer = csv.writer(out, dialect = "excel" )
self .csv_writer.writerow( self . dict )
def writer_to( self , key_value):
self .csv_writer.writerow(key_value)
if __name__ = = "__main__" :
a = writer()
new = {
"链接" : "http://www.baidu.com" ,
"标题" : "我是标题" ,
}
a. dict .update(new)
print (a. dict )
a.writer_to(a. dict .values())
|
- 结合爬虫
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
driver = [ "1" , "2" ]
colspan = [ "1" , "2" ]
try :
out = open ( "类目.csv" , "w" , newline = "")
except PermissionError:
print ( "文件被其他程序占用" )
input ("")
csv_writer = csv.writer(out, dialect = "excel" )
csv_writer.writerow([ "宝贝ID" , "类目" ])
def open_chrome():
driver[ 0 ] = webdriver.Chrome()
driver[ 0 ].get( "https://www.dianchacha.com" )
input ( "请登陆后按回车:" )
def EC_located(one_group, value):
"""
目的:简化代码长度,参数1选择one或者group切换选中模式
:param value:要找的值【CSS选择器】
:return:选择到的对象
"""
wait = WebDriverWait(driver[ 0 ], 10 )
if one_group = = "one" :
try :
ecl = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, value)))
return ecl
except TimeoutException:
print (value, "1元素未加载成功,等待超时" )
else :
try :
ecl = wait.until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, value))
)
return ecl
except TimeoutException:
print (value, "1元素---组---未加载成功,等待超时" )
def operating( ID ):
# 先获取ID输入框
driver[ 0 ].get( "https://www.dianchacha.com/item/info/index/iid/" + ID )
html = driver[ 0 ].page_source
if "未能找到亲的宝贝" not in html:
colspans = EC_located( "group" , ".colspan-1" )
colspan[ 0 ] = str (colspans[ 1 ].text).replace( "宝贝类目: " , "")
else :
return operating( ID )
print (colspan)
def writer_txt():
csv_writer.writerow([url[ 0 ], colspan[ 0 ]])
print ( "保存" , url[ 0 ], colspan[ 0 ], "成功" )
url = [ "0" , "1" ]
def main():
open_chrome()
file = "宝贝ID.txt"
with open ( file ) as f:
for line in f.readlines():
url[ 0 ] = line
print (line)
operating(url[ 0 ])
writer_txt()
out.close()
print ( "已完成" )
if __name__ = = "__main__" :
main()
|
以上就是利用python 读写csv文件的详细内容,更多关于python 读写csv文件的资料请关注服务器之家其它相关文章!
原文链接:https://www.addcoder.com/blog/article_detail/0wx4ritl/