方法一:
这种方法需要引入glob模块
glob模块是最简单的模块之一,内容非常少。用它可以查找符合特定规则的文件路径名。跟使用windows下的文件搜索差不多。查找文件只用到三个匹配符:”*”, “?”, “[]”。”*”匹配0个或多个字符;”?”匹配单个字符;”[]”匹配指定范围内的字符,如:[0-9]匹配数字。
具体实现如下:
import pandas as pd
import glob
def hebing():
csv_list = ('F:/*.csv')
print(u'共发现%s个CSV文件'% len(csv_list))
print(u'正在处理............')
for i in csv_list:
fr = open(i,'r').read()
with open('F:/','a') as f:
(fr)
print(u'合并完毕!')
def quchong(file):
df = pd.read_csv(file,header=0)
datalist = df.drop_duplicates()
datalist.to_csv(file)
if __name__ == '__main__':
hebing()
quchong("F:/")
方法二:
通过pandas包的read_csv和to_csv两个方法来完成。
import pandas as pd
import glob
outputfile='f:/'
csv_list = ('F:/*.csv')
print(u'共发现%s个CSV文件'% len(csv_list))
print(u'正在处理............')
def hebing():
for inputfile in csv_list:
f=open(inputfile)
data=pd.read_csv(f)
data.to_csv(outputfile,mode='a',index=False,header=None)
print('完成合并')
def quchong(file):
df = pd.read_csv(file,header=0)
datalist = df.drop_duplicates()
datalist.to_csv(file)
print('完成去重')
if __name__ == '__main__':
hebing()
quchong(outputfile)