import pandas as pd
import glob
#只是合并文件
def marge(csv_list, outputfile):
for inputfile in csv_list:
f = open(inputfile, 'r', encoding="utf-8")
data = pd.read_csv(f)
data.to_csv(outputfile, mode='a', index=False)
print('完成合并')
#去重保留一个表头
def distinct(file):
df = pd.read_csv(file, header=None)
datalist = df.drop_duplicates()
datalist.to_csv('result_new.csv', index=False, header=False)
print('完成去重')
if __name__ == '__main__':
csv_list = glob.glob('*.csv')
output_csv_path = ''
print(csv_list)
marge(csv_list, output_csv_path)
distinct(output_csv_path)
填充缺失值:
df = (0)
二、合并
import os
import glob
import pandas as pd
aa='csv' #后缀为csv格式的文件
a=[i for i in glob.glob('*.{}'.format(aa))] #加载所有后缀为csv的文件。
b = pd.concat([pd.read_csv(i) for i in a]) #合并
b.to_csv( "", index=False, encoding='utf-8-sig') #存档
excel文档,与csv文档步骤一致。