Python之pandas学习【9】:利用pandas筛选出CSV某几列的数据进行数据处理

时间:2024-10-30 07:45:53
  • import re
  • import pandas as pd
  • import time
  • class jk_jd():
  • # 方法1:传统for循环
  • def solution(self,data):
  • col = ['x','y'] # 定义要筛选的列
  • data_del = (data,columns=col) # 提取出要筛选的列,装到dataframe中
  • for i in range(len(data_del['x'])): # 遍历dataFrame中的每一行,进行正则表达式的匹配处理
  • data_del['x'][i] = ("<br>(.*)", "", data_del['x'][i]) # 删掉<br>后面的所有数据
  • data_del['x'][i] = ("【(.*?)】","",data_del['x'][i]) # 删掉【】中的所有数据
  • data_del.to_csv("") # 导出为CSV文件
  • # 方法2:pandas的map模块方式
  • def solution1(self,data):
  • col = ['x','y']
  • data_dele = (data,columns=col)
  • data_dele['x'] = data_dele['x'].map(lambda x:("<br>(.*)", "",("【(.*?)】","",x))\
  • .replace('\t','').replace('\n','').replace(' ',''))
  • data_dele.to_csv("")
  • if __name__ == '__main__':
  • data = pd.read_excel('')
  • s = jk_jd()
  • t1 = ()
  • data_dele = (data)
  • print('for循环需要时间: ',()-t1)
  • t2 = ()
  • data_dele1 = s.solution1(data)
  • print('map需要时间: ',()-t2)