VBA处理数据与Python Pandas处理数据案例比较分析

需求：

现有一个 csv文件，包含'CNUM'和'COMPANY'两列，数据里包含空行，且有内容重复的行数据。

要求：

1）去掉空行；
2）重复行数据只保留一行有效数据；
3）修改'COMPANY'列的名称为'Company_New‘；
4）并在其后增加六列，分别为'C_col',‘D_col',‘E_col',‘F_col',‘G_col',‘H_col'。

VBA处理数据与Python Pandas处理数据案例比较分析

一，使用 Python Pandas来处理：

				?

									import pandas as pd

									import numpy as np

									from pandas import DataFrame,Series

									def deal_with_data(filepath,newpath):

									  file_obj=open(filepath)

									  df=pd.read_csv(file_obj)  # 读取csv文件，创建 DataFrame

									  df=df.reindex(columns=['CNUM','COMPANY','C_col','D_col','E_col','F_col','G_col','H_col'],fill_value=None)  # 重新指定列索引

									  df.rename(columns={'COMPANY':'Company_New'}, inplace = True) # 修改列名

									  df=df.dropna(axis=0,how='all')         # 去除 NAN 即文件中的空行

									  df['CNUM'] = df['CNUM'].astype('int32')    # 将 CNUM 列的数据类型指定为 int32

									  df = df.drop_duplicates(subset=['CNUM', 'Company_New'], keep='first') # 去除重复行

									  df.to_csv(newpath,index=False,encoding='GBK')

									  file_obj.close()

									if __name__=='__main__':

									  file_path=r'C:UsersP78DesktoppythonCNUM_COMPANY.csv'

									  file_save_path=r'C:UsersP78DesktoppythonCNUM_COMPANY_OUTPUT.csv'

									  deal_with_data(file_path,file_save_path)

二，使用 VBA来处理：

				?

									Option Base 1

									Option Explicit

									Sub main()

									 On Error GoTo error_handling

									 Dim wb         As Workbook

									 Dim wb_out       As Workbook

									 Dim sht         As Worksheet

									 Dim sht_out       As Worksheet

									 Dim rng         As Range

									 Dim usedrows      As Byte

									 Dim usedrows_out    As Byte

									 Dim dict_cnum_company  As Object

									 Dim str_file_path    As String

									    Dim str_new_file_path  As String

									    'assign values to variables:

									    str_file_path = "C:UsersP78DesktopPythonCNUM_COMPANY.csv"

									    str_new_file_path = "C:UsersP78DesktopPythonCNUM_COMPANY_OUTPUT.csv"

									 Set wb = checkAndAttachWorkbook(str_file_path)

									 Set sht = wb.Worksheets("CNUM_COMPANY")

									 Set wb_out = Workbooks.Add

									 wb_out.SaveAs str_new_file_path, xlCSV 'create a csv file

									 Set sht_out = wb_out.Worksheets("CNUM_COMPANY_OUTPUT")

									 Set dict_cnum_company = CreateObject("Scripting.Dictionary")

									 usedrows = WorksheetFunction.Max(getLastValidRow(sht, "A"), getLastValidRow(sht, "B"))

									 'rename the header 'COMPANY' to 'Company_New',remove blank & duplicate lines/rows.

									 Dim cnum_company As String

									 cnum_company = ""

									 For Each rng In sht.Range("A1", "A" & usedrows)

									   If VBA.Trim(rng.Offset(0, 1).Value) = "COMPANY" Then

									     rng.Offset(0, 1).Value = "Company_New"

									   End If

									   cnum_company = rng.Value & "-" & rng.Offset(0, 1).Value

									   If VBA.Trim(cnum_company) <> "-" And Not dict_cnum_company.Exists(rng.Value & "-" & rng.Offset(0, 1).Value) Then

									     dict_cnum_company.Add rng.Value & "-" & rng.Offset(0, 1).Value, ""

									   End If

									 Next rng

									 'loop the keys of dict split the keyes by '-' into cnum array and company array.

									 Dim index_dict As Byte

									 Dim arr_cnum()

									 Dim arr_Company()

									 For index_dict = 0 To UBound(dict_cnum_company.keys)

									   ReDim Preserve arr_cnum(1 To UBound(dict_cnum_company.keys) + 1)

									   ReDim Preserve arr_Company(1 To UBound(dict_cnum_company.keys) + 1)

									   arr_cnum(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(0)

									   arr_Company(index_dict + 1) = Split(dict_cnum_company.keys()(index_dict), "-")(1)

									   Debug.Print index_dict

									 Next

									 'assigns the value of the arrays to the celles.

									 sht_out.Range("A1", "A" & UBound(arr_cnum)) = Application.WorksheetFunction.Transpose(arr_cnum)

									 sht_out.Range("B1", "B" & UBound(arr_Company)) = Application.WorksheetFunction.Transpose(arr_Company)

									 'add 6 columns to output csv file:

									 Dim arr_columns() As Variant

									 arr_columns = Array("C_col", "D_col", "E_col", "F_col", "G_col", "H_col")  '

									 sht_out.Range("C1:H1") = arr_columns

									 Call checkAndCloseWorkbook(str_file_path, False)

									 Call checkAndCloseWorkbook(str_new_file_path, True)

									Exit Sub

									error_handling:

									  Call checkAndCloseWorkbook(str_file_path, False)

									  Call checkAndCloseWorkbook(str_new_file_path, False)

									End Sub

									' 辅助函数：

									'Get last row of Column N in a Worksheet

									Function getLastValidRow(in_ws As Worksheet, in_col As String)

									  getLastValidRow = in_ws.Cells(in_ws.Rows.count, in_col).End(xlUp).Row

									End Function

									Function checkAndAttachWorkbook(in_wb_path As String) As Workbook

									  Dim wb As Workbook

									  Dim mywb As String

									  mywb = in_wb_path

									  For Each wb In Workbooks

									    If LCase(wb.FullName) = LCase(mywb) Then

									      Set checkAndAttachWorkbook = wb

									      Exit Function

									    End If

									  Next

									  Set wb = Workbooks.Open(in_wb_path, UpdateLinks:=0)

									  Set checkAndAttachWorkbook = wb

									End Function

									Function checkAndCloseWorkbook(in_wb_path As String, in_saved As Boolean)

									  Dim wb As Workbook

									  Dim mywb As String

									  mywb = in_wb_path

									  For Each wb In Workbooks

									    If LCase(wb.FullName) = LCase(mywb) Then

									      wb.Close savechanges:=in_saved

									      Exit Function

									    End If

									  Next

									End Function

三，输出结果：

VBA处理数据与Python Pandas处理数据案例比较分析

两种方法输出结果相同：

四，比较总结：

Python pandas 内置了大量处理数据的方法，我们不需要重复造*，用起来很方便，代码简洁的多。
Excel VBA 处理这个需求，使用了数组，字典等数据结构（实际需求中，数据量往往很大，所以一些地方没有直接使用遍历单元格的方法），以及处理字符串，数组和字典的很多方法，对文件的操作也很复杂，一旦出错，调试起来比python也较困难，代码已经尽量优化，但还是远比 Python要多。

到此这篇关于VBA处理数据与Python Pandas处理数据案例比较分析的文章就介绍到这了,更多相关VBA与Python Pandas处理数据内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家！

原文链接：https://blog.csdn.net/qq_24937551/article/details/105338086