# -- coding: utf-8 --
import os
import numpy as np
import pandas as pd
import time
start=() #用于记录程序运行时间
('G:\\GISworkspace\\Rprocess')
tx=open('level3_river_clipV6.csv')
df=pd.read_csv(tx)
()
Cname=df['NAME_CH']
num=(range(0,len(Cname))) #用于记录每个地名重复出现次数
# print(num)
# print(Cname)
space=range(0,len(Cname))
# print(space,type(space))
space=(space,columns=['name'])
# space['name'][0]=(Cname[2])
# print(space)
# print(df['NAME_CH']) #取某一列
for i in range(len(Cname)):
k = 0 #记录次数
for j in range(len(Cname)):
if (Cname[i]==Cname[j]):
space['name'][i]=Cname[j]
k=k+1
else:
k=k
num[i] = k
# print(space)
# print(num)
num=(data=num,columns=['num'])
# print(num)
frame=[space,num] #concat进行两个dataframe合并
result=(frame,axis=1) #axis=1为向右连接, =0 为向下连接
# print(result)
result=result.drop_duplicates('name',keep='first') #去除重复行,可以利用单列也可以用多列 即 result.drop_duplicates(['col1','col2'],keep='first'),
# keep='first'即只保留第一次出现的值
result.to_csv('Cname_10km.csv') #输出
end=()
print(end-start) #查看程序运行时间 R运行4.7min,python运行时间 31s