脚本如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
from openpyxl import load_workbook
booksheet = workbook.active #获取当前活跃的sheet,默认是第一个sheet
#如果想获取别的sheet页采取下面这种方式,先获取所有sheet页名,在通过指定那一页。
# sheets = workbook.get_sheet_names() # 从名称获取sheet
# booksheet = workbook.get_sheet_by_name(sheets[0])
#获取sheet页的行数据
rows = booksheet.rows
#获取sheet页的列数据
columns = booksheet.columns
i = 0
# 迭代所有的行
for row in rows:
i = i + 1
line = [col.value for col in row]
cell_data_1 = booksheet.cell(row = i, column = 3 ).value #获取第i行1 列的数据
cell_data_2 = booksheet.cell(row = i, column = 4 ).value #获取第i行 2 列的数据
cell_data_3 = booksheet.cell(row = i, column = 8 ).value #获取第i行 3 列的数据
cell_data_4 = booksheet.cell(row = i, column = 18 ).value #获取第i行 4 列的数据
print (cell_data_1, cell_data_2, cell_data_3, cell_data_4)
|
实例:python读取excel数据做分类统计
某excel中记录了某个人的通话记录,下面程序将按照通话地点,通话类型等统计通话时间
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# -*- coding:utf-8 -*-
import xlrd
import datetime
infos = []
info_file = xlrd.open_workbook( 'src.xls' ) #打开excel文件
info_sheet = info_file.sheets()[ 0 ] #通过索引顺序获取工作表
row_count = info_sheet.nrows #获取行数,列数ncols
for row in range ( 1 ,row_count):
time_string = info_sheet.cell(row, 3 ).value
time_s_sp = time_string.split( ':' )
infos.append( #该数组里放了row_count个字典
{
'type' :info_sheet.cell(row, 2 ).value, #获取单元格,通话类型,主叫被叫
'other_cellphone' :info_sheet.cell(row, 0 ).value, #对方号码,
'timespan' :datetime.timedelta(seconds = int (time_s_sp[ 2 ]),minutes = int (time_s_sp[ 1 ]),hours = int (time_s_sp[ 0 ])),
'gpscity' :info_sheet.cell(row, 5 ).value #通话是本地还是外地
}
)
time_all = datetime.timedelta(seconds = 0 ) #初始化
time_types = {}
time_classes = {}
time_numbers = {}
for infor in infos: #取出该数组里的字典
time_all + = infor[ 'timespan' ] #求总通话次数
infor_type = infor[ 'type' ]
if infor_type in time_types:
time_types[infor_type] + = infor[ 'timespan' ]
else :
time_types[infor_type] = infor[ 'timespan' ] #按通话类型统计通话时间
infor_class = infor[ 'gpscity' ]
if infor_class in time_classes:
time_classes[infor_class] + = infor[ 'timespan' ]
else :
time_classes[infor_class] = infor[ 'timespan' ] #这里相当于先分类赋值再++,按归属地统计通话时间
infor_number = infor[ 'other_cellphone' ]
if infor_number in time_numbers:
time_numbers[infor_number] + = infor[ 'timespan' ]
else :
time_numbers[infor_number] = infor[ 'timespan' ] #根据号码统计通话时间
print '总通话时间:%s' % time_all
print
print '总通话方式分类'
for k,v in time_types.items():
print k.encode( 'utf-8' ),v
print
print '通话类型分类:'
for k,v in time_classes.items():
print k.encode( 'utf-8' ),v
print
print '对方号码分类:'
for k,v in time_numbers.items():
print k,v
|
再优化下代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# -*- coding:utf-8 -*-
import xlrd
from datetime import timedelta
def read_excel(file_excel): #读excel并将需要的数据分类放在数组里
infos = []
info_file = xlrd.open_workbook(file_excel)
info_sheet = info_file.sheets()[ 0 ]
row_count = info_sheet.nrows
for row in range ( 1 ,row_count):
time_string = info_sheet.cell(row, 3 ).value
time_s_sp = time_string.split( ':' )
infos.append(
{
'type' :info_sheet.cell(row, 2 ).value,
'other_cellphone' :info_sheet.cell(row, 0 ).value,
'timespan' :timedelta(seconds = int (time_s_sp[ 2 ]),minutes = int (time_s_sp[ 1 ]),hours = int (time_s_sp[ 0 ])),
'gpscity' :info_sheet.cell(row, 5 ).value
}
)
return infos
def count_cell(list_dirs,infotype): #统计总通话及分类统计结果,存在字典里
result_dir = {}
time_all = timedelta(seconds = 0 )
for list_dir in list_dirs:
time_all + = list_dir[ 'timespan' ]
info_type = list_dir[infotype]
if info_type not in result_dir:
result_dir[info_type] = list_dir[ 'timespan' ]
else :
result_dir[info_type] + = list_dir[ 'timespan' ]
return time_all,result_dir
def print_result(result_dir): #打印数据
for k,v in result_dir.items():
print k.encode( 'utf-8' ),v
if __name__ = = "__main__" :
list_dirs = read_excel( 'src.xls' )
time_all,result_type = count_cell(list_dirs, 'type' )
result_cell = count_cell(list_dirs, 'other_cellphone' )
result_gpscity = count_cell(list_dirs, 'gpscity' )
print '总通话时间:%s' % time_all
print '按照通话类型分类:'
print_result(result_type)
print '按照号码分类:'
print_result(result_cell[ 1 ])
print '按照归属地分类:'
print_result(result_gpscity[ 1 ])
|
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/hui-shao/p/read_xlsx.html