快速解析超大XML不占用太大内存

时间:2022-08-10 08:55:36
 import xml.etree.ElementTree as ET

 def parse_res(xml_file):
res_dic = {}
tmp_lst_lev1 = []
tmp_lst_lev2 = []
add_flag = False
for event, elem in ET.iterparse(xml_file):
if event == 'end':
if elem.tag == 'Item':
tmp_lst_lev1.append(dict(elem.attrib))
elif elem.tag == 'Enum':
enum_str = ''.join([ ''.join(['[', item['value'], '=', item['name'], ']']) for item in tmp_lst_lev1])
res_enum = {}
res_enum['id'] = elem.attrib['id']
res_enum['name'] = enum_str
tmp_lst_lev2.append(res_enum)
tmp_lst_lev1 = []
elif elem.tag == 'EnumRes':
res_dic['EnumRes'] = {}
tmp_dic = res_dic['EnumRes']
for item in tmp_lst_lev2:
tmp_dic[ item['id'].split('.')[1] ] = item['name']
tmp_lst_lev2 = []
elif elem.tag == 'MeasUnitRes' or elem.tag == 'CounterNameRes' or elem.tag == 'CounterUnitRes':
res_dic[elem.tag] = {}
tmp_dic = res_dic[elem.tag]
for item in tmp_lst_lev1:
tmp_dic[ item['id'].split('.')[1] ] = item['name']
tmp_lst_lev1 = []
#CommonInfo.Resource.xml
elif elem.tag == 'DevTypeNameRes' or elem.tag == 'VendorRes' or elem.tag == 'MocRes':
res_dic[elem.tag] = {}
tmp_dic = res_dic[elem.tag]
for item in tmp_lst_lev1:
tmp_dic['id'] = item['id'].split('.')[1]
tmp_dic['name'] = item['name']
tmp_lst_lev1 = []
#StaticList.xml
elif elem.tag == 'param':
if 'alarmId' == elem.attrib['name']:
id = elem.text
elif elem.tag == 'alarm':
tmp_lst_lev1.append([id, elem.attrib['name']])
elif elem.tag == 'alarms':
res_dic[elem.tag] = {}
tmp_dic = res_dic[elem.tag]
for item in tmp_lst_lev1:
tmp_dic[ item[0] ] = item[1]
tmp_lst_lev1 = []
elem.clear() #关键在这一名,处理完节点及时清理内存
return res_dic