首页 > 代码库 > 快速解析超大XML不占用太大内存

快速解析超大XML不占用太大内存

 1 import xml.etree.ElementTree as ET
 2 
 3 def parse_res(xml_file):
 4     res_dic = {}
 5     tmp_lst_lev1 = []
 6     tmp_lst_lev2 = []
 7     add_flag = False
 8     for event, elem in ET.iterparse(xml_file):
 9         if event == end:
10             if elem.tag == Item:
11                 tmp_lst_lev1.append(dict(elem.attrib))
12             elif elem.tag == Enum:
13                 enum_str = ‘‘.join([ ‘‘.join([[, item[value], =, item[name], ]]) for item in tmp_lst_lev1])
14                 res_enum = {}
15                 res_enum[id] = elem.attrib[id]
16                 res_enum[name] = enum_str
17                 tmp_lst_lev2.append(res_enum)
18                 tmp_lst_lev1 = []
19             elif elem.tag == EnumRes:
20                 res_dic[EnumRes] = {}
21                 tmp_dic = res_dic[EnumRes]
22                 for item in tmp_lst_lev2:
23                     tmp_dic[ item[id].split(.)[1] ] = item[name]
24                 tmp_lst_lev2 = []
25             elif elem.tag == MeasUnitRes or elem.tag == CounterNameRes or elem.tag == CounterUnitRes:
26                 res_dic[elem.tag] = {}
27                 tmp_dic = res_dic[elem.tag]
28                 for item in tmp_lst_lev1:
29                     tmp_dic[ item[id].split(.)[1] ] = item[name]
30                 tmp_lst_lev1 = []
31             #CommonInfo.Resource.xml
32             elif elem.tag == DevTypeNameRes or elem.tag == VendorRes or elem.tag == MocRes:
33                 res_dic[elem.tag] = {}
34                 tmp_dic = res_dic[elem.tag]
35                 for item in tmp_lst_lev1:
36                     tmp_dic[id] = item[id].split(.)[1]
37                     tmp_dic[name] = item[name]
38                 tmp_lst_lev1 = []
39             #StaticList.xml
40             elif elem.tag == param:
41                 if alarmId == elem.attrib[name]:
42                     id = elem.text
43             elif elem.tag == alarm:
44                 tmp_lst_lev1.append([id, elem.attrib[name]])
45             elif elem.tag == alarms:
46                 res_dic[elem.tag] = {}
47                 tmp_dic = res_dic[elem.tag]
48                 for item in tmp_lst_lev1:
49                     tmp_dic[ item[0] ] = item[1]
50                 tmp_lst_lev1 = []
51         elem.clear()   #关键在这一名,处理完节点及时清理内存
52     return res_dic

 

快速解析超大XML不占用太大内存