首页 > 代码库 > python中lxml的应用

python中lxml的应用

<?xml version="1.0" encoding="UTF-8"?>
首先下载lxml, http://www.lfd.uci.edu/~gohlke/pythonlibs/ ,然后添加引用
from lxml import _elementpath as DONTUSE
from lxml import etree


具体示例:
1.添加命名空间
#set namespace
nsmap = {"xsi": "http://www.w3.org/2001/XMLSchema-instance" }
g_statisticsRoot = etree.Element("DcmStatistics", nsmap = nsmap)

2.添加xml schema引用
#add xsd reference
g_statisticsRoot.set("{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation", "DcmStatistics.xsd" )

3.添加注释
#add comment, 利用addprevious添加到根节点的前面
comment = etree.Comment("create by jiangong.li")
g_statisticsRoot.addprevious(comment)

4.尝试多种编码来解析xml
def decodingXml(xmlFile):
    tree = None
    encoding = "utf-8"

    while(True):
        try:
            parser = etree.XMLParser(remove_blank_text= True, encoding=encoding, remove_comments = False)
            tree = etree.parse(xmlFile, parser)
        except Exception as e:
            if (encoding != "gb18030"):
                encoding = "gb18030"
                continue
            else:
                print( "\nPAR XML ERROR, decoding error." )
                break
        break
    return tree

5.遍历xml下的所有子节点,不止直属第一级子节点. iter()
     for element in root.iter():
          element.tail = None

6.遍历xml下的第一级子节点. iterchildren()
    for e in srcParentNode.iterchildren():
        if e is srcParentNode:
            continue

        name = ""
        #statistics node
        if  e.tag == "element":
            name = "Element"
        elif e.tag == "sequence":
            name = "Sequence"
        elif e.tag == "item":
            name = "Item"
        else:
            print( "\nUnsupported element type: %s\n" %(e.tag))
            name = e.tag
            # Only parse element/sequence/item
             continue     

7.添加子节点到尾部. append()
def getXmlElement(nodeName, parentNode):
    if parentNode  == None:
        raise Exception( "parent node is None")

    nodes = parentNode.xpath( ‘./‘+nodeName)

    if len(nodes) == 0:
        node = etree.Element(nodeName)
        parentNode.append(node)
        return node
    else:
        return nodes[0]

8.格式化成str输出
etree.tostring(g_statisticsRoot, encoding= "UTF-8", xml_declaration=True , pretty_print=True, with_comments=True )

9.保存成xml文件
        statisticsResult =  open(g_xmlName, "bw+")
        statisticsResult.write(etree.tostring(g_statisticsRoot, encoding= "UTF-8", xml_declaration=True, pretty_print=True , with_comments=True))
        statisticsResult.flush()
        statisticsResult.close()