热搜：编程 Python Microsoft 编程语言 C++

python解析xml模块封装代码

发布时间：2024-04-26 08:31:26 来源：互联网

.

一个兢兢业业、无微不至的保姆。你用自己的一只手——雪，把那青绿的麦苗爱抚地搂在怀里，给他以温暖和关怀。麦苗在那层层棉被下静静地期待，期待着来春生长发育。蛇、鳝、熊等等，有的钻进了深泥，有的藏入树洞，休息一冬，养精蓄锐，方有新春来到时的精神振奋。你用自己的另一只手――寒风，举刀挥剑，把那些残害农作物的害虫，砍光杀尽，把那些残害人类、牲畜的虎豹豺豺狼驱赶进深山老林。冬呵！你严守阵地，不容侵犯；你，是一位最负责任的对友火热温情，对敌残忍无情的好保姆。

有如下的xml文件：


<?xml version="1.0" encoding="utf-8" ?> 
<root> 
<childs> 
<child name='first' >1</child> 
<child value="2">2</child> 
</childs> 
</root>

下面介绍python解析xml文件的几种方法，使用python模块实现。

方式1，python模块实现自动遍历所有节点：


#!/usr/bin/env python 
# -*- coding: utf-8 -*- 
from xml.sax.handler import ContentHandler 
from xml.sax import parse
class TestHandle(ContentHandler): 
 def __init__(self, inlist): 
 self.inlist = inlist  def startElement(self,name,attrs): 
 print 'name:',name, 'attrs:',attrs.keys()  def endElement(self,name): 
 print 'endname',name  def characters(self,chars): 
 print 'chars',chars 
 self.inlist.append(chars)  
if __name__ == '__main__': 
 lt = [] 
 parse('test.xml', TestHandle(lt)) 
 print lt

结果：
[html] view plaincopy
name: root attrs: []
chars name: childs attrs: []
chars name: child attrs: [u'name']
chars 1
endname child
chars name: child attrs: [u'value']
chars 2
endname child
chars endname childs
chars endname root
[u'\n', u'\n', u'1', u'\n', u'2', u'\n', u'\n']

方式2，python模块实现获取根节点，按需查找指定节点：


#!/usr/bin/env python 
# -*- coding: utf-8 -*- 
from xml.dom import minidom 
xmlstr = '''''<?xml version="1.0" encoding="UTF-8"?> 
<hash> 
 <request name='first'>/2/photos/square/type.xml</request> 
 <error_code>21301</error_code> 
 <error>auth faild!</error> 
</hash> 
''' 
def doxml(xmlstr): 
 dom = minidom.parseString(xmlstr) 
 print 'Dom:' 
 print dom.toxml()  root = dom.firstChild 
 print 'root:' 
 print root.toxml()  childs = root.childNodes 
 for child in childs: 
 print child.toxml() 
 if child.nodeType == child.TEXT_NODE: 
 pass 
 else: 
 print 'child node attribute name:', child.getAttribute('name') 
 print 'child node name:', child.nodeName 
 print 'child node len:',len(child.childNodes) 
 print 'child data:',child.childNodes[0].data 
 print '=======================================' 
 print 'more help info to see:' 
 for med in dir(child): 
 print help(med)  
if __name__ == '__main__': 
 doxml(xmlstr)

结果：
[html] view plaincopy
Dom:
<?xml version="1.0" ?><hash>
<request name="first">/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
</hash>
root:
<hash>
<request name="first">/2/photos/square/type.xml</request>
<error_code>21301</error_code>
<error>auth faild!</error>
</hash> <request name="first">/2/photos/square/type.xml</request>
child node attribute name: first
child node name: request
child node len: 1
child data: /2/photos/square/type.xml
=======================================
more help info to see:
两种方法各有其优点，python的xml处理模块太多，目前只用到这2个。

=====补充分割线================
实际工作中发现python的mimidom无法解析其它编码的xml，只能解析utf-8的编码，而其xml文件的头部申明也必须是utf-8，为其它编码会报错误。
网上的解决办法都是替换xml文件头部的编码申明，然后转换编码为utf-8再用minidom解码，实际测试为可行，不过有点累赘的感觉。

本节是 python解析xml模块封装代码的第二部分。
====写xml内容的分割线=========


#!\urs\bin\env python 
#encoding: utf-8 
from xml.dom import minidom class xmlwrite: 
 def __init__(self, resultfile): 
 self.resultfile = resultfile 
 self.rootname = 'api' 
 self.__create_xml_dom()  def __create_xml_dom(self): 
 xmlimpl = minidom.getDOMImplementation() 
 self.dom = xmlimpl.createDocument(None, self.rootname, None) 
 self.root = self.dom.documentElement  def __get_spec_node(self, xpath): 
 patharr = xpath.split(r'/') 
 parentnode = self.root 
 exist = 1 
 for nodename in patharr: 
 if nodename.strip() == '': 
 continue 
 if not exist: 
 return None 
 spcindex = nodename.find('[') 
 if spcindex > -1: 
 index = int(nodename[spcindex+1:-1]) 
 else: 
 index = 0 
 count = 0 
 childs = parentnode.childNodes 
 for child in childs: 
 if child.nodeName == nodename[:spcindex]: 
 if count == index: 
 parentnode = child 
 exist = 1 
 break 
 count += 1 
 continue 
 else: 
 exist = 0 
 return parentnode  
 def write_node(self, parent, nodename, value, attribute=None, CDATA=False): 
 node = self.dom.createElement(nodename) 
 if value: 
 if CDATA: 
 nodedata = self.dom.createCDATASection(value) 
 else: 
 nodedata = self.dom.createTextNode(value) 
 node.appendChild(nodedata) 
 if attribute and isinstance(attribute, dict): 
 for key, value in attribute.items(): 
 node.setAttribute(key, value) 
 try: 
 parentnode = self.__get_spec_node(parent) 
 except: 
 print 'Get parent Node Fail, Use the Root as parent Node' 
 parentnode = self.root 
 parentnode.appendChild(node)  
 def write_start_time(self, time): 
 self.write_node('/','StartTime', time)  def write_end_time(self, time): 
 self.write_node('/','EndTime', time)  def write_pass_count(self, count): 
 self.write_node('/','PassCount', count)  def write_fail_count(self, count): 
 self.write_node('/','FailCount', count)  def write_case(self): 
 self.write_node('/','Case', None)  def write_case_no(self, index, value): 
 self.write_node('/Case[%s]/' % index,'No', value)  def write_case_url(self, index, value): 
 self.write_node('/Case[%s]/' % index,'URL', value)  def write_case_dbdata(self, index, value): 
 self.write_node('/Case[%s]/' % index,'DBData', value)  def write_case_apidata(self, index, value): 
 self.write_node('/Case[%s]/' % index,'APIData', value)  def write_case_dbsql(self, index, value): 
 self.write_node('/Case[%s]/' % index,'DBSQL', value, CDATA=True)  def write_case_apixpath(self, index, value): 
 self.write_node('/Case[%s]/' % index,'APIXPath', value)  def save_xml(self): 
 myfile = file(self.resultfile, 'w') 
 self.dom.writexml(myfile, encoding='utf-8') 
 myfile.close() if __name__ == '__main__': 
 xr = xmlwrite(r'D:\test.xml') 
 xr.write_start_time('2223') 
 xr.write_end_time('444') 
 xr.write_pass_count('22') 
 xr.write_fail_count('33') 
 xr.write_case() 
 xr.write_case() 
 xr.write_case_no(0, '0') 
 xr.write_case_url(0, 'http://www.google.com') 
 xr.write_case_url(0, 'http://www.google.com') 
 xr.write_case_dbsql(0, 'select * from ') 
 xr.write_case_dbdata(0, 'dbtata') 
 xr.write_case_apixpath(0, '/xpath') 
 xr.write_case_apidata(0, 'apidata') 
 xr.write_case_no(1, '1') 
 xr.write_case_url(1, 'http://www.baidu.com') 
 xr.write_case_url(1, 'http://www.baidu.com') 
 xr.write_case_dbsql(1, 'select 1 from ') 
 xr.write_case_dbdata(1, 'dbtata1') 
 xr.write_case_apixpath(1, '/xpath1') 
 xr.write_case_apidata(1, 'apidata1') 
 xr.save_xml()