Java解析XML的四种方法详解

时间:2021-08-14 19:24:40

XML现在已经成为一种通用的数据交换格式,它的平台无关性,语言无关性,系统无关性,给数据集成与交互带来了极大的方便。对于XML本身的语法知识与技术细节,需要阅读相关的技术文献,这里面包括的内容有DOM(Document Object Model),DTD(Document Type Definition),SAX(Simple API for XML),XSD(Xml Schema Definition),XSLT(Extensible Stylesheet Language Transformations)。


XML在不同的语言里解析方式都是一样的,只不过实现的语法不同而已。基本的解析方式有两种,一种叫SAX,另一种叫DOM。SAX是基于事件流的解析,DOM是基于XML文档树结构的解析。假设我们XML的内容和结构如下:


view sourceprint?
1 <?xmlversion="1.0"encoding="UTF-8"?>  
2 <employees>  
3     <employee>  
4         <name>ddviplinux</name>  
5         <sex>m</sex>  
6         <age>30</age>  
7     </employee>  
8 </employees>


本文使用JAVA语言来实现DOM与SAX的XML文档生成与解析。 首先定义一个操作XML文档的接口XmlDocument 它定义了XML文档的建立与解析的接口。


view sourceprint?
01 /**   
02  * 定义XML文档建立与解析的接口  
03  */  
04 public interface XmlDocument {    
05     /**     
06      * 建立XML文档     
07      * @param fileName 文件全路径名称     
08      */   
09     public void createXml(String fileName);    
10     /**     
11      * 解析XML文档     
12      * @param fileName 文件全路径名称     
13      */   
14     public void parserXml(String fileName); 
15 }


1、DOM生成和解析XML文档
为 XML 文档的已解析版本定义了一组接口。解析器读入整个文档,然后构建一个驻留内存的树结构,然后代码就可以使用 DOM 接口来操作这个树结构。优点:整个文档树在内存中,便于操作;支持删除、修改、重新排列等多种功能;缺点:将整个文档调入内存(包括无用的节点),浪费时间和空间;使用场合:一旦解析了文档还需多次访问这些数据;硬件资源充足(内存、CPU)。
view sourceprint?
001 package com.alisoft.facepay.framework.bean;
002   
003 import java.io.FileInputStream;
004 import java.io.FileNotFoundException;
005 import java.io.FileOutputStream;
006 import java.io.IOException;
007 import java.io.InputStream;
008 import java.io.PrintWriter;
009 import javax.xml.parsers.DocumentBuilder;
010 import javax.xml.parsers.DocumentBuilderFactory;
011 import javax.xml.parsers.ParserConfigurationException;
012 import javax.xml.transform.OutputKeys;
013 import javax.xml.transform.Transformer;
014 import javax.xml.transform.TransformerConfigurationException;
015 import javax.xml.transform.TransformerException;
016 import javax.xml.transform.TransformerFactory;
017 import javax.xml.transform.dom.DOMSource;
018 import javax.xml.transform.stream.StreamResult;
019 import org.w3c.dom.Document;
020 import org.w3c.dom.Element;
021 import org.w3c.dom.Node;
022 import org.w3c.dom.NodeList;
023 import org.xml.sax.SAXException;
024   
025 public class DomDemo implements XmlDocument {
026     private Document document;
027     private String fileName;
028   
029     public void init() {
030         try{
031             DocumentBuilderFactory factory = DocumentBuilderFactory
032                     .newInstance();
033             DocumentBuilder builder = factory.newDocumentBuilder();
034             this.document = builder.newDocument();
035         }catch (ParserConfigurationException e) {
036             System.out.println(e.getMessage());
037         }
038     }
039   
040     public void createXml(String fileName) {
041         Element root =this.document.createElement("employees");
042         this.document.appendChild(root);
043         Element employee =this.document.createElement("employee");
044         Element name =this.document.createElement("name");
045         name.appendChild(this.document.createTextNode("丁宏亮"));
046         employee.appendChild(name);
047         Element sex =this.document.createElement("sex");
048         sex.appendChild(this.document.createTextNode("m"));
049         employee.appendChild(sex);
050         Element age =this.document.createElement("age");
051         age.appendChild(this.document.createTextNode("30"));
052         employee.appendChild(age);
053         root.appendChild(employee);
054         TransformerFactory tf = TransformerFactory.newInstance();
055         try{
056             Transformer transformer = tf.newTransformer();
057             DOMSource source =new DOMSource(document);
058             transformer.setOutputProperty(OutputKeys.ENCODING,"gb2312");
059             transformer.setOutputProperty(OutputKeys.INDENT,"yes");
060             PrintWriter pw =new PrintWriter(newFileOutputStream(fileName));
061             StreamResult result =new StreamResult(pw);
062             transformer.transform(source, result);
063             System.out.println("生成XML文件成功!");
064         }catch (TransformerConfigurationException e) {
065             System.out.println(e.getMessage());
066         }catch (IllegalArgumentException e) {
067             System.out.println(e.getMessage());
068         }catch (FileNotFoundException e) {
069             System.out.println(e.getMessage());
070         }catch (TransformerException e) {
071             System.out.println(e.getMessage());
072         }
073     }
074   
075     public void parserXml(String fileName) {
076         try{
077             DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
078             DocumentBuilder db = dbf.newDocumentBuilder();
079             Document document = db.parse(fileName);
080             NodeList employees = document.getChildNodes();
081             for(int i = 0; i < employees.getLength(); i++) {
082                 Node employee = employees.item(i);
083                 NodeList employeeInfo = employee.getChildNodes();
084                 for(int j = 0; j < employeeInfo.getLength(); j++) {
085                     Node node = employeeInfo.item(j);
086                     NodeList employeeMeta = node.getChildNodes();
087                     for(int k = 0; k < employeeMeta.getLength(); k++) {
088                         System.out.println(employeeMeta.item(k).getNodeName()
089                                 +":" + employeeMeta.item(k).getTextContent());
090                     }
091                 }
092             }
093             System.out.println("解析完毕");
094         }catch (FileNotFoundException e) {
095             System.out.println(e.getMessage());
096         }catch (ParserConfigurationException e) {
097             System.out.println(e.getMessage());
098         }catch (SAXException e) {
099             System.out.println(e.getMessage());
100         }catch (IOException e) {
101             System.out.println(e.getMessage());
102         }
103     }
104 }


2、SAX生成和解析XML文档
为解决DOM的问题,出现了SAX。SAX ,事件驱动。当解析器发现元素开始、元素结束、文本、文档的开始或结束等时,发送事件,程序员编写响应这些事件的代码,保存数据。优点:不用事先调入整个文档,占用资源少;SAX解析器代码比DOM解析器代码小,适于Applet,下载。缺点:不是持久的;事件过后,若没保存数据,那么数据就丢了;无状态性;从事件中只能得到文本,但不知该文本属于哪个元素;使用场合:Applet;只需XML文档的少量内容,很少回头访问;机器内存少;
Java代码:


view sourceprint?
001 import java.io.FileInputStream;   
002 import java.io.FileNotFoundException;   
003 import java.io.IOException;   
004 import java.io.InputStream;   
005 import javax.xml.parsers.ParserConfigurationException;   
006 import javax.xml.parsers.SAXParser;   
007 import javax.xml.parsers.SAXParserFactory;   
008 import org.xml.sax.Attributes;   
009 import org.xml.sax.SAXException;   
010 import org.xml.sax.helpers.DefaultHandler;   
011    
012 public class SaxDemo implements XmlDocument {   
013     public void createXml(String fileName) {   
014         System.out.println("<<"+filename+">>");   
015     }   
016     public void parserXml(String fileName) {   
017         SAXParserFactory saxfac = SAXParserFactory.newInstance();   
018         try{   
019             SAXParser saxparser = saxfac.newSAXParser();   
020             InputStream is =new FileInputStream(fileName);   
021             saxparser.parse(is,new MySAXHandler());   
022         }catch (ParserConfigurationException e) {   
023             e.printStackTrace();   
024         }catch (SAXException e) {   
025             e.printStackTrace();   
026         }catch (FileNotFoundException e) {   
027             e.printStackTrace();   
028         }catch (IOException e) {   
029             e.printStackTrace();   
030         }   
031     }   
032 }   
033 public class MySAXHandler extends DefaultHandler {   
034     boolean hasAttribute =false;   
035     Attributes attributes =null;   
036     public void startDocument() throws SAXException {   
037         System.out.println("文档开始打印了");   
038     }   
039     public void endDocument() throws SAXException {   
040         System.out.println("文档打印结束了");   
041     }   
042     public void startElement(String uri, String localName, String qName,   Attributes attributes) throws SAXException {   
043         if(qName.equals("employees")) {   
044             return;   
045         }   
046         if(qName.equals("employee")) {   
047             System.out.println(qName);   
048         }   
049         if(attributes.getLength() > 0) {   
050             this.attributes = attributes;   
051             this.hasAttribute =true;   
052         }   
053     }   
054     public void endElement(String uri, String localName, String qName)   throws SAXException {   
055         if(hasAttribute && (attributes != null)) {   
056             for(int i = 0; i < attributes.getLength(); i++) {   
057                 System.out.println(attributes.getQName(0)   + attributes.getValue(0));   
058             }   
059         }   
060     }  
061     public void characters(char[] ch, int start, int length)   throws SAXException {   
062         System.out.println(newString(ch, start, length));   
063     }   
064 }  
065   
066 import java.io.FileInputStream; 
067 import java.io.FileNotFoundException; 
068 import java.io.IOException; 
069 import java.io.InputStream; 
070 import javax.xml.parsers.ParserConfigurationException; 
071 import javax.xml.parsers.SAXParser; 
072 import javax.xml.parsers.SAXParserFactory; 
073 import org.xml.sax.Attributes; 
074 import org.xml.sax.SAXException; 
075 import org.xml.sax.helpers.DefaultHandler; 
076   
077 public class SaxDemo implements XmlDocument { 
078     public void createXml(String fileName) { 
079         System.out.println("<<"+filename+">>"); 
080     
081     public void parserXml(String fileName) { 
082         SAXParserFactory saxfac = SAXParserFactory.newInstance(); 
083         try
084             SAXParser saxparser = saxfac.newSAXParser(); 
085             InputStream is =new FileInputStream(fileName); 
086             saxparser.parse(is,new MySAXHandler()); 
087         }catch (ParserConfigurationException e) { 
088             e.printStackTrace(); 
089         }catch (SAXException e) { 
090             e.printStackTrace(); 
091         }catch (FileNotFoundException e) { 
092             e.printStackTrace(); 
093         }catch (IOException e) { 
094             e.printStackTrace(); 
095         
096     
097
098 public class MySAXHandler extends DefaultHandler { 
099     boolean hasAttribute =false
100     Attributes attributes =null
101     public void startDocument() throws SAXException { 
102         System.out.println("文档开始打印了"); 
103     
104     public void endDocument() throws SAXException { 
105         System.out.println("文档打印结束了"); 
106     
107     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { 
108         if(qName.equals("employees")) { 
109             return
110         
111         if(qName.equals("employee")) { 
112             System.out.println(qName); 
113         
114         if(attributes.getLength() > 0) { 
115             this.attributes = attributes; 
116             this.hasAttribute =true
117         
118     
119     public void endElement(String uri, String localName, String qName) throws SAXException { 
120         if(hasAttribute && (attributes != null)) { 
121             for(int i = 0; i < attributes.getLength(); i++) { 
122                 System.out.println(attributes.getQName(0) + attributes.getValue(0)); 
123             
124         
125     
126     public void characters(char[] ch, int start, int length) throws SAXException { 
127         System.out.println(newString(ch, start, length)); 
128     
129 }
3、DOM4J生成和解析XML文档
DOM4J 是一个非常非常优秀的Java XML API,具有性能优异、功能强大和极端易用使用的特点,同时它也是一个开放源代码的软件。如今你可以看到越来越多的 Java 软件都在使用 DOM4J 来读写 XML,特别值得一提的是连 Sun 的 JAXM 也在用 DOM4J。
Java代码:



view sourceprint?
01 import java.io.File;
02 import java.io.FileWriter;
03 import java.io.IOException;
04 import java.io.Writer;
05 import java.util.Iterator;
06 import org.dom4j.Document;
07 import org.dom4j.DocumentException;
08 import org.dom4j.DocumentHelper;
09 import org.dom4j.Element;
10 import org.dom4j.io.SAXReader;
11 import org.dom4j.io.XMLWriter;
12   
13 /** * * @author hongliang.dinghl * Dom4j 生成XML文档与解析XML文档 */
14 public class Dom4jDemo implements XmlDocument {
15     public void createXml(String fileName) {
16         Document document = DocumentHelper.createDocument();
17         Element employees = document.addElement("employees");
18         Element employee = employees.addElement("employee");
19         Element name = employee.addElement("name");
20         name.setText("ddvip");
21         Element sex = employee.addElement("sex");
22         sex.setText("m");
23         Element age = employee.addElement("age");
24         age.setText("29");
25         try{
26             Writer fileWriter =new FileWriter(fileName);
27             XMLWriter xmlWriter =new XMLWriter(fileWriter);
28             xmlWriter.write(document);
29             xmlWriter.close();
30         }catch (IOException e) {
31             System.out.println(e.getMessage());
32         }
33     }
34   
35     public void parserXml(String fileName) {
36         File inputXml =new File(fileName);
37         SAXReader saxReader =new SAXReader();
38         try{
39             Document document = saxReader.read(inputXml);
40             Element employees = document.getRootElement();
41             for(Iterator i = employees.elementIterator(); i.hasNext();) {
42                 Element employee = (Element) i.next();
43                 for(Iterator j = employee.elementIterator(); j.hasNext();) {
44                     Element node = (Element) j.next();
45                     System.out.println(node.getName() +":" + node.getText());
46                 }
47             }
48         }catch (DocumentException e) {
49             System.out.println(e.getMessage());
50         }
51         System.out.println("dom4j parserXml");
52     }
53 }
4、JDOM生成和解析XML
为减少DOM、SAX的编码量,出现了JDOM;优点:20-80原则,极大减少了代码量。使用场合:要实现的功能简单,如解析、创建等,但在底层,JDOM还是使用SAX(最常用)、DOM、Xanan文档。
view sourceprint?
01 import java.io.FileNotFoundException;
02 import java.io.FileOutputStream;
03 import java.io.IOException;
04 import java.util.List;
05 import org.jdom.Document;
06 import org.jdom.Element;
07 import org.jdom.JDOMException;
08 import org.jdom.input.SAXBuilder;
09 import org.jdom.output.XMLOutputter;
10   
11 public class JDomDemo implements XmlDocument {
12     public void createXml(String fileName) {
13         Document document;
14         Element root;
15         root =new Element("employees");
16         document =new Document(root);
17         Element employee =new Element("employee");
18         root.addContent(employee);
19         Element name =new Element("name");
20         name.setText("ddvip");
21         employee.addContent(name);
22         Element sex =new Element("sex");
23         sex.setText("m");
24         employee.addContent(sex);
25         Element age =new Element("age");
26         age.setText("23");
27         employee.addContent(age);
28         XMLOutputter XMLOut =new XMLOutputter();
29         try{
30             XMLOut.output(document,new FileOutputStream(fileName));
31         }catch (FileNotFoundException e) {
32             e.printStackTrace();
33         }catch (IOException e) {
34             e.printStackTrace();
35         }
36     }
37   
38     public void parserXml(String fileName) {
39         SAXBuilder builder =new SAXBuilder(false);
40         try{
41             Document document = builder.build(fileName);
42             Element employees = document.getRootElement();
43             List employeeList = employees.getChildren("employee");
44             for(int i = 0; i < employeeList.size(); i++) {
45                 Element employee = (Element) employeeList.get(i);
46                 List employeeInfo = employee.getChildren();
47                 for(int j = 0; j < employeeInfo.size(); j++) {
48                     System.out.println(((Element) employeeInfo.get(j))
49                             .getName()
50                             +":" + ((Element) employeeInfo.get(j)).getValue());
51                 }
52             }
53         }catch (JDOMException e) {
54             e.printStackTrace();
55         }catch (IOException e) {
56             e.printStackTrace();
57         }
58     }
59 }



关键字: javaxml java解析xml dom解析 xslt sax JDOM DOM4J