Java知识积累——Sax解析xml文档

时间:2022-10-31 13:21:33

整体思路:Sax解析是将xml文档按Stream读入,一点一点的解析,不像Dom解析那样把文档全部加载到内存中,因此更适合实际情况的使用。Sax解析需要继承DefaultHandler,重写其4个方法(startDocument、startElement、endElement、characters),分别用于处理xml的ElementNode(<>或</>)和TextNode(字符)。下面看一个例子:此程序将原本的xml文件中各个元素标签解析成对应的TagBean实例,每对标签会被解析成一个实例,并且按树状存储标签之间的关系。因为我的目的是要解析Spring IoC的配置信息,所以4个方法的处理都是基于Spring IoC配置规则而实现的,当你们解析特定的xml文档时,只要修改这4个方法的解析规则即可。

TagBean的代码:

 1 public class TagBean {
 2     private String name;
 3     private Map<String,String> attrList = null;
 4     private ArrayList<TagBean> childList = new ArrayList<TagBean>();
 5     
 6     public void print(int count){
 7         for(int i = 0; i < count; i++){
 8             System.out.print(" ");
 9         }
10         System.out.print(name+":");
11         if(attrList != null){
12             Iterator<String> keySetItr = attrList.keySet().iterator();
13             while(keySetItr.hasNext()){
14                 String key = keySetItr.next();
15                 System.out.print(key +"--"+attrList.get(key)+"\t");
16             }
17             System.out.println();
18         }else{
19             System.out.println();
20         }
21         if(childList.size() > 0){
22             int newCount = ++count;
23             for(TagBean temp : childList){
24                 temp.print(newCount);
25             }
26         }else{
27             return;
28         }
29     }
30     
31     public String getName() {
32         return name;
33     }
34     public void setName(String name) {
35         this.name = name;
36     }
37     public Map<String, String> getAttrList() {
38         if(attrList == null){
39             attrList = new HashMap<String,String>();
40         }
41         return attrList;
42     }
43     public void setAttrList(Map<String, String> attrList) {
44         this.attrList = attrList;
45     }
46     public ArrayList<TagBean> getChildList() {
47         return childList;
48     }
49     public void setChildList(TagBean childTag){
50         this.childList.add(childTag);
51     }
52     
53 }

解析服务的代码:

 1 public class SaxParsingService extends DefaultHandler{
 2     private ArrayList<TagBean> tagList = null;  
 3     private TagBean tag = null;
 5     private Stack<String> tagNameStack = null;//标记当前在处理的tag的隶属关系,遇到<>时入栈,遇到 </>时出栈
 6     private Stack<TagBean> tagBeanStack = null;
 7     private TagBean rootTag = null;
 8       
 9     public TagBean getRootTag(InputStream xmlStream) throws Exception{  
10         //获取sax分析器
11         SAXParserFactory factory = SAXParserFactory.newInstance();  
12         SAXParser parser = factory.newSAXParser();
13         //根据具体程序要求进行分析
14         SaxParsingService handler = new SaxParsingService();  
15         parser.parse(xmlStream, handler);  //给分析器传入文件路径和分析规则
16         return handler.getRootTag(); //分析结束,获得信息 
17     }  
18       
19     public ArrayList<TagBean> getTagList(){  
20         return tagList;
21     }  
22       
23     @Override  //当文件解析开始时,会调用此函数
24     public void startDocument() throws SAXException {  
25         tagList = new ArrayList<TagBean>();
26         tagNameStack = new Stack<String>();
27         tagBeanStack = new Stack<TagBean>();
28     }  
29   
30     @Override  //当碰到ElementNode时(如<books>),会调用此方法
31     public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {  
32         //System.out.println(qName+" start");
33         tagNameStack.push(qName);
34         
35         tag = new TagBean();
36         tag.setName(qName);
37         
38         //存储标签的属性信息
39         int index = attributes.getLength();
40         if(index > 0){
41             Map<String,String> attrList = new HashMap<String,String>();
42             for(int i = 0 ; i < index ; i++ ){
43                 attrList.put(attributes.getQName(i),attributes.getValue(i));
44             }
45             tag.setAttrList(attrList);
46         }
47         
48         //将正在解析的节点名称赋给preTag
49         //preTag = qName;
50         
51         //处理子标签问题
52         tagBeanStack.push(tag);
53         
54         //保存beans的引用
55         if("beans".equals(qName)){
56             rootTag = tag;
57         }
58     }  
59   
60     @Override  //当遇到</>时调用此方法
61     public void endElement(String uri, String localName, String qName)  throws SAXException {  
62         //System.out.println(qName+" end");
63         
64         //处理了</>后,需要弹栈,并且构建子标签与父标签的关系
65         tagNameStack.pop();
66         tag = tagBeanStack.pop();
67         if(!tag.getName().equals("beans")){
68             tagBeanStack.peek().setChildList(tag);
69         }
70         
71         //preTag = null;
72     }  
73       
74     @Override  //当遇到TextNode时调用,比如空格、值
75     public void characters(char[] ch, int start, int length) throws SAXException {  
76         //System.out.println("char here");
77         String content = new String(ch,start,length);  
78         content = content.trim();
79         
80         if(content.length() > 0){
81             tagNameStack.peek();
82             tag = tagBeanStack.peek();
83             tag.getAttrList().put(tagNameStack.peek(), content);
84         }
85         
86     }
87     
88     public TagBean getRootTag(){
89         return this.rootTag;
90     }
91 }

程序入口,Main类:

 1 public class Main {
 2       public void testSAX() throws Throwable{  
 3           SaxParsingService sax;  
 4           InputStream input = null;
 5           try {
 6              sax = new SaxParsingService();
 7              input = this.getClass().getClassLoader().getResourceAsStream(“test.xml”);
 8              
 9              //解析input的xml文件,存储成树结构的tag链
10              TagBean rootTag = sax.getRootTag(input);
11              
12              //输出解析后存储的标签树结构
13              rootTag.print(0);
14          } catch (Exception e) {
15              e.printStackTrace();
16          } finally{
17              input.close();
18          }
19       }
20       public static void main(String[] args) throws Throwable {
21         new Main().testSAX();
22       }
23 }

 

需要解析的文档:

 1 <?xml version="1.0" encoding="UTF-8"?>
 2 <beans 
 3     xmlns="http://www.springframework.org/schema/beans"
 4     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 5     xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.2.xsd">
 6     <bean id = "beanB" class = "org.beans.BeanB">
 7         <property name = "ba">
 8             <ref bean = "beanA"/>
 9         </property>
10         <property name = "content">
11             <value>There is no problem!</value>
12         </property>
13         <property name="bc" ref="beanC"/>
14         <property name="listRef">
15             <list>
16                 <value>First</value>
17                 <value>Second</value>
18             </list>
19         </property>
20         <property name="listBeanRef">
21             <list>
22                 <ref bean = "beanA1"/>
23                 <ref bean = "beanA2"/>
24             </list>
25         </property>
26         <property name="max" value="15" />
27         <property name = "valueProps">
28             <props>
29                 <prop key = "strValueKey1">something1</prop>
30                 <prop key = "strValueKey2">something2</prop>
31             </props>
32         </property>
33         <property name="mapRef">
34             <map>
35                 <entry>
36                     <key>
37                         <value>key1</value>
38                     </key>
39                     <value>v1</value>
40                 </entry>
41                 <entry key="key2">
42                     <value>v2</value>
43                 </entry>
44                 <entry key="key3" value="v3"/>
45             </map>
46         </property>
47     </bean>
48     <bean id = "beanA" class = "org.beans.BeanA"/>
49     <bean id = "beanC" class = "org.beans.BeanC">
50         <constructor-arg >
51             <value>11</value>
52         </constructor-arg>
53         <constructor-arg>
54             <ref bean = "beanA3"/>
55         </constructor-arg>
56     </bean>
57     <bean id = "beanA1" class = "org.beans.BeanA">
58         <constructor-arg index = "1" value = "22"/>
59         <constructor-arg index = "0" ref = "beanA4"/>    
60     </bean>
61     <bean id = "beanA2" class = "org.beans.BeanA">
62         <constructor-arg index = "1" >
63             <value>33</value>
64         </constructor-arg>
65         <constructor-arg  ref = "beanA4"/>
66         <constructor-arg>
67             <value>44</value>
68         </constructor-arg>
69         <constructor-arg index = "0" value = "55"/>
70     </bean>
71 </beans>

解析后,调用rootTag的print方法,打印的树状数据结构:

beans:xmlns:xsi--http://www.w3.org/2001/XMLSchema-instance    xmlns--http://www.springframework.org/schema/beans    xsi:schemaLocation--http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-3.2.xsd    
 bean:id--beanB    class--org.beans.BeanB    
  property:name--ba    
   ref:bean--beanA    
  property:name--content    
   value:value--There is no problem!    
  property:ref--beanC    name--bc    
  property:name--listRef    
   list:
    value:value--First    
    value:value--Second    
  property:name--listBeanRef    
   list:
    ref:bean--beanA1    
    ref:bean--beanA2    
  property:name--max    value--15    
  property:name--valueProps    
   props:
    prop:prop--something1    key--strValueKey1    
    prop:prop--something2    key--strValueKey2    
  property:name--mapRef    
   map:
    entry:
     key:
      value:value--key1    
     value:value--v1    
    entry:key--key2    
     value:value--v2    
    entry:value--v3    key--key3    
 bean:id--beanA    class--org.beans.BeanA    
 bean:id--beanC    class--org.beans.BeanC    
  constructor-arg:
   value:value--11    
  constructor-arg:
   ref:bean--beanA3    
 bean:id--beanA1    class--org.beans.BeanA    
  constructor-arg:index--1    value--22    
  constructor-arg:ref--beanA4    index--0    
 bean:id--beanA2    class--org.beans.BeanA    
  constructor-arg:index--1    
   value:value--33    
  constructor-arg:ref--beanA4    
  constructor-arg:
   value:value--44    
  constructor-arg:index--0    value--55

如有疑问,可留言沟通~