怎样在Android中解析doc、docx、xls、xlsx格式文件

时间:2024-03-13 22:05:49

怎样在Android中解析doc、docx、xls、xlsx格式文件

http://www.open-open.com/home/space-37924-do-blog-id-5872.html
解析doc,要tm-extractors-0.4.jar这个包
解析xls,要jxl.jar这个包
01 public static String readDOC(String path) {
02                 // 创建输入流读取doc文件
03                 FileInputStream in;
04                 String text = null;
05 //                Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc")
06                 try {
07                         in = new FileInputStream(new File(path));
08                         int a= in.available();
09                         WordExtractor extractor = null;
10                         // 创建WordExtractor
11                         extractor = new WordExtractor();
12                         // 对doc文件进行提取
13                         text = extractor.extractText(in);
14                         System.out.println("解析得到的东西"+text);
15                 } catch (FileNotFoundException e) {
16                         e.printStackTrace();
17                 } catch (Exception e) {
18                         e.printStackTrace();
19                 }
20                 if (text == null) {
21                         text = "解析文件出现问题";
22                 }
23                 return text;
24         }
       解析xls
01 public static String readXLS(String path) {
02                 String str = "";
03                 try {
04                         Workbook workbook = null;
05                         workbook = Workbook.getWorkbook(new File(path));
06                         Sheet sheet = workbook.getSheet(0);
07                         Cell cell = null;
08                         int columnCount = sheet.getColumns();
09                         int rowCount = sheet.getRows();
10                         for (int i = 0; i < rowCount; i++) {
11                                 for (int j = 0; j < columnCount; j++) {
12                                         cell = sheet.getCell(j, i);
13                                         String temp2 = "";
14                                         if (cell.getType() == CellType.NUMBER) {
15                                                 temp2 = ((NumberCell) cell).getValue() + "";
16                                         } else if (cell.getType() == CellType.DATE) {
17                                                 temp2 = "" + ((DateCell) cell).getDate();
18                                         } else {
19                                                 temp2 = "" + cell.getContents();
20                                         }
21                                         str = str + "  " + temp2;
22                                 }
23                                 str += "\n";
24                         }
25                         workbook.close();
26                 } catch (Exception e) {
27                 }
28                 if (str == null) {
29                         str = "解析文件出现问题";
30                 }
31                 return str;
32  
33         }

 

解析docx

 

01 public static String readDOCX(String path) {
02                 String river = "";
03                 try {
04                         ZipFile xlsxFile = new ZipFile(new File(path));
05                         ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");
06                         InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
07                         XmlPullParser xmlParser = Xml.newPullParser();
08                         xmlParser.setInput(inputStream, "utf-8");
09                         int evtType = xmlParser.getEventType();
10                         while (evtType != XmlPullParser.END_DOCUMENT) {
11                                 switch (evtType) {
12                                 case XmlPullParser.START_TAG:
13                                         String tag = xmlParser.getName();
14                                         System.out.println(tag);
15                                         if (tag.equalsIgnoreCase("t")) {
16                                                 river += xmlParser.nextText() + "\n";
17                                         }
18                                         break;
19                                 case XmlPullParser.END_TAG:
20                                         break;
21                                 default:
22                                         break;
23                                 }
24                                 evtType = xmlParser.next();
25                         }
26                 } catch (ZipException e) {
27                         e.printStackTrace();
28                 } catch (IOException e) {
29                         e.printStackTrace();
30                 } catch (XmlPullParserException e) {
31                         e.printStackTrace();
32                 }
33                 if (river == null) {
34                         river = "解析文件出现问题";
35                 }
36  
37                 return river;
38         }
        解析xlsx

 

01 public static String readXLSX(String path) {
02                 String str = "";
03                 String v = null;
04                 boolean flat = false;
05                 List<String> ls = new ArrayList<String>();
06                 try {
07                         ZipFile xlsxFile = new ZipFile(new File(path));
08                         ZipEntry sharedStringXML = xlsxFile
09                                         .getEntry("xl/sharedStrings.xml");
10                         InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
11                         XmlPullParser xmlParser = Xml.newPullParser();
12                         xmlParser.setInput(inputStream, "utf-8");
13                         int evtType = xmlParser.getEventType();
14                         while (evtType != XmlPullParser.END_DOCUMENT) {
15                                 switch (evtType) {
16                                 case XmlPullParser.START_TAG:
17                                         String tag = xmlParser.getName();
18                                         if (tag.equalsIgnoreCase("t")) {
19                                                 ls.add(xmlParser.nextText());
20                                         }
21                                         break;
22                                 case XmlPullParser.END_TAG:
23                                         break;
24                                 default:
25                                         break;
26                                 }
27                                 evtType = xmlParser.next();
28                         }
29                         ZipEntry sheetXML = xlsxFile.getEntry("xl/worksheets/sheet1.xml");
30                         InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML);
31                         XmlPullParser xmlParsersheet = Xml.newPullParser();
32                         xmlParsersheet.setInput(inputStreamsheet, "utf-8");
33                         int evtTypesheet = xmlParsersheet.getEventType();
34                         while (evtTypesheet != XmlPullParser.END_DOCUMENT) {
35                                 switch (evtTypesheet) {
36                                 case XmlPullParser.START_TAG:
37                                         String tag = xmlParsersheet.getName();
38                                         if (tag.equalsIgnoreCase("row")) {
39                                         } else if (tag.equalsIgnoreCase("c")) {
40                                                 String t = xmlParsersheet.getAttributeValue(null, "t");
41                                                 if (t != null) {
42                                                         flat = true;
43                                                         System.out.println(flat + "有");
44                                                 } else {
45                                                         System.out.println(flat + "没有");
46                                                         flat = false;
47                                                 }
48                                         } else if (tag.equalsIgnoreCase("v")) {
49                                                 v = xmlParsersheet.nextText();
50                                                 if (v != null) {
51                                                         if (flat) {
52                                                                 str += ls.get(Integer.parseInt(v)) + "  ";
53                                                         } else {
54                                                                 str += v + "  ";
55                                                         }
56                                                 }
57                                         }
58                                         break;
59                                 case XmlPullParser.END_TAG:
60                                         if (xmlParsersheet.getName().equalsIgnoreCase("row")
61                                                         && v != null) {
62                                                 str += "\n";
63                                         }
64                                         break;
65                                 }
66                                 evtTypesheet = xmlParsersheet.next();
67                         }
68                         System.out.println(str);
69                 } catch (ZipException e) {
70                         e.printStackTrace();
71                 } catch (IOException e) {
72                         e.printStackTrace();
73                 } catch (XmlPullParserException e) {
74                         e.printStackTrace();
75                 }
76                 if (str == null) {
77                         str = "解析文件出现问题";
78                 }
79  
80                 return str;