java实现word文件转html文件

最近在项目开发中用户提出要在电脑上没有装office时在浏览器中打开html">word文件，最后确定的逻辑：用户选择想要查看的文件，页面js判断文件是否为word。不是执行下载，是后端根据word文件后缀访问对应转换方法。文件已存在对应html文件直接返回html文件地址，不存在先生成对应html文件再返回地址。js直接通过open()打开新的页签，展示word文件内容。新人一枚，如果代码中存在错误或有更好的实现万望指正！

相关jar包

java实现word文件转html文件

代码

java" id="highlighter_800105">

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

									import java.io.bytearrayoutputstream;

									import java.io.file;

									import java.io.fileinputstream;

									import java.io.filenotfoundexception;

									import java.io.fileoutputstream;

									import java.io.ioexception;

									import java.io.inputstream;

									import java.io.outputstream;

									import javax.xml.parsers.documentbuilderfactory;

									import javax.xml.parsers.parserconfigurationexception;

									import javax.xml.transform.outputkeys;

									import javax.xml.transform.transformer;

									import javax.xml.transform.transformerexception;

									import javax.xml.transform.transformerfactory;

									import javax.xml.transform.dom.domsource;

									import javax.xml.transform.stream.streamresult;

									import org.apache.poi.hwpf.hwpfdocument;

									import org.apache.poi.hwpf.converter.picturesmanager;

									import org.apache.poi.hwpf.converter.wordtohtmlconverter;

									import org.apache.poi.hwpf.usermodel.picturetype;

									import org.apache.poi.xwpf.converter.core.basicuriresolver;

									import org.apache.poi.xwpf.converter.core.fileimageextractor;

									import org.apache.poi.xwpf.converter.core.fileuriresolver;

									import org.apache.poi.xwpf.converter.xhtml.xhtmlconverter;

									import org.apache.poi.xwpf.converter.xhtml.xhtmloptions;

									import org.apache.poi.xwpf.usermodel.xwpfdocument;

									import org.w3c.dom.document;

									/**

									 * word 转换成html 2017-2-27 

									 */

									public class wordtohtml {

									  /**

									   * 将word2003转换为html文件 2017-2-27 

									   * @param wordpath word文件路径

									   * @param wordname word文件名称无后缀

									   * @param suffix  word文件后缀

									   * @throws ioexception

									   * @throws transformerexception

									   * @throws parserconfigurationexception

									   */

									  public string word2003tohtml(string wordpath,string wordname,string suffix) throws ioexception, transformerexception, parserconfigurationexception {

									    string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;

									    string htmlname = wordname + ".html";

									    final string imagepath = htmlpath + "image" + file.separator;

									    //判断html文件是否存在

									    file htmlfile = new file(htmlpath + htmlname);

									    if(htmlfile.exists()){ 

									      return htmlfile.getabsolutepath();

									    }

									    //原word文档

									    final string file = wordpath + file.separator + wordname + suffix;

									    inputstream input = new fileinputstream(new file(file));

									    hwpfdocument worddocument = new hwpfdocument(input);

									    wordtohtmlconverter wordtohtmlconverter = new wordtohtmlconverter(documentbuilderfactory.newinstance().newdocumentbuilder().newdocument());

									    //设置图片存放的位置

									    wordtohtmlconverter.setpicturesmanager(new picturesmanager() {

									      public string savepicture(byte[] content, picturetype picturetype, string suggestedname, float widthinches, float heightinches) {

									        file imgpath = new file(imagepath);

									        if(!imgpath.exists()){//图片目录不存在则创建

									          imgpath.mkdirs();

									        }

									        file file = new file(imagepath + suggestedname);

									        try {

									          outputstream os = new fileoutputstream(file);

									          os.write(content);

									          os.close();

									        } catch (filenotfoundexception e) {

									          e.printstacktrace();

									        } catch (ioexception e) {

									          e.printstacktrace();

									        }

									        //图片在html文件上的路径 相对路径

									        return "image/" + suggestedname;

									      }

									    });

									    //解析word文档

									    wordtohtmlconverter.processdocument(worddocument);

									    document htmldocument = wordtohtmlconverter.getdocument();

									    //生成html文件上级文件夹

									    file folder = new file(htmlpath);

									    if(!folder.exists()){ 

									      folder.mkdirs(); 

									    }

									    //生成html文件地址

									    outputstream outstream = new fileoutputstream(htmlfile);

									    domsource domsource = new domsource(htmldocument);

									    streamresult streamresult = new streamresult(outstream);

									    transformerfactory factory = transformerfactory.newinstance();

									    transformer serializer = factory.newtransformer();

									    serializer.setoutputproperty(outputkeys.encoding, "utf-8");

									    serializer.setoutputproperty(outputkeys.indent, "yes");

									    serializer.setoutputproperty(outputkeys.method, "html");

									    serializer.transform(domsource, streamresult);

									    outstream.close();

									    return htmlfile.getabsolutepath();

									  }

									  /**

									   * 2007版本word转换成html 2017-2-27

									   * @param wordpath word文件路径

									   * @param wordname word文件名称无后缀

									   * @param suffix  word文件后缀

									   * @return

									   * @throws ioexception

									   */

									  public string word2007tohtml(string wordpath,string wordname,string suffix) throws ioexception {

									    string htmlpath = wordpath + file.separator + wordname + "_show" + file.separator;

									    string htmlname = wordname + ".html";

									    string imagepath = htmlpath + "image" + file.separator;

									    //判断html文件是否存在

									    file htmlfile = new file(htmlpath + htmlname);

									    if(htmlfile.exists()){ 

									      return htmlfile.getabsolutepath();

									    }

									    //word文件

									    file wordfile = new file(wordpath + file.separator + wordname + suffix); 

									    // 1) 加载word文档生成 xwpfdocument对象 

									    inputstream in = new fileinputstream(wordfile); 

									    xwpfdocument document = new xwpfdocument(in); 

									    // 2) 解析 xhtml配置 (这里设置iuriresolver来设置图片存放的目录) 

									    file imgfolder = new file(imagepath);

									    xhtmloptions options = xhtmloptions.create();

									    options.setextractor(new fileimageextractor(imgfolder));

									    //html中图片的路径 相对路径 

									    options.uriresolver(new basicuriresolver("image"));

									    options.setignorestylesifunused(false); 

									    options.setfragment(true); 

									    // 3) 将 xwpfdocument转换成xhtml

									    //生成html文件上级文件夹

									    file folder = new file(htmlpath);

									    if(!folder.exists()){ 

									      folder.mkdirs(); 

									    }

									    outputstream out = new fileoutputstream(htmlfile); 

									    xhtmlconverter.getinstance().convert(document, out, options);

									    return htmlfile.getabsolutepath(); 

									  } 

									}

文件目录：

java实现word文件转html文件

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持服务器之家。

秒客网

java实现word文件转html文件

相关文章