java使用poi转换doc/docx为pdf

时间:2024-01-30 11:11:59

为了方便前端预览word文件,上传后进行pdf转换(也可以预览时生成临时文件)*注word中插入的表格的话表格内字体都要为宋体不然转出来为空

引用jar包

<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>
    <dependency>
  <groupId>fr.opensagres.xdocreport</groupId>
  <artifactId>fr.opensagres.poi.xwpf.converter.pdf-gae</artifactId>
  <version>2.0.1</version>
    </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency>

 

docx转换比较简单代码如下

        FileInputStream fileInputStream = null;
        FileOutputStream  fileOutputStream=null;
        try {
            // 读取docx文件
            fileInputStream = new FileInputStream(inPath);
            XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
            PdfOptions pdfOptions = PdfOptions.create();
            // 输出路径
            fileOutputStream = new FileOutputStream(outPath);
            // 调用转换
           PdfConverter.getInstance().convert(xwpfDocument,fileOutputStream,pdfOptions);
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
                fileInputStream.close();
                fileOutputStream.close();
        }                    

doc不能直接通过poi转换pdf,看到有教程先转html然后再转pdf,要结合itext还要处理html标签闭合问题觉得太麻烦(好吧 其实是我懒).

转换思路找到一个jar包可以把doc转成docx :spire.doc.free-3.9.0.jar

网址:https://www.e-iceblue.cn/Introduce/Free-Spire-Doc-JAVA.html        下载后引入或安装到本地maven仓库添加pom即可

官方有提示:免费版有篇幅限制。在加载或保存Word 文档时,要求 Word 文档不超过 500 个段落,25 个表格。同时将 Word 文档转换为 PDF 和 XPS 等格式时,仅支持转换前三页。

对于场景来说五百个段落足够了,转前三页有点少  所以我们在这里仅使用doc转docx的方法代码如下:

 /**
     * doc转docx
     * @param path
     * @param outPath
     * @return
     */
    public static void docToDOcx(String path,String outPath){
        Document dc = new Document();
        dc.loadFromFile(path);
        dc.saveToFile(outPath, FileFormat.Docx_2013);
        dc.close();
    }   

 

整体工具类对文档进行兼容判断处理,doc就生成临时docx文档进行转换代码如下:

public class PdfUtil {
    /**
     * 2003- 版本的word
     */
    private static final String word2003L = ".doc";
    /**
     * 2007+ 版本的word
     */
    private final static String word2007U = ".docx";

    /**
     * word转pdf
     * @param inPath
     * @param outPath
     * @return
     */
    public static boolean doc2pdf(String inPath, String outPath)throws IOException{
        // 是否需清除中间转换的docx文档
        Boolean isDelete = false;
        String fileType = inPath.substring(inPath.lastIndexOf(".")).toLowerCase();
        if (word2003L.equals(fileType)){
            docToDOcx(inPath,inPath+"x");
            inPath = inPath+"x";
            isDelete = true;
        }else if(word2007U.equals(fileType)){

        }else {
            return false;
        }
        FileInputStream fileInputStream = null;
        FileOutputStream  fileOutputStream=null;
        try {
            fileInputStream = new FileInputStream(inPath);
            XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
            PdfOptions pdfOptions = PdfOptions.create();
            fileOutputStream = new FileOutputStream(outPath);
            PdfConverter.getInstance().convert(xwpfDocument,fileOutputStream,pdfOptions);
        } catch (IOException e) {
            e.printStackTrace();
        }finally {

                fileInputStream.close();
                fileOutputStream.close();
                if (isDelete){
                    deleteDocx(inPath);
                }
        }
        return true;
    }

    /**
     * doc转docx
     * @param path
   * @param outPath
     * @return
     */
    public static void docToDOcx(String path,String outPath){
        Document dc = new Document();
        dc.loadFromFile(path);
        dc.saveToFile(outPath, FileFormat.Docx_2013);
        dc.close();
    }

    /**
     * 清除临时转换docx
     * @param path
     */
    public static void  deleteDocx(String path){
        File file = new File(path);
        file.delete();
    }
}