java实现pdf转word

时间:2024-03-07 12:25:12

java实现pdf转word

    • 前言
    • pom文件
    • 启动入口
    • 过滤器对象
    • ConvertPdfToWordWithFlowableStructure转换实现类

前言

1.java实现pdf转word。
2.纯免费开源。
3.pdf解析完会生成word文件和图片文件夹。
4.无页码限制,文本类型生成到word中,图片生成到图片文件夹中。
5.弊端:需手动将图片与文本整合成一个word文件。
仅提供一个pdf转word的实现方案,代码粗糙,老铁轻喷。
jar包地址:https://download.csdn.net/download/wyazyf/88917191

pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>wy</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>4.1.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>4.1.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13.1</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian -->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext-asian</artifactId>
            <version>5.2.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox-tools</artifactId>
            <version>2.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>jbig2-imageio</artifactId>
            <version>3.0.2</version>
        </dependency>
        <dependency>
            <groupId>com.intellij</groupId>
            <artifactId>forms_rt</artifactId>
            <version>7.0.3</version>
        </dependency>
    </dependencies>
    <repositories>
        <repository>
            <id>alimaven</id>
            <name>aliyun maven</name>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
    </repositories>
</project>

启动入口

import com.sun.deploy.util.StringUtils;
import org.apache.poi.util.StringUtil;

import javax.swing.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;

/**
 * @author wy
 * @create 2024-02-20 15:57
 */
public class test {
    private JPanel Panel;
    private JLabel JLabel;
    private JButton button;
    private JButton selectButton;
    private JFileChooser jf ;

    public static void main(String[] args) {

        JFrame frame = new JFrame("test");
        JPanel panel = new test().Panel;
        panel.setSize(500,300);
        frame.setContentPane(panel);
        frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);

        frame.pack();
        frame.setSize(400, 200);
        // frame.setLocation(3);
        frame.setVisible(true);

    }

    public test() {
        selectButton.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                FileFilterTest fileFilter=new FileFilterTest ();  //创建过滤器对象
                jf=new JFileChooser();
                jf.setFileFilter(fileFilter);   //对JFileChooser设置过滤器
                jf.showOpenDialog(null);
            }
        });
        button.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                if(jf==null){
                    JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);
                }
                File selectedFile = jf.getSelectedFile(); // 获取选择的文件
                String fielPath = selectedFile.getPath();
                if(fielPath==null||(fielPath!=null && fielPath=="")){
                    JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);
                }
                try {
                    ConvertPdfToWordWithFlowableStructure pdfToWord=new ConvertPdfToWordWithFlowableStructure();
                    pdfToWord.pdfToWordOrPhoto(fielPath);
                } catch (IOException ioException) {
                    ioException.printStackTrace();
                }
            }
        });
    }

}

过滤器对象

/**
 * @author wy
 * @create 2024-02-20 16:24
 */
public class FileFilterTest extends javax.swing.filechooser.FileFilter{
    public boolean accept(java.io.File f) {
        if (f.isDirectory())return true;
        return f.getName().endsWith(".pdf");  //设置为选择以.pdf为后缀的文件
    }
    public String getDescription(){
        return ".pdf";
    }
}

ConvertPdfToWordWithFlowableStructure转换实现类

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;

import java.awt.image.BufferedImage;
import java.io.*;

public class ConvertPdfToWordWithFlowableStructure {

    public void pdfToWordOrPhoto(String pdfFile) throws IOException {

        try
        {
            // String pdfFile = "C:\\Users\\Administrator\\Desktop\\1.pdf";
            PDDocument doc = PDDocument.load(new File(pdfFile));
            int pagenumber = doc.getNumberOfPages();
            pdfFile = pdfFile.substring(0, pdfFile.lastIndexOf("."));
            String fileName = pdfFile + ".doc";
            File file = new File(fileName);
            if (!file.exists())
            {
                file.createNewFile();
            }
            FileOutputStream fos = new FileOutputStream(fileName);
            Writer writer = new OutputStreamWriter(fos, "UTF-8");
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setSortByPosition(true);// 排序
            stripper.setStartPage(1);// 设置转换的开始页
            stripper.setEndPage(pagenumber);// 设置转换的结束页
            stripper.writeText(doc, writer);
            writer.close();

            System.out.println("pdf文字转换word成功!");
            //开始下载图片
            PDFRenderer pdfRenderer = new PDFRenderer(doc);

            for(int i = 0;i<pagenumber;i++){

                // 设置页数(首页从0开始)、每英寸点数、图片类型
                BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(i, 300, ImageType.RGB);
                String currentCoverPath = pdfFile + "/" + i + "." + "png";

                // 创建图片文件对象
                File file1 = new File(currentCoverPath);
                if (!file1.getParentFile().exists()) {
                    file1.getParentFile().mkdirs();
                }
                if (!file1.exists()) {
                    file1.createNewFile();
                }else{
                    file1.delete();
                    file1.createNewFile();
                }
                // 将图片写入到图片对象中
                ImageIOUtil.writeImage(bufferedImage, currentCoverPath, 300);
            }
            doc.close();
            System.out.println("pdf图片下载成功!");
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
    }
}