java实现pdf转word
- 前言
- pom文件
- 启动入口
- 过滤器对象
- ConvertPdfToWordWithFlowableStructure转换实现类
前言
1.java实现pdf转word。
2.纯免费开源。
3.pdf解析完会生成word文件和图片文件夹。
4.无页码限制,文本类型生成到word中,图片生成到图片文件夹中。
5.弊端:需手动将图片与文本整合成一个word文件。
仅提供一个pdf转word的实现方案,代码粗糙,老铁轻喷。
jar包地址:https://download.csdn.net/download/wyazyf/88917191
pom文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>wy</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>jbig2-imageio</artifactId>
<version>3.0.2</version>
</dependency>
<dependency>
<groupId>com.intellij</groupId>
<artifactId>forms_rt</artifactId>
<version>7.0.3</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
</repositories>
</project>
启动入口
import com.sun.deploy.util.StringUtils;
import org.apache.poi.util.StringUtil;
import javax.swing.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;
import java.io.IOException;
/**
* @author wy
* @create 2024-02-20 15:57
*/
public class test {
private JPanel Panel;
private JLabel JLabel;
private JButton button;
private JButton selectButton;
private JFileChooser jf ;
public static void main(String[] args) {
JFrame frame = new JFrame("test");
JPanel panel = new test().Panel;
panel.setSize(500,300);
frame.setContentPane(panel);
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.pack();
frame.setSize(400, 200);
// frame.setLocation(3);
frame.setVisible(true);
}
public test() {
selectButton.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
FileFilterTest fileFilter=new FileFilterTest (); //创建过滤器对象
jf=new JFileChooser();
jf.setFileFilter(fileFilter); //对JFileChooser设置过滤器
jf.showOpenDialog(null);
}
});
button.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
if(jf==null){
JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);
}
File selectedFile = jf.getSelectedFile(); // 获取选择的文件
String fielPath = selectedFile.getPath();
if(fielPath==null||(fielPath!=null && fielPath=="")){
JOptionPane.showMessageDialog(Panel, "请选择一个pdf文件", "标题",JOptionPane.WARNING_MESSAGE);
}
try {
ConvertPdfToWordWithFlowableStructure pdfToWord=new ConvertPdfToWordWithFlowableStructure();
pdfToWord.pdfToWordOrPhoto(fielPath);
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
});
}
}
过滤器对象
/**
* @author wy
* @create 2024-02-20 16:24
*/
public class FileFilterTest extends javax.swing.filechooser.FileFilter{
public boolean accept(java.io.File f) {
if (f.isDirectory())return true;
return f.getName().endsWith(".pdf"); //设置为选择以.pdf为后缀的文件
}
public String getDescription(){
return ".pdf";
}
}
ConvertPdfToWordWithFlowableStructure转换实现类
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import java.awt.image.BufferedImage;
import java.io.*;
public class ConvertPdfToWordWithFlowableStructure {
public void pdfToWordOrPhoto(String pdfFile) throws IOException {
try
{
// String pdfFile = "C:\\Users\\Administrator\\Desktop\\1.pdf";
PDDocument doc = PDDocument.load(new File(pdfFile));
int pagenumber = doc.getNumberOfPages();
pdfFile = pdfFile.substring(0, pdfFile.lastIndexOf("."));
String fileName = pdfFile + ".doc";
File file = new File(fileName);
if (!file.exists())
{
file.createNewFile();
}
FileOutputStream fos = new FileOutputStream(fileName);
Writer writer = new OutputStreamWriter(fos, "UTF-8");
PDFTextStripper stripper = new PDFTextStripper();
stripper.setSortByPosition(true);// 排序
stripper.setStartPage(1);// 设置转换的开始页
stripper.setEndPage(pagenumber);// 设置转换的结束页
stripper.writeText(doc, writer);
writer.close();
System.out.println("pdf文字转换word成功!");
//开始下载图片
PDFRenderer pdfRenderer = new PDFRenderer(doc);
for(int i = 0;i<pagenumber;i++){
// 设置页数(首页从0开始)、每英寸点数、图片类型
BufferedImage bufferedImage = pdfRenderer.renderImageWithDPI(i, 300, ImageType.RGB);
String currentCoverPath = pdfFile + "/" + i + "." + "png";
// 创建图片文件对象
File file1 = new File(currentCoverPath);
if (!file1.getParentFile().exists()) {
file1.getParentFile().mkdirs();
}
if (!file1.exists()) {
file1.createNewFile();
}else{
file1.delete();
file1.createNewFile();
}
// 将图片写入到图片对象中
ImageIOUtil.writeImage(bufferedImage, currentCoverPath, 300);
}
doc.close();
System.out.println("pdf图片下载成功!");
}
catch (IOException e)
{
e.printStackTrace();
}
}
}