利用jpedal进行pdf转换成jpeg,jpg,png,tiff,tif等格式的图片

时间:2023-12-21 19:12:32

项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.

try{
String password = null;
int startPage = 1;
String imageType = "jpg";
File imageFile = new File("E:\\upload\\pdf\\20140424\\Servlet."+ imageType);
File pdfFile = new File("E:\\upload\\pdf\\20140424\\Servlet.pdf");
PDDocument document = PDDocument.load(pdfFile);
endPage = document.getPageCount();
PDFImageWriter imageWriter = new PDFImageWriter();
imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());
document.close(); }catch(IOException e){
e.printStackTrace();
}

比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少,除了官方网站外,即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的
jpeal的代码库中不存在。下面是收集到的一些资料

1、jpedal文档:http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html

2、简单调用示例:http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img
3、lgpl授权的jpedal库的下载地址:http://sourceforge.net/projects/jpedal/
4、转换示例示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html

5、高清图片转换示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html

于是稍微修改了官方的转换示例,下面是经过测试可以使用的转换代码

import cn.com.pujiConvert.util.Common;

import com.sun.imageio.plugins.jpeg.JPEGImageWriter;
import org.jpedal.*;
import org.jpedal.color.ColorSpaces;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.external.Options;
import org.jpedal.fonts.FontMappings;
import org.jpedal.objects.PdfFileInformation;
import org.jpedal.utils.LogWriter;
import org.w3c.dom.Element; import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
import javax.imageio.stream.ImageOutputStream;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Iterator; public class ConvertPagesToImages{ /**
* show if image transparent
*/
boolean isTransparent=false; /**output where we put files */
private String user_dir = System.getProperty("user.dir"); /**use 96 dpi as default so pages correct size (72 will be smaller) */
private float pageScaling =1.33f; /**flag to show if we print messages */
public static boolean outputMessages = false; String output_dir=null; /**correct separator for OS */
String separator = System.getProperty("file.separator"); /**the decoder object which decodes the pdf and returns a data object */
PdfDecoder decode_pdf = null; //type of image to save thumbnails
private String format = "png"; /** holding all creators that produce OCR pdf's ocr*/
private String[] ocr = {"TeleForm"}; /**scaling to use - default is 100 percent */
private int scaling=100; /**file password or null */
private String password=null; //only used if between 0 and 1
private float JPEGcompression=-1f; private int pageCount = 0; public ConvertPagesToImages() { } public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){
/*缩小比率*/
this.scaling = scaling;
/*图片格式*/
this.format = format;
/*输出目录*/
this.output_dir = output_dir;
/*pdf密码*/
this.password = password;
/*输出图片数*/
this.pageCount = pageCount; /*判断文件是否存在*/
File pdf_file = new File(file_name);
if (!pdf_file.exists()) {
System.out.println("File " + pdf_file + " not found");
System.out.println("May need full path"); return;
} extraction(file_name, output_dir);
} private void extraction(String file_name, String output_dir) {
this.output_dir=output_dir; if (!user_dir.endsWith(separator)){
user_dir = user_dir + separator;
} if (file_name.toLowerCase().endsWith(".pdf")) { if(output_dir==null){
output_dir=user_dir + "thumbnails" + separator;
} decodeFile(file_name,output_dir);
} else {
String[] files = null;
File inputFiles; if (!file_name.endsWith(separator)){
file_name = file_name + separator;
} try {
inputFiles = new File(file_name); if (!inputFiles.isDirectory()) {
System.err.println(file_name + " is not a directory. Exiting program");
}else{
files = inputFiles.list();
}
} catch (Exception ee) {
LogWriter.writeLog("Exception trying to access file " + ee.getMessage()); } if(files!=null){
for (String file : files) { if (file.toLowerCase().endsWith(".pdf")) {
if (outputMessages){
System.out.println(file_name + file);
} decodeFile(file_name + file, output_dir);
}
}
}
} if(outputMessages){
System.out.println("Thumbnails created");
}
} /**
* routine to decode a file
*/
private void decodeFile(String file_name,String output_dir) {
String name = "demo"; //set a default just in case int pointer = file_name.lastIndexOf(separator); if(pointer==-1){
pointer = file_name.lastIndexOf('/');
} if (pointer != -1){
name = file_name.substring(pointer + 1, file_name.length() - 4);
}else if((file_name.toLowerCase().endsWith(".pdf"))){
name=file_name.substring(0,file_name.length()-4);
} //fix for odd files on Linux created when you view pages
if(name.startsWith(".")){
return;
} //create output dir for images
if(output_dir==null){
output_dir = user_dir + "thumbnails" + separator ;
} //PdfDecoder returns a PdfException if there is a problem
try {
if(decode_pdf==null){
decode_pdf = new PdfDecoder(true);
} /**optional JAI code for faster rendering*/
org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();
decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler); //mappings for non-embedded fonts to use
FontMappings.setFontReplacements(); //true as we are rendering page
decode_pdf.setExtractionMode(0, pageScaling);
//don't bother to extract text and images /**
* open the file (and read metadata including pages in file)
*/
if (outputMessages){
System.out.println("Opening file :" + file_name);
} if(password != null && password != ""){
decode_pdf.openPdfFile(file_name,password);
}else{
decode_pdf.openPdfFile(file_name);
} } catch (Exception e) {
System.err.println("8.Exception " + e + " in pdf code in "+file_name);
} /**
* extract data from pdf (if allowed).
*/
if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){
throw new RuntimeException("Wrong password password used=>"+password+ '<');
}else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {
throw new RuntimeException("Extraction not allowed");
} else {
extractPageAsImage(file_name, output_dir, name, isTransparent);
} /**close the pdf file */
decode_pdf.closePdfFile();
} private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) {
//create a directory if it doesn't exist
File output_path = new File(output_dir);
if (!output_path.exists()){
output_path.mkdirs();
} boolean isSingleOutputFile=false;
boolean compressTiffs = false;
String rawJPEGComp = null;
String jpgFlag = "96"; //page range
int start = 1, end = decode_pdf.getPageCount(); end = (pageCount == 0) ? end : pageCount; if (outputMessages){
System.out.println("Thumbnails will be in " + output_dir);
} try {
BufferedImage[] multiPages = new BufferedImage[1 + (end - start)]; for (int page = start; page < end + 1; page++){
getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);
}
} catch (Exception e) {
decode_pdf.closePdfFile();
throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);
}
} private void getPage(
String output_dir,
String name,
boolean isTransparent,
boolean isSingleOutputFile,
String rawJPEGComp,
String jpgFlag,
boolean compressTiffs,
int start,
int end,
BufferedImage[] multiPages,
int page
) throws PdfException, IOException, FileNotFoundException {
if (outputMessages ){
System.out.println("Page " + page);
} /**
* 补0操作
*/
String pageAsString = String.valueOf(page);
String maxPageSize = String.valueOf(end);
int padding = maxPageSize.length()-pageAsString.length(); for(int ii = 0; ii < padding; ii++){
pageAsString = '0' + pageAsString;
} String image_name;
if(isSingleOutputFile){
image_name =name;
}else{
image_name =name+"_page_" + pageAsString;
} /**
* get PRODUCER and if OCR disable text printing
*/
PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData(); String[] values=currentFileInformation.getFieldValues();
String[] fields=PdfFileInformation.getFieldNames(); for(int i=0;i<fields.length;i++){
if(fields[i].equals("Creator")){
for (String anOcr : ocr) {
if (values[i].equals(anOcr)) {
decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES);
}
}
}
} BufferedImage image_to_save;
if(!isTransparent){
image_to_save=decode_pdf.getPageAsImage(page);
}else{
//use this if you want a transparent image
image_to_save =decode_pdf.getPageAsTransparentImage(page); //java adds odd tint if you save this as JPEG which does not have transparency
// so put as RGB on white background
// (or save as PNG or TIFF which has transparency)
// or just call decode_pdf.getPageAsImage(page)
if(image_to_save!=null && format.toLowerCase().startsWith("jp")){ BufferedImage rawVersion=image_to_save; int w=rawVersion.getWidth(), h=rawVersion.getHeight();
//blank canvas
image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB); //
Graphics2D g2 = image_to_save.createGraphics();
//white background
g2.setPaint(Color.WHITE);
g2.fillRect(0,0,w,h);
//paint on image
g2.drawImage(rawVersion, 0, 0,null);
}
} /*if just gray we can reduce memory usage by converting image to Grayscale @SuppressWarnings("rawtypes")
Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES); int nextID;
boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove
while(colorspacesUsed!=null && colorspacesUsed.hasNext()){
nextID= (Integer) (colorspacesUsed.next()); if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){
isGrayOnly=false;
}
} //draw onto GRAY image to reduce colour depth
if(isGrayOnly){
BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);
image_to_save = image_to_save2;
} //put image in array if multi-images
if(isSingleOutputFile){
multiPages[page-start] = image_to_save;
} if (image_to_save != null) { /**BufferedImage does not support any dpi concept. A higher dpi can be created
* using JAI to convert to a higher dpi image*/ //shrink the page to 50% with graphics2D transformation
//- add your own parameters as needed
//you may want to replace null with a hints object if you
//want to fine tune quality. /** example 1 biliniear scaling
AffineTransform scale = new AffineTransform();
scale.scale(.5, .5); //50% as a decimal
AffineTransformOp scalingOp =new AffineTransformOp(scale, null);
image_to_save =scalingOp.filter(image_to_save, null); */ /** example 2 bicubic scaling - better quality but slower
to preserve aspect ratio set newWidth or newHeight to -1*/ /**allow user to specify maximum dimension for thumbnail*/
int maxDimension = -1; if(scaling!=100 || maxDimension != -1){
int newWidth=image_to_save.getWidth()*scaling/100;
int newHeight=image_to_save.getHeight()*scaling/100; Image scaledImage;
if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){
if(newWidth > newHeight){
newWidth = maxDimension;
scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
} else {
newHeight = maxDimension;
scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);
}
} else {
scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
} if(format.toLowerCase().startsWith("jp")){
image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);
}else{
image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);
} Graphics2D g2 = image_to_save.createGraphics(); g2.drawImage(scaledImage, 0, 0,null);
} if (format.startsWith("jp")) {
saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));
} else {
//save image
decode_pdf.getObjectStore().saveStoredImage(
output_dir + pageAsString + image_name,
image_to_save,
true,
false,
format);
}
} //flush images in case we do more than 1 page so only contains
//images from current page
decode_pdf.flushObjectValues(true);
} private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {
JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();
ImageOutputStream ios = ImageIO.createImageOutputStream(fos);
imageWriter.setOutput(ios); IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null); if (Common.isInteger(jpgFlag)){ int dpi = 96; try {
dpi = Integer.parseInt(jpgFlag);
} catch (Exception e) {
e.printStackTrace();
} Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");
Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);
jfif.setAttribute("Xdensity", Integer.toString(dpi));
jfif.setAttribute("Ydensity", Integer.toString(dpi));
} JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();
if(JPEGcompression>=0 && JPEGcompression<=1f){
jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);
jpegParams.setCompressionQuality(JPEGcompression); } imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);
ios.close();
imageWriter.dispose();
} public static void main(String[] args) {
long start=System.currentTimeMillis(); String pdfPath = "E:\\upload\\pdf\\20140424\\Servlet.pdf";
int scaling = -1;
String format = "jpg";
String output_dir = "E:\\upload\\pdf\\20140424\\jpg\\";
String password = null;
int pageCount = 10; ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();
convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount); System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒");
}
}

功能说明:

1、支持对文件夹下的所有pdf转换成图片,同时也支持对单个pdf进行转换操作。

2、支持转换成jpg,jpeg,tiff,tif,png格式的图片

3、支持指定转换的图片数。

4、支持指定图片的存储位置

传入参数说明

1、pdfPath pdf文件绝对路径,可以是pdf所在的目录也可以是pdf文件路径 
 2、format  图片格式 (支持jpg,jpeg,tiff,png) ,传参时不能带有点号
 3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量
 4、output_dir 输出路径,输出路径为绝对路径
 5、password 文件密码 若没有传入null值