html转pdf两种方式 - HuaChenYing

时间:2024-02-17 13:22:37

html转pdf<ITextRenderer/XMLWorkerHelper>两种方式

Maven
<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itextpdf</artifactId>
    <version>5.5.10</version>
</dependency>

<dependency>
    <groupId>com.itextpdf</groupId>
    <artifactId>itext-asian</artifactId>
    <version>5.2.0</version>
</dependency>

<dependency>
    <groupId>org.xhtmlrenderer</groupId>
    <artifactId>flying-saucer-pdf-itext5</artifactId>
    <version>9.1.6</version>
</dependency>

<dependency>
    <groupId>com.itextpdf.tool</groupId>
    <artifactId>xmlworker</artifactId>
    <version>5.5.8</version>
</dependency>
Java代码
import com.itextpdf.text.Document;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;

import java.io.*;
import java.nio.charset.Charset;

public class Html2Pdf {

    // HTML文件路径
    public static final String HTML = "X:\\xxx.html";

    // 生成PDF路径
    public static final String PDF_PATH = "X:\\xx.pdf";

    public static void main(String[] args) {
//        Html2Pdf.XMLWorkerHelperHtmlToPDF();
        Html2Pdf.itextHtmlToPDF();
    }

    public static void XMLWorkerHelperHtmlToPDF() {

        Document document = null;
        try {
            Long startTime = System.currentTimeMillis();

            document = new Document(PageSize.A2);
            PdfWriter pdfWriter = PdfWriter.getInstance(document, new FileOutputStream(PDF_PATH));
            document.open();
            document.addAuthor("pdf作者");
            document.addCreator("pdf创建者");
            document.addSubject("pdf主题");
            document.addCreationDate();
            document.addTitle("pdf标题,可在html中指定title");
            XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
            InputStream inputStream = null;
            // 方式一/文件生成PDF
            worker.parseXHtml(pdfWriter, document, new FileInputStream(HTML), inputStream, Charset.forName("UTF-8"), new AsianFontProvider());
            // 方式二/HTML代码字符串生成PDF
//            worker.parseXHtml(pdfWriter, document, new ByteArrayInputStream(htmlString.getBytes("UTF-8")),inputStream,Charset.forName("UTF-8"),new AsianFontProvider());

            Long endTime = System.currentTimeMillis();
            System.out.print("XMLWorkerHelper parse Html to Pdf End -> " + (endTime -startTime));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            document.close();
        }

    }

    public static void itextHtmlToPDF() {

        OutputStream os = null;
        try {
            Long startTime = System.currentTimeMillis();

            os = new FileOutputStream(PDF_PATH);
            ITextRenderer renderer = new ITextRenderer();
            ITextFontResolver fontResolver = renderer.getFontResolver();
            // 设置中文字体/宋体
            fontResolver.addFont("C:/Windows/Fonts/SimSun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
            // 获取HTML文件的URL
            String url = new File(HTML).toURI().toURL().toString();
            // 方式一/URL方式生成PDF
            renderer.setDocument(url);
            // 方式二/HTML代码字符串方式生成PDF
            // HTML代码字符串
            String htmlString = "<html></html>";
            renderer.setDocumentFromString(htmlString);
            renderer.layout();
            renderer.createPDF(os);

            Long endTime = System.currentTimeMillis();
            System.out.print("Itext parse Html to Pdf End -> " + (endTime -startTime));
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
           if (null != os) {
               try {
                   os.close();
               } catch (IOException e) {
                   e.printStackTrace();
               }
           }
        }

    }

}

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Font;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;

/**
* XMLWorkerHelper字库类
*/
public class AsianFontProvider extends XMLWorkerFontProvider {

public Font getFont(final String fontname, final String encoding,
final boolean embedded, final float size, final int style,
final BaseColor color) {
BaseFont bf = null;
try {
bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H",
BaseFont.NOT_EMBEDDED);
} catch (Exception e) {
e.printStackTrace();
}
Font font = new Font(bf, size, style, color);
font.setColor(color);
return font;
}
}
 

总结

  PDF生成时,对html代码规范要求较高,所有的标签必须有闭合标签,否则生成过程中会报异常。异常提示会明确指出对应标签缺少闭合标签。

  XMLWorkerHelper方式缺点,对css样式支持不友好,样式丢失,图片丢失,甚至出现文本丢失。无具体解决方案,资料查阅度较为困难,且在生成过程中发现生成耗时为Itext方式两倍。故直接放弃使用。

  Itext方式生成注意要点

    中文支持,需要在java代码及html代码中同步配置

    控制PDF页面大小,需要在html代码中配置

    <style>

      body{font-family:SimSun}<!-- 支持中文字体, SimSun宋体-->

      @page{size:240mm 360mm;}<!-- 设置PDF页面大小,此配置只对生成PDF文件有效,不会对页面显示生效 -->

    </style>

 

补充

  判断操作环境加载系统字体

// 设置中文字体
if ("LINUX".equalsIgnoreCase(System.getProperty("os.name"))) {
  fontResolver.addFont("/usr/share/fonts/truetype/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
} else {
  fontResolver.addFont("c:/windows/fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
}

以上代码会发生问题

  项目发布至LINUX环境时,如果系统中无中文字体,会无法找到字体,报IO异常

    java.io.IOException: /usr/share/fonts/truetype/simsun.ttc not found as file or resource.

如果无法安装字体至环境中时,使用下载字体至本地项目中,加载项目中存放字体的方法

  下载SimSun.ttc文件

    复制c:/windows/fonts/simsun.ttc至浏览器或资源管理器中,自动下载

    将SimSun.ttc文件放入项目src/main/resources目录下,举例为新建文件夹fonts

    配置字体PATH为 

private static String FONT_PATH = "/fonts/SimSun.ttc";

    配置中文字体方式为

fontResolver.addFont(FONT_PATH, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);

 

至此,WINDOWS环境运行成功。LINUX环境运行情况待测。