html转pdf<ITextRenderer/XMLWorkerHelper>两种方式
Maven
<dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.10</version> </dependency> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itext-asian</artifactId> <version>5.2.0</version> </dependency> <dependency> <groupId>org.xhtmlrenderer</groupId> <artifactId>flying-saucer-pdf-itext5</artifactId> <version>9.1.6</version> </dependency> <dependency> <groupId>com.itextpdf.tool</groupId> <artifactId>xmlworker</artifactId> <version>5.5.8</version> </dependency>
Java代码
import com.itextpdf.text.Document; import com.itextpdf.text.PageSize; import com.itextpdf.text.pdf.BaseFont; import com.itextpdf.text.pdf.PdfWriter; import com.itextpdf.tool.xml.XMLWorkerHelper; import org.xhtmlrenderer.pdf.ITextFontResolver; import org.xhtmlrenderer.pdf.ITextRenderer; import java.io.*; import java.nio.charset.Charset; public class Html2Pdf { // HTML文件路径 public static final String HTML = "X:\\xxx.html"; // 生成PDF路径 public static final String PDF_PATH = "X:\\xx.pdf"; public static void main(String[] args) { // Html2Pdf.XMLWorkerHelperHtmlToPDF(); Html2Pdf.itextHtmlToPDF(); } public static void XMLWorkerHelperHtmlToPDF() { Document document = null; try { Long startTime = System.currentTimeMillis(); document = new Document(PageSize.A2); PdfWriter pdfWriter = PdfWriter.getInstance(document, new FileOutputStream(PDF_PATH)); document.open(); document.addAuthor("pdf作者"); document.addCreator("pdf创建者"); document.addSubject("pdf主题"); document.addCreationDate(); document.addTitle("pdf标题,可在html中指定title"); XMLWorkerHelper worker = XMLWorkerHelper.getInstance(); InputStream inputStream = null; // 方式一/文件生成PDF worker.parseXHtml(pdfWriter, document, new FileInputStream(HTML), inputStream, Charset.forName("UTF-8"), new AsianFontProvider()); // 方式二/HTML代码字符串生成PDF // worker.parseXHtml(pdfWriter, document, new ByteArrayInputStream(htmlString.getBytes("UTF-8")),inputStream,Charset.forName("UTF-8"),new AsianFontProvider()); Long endTime = System.currentTimeMillis(); System.out.print("XMLWorkerHelper parse Html to Pdf End -> " + (endTime -startTime)); } catch (Exception e) { e.printStackTrace(); } finally { document.close(); } } public static void itextHtmlToPDF() { OutputStream os = null; try { Long startTime = System.currentTimeMillis(); os = new FileOutputStream(PDF_PATH); ITextRenderer renderer = new ITextRenderer(); ITextFontResolver fontResolver = renderer.getFontResolver(); // 设置中文字体/宋体 fontResolver.addFont("C:/Windows/Fonts/SimSun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); // 获取HTML文件的URL String url = new File(HTML).toURI().toURL().toString(); // 方式一/URL方式生成PDF renderer.setDocument(url); // 方式二/HTML代码字符串方式生成PDF // HTML代码字符串 String htmlString = "<html></html>"; renderer.setDocumentFromString(htmlString); renderer.layout(); renderer.createPDF(os); Long endTime = System.currentTimeMillis(); System.out.print("Itext parse Html to Pdf End -> " + (endTime -startTime)); } catch (Exception e) { e.printStackTrace(); } finally { if (null != os) { try { os.close(); } catch (IOException e) { e.printStackTrace(); } } } } }
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Font;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.tool.xml.XMLWorkerFontProvider;
/**
* XMLWorkerHelper字库类
*/
public class AsianFontProvider extends XMLWorkerFontProvider {
public Font getFont(final String fontname, final String encoding,
final boolean embedded, final float size, final int style,
final BaseColor color) {
BaseFont bf = null;
try {
bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H",
BaseFont.NOT_EMBEDDED);
} catch (Exception e) {
e.printStackTrace();
}
Font font = new Font(bf, size, style, color);
font.setColor(color);
return font;
}
}
总结
PDF生成时,对html代码规范要求较高,所有的标签必须有闭合标签,否则生成过程中会报异常。异常提示会明确指出对应标签缺少闭合标签。
XMLWorkerHelper方式缺点,对css样式支持不友好,样式丢失,图片丢失,甚至出现文本丢失。无具体解决方案,资料查阅度较为困难,且在生成过程中发现生成耗时为Itext方式两倍。故直接放弃使用。
Itext方式生成注意要点
中文支持,需要在java代码及html代码中同步配置
控制PDF页面大小,需要在html代码中配置
<style> body{font-family:SimSun}<!-- 支持中文字体, SimSun宋体--> @page{size:240mm 360mm;}<!-- 设置PDF页面大小,此配置只对生成PDF文件有效,不会对页面显示生效 --> </style>
补充
判断操作环境加载系统字体
// 设置中文字体 if ("LINUX".equalsIgnoreCase(System.getProperty("os.name"))) { fontResolver.addFont("/usr/share/fonts/truetype/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); } else { fontResolver.addFont("c:/windows/fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); }
以上代码会发生问题
项目发布至LINUX环境时,如果系统中无中文字体,会无法找到字体,报IO异常
java.io.IOException: /usr/share/fonts/truetype/simsun.ttc not found as file or resource.
如果无法安装字体至环境中时,使用下载字体至本地项目中,加载项目中存放字体的方法
下载SimSun.ttc文件
复制c:/windows/fonts/simsun.ttc至浏览器或资源管理器中,自动下载
将SimSun.ttc文件放入项目src/main/resources目录下,举例为新建文件夹fonts
配置字体PATH为
private static String FONT_PATH = "/fonts/SimSun.ttc";
配置中文字体方式为
fontResolver.addFont(FONT_PATH, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
至此,WINDOWS环境运行成功。LINUX环境运行情况待测。