方案背景
22 年时,那时由于团队权限比较受限,在不开新页面,也不开新服务的情况下,同样是生成图片的需求,性能要求也比较高,当时采用的 Html2canvas 的前端生成方案,做了十几个核心业务模块,在 Web,H5,App,Electron PC App 下, 速度 300ms~2500ms 内生成和下载图片
今年遇到同样需求,但由于业务内嵌入了几十张图表组成的可视化大屏 Iframe,Html2canvas 想解决 Iframe 情况有些吃力,也考虑团队权限能用服务端截屏的方案了,那就 Puppeteer 走起
生成 PDF的额外说明
用 puppeteer 生成 pdf 走不通,因此方案里采用了 sharp 手动的把 puppeteer 生成的图片裁切成了 pdf 页面大小的等比例尺寸,再用 pdfkit scale 等比把裁切出来的若干份图片塞进 pdf 里
代码
代码为 POC 方案,离生产可用还是要多做一些稳定性和可用性的支持工作,因此以下代码仅作学习参考
其中流程代码参考意义不大,核心代码里的裁切图片,自动滚动,等待指定 Iframe 加载 和 获取指定元素上的属性值,比较有参考意义
// 流程代码
import { ConsoleLogger } from '@nestjs/common';
import puppeteer, { Viewport } from 'puppeteer';
import { Browser, CookieParam } from 'puppeteer';
import * as helper from './helper';
const logger = new ConsoleLogger('WebViewer');
export enum TargetFileType {
png = 'png',
jpeg = 'jpeg',
webp = 'webp',
pdf = 'pdf',
}
export interface ShotParam {
type: TargetFileType;
url: string;
targetFile?: string;
cookies?: CookieParam[];
viewport?: Viewport;
timeout?: number;
}
export class WebViewer {
private browser: Browser | null = null;
private inited = false;
static defaultInstance = new WebViewer();
constructor(private readonly options?: WebViewerOptions) {
this.options = options || {
socketTimeout: 2 * 60 * 1000,
generateTimeout: 0,
};
}
async init(): Promise<void> {
this.browser = await puppeteer.launch({
// defaultViewport: { width: 1920, height: 1080 },
// headless: 'shell',
headless: false,
pipe: true,
args: [
'--disable-gpu',
'--disable-dev-shm-usage',
'--disable-setuid-sandbox',
'--no-first-run',
'--no-sandbox',
'--no-zygote',
'--full-memory-crash-report',
'--unlimited-storage',
],
});
this.inited = true;
}
async shot(param: ShotParam): Promise<Buffer> {
if (!this.inited) {
await this.init();
}
logger.log(
`Start to shot url: ${param.url}, type: ${
param.type
}, viewport: { heigth:${param.viewport?.height || 0}, width:${
param.viewport?.width || 0
}} `,
);
const page = await this.browser.newPage();
page.on('response', (response) => {
logger.debug(response.url());
});
page.on('close', () => {
logger.debug('Current page has been closed.');
});
if (param.cookies) {
await page.setCookie(...param.cookies);
}
await page.goto(param.url, {
timeout: param?.timeout || this.options.socketTimeout,
waitUntil: 'networkidle0',
});
await helper.waitForFrame(page);
const minWidth = 1920;
let width = await page.$eval('.html-table', (el) => el.scrollWidth + 36);
width = width > minWidth ? width : minWidth;
param.viewport = { width, height: 1080 };
await page.setViewport(param.viewport);
await helper.getValueFromElementDataset(
page,
'html',
'height',
async (value: string) => !Number.isNaN(Number(value)),
);
const bodyHandle = await page.$('body');
const { height: bodyHeight } = await bodyHandle.boundingBox();
param.viewport = { width, height: Math.floor(bodyHeight) + 1 };
await bodyHandle.dispose();
await page.setViewport(param.viewport);
await page.waitForSelector('#datart-rendered');
await helper.sleep(300);
let buffer = await page.screenshot();
if (param.type === 'pdf') {
buffer = await helper.generatePdf(buffer);
}
await helper.sleep(300);
await page.close();
return buffer;
}
async close(): Promise<void> {
await this.browser?.close();
}
}
export interface WebViewerOptions {
socketTimeout: number;
generateTimeout?: number;
}
// 核心代码
import { Page } from 'puppeteer';
import * as sharp from 'sharp';
import * as pdfkit from 'pdfkit';
import * as getStream from 'get-stream';
function waitForFrame(page: Page) {
let fulfill;
const promise = new Promise((resolve) => (fulfill = resolve));
checkFrame();
return promise;
function checkFrame() {
const frame = page.frames().find((f) => {
console.log(f.name());
return f.name() === 'datart';
});
if (frame) fulfill(frame);
else page.once('frameattached', checkFrame);
}
}
async function autoScroll(page: Page, selector: string) {
return page.evaluate((selector) => {
return new Promise((resolve) => {
//滚动的总高度
let totalHeight = 0;
//每次向下滚动的高度 100 px
const distance = 100;
const timer = setInterval(() => {
const dom = document.querySelector(selector);
if (!dom) {
return clearInterval(timer);
}
//页面的高度 包含滚动高度
const scrollHeight = dom.scrollHeight;
console.log(scrollHeight);
//滚动条向下滚动 distance
dom.scrollBy(0, distance);
totalHeight += distance;
//当滚动的总高度 大于 页面高度 说明滚到底了。也就是说到滚动条滚到底时,以上还会继续累加,直到超过页面高度
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve(true);
}
}, 100);
});
}, selector);
}
async function getValueFromElementDataset(
page: Page,
selector: string,
key: string,
checkValue: (value: string) => Promise<boolean>,
) {
return new Promise((resolve) => {
const interval = setInterval(async () => {
const value = await page.$eval(
selector,
(el: HTMLElement, key: string) => {
return el.dataset[key];
},
key,
);
if (checkValue && !(await checkValue(value))) {
return;
} else {
clearInterval(interval);
resolve(value);
}
}, 500);
});
}
async function clipImage(
pageWidth: number,
pageHeight: number,
buffer: Buffer,
): Promise<{ images: Buffer[]; scale: number }> {
const imageOriginSharp = sharp(buffer);
const imageSharp = sharp(buffer).resize(pageWidth);
const imageBuffer = await imageSharp
.withMetadata()
.toBuffer({ resolveWithObject: true });
const imageOriginBuffer = await imageOriginSharp
.withMetadata()
.toBuffer({ resolveWithObject: true });
// const imageWidth = imageBuffer.info.width;
const imageHeight = imageBuffer.info.height;
const imageOriginWidth = imageOriginBuffer.info.width;
const imageOriginHeight = imageOriginBuffer.info.height;
const scale = imageOriginHeight / imageHeight;
const images: Buffer[] = [];
console.log({
imageOriginWidth,
imageOriginHeight,
scale,
});
let startY = 0;
while (startY < imageHeight) {
const height = Math.min(pageHeight, imageHeight - startY);
console.log(Math.ceil(height * scale), Math.ceil(startY * scale));
const imageOriginSharp = sharp(buffer);
const liteImage = await imageOriginSharp
.extract({
width: imageOriginWidth,
height: Math.ceil(height * scale),
left: 0,
top: Math.floor(startY * scale),
})
.toBuffer();
images.push(liteImage);
startY += height;
}
return { images, scale };
}
const generatePdf = (imageBuffer: Buffer) => {
return new Promise<Buffer>(async (resolve) => {
const doc = new pdfkit();
// 获取PDF页面的宽度和高度
const pageWidth = doc.page.width;
const pageHeight = doc.page.height;
// 图片按 page 高度等比放缩然后裁切为多份,然后通过 doc addpage 以及 doc.image 把每张图放入 pdf
const { images, scale } = await clipImage(
pageWidth,
pageHeight,
imageBuffer,
);
let index = 1;
for (const image of images) {
doc.image(image, 0, 0, {
width: pageWidth,
scale: 1 / scale,
});
if (index < images.length) {
doc.addPage();
doc.switchToPage(index);
index += 1;
}
}
doc.end();
const pdfBuffer = await getStream.buffer(doc);
resolve(pdfBuffer);
});
};
const sleep = (time: number) => new Promise((r) => setTimeout(r, time));
export {
sleep,
clipImage,
autoScroll,
generatePdf,
waitForFrame,
getValueFromElementDataset,
};