参考资料: jQuery-Word-Export导出word_jquery.wordexport.js下载-****博客
近期又需要自己做个 Html2Doc 的解决方案,因为客户又不想要 Html2pdf 的下载了,当初还给我费尽心思解决Html转pdf时中文输出的问题(html转pdf文件下载之最合理的方法支持中文_jspdf.umd.min.js-****博客)以及手机端下载pdf的问题(手机端 Android WebView 获取 blob 链接文件名并下载网页动态生成的 pdf 文件且调用外部程序打开_blob链接怎么打开-****博客)
查了下,基于 IIS 的解决方案,感觉还是参考资料中的相对靠谱,测试了一下,页面上的例子正确工作,输出文件正确,之后逐步添加不同元素测试也正常,但是到加了 img 时就报错了:
问 AI 说是叫检查 jquery.word.export.js 的第 33行:
$('<canvas>').attr("id", "test_word_img_" + i).width(w).height(h).insertAfter(img_id);
看了前后代码,发现是 img 元素没有 id 造成的,这样的话,就需要修复 jquery.word.export.js,以下是修复后的代码,不管 img 有没有 id 都能正确输出了,当然,是交给通义千问去修复的:
if (typeof jQuery !== "undefined" && typeof saveAs !== "undefined") {
(function ($) {
$.fn.wordExport = function (fileName) {
fileName = typeof fileName !== 'undefined' ? fileName : "jQuery-Word-Export";
var static = {
mhtml: {
top: "Mime-Version: 1.0\nContent-Base: " + location.href + "\nContent-Type: Multipart/related; boundary=\"NEXT.ITEM-BOUNDARY\";type=\"text/html\"\n\n--NEXT.ITEM-BOUNDARY\nContent-Type: text/html; charset=\"utf-8\"\nContent-Location: " + location.href + "\n\n<!DOCTYPE html>\n<html>\n_html_</html>",
head: "<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n<style>\n_styles_\n</style>\n</head>\n",
body: "<body>_body_</body>"
}
};
var options = {
maxWidth: 624
};
// Clone selected element before manipulating it
var markup = $(this).clone();
// Remove hidden elements from the output
markup.each(function () {
var self = $(this);
if (self.is(':hidden'))
self.remove();
});
// Embed all images using Data URLs
var images = Array();
var img = markup.find('img');
for (var i = 0; i < img.length; i++) {
// Calculate dimensions of output image
var w = Math.min(img[i].width, options.maxWidth);
var h = img[i].height * (w / img[i].width);
// Ensure each image has a unique id
var img_id = img[i].id || "dynamic_img_" + i;
if (!img[i].id) {
$(img[i]).attr("id", img_id);
}
$('<canvas>').attr("id", "test_word_img_" + i).width(w).height(h).insertAfter("#" + img_id);
// Create canvas for converting image to data URL
var canvas = document.createElement("CANVAS");
canvas.width = w;
canvas.height = h;
// Draw image to canvas
var context = canvas.getContext('2d');
context.drawImage(img[i], 0, 0, w, h);
// Get data URL encoding of image
var uri = canvas.toDataURL("image/png");
// Save encoded image to array
images[i] = {
type: uri.substring(uri.indexOf(":") + 1, uri.indexOf(";")),
encoding: uri.substring(uri.indexOf(";") + 1, uri.indexOf(",")),
location: "#" + img_id,
data: uri.substring(uri.indexOf(",") + 1)
};
// Replace original image with canvas in markup
$("#" + img_id).replaceWith(canvas);
}
// Prepare bottom of mhtml file with image data
var mhtmlBottom = "\n";
for (var i = 0; i < images.length; i++) {
mhtmlBottom += "--NEXT.ITEM-BOUNDARY\n";
mhtmlBottom += "Content-Location: " + images[i].location + "\n";
mhtmlBottom += "Content-Type: " + images[i].type + "\n";
mhtmlBottom += "Content-Transfer-Encoding: " + images[i].encoding + "\n\n";
mhtmlBottom += images[i].data + "\n\n";
}
mhtmlBottom += "--NEXT.ITEM-BOUNDARY--";
//TODO: load css from included stylesheet
var styles = "";
// Aggregate parts of the file together
var fileContent = static.mhtml.top.replace("_html_", static.mhtml.head.replace("_styles_", styles) + static.mhtml.body.replace("_body_", markup.html())) + mhtmlBottom;
// Create a Blob with the file contents
var blob = new Blob([fileContent], {
type: "application/msword;charset=utf-8"
});
saveAs(blob, fileName + ".doc");
};
})(jQuery);
} else {
if (typeof jQuery === "undefined") {
console.error("jQuery Word Export: missing dependency (jQuery)");
}
if (typeof saveAs === "undefined") {
console.error("jQuery Word Export: missing dependency (FileSaver.js)");
}
}
中间还遇到 img 标签中 src 使用其他网站的链接也会报错:
但是这是由于跨域问题,因此将图片放置到网站本地即可:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Export to DOCX</title>
<script src="./js/jquery-3.3.1.min.js"></script>
<script src="./js/FileSaver.js"></script>
<script src="./js/jquery.wordexport.js"></script>
</head>
<body>
<div id="contentAA">
<h1>这是一个标题</h1>
<h1> Your content here...</h1>
<p>这是段落内容。</p>
<img src="./img/01.png" alt="示例图片">
<table border="1">
<tr><th>表头1</th><th>表头2</th></tr>
<tr><td>数据1</td><td>数据2</td></tr>
</table>
<img src="./img/02.png" alt="示例图片">
</div>
<hr/>
<button id="export">导出</button>
<script>
$("#export").click(function(){
$("#contentAA").wordExport();
});
</script>
</body>
</html>
图片也正常导出了,打开导出的文件看了一下,发现是个加了壳的 Html 文件,仔细看了下格式,发现这是 Mhtml 格式的,也即 .MHT 单个网页文件格式:
Mime-Version: 1.0
Content-Base: http://127.0.0.1/html2doc02.html
Content-Type: Multipart/related; boundary="NEXT.ITEM-BOUNDARY";type="text/html"
--NEXT.ITEM-BOUNDARY
Content-Type: text/html; charset="utf-8"
Content-Location: http://127.0.0.1/html2doc02.html
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style>
</style>
</head>
<body>
<h1>这是一个标题</h1>
<h1> Your content here...</h1>
<p>这是段落内容。</p>
<img src="./img/01.png" alt="示例图片" id="dynamic_img_0">
<table border="1">
<tbody><tr><th>表头1</th><th>表头2</th></tr>
<tr><td>数据1</td><td>数据2</td></tr>
</tbody></table>
<img src="./img/02.png" alt="示例图片" id="dynamic_img_1">
</body></html>
--NEXT.ITEM-BOUNDARY
Content-Location: #dynamic_img_0
Content-Type: image/png
Content-Transfer-Encoding: base64
iVBORw0KGgoAAAANSUhEUgAAAFAAAAAwCAYAAACG5f33AAAAAXNSR0IArs4c6QAABRFJREFUaEPtm71rFEEUwN8Zc8ldbAykUjuttJBYSCzEgKLiByiksEghiqKi4EchEQwRjFFiAkoExUKwEKIRESwEC/FvUBS0EKKFJCTx43I5v1beyCxz72Z33ps99i5w297szHu/fd+TZIIgCKDxeBPINAB6s1MvNgAm49cAmJBfA2DNAP5+8xZ+PnoMP5+/gL+TnxLJsWTVSsidPgktPfsT7VOLlyti4K+Xr6A4eA3+fJmCYGYmNZkyLS2w/MPr1M6LOwgZLNy6A9me/c6PWgFwrnMT/J2eBqhBddM++b4uACoGU1PA+agVAGdWrQEsDDMOVZZ0dEB2x1Zo3rMLmrs2eiuO5+mnXgBKZLICTFMhibBRX6n08DEUR2+qn5tWrlAftbX3QCofddEDLI7cCOGZxDAp5c6c8oIo+ag1Bzi7eh0EpZJSNHf+HOROHGUpjYG+0NcfWwH4ZvdUAWpFFACPUmR+4DIs3L33vy0SZGId6DXt5i2bYdnYKPw4cRpQJv1I9tTvpArQVMRHWLQ+tEJu3LVZXsve3ZAfGVIfAPeb7+uH0vhEuKfEsvGl1ADS+IOBOz94ieWC5iKuwAgPLSz49i18HeG1jY1WnEktO3/lkrOmS9UCKbwoRTg0uQCp25qWR89BS5xbuyGMr2idXIhceVTYoeMs18s2F1Lx5+4t5UI+j+tM3BPP/d57KNyeY+3FsdtQHBoWx0OOPGGMlQAsc6FMhtWtcDIhR2Bqfdyim0JsG7nqdGWOPGKAPvC4mdAl8ML9BzDfd1FkfaYnmKUSJ9G55DH3Zrvw3KbusObitHrUleMsxiWwaX0+sZZaoct6XfJ4ATSFYMUf0iEkAWgqhBMbn1grgSJZy7ZAalF65GPrOyUZOkwOGFMBoO36UEWMkigUlcQke0jWegM0XdrsOyk8V4ZW+3z6HCYktK5c/4VwGECzr8v9OABdiaRqAOMOMgtVVQ+1tysIwexsqIMLHi7E0DA/NFwxPtPdg2/2pSAliSQRQHpQVPGJheqPw8fL+k5TaA48c31UzWYqw4m9URYoSSSJABbOni/rI+PS/sKDcSgODEJQKJTJncnnoRUnKwd7RXW1rS+WKOM6jLsXd521E6Eug4to3NEDTNdlEhbRrceOiIabVHiJMnUB0BQYBcK6K9uzT12yLO1cD6Wnz6wzuOzunaq8KE08idTD1ZXQghk/3KIHiHXX165udclie2hDbxsnlRWeMTM/W8FsAnRlz7qwwLIk0tQEyz++K7MCLWTcJATXxIGMKkVsBbMke9YFQFu2MpVAIc1syL1D1fvGZVKbu0qyZxxAST0pCRvWPy6iG+iaL7t9mxpemq2U5A7V10IkCkWdIaknJeexAEYJZQv6Lkhxv0cJLlGI04m46knJed4A6XjdnJJw3ZoqGyW4782dub8EimStN0DqEnpKYoLlzN44Svre3HH2tllsKgBNq9AuYbv0kTT/UYLTDsWnnJFAkaz1tkCaVW3wXLGG68K4Lkk5I43VqQCkypvjLVrqcBNLnOA+dxv6XOlEuyYApRNrn9hDrdCcG0Z9JFr/cSbaNQHItTKfMka/Q63Q50xOTF6UALmdQhKI3JhsWroLuDOJ4ATFNbZyWQJnrCXpFFwDC5s8XHj4Lqft1GdYAdLe1wWo2r9LlK322dL9rACTuIlUANt6l9tU44xq7RH5n0rVyKoopNTdFpP1oX6Nf/VKaIoNgA2ACQkkfP0f773NrWxc5+8AAAAASUVORK5CYII=
--NEXT.ITEM-BOUNDARY
Content-Location: #dynamic_img_1
Content-Type: image/png
Content-Transfer-Encoding: base64
iVBORw0KGgoAAAANSUhEUgAAABMAAAALCAYAAACd1bY6AAAAAXNSR0IArs4c6QAAAHRJREFUOE9j/P///38GKgFGUgxT3uSLYu1dv80ofBTD0BXDVMI0geSxsWHqMAxDtw2XAcjiGIbBXIXPMGQ/ETQM3SCQZmyacFkM9yYxLkMOU2wWY40AZIUwl+GyDNnrGEkDW4yCDMcV08gxTlI6I5S2qWoYAMYcbeBk+Zz2AAAAAElFTkSuQmCC
--NEXT.ITEM-BOUNDARY--
既然如此,那么就有很多方法可以输出这种格式的文件了。不一定非得用 jQuery-Word-Export 了,而且从代码看 jquery.wordexport.js ,还可以做很多优化和自定义,现在这个只能勉强算是验证,还不能投入实用。
刚刚测试了 <img src="data:image/png;base64, 格式的图片,发现无法输出后无法在 word 2003 中显示,是因为没有处理 Src 和图片资源区 Content-Location: #dynamic_img_0 这样的设置不对造成的,按照 MHTML 格式规范,手工修改了一下下载的文件,在 Word 2003 里面就能正确显示图片了:
Mime-Version: 1.0
Content-Type: Multipart/related; boundary="NEXT.ITEM-BOUNDARY";type="text/html"
--NEXT.ITEM-BOUNDARY
Content-Type: text/html; charset="utf-8"
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<style>
</style>
</head>
<body>
<h1>这是一个标题</h1>
<p>这是段落内容。</p>
<img src="02.png" alt="示例图片">
<table border="1">
<tbody><tr><th>表头1</th><th>表头2</th></tr>
<tr><td>数据1</td><td>数据2</td></tr>
</tbody></table>
<img src="01.png" alt="示例图片">
</body></html>
--NEXT.ITEM-BOUNDARY
Content-Type: image/png
Content-Transfer-Encoding: base64
Content-Location: 01.png
iVBORw0KGgoAAAANSUhEUgAAAFAAAAAwCAYAAACG5f33AAAAAXNSR0IArs4c6QAABRFJREFUaEPtm71rFEEUwN8Zc8ldbAykUjuttJBYSCzEgKLiByiksEghiqKi4EchEQwRjFFiAkoExUKwEKIRESwEC/FvUBS0EKKFJCTx43I5v1beyCxz72Z33ps99i5w297szHu/fd+TZIIgCKDxeBPINAB6s1MvNgAm49cAmJBfA2DNAP5+8xZ+PnoMP5+/gL+TnxLJsWTVSsidPgktPfsT7VOLlyti4K+Xr6A4eA3+fJmCYGYmNZkyLS2w/MPr1M6LOwgZLNy6A9me/c6PWgFwrnMT/J2eBqhBddM++b4uACoGU1PA+agVAGdWrQEsDDMOVZZ0dEB2x1Zo3rMLmrs2eiuO5+mnXgBKZLICTFMhibBRX6n08DEUR2+qn5tWrlAftbX3QCofddEDLI7cCOGZxDAp5c6c8oIo+ag1Bzi7eh0EpZJSNHf+HOROHGUpjYG+0NcfWwH4ZvdUAWpFFACPUmR+4DIs3L33vy0SZGId6DXt5i2bYdnYKPw4cRpQJv1I9tTvpArQVMRHWLQ+tEJu3LVZXsve3ZAfGVIfAPeb7+uH0vhEuKfEsvGl1ADS+IOBOz94ieWC5iKuwAgPLSz49i18HeG1jY1WnEktO3/lkrOmS9UCKbwoRTg0uQCp25qWR89BS5xbuyGMr2idXIhceVTYoeMs18s2F1Lx5+4t5UI+j+tM3BPP/d57KNyeY+3FsdtQHBoWx0OOPGGMlQAsc6FMhtWtcDIhR2Bqfdyim0JsG7nqdGWOPGKAPvC4mdAl8ML9BzDfd1FkfaYnmKUSJ9G55DH3Zrvw3KbusObitHrUleMsxiWwaX0+sZZaoct6XfJ4ATSFYMUf0iEkAWgqhBMbn1grgSJZy7ZAalF65GPrOyUZOkwOGFMBoO36UEWMkigUlcQke0jWegM0XdrsOyk8V4ZW+3z6HCYktK5c/4VwGECzr8v9OABdiaRqAOMOMgtVVQ+1tysIwexsqIMLHi7E0DA/NFwxPtPdg2/2pSAliSQRQHpQVPGJheqPw8fL+k5TaA48c31UzWYqw4m9URYoSSSJABbOni/rI+PS/sKDcSgODEJQKJTJncnnoRUnKwd7RXW1rS+WKOM6jLsXd521E6Eug4to3NEDTNdlEhbRrceOiIabVHiJMnUB0BQYBcK6K9uzT12yLO1cD6Wnz6wzuOzunaq8KE08idTD1ZXQghk/3KIHiHXX165udclie2hDbxsnlRWeMTM/W8FsAnRlz7qwwLIk0tQEyz++K7MCLWTcJATXxIGMKkVsBbMke9YFQFu2MpVAIc1syL1D1fvGZVKbu0qyZxxAST0pCRvWPy6iG+iaL7t9mxpemq2U5A7V10IkCkWdIaknJeexAEYJZQv6Lkhxv0cJLlGI04m46knJed4A6XjdnJJw3ZoqGyW4782dub8EimStN0DqEnpKYoLlzN44Svre3HH2tllsKgBNq9AuYbv0kTT/UYLTDsWnnJFAkaz1tkCaVW3wXLGG68K4Lkk5I43VqQCkypvjLVrqcBNLnOA+dxv6XOlEuyYApRNrn9hDrdCcG0Z9JFr/cSbaNQHItTKfMka/Q63Q50xOTF6UALmdQhKI3JhsWroLuDOJ4ATFNbZyWQJnrCXpFFwDC5s8XHj4Lqft1GdYAdLe1wWo2r9LlK322dL9rACTuIlUANt6l9tU44xq7RH5n0rVyKoopNTdFpP1oX6Nf/VKaIoNgA2ACQkkfP0f773NrWxc5+8AAAAASUVORK5CYII=
--NEXT.ITEM-BOUNDARY
Content-Type: image/png
Content-Transfer-Encoding: base64
Content-Location: 02.png
iVBORw0KGgoAAAANSUhEUgAAABMAAAALCAYAAACd1bY6AAAAAXNSR0IArs4c6QAAAHRJREFUOE9j/P///38GKgFGUgxT3uSLYu1dv80ofBTD0BXDVMI0geSxsWHqMAxDtw2XAcjiGIbBXIXPMGQ/ETQM3SCQZmyacFkM9yYxLkMOU2wWY40AZIUwl+GyDNnrGEkDW4yCDMcV08gxTlI6I5S2qWoYAMYcbeBk+Zz2AAAAAElFTkSuQmCC
--NEXT.ITEM-BOUNDARY--
下一步就是继续修正 jquery.word.export.js 让它能正确按照这个格式输出文件,并且正确处理 <img 的 Src 属性。
下面补充一下 FileSaver.js ,省得还要去 github下载,好麻烦:
/* FileSaver.js
* A saveAs() FileSaver implementation.
* 1.3.2
* 2016-06-16 18:25:19
*
* By Eli Grey, http://eligrey.com
* License: MIT
* See https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md
*/
/*global self */
/*jslint bitwise: true, indent: 4, laxbreak: true, laxcomma: true, smarttabs: true, plusplus: true */
/*! @source http://purl.eligrey.com/github/FileSaver.js/blob/master/FileSaver.js */
var saveAs = saveAs || (function(view) {
"use strict";
// IE <10 is explicitly unsupported
if (typeof view === "undefined" || typeof navigator !== "undefined" && /MSIE [1-9]\./.test(navigator.userAgent)) {
return;
}
var
doc = view.document
// only get URL when necessary in case Blob.js hasn't overridden it yet
, get_URL = function() {
return view.URL || view.webkitURL || view;
}
, save_link = doc.createElementNS("http://www.w3.org/1999/xhtml", "a")
, can_use_save_link = "download" in save_link
, click = function(node) {
var event = new MouseEvent("click");
node.dispatchEvent(event);
}
, is_safari = /constructor/i.test(view.HTMLElement)
, is_chrome_ios =/CriOS\/[\d]+/.test(navigator.userAgent)
, throw_outside = function(ex) {
(view.setImmediate || view.setTimeout)(function() {
throw ex;
}, 0);
}
, force_saveable_type = "application/octet-stream"
// the Blob API is fundamentally broken as there is no "downloadfinished" event to subscribe to
, arbitrary_revoke_timeout = 1000 * 40 // in ms
, revoke = function(file) {
var revoker = function() {
if (typeof file === "string") { // file is an object URL
get_URL().revokeObjectURL(file);
} else { // file is a File
file.remove();
}
};
setTimeout(revoker, arbitrary_revoke_timeout);
}
, dispatch = function(filesaver, event_types, event) {
event_types = [].concat(event_types);
var i = event_types.length;
while (i--) {
var listener = filesaver["on" + event_types[i]];
if (typeof listener === "function") {
try {
listener.call(filesaver, event || filesaver);
} catch (ex) {
throw_outside(ex);
}
}
}
}
, auto_bom = function(blob) {
// prepend BOM for UTF-8 XML and text/* types (including HTML)
// note: your browser will automatically convert UTF-16 U+FEFF to EF BB BF
if (/^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) {
return new Blob([String.fromCharCode(0xFEFF), blob], {type: blob.type});
}
return blob;
}
, FileSaver = function(blob, name, no_auto_bom) {
if (!no_auto_bom) {
blob = auto_bom(blob);
}
// First try a.download, then web filesystem, then object URLs
var
filesaver = this
, type = blob.type
, force = type === force_saveable_type
, object_url
, dispatch_all = function() {
dispatch(filesaver, "writestart progress write writeend".split(" "));
}
// on any filesys errors revert to saving with object URLs
, fs_error = function() {
if ((is_chrome_ios || (force && is_safari)) && view.FileReader) {
// Safari doesn't allow downloading of blob urls
var reader = new FileReader();
reader.onloadend = function() {
var url = is_chrome_ios ? reader.result : reader.result.replace(/^data:[^;]*;/, 'data:attachment/file;');
var popup = view.open(url, '_blank');
if(!popup) view.location.href = url;
url=undefined; // release reference before dispatching
filesaver.readyState = filesaver.DONE;
dispatch_all();
};
reader.readAsDataURL(blob);
filesaver.readyState = filesaver.INIT;
return;
}
// don't create more object URLs than needed
if (!object_url) {
object_url = get_URL().createObjectURL(blob);
}
if (force) {
view.location.href = object_url;
} else {
var opened = view.open(object_url, "_blank");
if (!opened) {
// Apple does not allow window.open, see https://developer.apple.com/library/safari/documentation/Tools/Conceptual/SafariExtensionGuide/WorkingwithWindowsandTabs/WorkingwithWindowsandTabs.html
view.location.href = object_url;
}
}
filesaver.readyState = filesaver.DONE;
dispatch_all();
revoke(object_url);
}
;
filesaver.readyState = filesaver.INIT;
if (can_use_save_link) {
object_url = get_URL().createObjectURL(blob);
setTimeout(function() {
save_link.href = object_url;
save_link.download = name;
click(save_link);
dispatch_all();
revoke(object_url);
filesaver.readyState = filesaver.DONE;
});
return;
}
fs_error();
}
, FS_proto = FileSaver.prototype
, saveAs = function(blob, name, no_auto_bom) {
return new FileSaver(blob, name || blob.name || "download", no_auto_bom);
}
;
// IE 10+ (native saveAs)
if (typeof navigator !== "undefined" && navigator.msSaveOrOpenBlob) {
return function(blob, name, no_auto_bom) {
name = name || blob.name || "download";
if (!no_auto_bom) {
blob = auto_bom(blob);
}
return navigator.msSaveOrOpenBlob(blob, name);
};
}
FS_proto.abort = function(){};
FS_proto.readyState = FS_proto.INIT = 0;
FS_proto.WRITING = 1;
FS_proto.DONE = 2;
FS_proto.error =
FS_proto.onwritestart =
FS_proto.onprogress =
FS_proto.onwrite =
FS_proto.onabort =
FS_proto.onerror =
FS_proto.onwriteend =
null;
return saveAs;
}(
typeof self !== "undefined" && self
|| typeof window !== "undefined" && window
|| this.content
));
// `self` is undefined in Firefox for Android content script context
// while `this` is nsIContentFrameMessageManager
// with an attribute `content` that corresponds to the window
if (typeof module !== "undefined" && module.exports) {
module.exports.saveAs = saveAs;
} else if ((typeof define !== "undefined" && define !== null) && (define.amd !== null)) {
define([], function() {
return saveAs;
});
}