NodeJs之word文件生成与解析

一,介绍与需求

1.1,介绍

1,officegen 模块可以为Microsoft Office 2007及更高版本生成Office Open XML文件。此模块不依赖于任何框架，您不需要安装Microsoft Office，因此您可以将它用于任何类型的 JavaScript 应用程序。输出也是流而不是文件，不依赖于任何输出工具。此模块应适用于支持Node.js 0.10或更高版本的任何环境，包括Linux，OSX和Windows。

2,textract文本提取节点模块。

3,pdf2json是一个节点。js模块解析和转换PDF从二进制到json格式，它是用PDF构建的。并通过浏览器外的交互式表单元素和文本内容解析对其进行扩展。其目标是在web服务中包装时启用带有交互式表单元素的服务器端PDF解析，并在作为命令行实用程序使用时启用将本地PDF解析为json文件。

1.2,需求

二,文件生成导出

第一步:安装officegen

1 cnpm install officegen --save

第二步:引入officegen

1 var officegen = require(\'officegen\');
2 var fs = require(\'fs\');
3 var docx = officegen(\'docx\');//word
4 var pptx = officegen(\'pptx\');//pptx

第三步:使用officegen docx

 1 ...
 2 
 3   docx.on(\'finalize\', function (written) {
 4         console.log(\'Finish to create Word file.\nTotal bytes created: \' + written + \'\n\');
 5     });
 6 
 7 
 8     docx.on(\'error\', function (err) {
 9         console.log(err);
10     });
11 
12 ...
13   
14 //var tows = [\'id\', \'provinceZh\', \'leaderZh\', \'cityZh\', \'cityEn\'];//创建一个和表头对应且名称与数据库字段对应数据，便于循环取出数据
15             var pObj = docx.createP({ align: \'center\' });// 创建行 设置居中 大标题
16             pObj.addText(\'全国所有城市\', { bold: true, font_face: \'Arial\', font_size: 18 });// 添加文字 设置字体样式 加粗 大小
17 
18             // let towsLen = tows.length
19             let dataLen = data.length
20             for (var i = 0; i < dataLen; i++) {//循环数据库得到的数据，因为取出的数据格式为
21                 //[{"id" : "101010100","provinceZh" : "北京","leaderZh" : "北京","cityZh" : "北京","cityEn" : "beijing"},{…………},{…………}]
22                 /************************* 文本 *******************************/
23                 // var pObj = docx.createP();//创建一行
24                 // pObj.addText(`(${i+1}), `,{ bold: true, font_face: \'Arial\',});
25                 // pObj.addText(`省级:`,{ bold: true, font_face: \'Arial\',});
26                 // pObj.addText(`${data[i][\'provinceZh\']}  `,);
27                 // pObj.addText(`市级：`,{ bold: true, font_face: \'Arial\',});
28                 // pObj.addText(`${data[i][\'leaderZh\']}  `);
29                 // pObj.addText(`县区：`,{ bold: true, font_face: \'Arial\',});
30                 // pObj.addText(`${data[i][\'cityZh\']}`);
31 
32                 /************************* 表格 *******************************/
33                 let SingleRow = [data[i][\'id\'], data[i][\'provinceZh\'], data[i][\'leaderZh\'], data[i][\'cityZh\']]
34                 table.push(SingleRow)
35             }
36             docx.createTable(table, tableStyle);
37             var out = fs.createWriteStream(\'out.docx\');// 文件写入
38             out.on(\'error\', function (err) {
39                 console.log(err);
40             });
41             var result = docx.generate(out);// 服务端生成word
42             res.writeHead(200, {
43                 // 注意这里的type设置，导出不同文件type值不同application/vnd.openxmlformats-officedocument.wordprocessingml.document
44                 "Content-Type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
45                 \'Content-disposition\': \'attachment; filename=out\' + moment(new Date().getTime()).format(\'YYYYMMDDhhmmss\') + \'.docx\'
46             });
47             docx.generate(res);// 客户端导出word

第四步:抛出接口

  1 router.put(\'/download/word\', function (req, res) {
  2     console.log(\'exportWord-------------\');
  3     docx.on(\'finalize\', function (written) {
  4         console.log(\'Finish to create Word file.\nTotal bytes created: \' + written + \'\n\');
  5     });
  6 
  7 
  8     docx.on(\'error\', function (err) {
  9         console.log(err);
 10     });
 11     let fields = {
 12         id: \'\',
 13         provinceZh: \'\',
 14         leaderZh: \'\',
 15         cityZh: \'\',
 16         cityEn: \'\'
 17     }
 18     var table = [
 19         [{
 20             val: "No.",
 21             opts: {
 22                 align: "center",
 23                 vAlign: "center",
 24                 sz: \'36\',
 25                 // cellColWidth: 42,
 26                 // b:true,
 27                 // sz: \'48\',
 28                 // shd: {
 29                 //   fill: "7F7F7F",
 30                 //   themeFill: "text1",
 31                 //   "themeFillTint": "80"
 32                 // },
 33                 // fontFamily: "Avenir Book"
 34             }
 35         }, {
 36             val: "省份",
 37             opts: {
 38                 align: "center",
 39                 vAlign: "center",
 40                 sz: \'36\',
 41                 // b:true,
 42                 // color: "A00000",
 43                 // align: "right",
 44                 // shd: {
 45                 //   fill: "92CDDC",
 46                 //   themeFill: "text1",
 47                 //   "themeFillTint": "80"
 48                 // }
 49             }
 50         }, {
 51             val: "市",
 52             opts: {
 53                 align: "center",
 54                 vAlign: "center",
 55                 sz: \'36\',
 56                 // cellColWidth: 42,
 57                 // b:true,
 58                 // sz: \'48\',
 59                 // shd: {
 60                 //   fill: "92CDDC",
 61                 //   themeFill: "text1",
 62                 //   "themeFillTint": "80"
 63                 // }
 64             }
 65         }, {
 66             val: "区/县",
 67             opts: {
 68                 align: "center",
 69                 vAlign: "center",
 70                 sz: \'36\',
 71                 // cellColWidth: 42,
 72                 // b:true,
 73                 // sz: \'48\',
 74                 // shd: {
 75                 //   fill: "92CDDC",
 76                 //   themeFill: "text1",
 77                 //   "themeFillTint": "80"
 78                 // }
 79             }
 80         }],
 81     ]
 82 
 83     var tableStyle = {
 84         tableColWidth: 2400,
 85         tableSize: 24,
 86         tableColor: "ada",
 87         tableAlign: "center",
 88         tableVAlign: "center",
 89         tableFontFamily: "Comic Sans MS",
 90         borders: true
 91     }
 92 
 93     MongoDbAction.getFieldsByConditions(\'AllCity\', {}, fields, function (err, data) {//根据需求查询想要的字段
 94         if (err) {
 95             //执行出错
 96         } else {
 97             //var tows = [\'id\', \'provinceZh\', \'leaderZh\', \'cityZh\', \'cityEn\'];//创建一个和表头对应且名称与数据库字段对应数据，便于循环取出数据
 98             var pObj = docx.createP({ align: \'center\' });// 创建行 设置居中 大标题
 99             pObj.addText(\'全国所有城市\', { bold: true, font_face: \'Arial\', font_size: 18 });// 添加文字 设置字体样式 加粗 大小
100 
101             // let towsLen = tows.length
102             let dataLen = data.length
103             for (var i = 0; i < dataLen; i++) {//循环数据库得到的数据，因为取出的数据格式为
104                 //[{"id" : "101010100","provinceZh" : "北京","leaderZh" : "北京","cityZh" : "北京","cityEn" : "beijing"},{…………},{…………}]
105                 /************************* 文本 *******************************/
106                 // var pObj = docx.createP();//创建一行
107                 // pObj.addText(`(${i+1}), `,{ bold: true, font_face: \'Arial\',});
108                 // pObj.addText(`省级:`,{ bold: true, font_face: \'Arial\',});
109                 // pObj.addText(`${data[i][\'provinceZh\']}  `,);
110                 // pObj.addText(`市级：`,{ bold: true, font_face: \'Arial\',});
111                 // pObj.addText(`${data[i][\'leaderZh\']}  `);
112                 // pObj.addText(`县区：`,{ bold: true, font_face: \'Arial\',});
113                 // pObj.addText(`${data[i][\'cityZh\']}`);
114 
115                 /************************* 表格 *******************************/
116                 let SingleRow = [data[i][\'id\'], data[i][\'provinceZh\'], data[i][\'leaderZh\'], data[i][\'cityZh\']]
117                 table.push(SingleRow)
118             }
119             docx.createTable(table, tableStyle);
120             var out = fs.createWriteStream(\'out.docx\');// 文件写入
121             out.on(\'error\', function (err) {
122                 console.log(err);
123             });
124             var result = docx.generate(out);// 服务端生成word
125             res.writeHead(200, {
126                 // 注意这里的type设置，导出不同文件type值不同application/vnd.openxmlformats-officedocument.wordprocessingml.document
127                 "Content-Type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
128                 \'Content-disposition\': \'attachment; filename=out\' + moment(new Date().getTime()).format(\'YYYYMMDDhhmmss\') + \'.docx\'
129             });
130             docx.generate(res);// 客户端导出word
131         }
132     });
133 
134 });

第五步:前端调用

下载调用方法

 1    downloadWordOper() {
 2         // var url =  "http://localhost:8880/api/v1/yingqi/download/word";
 3         // window.location = url;//这里不能使用get方法跳转，否则下载不成功
 4             this.$http(downloadWord()).then((res)=>{
 5               //这里res.data是返回的blob对象
 6               var blob = new Blob([res.data], {type: \'application/vnd.openxmlformats-officedocument.wordprocessingml.document;charset=utf-8\'}); //application/vnd.openxmlformats-officedocument.wordprocessingml.document这里表示doc类型
 7               downloadFile(blob,\'word\',\'docx\')
 8             })
 9      
10     },

downloadFile方法代码如下:

 1   /**
 2    *下载文件
 3    * @param blob  ：返回数据的blob对象
 4    * @param tagFileName  ：下载后文件名标记
 5    * @param fileType  ：文件类 word(docx) excel(xlsx) ppt等
 6    */
 7   export function downloadFile(blob,tagFileName,fileType) {
 8     var downloadElement = document.createElement(\'a\');
 9     var href = window.URL.createObjectURL(blob); //创建下载的链接
10     downloadElement.href = href;
11     downloadElement.download = tagFileName+moment(new Date().getTime()).format(\'YYYYMMDDhhmmss\')+\'.\'+fileType; //下载后文件名
12     document.body.appendChild(downloadElement);
13     downloadElement.click(); //点击下载
14     document.body.removeChild(downloadElement); //下载完成移除元素
15     window.URL.revokeObjectURL(href); //释放掉blob对象
16   }

第六步:下载后的效果

ppt生成下载类似,只是设置的writeHead类型与使用的方法不一样

 1 router.put(\'/download/createPpt\', function (req, res) {
 2     console.log(\'exportPpt-------------\');
 3     pptx.on(\'finalize\', function (written) {
 4         console.log(\'Finish to create ppt file.\nTotal bytes created: \' + written + \'\n\');
 5     });
 6 
 7 
 8     pptx.on(\'error\', function (err) {
 9         console.log(err);
10     });
11 
12     let slide1 = pptx.makeNewSlide();//创建一个新幻灯片
13     slide1.title = \'PPT文件\';
14     slide1.addText(\'Office generator\', {
15         y: 66, x: \'c\', cx: \'50%\', cy: 60, font_size: 48,
16         color: \'0000ff\'
17     });
18 
19     slide1.addText(\'Big Red\', {
20         y: 250, x: 10, cx: \'70%\',
21         font_face: \'Wide Latin\', font_size: 54,
22         color: \'cc0000\', bold: true, underline: true
23     });
24 
25     var out = fs.createWriteStream(\'out.pptx\');// 文件写入
26     out.on(\'error\', function (err) {
27         console.log(\'error2===\',err);
28     });
29     var result = pptx.generate(out);// 服务端生成ppt
30     res.writeHead(200, {
31         // 注意这里的type设置，导出不同文件type值不同application/vnd.openxmlformats-officedocument.presentationml.presentation
32         // "Content-Type": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
33         // \'Content-disposition\': \'attachment; filename=out\' + moment(new Date().getTime()).format(\'YYYYMMDDhhmmss\') + \'.pptx\'
34         "Content-Type": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
35         \'Content-disposition\': \'attachment; filename=surprise.pptx\'
36     });
37     pptx.generate(res);// 客户端导出ppt
38 
39 });

三,文件上传解析

3.1,word文档解析

第一步:安装textract

1 cnpm install textract --save

第二步:引入textract

1 //引入textract解析word模块
2 var textract = require(\'textract\');//对于docx文件，您可以使用textract，它将从.docx文件中提取文本。
3 var fs = require(\'fs\');

第三步:解析文档

 1 function parseWord(excelConfig, res) {
 2     textract.fromFileWithPath(excelConfig.excel_Dir, function (error, text) {
 3         if (error) {
 4             res.status(200).json({
 5                 httpCode: 200,
 6                 message: \'导入解析失败\',
 7                 data: error,
 8                 returnValue: 0
 9             });
10         } else {
11             res.status(200).json({
12                 httpCode: 200,
13                 message: \'导入成功\',
14                 data: {
15                     result: text
16                 },
17                 returnValue: 1
18             });
19         }
20     })
21 }

第四步:解析后删除文档

1 fs.unlink(excelConfig.excel_Dir, function (err) {
2             if (err) throw err;
3             console.log("删除文件" + excelConfig.excel_Dir + "成功")
4         })

第五步:抛出接口调用后的效果

3.2,pdf文档解析

第一步:安装pdf2json

1 cnpm install pdf2json --save

第二步:引入pdf2json

1 var PDFParser = require("pdf2json");
2 var fs = require(\'fs\');

第三步:解析文档

 1 function parsePdf(excelConfig, res) {
 2     var pdfParser = new PDFParser(this, 1);
 3     pdfParser.loadPDF(excelConfig.excel_Dir);
 4     pdfParser.on("pdfParser_dataError", errData => {
 5         res.status(200).json({
 6             httpCode: 200,
 7             message: \'导入解析失败\',
 8             data: errData,
 9             returnValue: 0
10         });
11     });
12     pdfParser.on("pdfParser_dataReady", pdfData => {
13         let data = pdfParser.getRawTextContent()
14         fs.writeFile(\'./uploads/test.txt\', data, function (err) {
15             if (err) {
16                 throw err;
17             }
18         });
19         res.status(200).json({
20             httpCode: 200,
21             message: \'导入成功\',
22             data: {
23                 result: data
24             },
25             returnValue: 1
26         });
27     });
28 }

第四步:解析后删除文档

1  fs.unlink(excelConfig.excel_Dir, function (err) {
2             if (err) throw err;
3             console.log("删除文件" + excelConfig.excel_Dir + "成功")
4         })

第五步:抛出接口调用后的效果

秒客网

NodeJs之word文件生成与解析

NodeJs之word文件生成与解析

一,介绍与需求

1.1,介绍

1.2,需求

二,文件生成导出

三,文件上传解析

3.1,word文档解析

3.2,pdf文档解析

相关文章