I built this app with the help of this question I did previously.
我在之前做过的这个问题的帮助下构建了这个应用程序。
app.js:
app.js:
var mongolib = require('./middlewares/db.js');
var downloaderCoverageWho = require('./routers/downloaderCoverageWho.js');
var downloaderCoverageIta = require('./routers/downloaderCoverageIta.js');
const start = async function() {
const conn = await mongolib.connectToMongoDb();
const coverages = await mongolib.createACollection('coverages');
const isPageHasUpdates = true;
if(isPageHasUpdates) {
await downloadCoverageIta();
await downloadCoverageWho();
}
await mongolib.closeConnection();
await console.log('d3.js creation...');
return 'FINISH';
}
start()
.then(res => console.log(res))
.catch(err => console.log(err));
async function downloadCoverageWho() {
await downloaderCoverageWho.download();
console.log('Finish');
}
async function downloadCoverageIta() {
await downloaderCoverageIta.download();
console.log('Finish');
}
db.js:
db.js:
var fs = require('fs');
var MongoClient = require('mongodb').MongoClient;
const url = 'mongodb://localhost:27017/';
const dbName = 'db';
var collCovName = 'coverages';
var collCov;
let myClient;
let myConn;
var methods = {};
methods.getConnection = function() {
return myConn;
}
methods.connectToMongoDb = async function() {
return MongoClient.connect(url + dbName)
.then(function(conn) {
console.log('Connected to MongoDB');
myClient = conn;
myConn = myClient.db(dbName);
return myConn;
})
.catch(function(err) {
console.log('Error during connection');
throw err;
});
}
methods.closeConnection = async function() {
myClient.close()
.then(function() {
console.log('Connection closed');
})
.catch(function(err) {
console.log('Error closing connection');
throw err;
});
}
methods.createACollection = async function(collectionName) {
return myConn.createCollection(collectionName)
.then(function() {
console.log('Collection', collectionName, 'created');
})
.catch(function(err) {
console.log('Error during creation of collection', collectionName);
throw err;
});
}
methods.insert = async function(collectionName, obj) {
return myConn.collection(collectionName).updateOne(obj, {$set: obj}, {upsert: true})
.then(function(res) {
console.log('Inserted 1 element in', collectionName);
})
.catch(function(err) {
console.log('Error during insertion in', collectionName);
throw err;
});
}
module.exports = methods;
downloadCoverageIta.js:
downloadCoverageIta.js:
var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var textract = require('textract');
var util = require('../helpers/util.js');
var mongolib = require('../middlewares/db.js');
var methods = {};
var outDir = './output/';
var finalFilename = outDir + 'coverage-ita.json'
var urls = [
{year: '2013', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_1_file.pdf'},
{year: '2012', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_5_fileAllegati_itemFile_0_file.pdf'},
{year: '2011', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_6_fileAllegati_itemFile_0_file.pdf'},
{year: '2010', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_7_fileAllegati_itemFile_0_file.pdf'},
{year: '2009', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_8_fileAllegati_itemFile_0_file.pdf'},
{year: '2008', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_15_fileAllegati_itemFile_0_file.pdf'},
{year: '2007', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_14_fileAllegati_itemFile_0_file.pdf'},
{year: '2006', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_13_fileAllegati_itemFile_0_file.pdf'},
{year: '2005', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_12_fileAllegati_itemFile_0_file.pdf'},
{year: '2004', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_11_fileAllegati_itemFile_0_file.pdf'},
{year: '2003', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_10_fileAllegati_itemFile_0_file.pdf'},
{year: '2002', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_9_fileAllegati_itemFile_0_file.pdf'},
{year: '2001', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_1_fileAllegati_itemFile_0_file.pdf'},
{year: '2000', link: 'http://www.salute.gov.it/imgs/C_17_tavole_20_allegati_iitemAllegati_0_fileAllegati_itemFile_0_file.pdf'}
];
var jsons = [];
methods.download = async function(req, res) {
jsons = await extractText()
.then(function() {
console.log('Extract text success');
})
.catch(function() {
console.log('Extract text error');
});
};
async function extractText() {
var config = {
preserveLineBreaks: true
};
//await extractTextTest();
await urls.forEach(async function(url) {
await textract.fromUrl(url.link, config, async function(error, text) {
if(error) {
throw error;
}
switch(url.year) {
case '2000':
case '2001':
case '2002':
case '2003':
case '2004':
case '2005':
case '2006':
case '2007':
case '2008':
case '2009':
case '2010':
case '2011':
case '2012':
await extractTextType1(url, text)
.then(function() {
console.log('extractTextType1 success');
})
.catch(function() {
console.log('extractTextType1 error');
});
break;
case '2013':
extractTextType2(url, text)
.then(function() {
console.log('extractTextType2 success');
})
.catch(function() {
console.log('extractTextType2 error');
});
break;
default:
console.log('Error: no case');
}
});
});
}
async function extractTextTest() { // THIS WORKS
var obj = {A: 'aa', B: 'bb', C: 'cc'};
await mongolib.insert('coverages', obj);
}
async function extractTextType1(url, text) {
var matrix = [];
var map = [];
var vaccines = [];
var regionsTemp = [];
var regions = [];
var regionLength = [1, 2, 1, 2, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
// text to matrix
var textArray = text.split('\n');
for(var i = 0; i < 23; i++) {
matrix[i] = textArray[i].split(' ');
}
matrix[0].shift();
vaccines = matrix[0];
map[0] = vaccines;
for(var i = 0; i < regionLength.length; i++) {
var j = i + 1;
var indexToRemove = 0;
var numberToRemove = regionLength[i];
var region = matrix[j].splice(indexToRemove, numberToRemove);
regionsTemp.push(region);
map[j+1] = matrix[j];
}
for(var i = 0; i < regionsTemp.length; i++) {
var region = '';
if(regionLength[i] > 1) {
region = regionsTemp[i].join(' ');
}
else {
region = regionsTemp[i].join('');
}
regions.push(region);
}
map[1] = regions;
vaccines = map.shift();
regions = map.shift();
var thisJson = await map.reduce(function(result, v, i) {
v.forEach(function(o, k) {
var obj = util.createJsonObjectCoverage(url.year, 'Italy', vaccines[k], regions[i], o);
// save on db
mongolib.insert('coverages', obj); // HERE
result.push(obj);
});
return result;
}, jsons);
util.printOnFile(jsons, finalFilename);
}
function extractTextType2(url, text) {
var matrix = [];
var map = [];
var vaccines = [];
var regions = [];
var textArray = text.split('\n');
for(var i = 0; i < 36; i++) {
matrix[i] = textArray[i].split(' ');
}
vaccines.push(matrix[0][1].replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, ''));
for(var i = 1; i < 10; i++) {
vaccines.push(matrix[i][0].replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, ''));
}
var meningo = ''.concat(matrix[10][0], matrix[11]).replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, '');
vaccines.push(meningo);
var pneumo = ''.concat(matrix[12][0], ' ', matrix[13]).replace(/\(a\)/g, '').replace(/\(b\)/g, '').replace(/\(c\)/g, '').replace(/\r/g, '');
vaccines.push(pneumo);
map[0] = vaccines;
for(var i = 14; i < matrix.length; i++) {
regions.push(matrix[i][0]);
}
map[1] = regions;
for(var i = 14; i < matrix.length; i++) {
matrix[i].shift();
map.push(matrix[i]);
}
vaccines = map.shift();
regions = map.shift();
var thisJson = map.reduce(function(result, v, i) {
v.forEach(function(o, k) {
var obj = util.createJsonObjectCoverage(url.year, 'Italy', vaccines[k], regions[i], o);
// save on db
mongolib.insert('coverages', obj); // HERE
result.push(obj);
});
return result;
}, jsons);
util.printOnFile(jsons, finalFilename);
}
module.exports = methods;
downloaderCoverageWho.js:
downloaderCoverageWho.js:
var cheerio = require('cheerio');
var express = require('express');
var fs = require('fs');
var request = require('request');
var util = require('../helpers/util.js');
var mongolib = require('../middlewares/db.js');
var methods = {};
var countries = {
'Albania': 'ALB',
'Austria': 'AUT'
};
var outDir = './output/';
var finalData = outDir + 'coverage-eu.json'
var jsons = [];
methods.download = async function(req, res) {
for(country in countries) {
var url = 'http://apps.who.int/immunization_monitoring/globalsummary/coverages?c=' + countries[country];
request(url, (function(country) {
var thisCountry = country;
return function(error, res, html) {
if(error) {
throw error;
}
$ = cheerio.load(html);
var years = [];
var vaccines = [];
var coverages = [];
$('.ts .year').each(function() {
years.push($(this).text().trim());
});
$('.ts .odd td a, .ts .even td a').each(function() {
vaccines.push($(this).text().trim());
});
$('.ts .odd .statistics_small, .ts .even .statistics_small').each(function() {
coverages.push($(this).text().trim());
});
const numYears = years.length;
const numVaccines = vaccines.length;
for(var vaccineIdx = 0; vaccineIdx < numVaccines; vaccineIdx++) {
for(var yearIdx = 0; yearIdx < numYears; yearIdx++) {
let obj = {
year: years[yearIdx],
country: country,
region: "",
vaccine: vaccines[vaccineIdx],
coverage: coverages[vaccineIdx*numYears + yearIdx]
}
jsons.push(obj);
// save on db
mongolib.insert('coverages', obj); // PROBLEM HERE
}
}
util.printOnFile(jsons, finalData);
}
})(country));
}
};
module.exports = methods;
When I run the code, I get this error:
当我运行代码时,我收到此错误:
(node:11952) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 2950): MongoError: server instance pool was destroyed
(node:11952)UnhandledPromiseRejectionWarning:未处理的promise拒绝(拒绝ID:2950):MongoError:服务器实例池被销毁
I think there is the same problem in both files (downloaderCoverageIta
and downloaderCoverageWho
).
我认为这两个文件都有同样的问题(downloaderCoverageIta和downloaderCoverageWho)。
I read here that probably I'm calling db.close()
before my inserts
have completed but this is not true. I don't know how to fix it.
我在这里读到可能在插入完成之前我正在调用db.close(),但事实并非如此。我不知道如何解决它。
It's the first time I use async/await
. How can I solve?
这是我第一次使用async / await。我怎么解决?
1 个解决方案
#1
0
I think db.closeConnection()
is called before all request responses are handled, and database access is finished.
我认为在处理所有请求响应和数据库访问完成之前调用db.closeConnection()。
The cause for this behaviour may be, that you are only waiting for creating the requests, but you are not waiting for their result (response).
这种行为的原因可能是,您只是在等待创建请求,但您不是在等待他们的结果(响应)。
You start the request with a statement like this inside an async function:
您在异步函数中使用这样的语句启动请求:
request(url, (function(country) {
...
})(...));
That means, the async function will wait for the request(...)
function call, but not for the callback function which handles the response.
这意味着,异步函数将等待请求(...)函数调用,但不会等待处理响应的回调函数。
You can also use request(...)
in the following way:
您还可以通过以下方式使用request(...):
const response = await request(url);
Now your async function will wait for the response.
现在你的异步函数将等待响应。
#1
0
I think db.closeConnection()
is called before all request responses are handled, and database access is finished.
我认为在处理所有请求响应和数据库访问完成之前调用db.closeConnection()。
The cause for this behaviour may be, that you are only waiting for creating the requests, but you are not waiting for their result (response).
这种行为的原因可能是,您只是在等待创建请求,但您不是在等待他们的结果(响应)。
You start the request with a statement like this inside an async function:
您在异步函数中使用这样的语句启动请求:
request(url, (function(country) {
...
})(...));
That means, the async function will wait for the request(...)
function call, but not for the callback function which handles the response.
这意味着,异步函数将等待请求(...)函数调用,但不会等待处理响应的回调函数。
You can also use request(...)
in the following way:
您还可以通过以下方式使用request(...):
const response = await request(url);
Now your async function will wait for the response.
现在你的异步函数将等待响应。