The following script is working perfectly in my NodeJS server, but rarely it returns response like this, when I'm trying to scrape some Cyrillic websites.
以下脚本在我的NodeJS服务器中运行得很好,但是当我试图刮掉一些西里尔网站时,很少会返回这样的响应。
Script
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
Example of response with incorrect encoding
编码错误的响应示例
firstData { name: [ '(Rock, Pop) [15LP] [24/96] Queen - Studio Collection - 2015,
FLAC (tracks) :: RuTracker.org' ],
description:
[ 'RuTracker.org » ���������� ��� (����������� ���������) »
������� ������� (Rock, Pop) [15LP] [24/96] Queen -
Studio Collection - 2015, FLAC (tracks)',
undefined ],
image: [ undefined, undefined, undefined, undefined, undefined, undefined ] }
How do I fix this?
我该如何解决?
1 个解决方案
#1
0
you can use request as x-ray's driver and iconv the body in it like this:
你可以使用请求作为x-ray的驱动程序,并在其中使用iconv,如下所示:
var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}
var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);
});
#1
0
you can use request as x-ray's driver and iconv the body in it like this:
你可以使用请求作为x-ray的驱动程序,并在其中使用iconv,如下所示:
var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}
var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]@content',
metaDescription: 'meta[name="description"]@content',
ogImage: 'meta[property="og:image"]@content',
twitterImage: 'meta[name="name="twitter:image:src""]@content',
metaImage: 'meta[name="image"]@content',
headImage: 'head img@src',
contentImage_1: '.content img@src',
contentImage_2: '.image img@src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);
});