So, I need to scrape real estate ads into a nidax.json file. I go to the all ads page, and use the link to the individual ads to take the data I need. I am using NodeJS Xray scraper, but for some reason it does not work.
所以,我需要将房地产广告刮到nidax.json文件中。我转到所有广告页面,并使用指向各个广告的链接来获取我需要的数据。我正在使用NodeJS Xray刮刀,但由于某种原因它不起作用。
Sometimes it returns nothing, sometimes it returns just links to individual ads.
有时候它什么都不返回,有时它只返回单个广告的链接。
var Xray = require('x-ray');
var x= Xray();
x('http://nidax-nekretnine.rs/nekretnine/','div.kutija-veca_dno > div.read-more` span ',[{
url: 'a@href'
items: x('div.kutija-veca_dno > div.read-more > span > a@href', {
location: 'body > div.contentarea-novo > div > div.info-part > div.one-third div.osnovni-podaci > p:nth-child(2) > span.orange-text',
}), // follow link to google images
}]).write('nidax.json');
1 个解决方案
#1
0
You can subscribe to get when the following pull request is being approved.
您可以订阅以获取以下拉取请求。
Meanwhile I recommend you to apply the solution in your downloaded x-ray module. It's one line code and I tested in two projects, it simply works. Take a look at the index.js file at line 237 see "return" after the long comment:
同时我建议您在下载的X射线模块中应用该解决方案。这是一行代码,我在两个项目中进行了测试,它只是起作用。看一下第237行的index.js文件,看看长评论后的“return”:
function WalkHTML (xray, selector, scope, filters) {
return function walkHTML ($, fn) {
walk(selector, function (v, k, next) {
if (typeof v === 'string') {
var value = resolve($, root(scope), v, filters)
return next(null, value)
} else if (typeof v === 'function') {
return v($, function (err, obj) {
if (err) return next(err)
return next(null, obj)
})
} else if (isArray(v)) {
if (typeof v[0] === 'string') {
return next(null, resolve($, root(scope), v, filters))
} else if (typeof v[0] === 'object') {
var $scope = $.find ? $.find(scope) : $(scope)
var pending = $scope.length
var out = []
// Handle the empty result set (thanks @jenbennings!)
if (!pending) return next(null, out)
$scope.each(function (i, el) {
var $innerscope = $scope.eq(i)
var node = xray(scope, v[0])
node($innerscope, function (err, obj) {
if (err) return next(err)
out[i] = obj
if (!--pending) {
return next(null, compact(out))
}
})
})
// Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))"
return
}
}
return next()
}, function (err, obj) {
if (err) return fn(err)
fn(null, obj, $)
})
}
}
#1
0
You can subscribe to get when the following pull request is being approved.
您可以订阅以获取以下拉取请求。
Meanwhile I recommend you to apply the solution in your downloaded x-ray module. It's one line code and I tested in two projects, it simply works. Take a look at the index.js file at line 237 see "return" after the long comment:
同时我建议您在下载的X射线模块中应用该解决方案。这是一行代码,我在两个项目中进行了测试,它只是起作用。看一下第237行的index.js文件,看看长评论后的“return”:
function WalkHTML (xray, selector, scope, filters) {
return function walkHTML ($, fn) {
walk(selector, function (v, k, next) {
if (typeof v === 'string') {
var value = resolve($, root(scope), v, filters)
return next(null, value)
} else if (typeof v === 'function') {
return v($, function (err, obj) {
if (err) return next(err)
return next(null, obj)
})
} else if (isArray(v)) {
if (typeof v[0] === 'string') {
return next(null, resolve($, root(scope), v, filters))
} else if (typeof v[0] === 'object') {
var $scope = $.find ? $.find(scope) : $(scope)
var pending = $scope.length
var out = []
// Handle the empty result set (thanks @jenbennings!)
if (!pending) return next(null, out)
$scope.each(function (i, el) {
var $innerscope = $scope.eq(i)
var node = xray(scope, v[0])
node($innerscope, function (err, obj) {
if (err) return next(err)
out[i] = obj
if (!--pending) {
return next(null, compact(out))
}
})
})
// Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))"
return
}
}
return next()
}, function (err, obj) {
if (err) return fn(err)
fn(null, obj, $)
})
}
}