ndoejs处理编码的爬虫

时间:2021-08-19 21:32:20
var express=require('express');
var http=require('http');
var cheerio=require('cheerio');
var fs=require('fs')
var iconv = require('iconv-lite')
var app=express();
app.get('/', function(req, res){
res.send('hello world');
});
app.listen(3000); var Nbaurl='http://china.nba.com/news/';
var html=""
var arr=[];
http.get(Nbaurl,function(req,res){
req.on('data',function(data){
html+=data;
});
req.on('end',function(){
var $=cheerio.load(html);
var hrefArr=$('#news').find('a');
// console.log(hrefArr[0])
hrefArr.each(function(item){
if($(this).attr('href').indexOf('javascript')==-1){
arr.push($(this).attr('href'));
} })
create(arr)
});
req.on('error',function(err){
console.info(err);
});
}) function create(arrhref){
for(let i=0;i<arrhref.length-arrhref.length+10;i++){
http.get(arrhref[i],function(req,res){
let length=0;
let arr=[];
req.on('data',function(data){
arr.push(data);
length+=data.length });
req.on('end',function(){
console.log(arr)
var data=Buffer.concat(arr,length);
var htmldata=iconv.decode(data,'gb2312');
var $=cheerio.load(htmldata);
var savedata=$("#MainL");
fs.writeFile(`./new/new${i}.html`,savedata,function(err){
console.log(err)
})
});
req.on('error',function(err){
console.info(err);
});
})
}
}
//⚠️ 需要先安装相关依赖 ⚠️新建new文件夹