node爬取页面元素

时间:2022-03-01 19:52:44

/**
* Created by on 2018/12/25.
*/
const http = require("https");
const fs = require('fs'); //读取html插件
const cheerio = require("cheerio"); //读取的文件路径
const filePath = './山炮表格.xlsx'; //表格插件
const xlsx = require('node-xlsx'); const sheets = xlsx.parse('./山炮表格.xlsx');//获取到所有sheets let allUserId = [];
sheets.forEach(function (sheet) {
// console.log(sheet['name']);
for (var rowId in sheet['data']) {
// console.log(rowId);
var row = sheet['data'][rowId];
// console.log(row);
allUserId.push(row);
}
});
// console.log(allUserId); //微博搜索用户页
const findUserUrl = 'https://s.weibo.com/user';
let hasVUser = []
let index = 1;
function getV(allIds, index) {
let theCurrentId = allIds[index][1];
let theUrl = 'https://s.weibo.com/user?q=' + theCurrentId + '&Refer=SUer_box';
console.log(theUrl)
http.get(''+theUrl, res => {
let html = "";
// 获取页面数据
res.on("data", function (data) {
html += data;
});
// 数据获取结束
res.on("end", function () {
let $ = cheerio.load(html);
// console.log($('.icon-vip').length);
if($('.icon-vip').length>0){
hasVUser.push(theCurrentId);
}
if(index<allIds.length-1){
index++
getV(allIds,index);
}else{
console.log(hasVUser,'所有带v')
}
});
});
} getV(allUserId,index);
});
{
"dependencies": {},
"devDependencies": {
"cheerio": "^1.0.0-rc.2",
"fs": "0.0.1-security",
"node-xlsx": "^0.12.1"
}
}