手写抓取单个微信公众号的全部文章列表
let map = new Map();
let lis = [];
let before = 0;
let inter = 0;
let scrollToY = 0;
function scrollMyPage(){
inter = setInterval(collect_data,2000);
}
function collect_data(){
before = map.size;
let msgs = document.getElementsByClassName("weui_media_box");
for(let msg of msgs){
let dt_ele = msg.getElementsByClassName("weui_media_extra_info")
let title_ele = msg.getElementsByClassName("weui_media_title");
let dt = dt_ele[0].innerText;
let title = "";
let url = "";
try{
url = title_ele[0].attributes["hrefs"].nodeValue;
title = title_ele[0].innerText;
}catch(e){
console.log(e);
console.log(msg);
}
let obj = {};
obj["dt"] = dt;
obj["url"] = url;
obj["title"] = title;
map.set(url,obj)
}
//操作过后,就没数据了...
if(before == map.size){
clear_inter();
save_data();
}
scrollToY = scrollToY + 4000;
window.scrollTo(0,scrollToY);
}
function save_data(){
for(let ent of map){
lis.push(ent[1]);
}
// 创建a标签
var elementA = document.createElement('a');
//文件的名称为时间戳加文件名后缀
elementA.download = +new Date() + ".json";
elementA.style.display = 'none';
//生成一个blob二进制数据,内容为json数据
var blob = new Blob([JSON.stringify(lis)]);
//生成一个指向blob的URL地址,并赋值给a标签的href属性
elementA.href = URL.createObjectURL(blob);
document.body.appendChild(elementA);
elementA.click();
document.body.removeChild(elementA);
}
function clear_inter(){
clearInterval(inter);
}
scrollMyPage();