手写抓取单个微信公众号的全部文章列表

时间:2025-03-20 18:17:57
let map = new Map(); let lis = []; let before = 0; let inter = 0; let scrollToY = 0; function scrollMyPage(){ inter = setInterval(collect_data,2000); } function collect_data(){ before = map.size; let msgs = document.getElementsByClassName("weui_media_box"); for(let msg of msgs){ let dt_ele = msg.getElementsByClassName("weui_media_extra_info") let title_ele = msg.getElementsByClassName("weui_media_title"); let dt = dt_ele[0].innerText; let title = ""; let url = ""; try{ url = title_ele[0].attributes["hrefs"].nodeValue; title = title_ele[0].innerText; }catch(e){ console.log(e); console.log(msg); } let obj = {}; obj["dt"] = dt; obj["url"] = url; obj["title"] = title; map.set(url,obj) } //操作过后,就没数据了... if(before == map.size){ clear_inter(); save_data(); } scrollToY = scrollToY + 4000; window.scrollTo(0,scrollToY); } function save_data(){ for(let ent of map){ lis.push(ent[1]); } // 创建a标签 var elementA = document.createElement('a'); //文件的名称为时间戳加文件名后缀 elementA.download = +new Date() + ".json"; elementA.style.display = 'none'; //生成一个blob二进制数据,内容为json数据 var blob = new Blob([JSON.stringify(lis)]); //生成一个指向blob的URL地址,并赋值给a标签的href属性 elementA.href = URL.createObjectURL(blob); document.body.appendChild(elementA); elementA.click(); document.body.removeChild(elementA); } function clear_inter(){ clearInterval(inter); } scrollMyPage();