Ai
1 Star 0 Fork 0

baihaowen/js可视爬取

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
index.js 1.53 KB
一键复制 编辑 原始数据 按行查看 历史
baihaowen 提交于 2024-04-11 15:44 +08:00 . '修改index.js'
const puppeteer = require("puppeteer");
const fs = require("fs");
const { log } = require("console");
(async () => {
let data = [];
const browser = await puppeteer.launch({
headless: false,
userDataDir: "./data",
});
const page = await browser.newPage();
await page.goto(
"https://www.bilibili.com/video/BV1ym42177Hy/?spm_id_from=333.1007.top_right_bar_window_dynamic.content.click&vd_source=3fb379bdd5d3e5bd63fada6189f6d4b6"
);
await page.waitForSelector("div > div > div > div.reply-warp > div.reply-list > div > div.root-reply-container > div.content-warp > div.root-reply > span > span");
let titles = [];
let previousHeight;
let loop = 0;//内容循环次数
let h = 50;
while (loop < h) {
const currentTitles = await page.$$eval(
"div > div > div > div.reply-warp > div.reply-list > div > div.root-reply-container > div.content-warp > div.root-reply > span > span",
(links) => links.map((x) => x.innerText)
);
if (currentTitles.length === 0) {
break;
}
titles = titles.concat(currentTitles);
previousHeight = await page.evaluate('document.body.scrollHeight');
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');
await page.waitForTimeout(1000); // 等待页面加载新内容
loop++;
console.log(`${loop}/${h}`);
}
console.log(titles);
data = data.concat(titles);
fs.writeFile("data.json", JSON.stringify(data, null, "\t"), function (err) {
if (err) {
console.log(err);
}
});
await browser.close();
})();
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/baihaowen/javascript-visual-crawling.git
git@gitee.com:baihaowen/javascript-visual-crawling.git
baihaowen
javascript-visual-crawling
js可视爬取
master

搜索帮助