From d8cf27a39bc4656eeca46dc34bf267632a86977f Mon Sep 17 00:00:00 2001 From: qqh <1198369927@qq.com> Date: Tue, 12 Mar 2024 13:23:57 +0000 Subject: [PATCH] add AI/AIFunctionSe. Signed-off-by: qqh <1198369927@qq.com> --- AI/AIFunctionSe | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 AI/AIFunctionSe diff --git a/AI/AIFunctionSe b/AI/AIFunctionSe new file mode 100644 index 00000000..e99bf9d8 --- /dev/null +++ b/AI/AIFunctionSe @@ -0,0 +1,21 @@ +import requests +from lxml import etree + +url = 'https://book.zongheng.com/showchapter/1284449.html' +headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/120.0.0.0 Safari/537.36'} +rq = requests.get(url=url, headers=headers) +rq.encoding = 'utf-8' +data = etree.HTML(rq.text) +titles = data.xpath('//ul[@class="chapter-list"]/li/a/text()') +chapter_urls = data.xpath('//ul[@class="chapter-list"]/li/a/@href') + +for title, chapter_url in zip(titles, chapter_urls): + rq1 = requests.get(url=chapter_url, headers=headers) + data1 = etree.HTML(rq1.text) + content = data1.xpath('//*[@id="Jcontent"]/div/div[4]/p/text()') + text = '\n'.join(content) + + print(f"正在爬取章节:{title}") + + with open(f'E:\\gyy\\novel\\{title}.txt', 'w', encoding='utf-8') as f: + f.write(text) \ No newline at end of file -- Gitee