代码拉取完成,页面将自动刷新
import requests
import pandas as pd
from tqdm import tqdm
def fetch_issues(base_url, search_term, max_pages=None):
"""
分页获取指定搜索条件的issue数据
:param base_url: 基础URL(不带search和page参数)
:param search_term: 搜索关键词
:param max_pages: 最大获取页数
:return: 包含title和link的DataFrame
"""
all_issues = []
page = 1
total_pages = None
# 构造带搜索参数的URL
search_url = f"{base_url}search={search_term}"
with tqdm(desc=f"Fetching '{search_term}' issues") as pbar:
while True:
url = f"{search_url}&page={page}"
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
if page == 1:
total_items = data.get("total", 0)
per_page = data.get("per_page", 10)
total_pages = (total_items + per_page - 1) // per_page
pbar.total = total_pages
# 只提取title和link字段
for issue in data.get("data", []):
all_issues.append({
"title": issue.get("title", ""),
"link": issue.get("link", ""),
})
pbar.update(1)
if page >= total_pages or not data.get("data"):
break
page += 1
except Exception as e:
print(f"\nError fetching {search_term} page {page}: {e}")
break
return pd.DataFrame(all_issues)
def batch_fetch_issues(base_url, search_terms, max_pages=None):
"""
批量获取多个搜索条件的数据
:param base_url: 基础URL
:param search_terms: 搜索关键词列表
:param max_pages: 每类最大页数
:return: 合并后的DataFrame
"""
all_dfs = []
for term in search_terms:
df = fetch_issues(base_url, term, max_pages)
if not df.empty:
df["search_term"] = term # 标记搜索关键词
all_dfs.append(df)
if all_dfs:
return pd.concat(all_dfs, ignore_index=True)
return pd.DataFrame()
def save_to_excel(df, filename):
"""
保存数据到Excel,添加序列号
:param df: 包含数据的DataFrame
:param filename: 输出文件名
"""
if df.empty:
print("没有数据可保存")
return
# 添加序列号列
df.insert(0, "序号", range(1, len(df) + 1))
# 只保留需要的列
final_df = df[["序号", "search_term", "title", "link"]]
try:
final_df.to_excel(filename, index=False)
print(f"成功保存 {len(final_df)} 条数据到 {filename}")
except Exception as e:
print(f"保存文件时出错: {e}")
if __name__ == "__main__":
# 基础URL(不包含search和page参数)
base_url = "https://quickissue.openeuler.org/api-issues/issues/?per_page=10&issue_type=&priority=&direction=desc&create=&author=&sort=created_at&assignee=&label=&branch=&repo=&issue_state=&milestone=&exclusion=&"
# 要搜索的关键词列表
search_terms = ["技术支持", "安装问题", "兼容性"]
# 获取数据(限制每类最多3页)
combined_df = batch_fetch_issues(base_url, search_terms, max_pages=3)
if not combined_df.empty:
# 保存到Excel
save_to_excel(combined_df, "openEuler_issues_optimized.xlsx")
# 打印统计信息
print("\n各搜索关键词获取数量:")
print(combined_df["search_term"].value_counts())
else:
print("未获取到任何数据")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。