7 Star 1 Fork 6

zengchen1024/repo-test

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
topn_issue_data.py 3.60 KB
一键复制 编辑 原始数据 按行查看 历史
i-robot 提交于 2025-05-13 10:08 +08:00 . add file
import requests
import pandas as pd
from tqdm import tqdm
def fetch_issues(base_url, search_term, max_pages=None):
"""
分页获取指定搜索条件的issue数据
:param base_url: 基础URL(不带search和page参数)
:param search_term: 搜索关键词
:param max_pages: 最大获取页数
:return: 包含title和link的DataFrame
"""
all_issues = []
page = 1
total_pages = None
# 构造带搜索参数的URL
search_url = f"{base_url}search={search_term}"
with tqdm(desc=f"Fetching '{search_term}' issues") as pbar:
while True:
url = f"{search_url}&page={page}"
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
if page == 1:
total_items = data.get("total", 0)
per_page = data.get("per_page", 10)
total_pages = (total_items + per_page - 1) // per_page
pbar.total = total_pages
# 只提取title和link字段
for issue in data.get("data", []):
all_issues.append({
"title": issue.get("title", ""),
"link": issue.get("link", ""),
})
pbar.update(1)
if page >= total_pages or not data.get("data"):
break
page += 1
except Exception as e:
print(f"\nError fetching {search_term} page {page}: {e}")
break
return pd.DataFrame(all_issues)
def batch_fetch_issues(base_url, search_terms, max_pages=None):
"""
批量获取多个搜索条件的数据
:param base_url: 基础URL
:param search_terms: 搜索关键词列表
:param max_pages: 每类最大页数
:return: 合并后的DataFrame
"""
all_dfs = []
for term in search_terms:
df = fetch_issues(base_url, term, max_pages)
if not df.empty:
df["search_term"] = term # 标记搜索关键词
all_dfs.append(df)
if all_dfs:
return pd.concat(all_dfs, ignore_index=True)
return pd.DataFrame()
def save_to_excel(df, filename):
"""
保存数据到Excel,添加序列号
:param df: 包含数据的DataFrame
:param filename: 输出文件名
"""
if df.empty:
print("没有数据可保存")
return
# 添加序列号列
df.insert(0, "序号", range(1, len(df) + 1))
# 只保留需要的列
final_df = df[["序号", "search_term", "title", "link"]]
try:
final_df.to_excel(filename, index=False)
print(f"成功保存 {len(final_df)} 条数据到 {filename}")
except Exception as e:
print(f"保存文件时出错: {e}")
if __name__ == "__main__":
# 基础URL(不包含search和page参数)
base_url = "https://quickissue.openeuler.org/api-issues/issues/?per_page=10&issue_type=&priority=&direction=desc&create=&author=&sort=created_at&assignee=&label=&branch=&repo=&issue_state=&milestone=&exclusion=&"
# 要搜索的关键词列表
search_terms = ["技术支持", "安装问题", "兼容性"]
# 获取数据(限制每类最多3页)
combined_df = batch_fetch_issues(base_url, search_terms, max_pages=3)
if not combined_df.empty:
# 保存到Excel
save_to_excel(combined_df, "openEuler_issues_optimized.xlsx")
# 打印统计信息
print("\n各搜索关键词获取数量:")
print(combined_df["search_term"].value_counts())
else:
print("未获取到任何数据")
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/zengchen1024/repo-test.git
git@gitee.com:zengchen1024/repo-test.git
zengchen1024
repo-test
repo-test
master

搜索帮助