From 9dbf20eef1c136505c3f8e8bae723b674a506cc3 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Thu, 19 Dec 2024 18:19:34 +0800 Subject: [PATCH 01/20] refactor code --- advisors/oe_review.py | 277 ++++++++++++++++++++++++++++----------- advisors/package_type.py | 2 +- 2 files changed, 204 insertions(+), 75 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 583811e1..ff8f7726 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -18,12 +18,14 @@ import re import sys import argparse import subprocess -import shutil +import collections +import queue import tempfile import urllib import urllib.error import urllib.request import urllib.parse +import http.client import yaml import json import requests @@ -32,6 +34,9 @@ import configparser from openai import OpenAI from advisors import gitee +GLOBAL_MAX_RETRY = 1000 +GLOBAL_TIMEOUT = 60 + OE_REVIEW_PR_PROMPT=""" You are a code reviewer of a openEuler Pull Request, providing feedback on the code changes below. As a code reviewer, your task is: @@ -60,7 +65,8 @@ a rating number in range 1-100. import threading import time -class ThreadSafeQueue: +""" +class ThreadSafeQueueSimple: def __init__(self): self.queue = [] # Your data structure (list) can be replaced with any other like deque from collections self.lock = threading.Lock() @@ -81,20 +87,54 @@ class ThreadSafeQueue: def qsize(self): with self.lock: return len(self.queue) +""" +class ThreadSafeQueueComplex: + def __init__(self, maxsize=0): + self.queue = collections.deque() # 使用deque替代list + self.lock = threading.Lock() + self.condition = threading.Condition(self.lock) + self.maxsize = maxsize # 添加最大容量限制 + + def put(self, item, block=True, timeout=None): + with self.condition: + while self.maxsize > 0 and len(self.queue) >= self.maxsize: + if not block: + raise self.queue.Full + if timeout is not None: + if not self.condition.wait(timeout): + raise self.queue.Full + else: + self.condition.wait() + self.queue.append(item) + self.condition.notify() + + def get(self, block=True, timeout=None): + with self.condition: + while len(self.queue) == 0: + if not block: + raise queue.Empty + if timeout is not None: + if not self.condition.wait(timeout): + raise queue.Empty + else: + self.condition.wait() + item = self.queue.popleft() # 使用popleft()而不是pop(0) + self.condition.notify() + return item # 建三个队列,一个是待处理PR列表,一个是经过预处理的PR列表,一个是待提交PR列表 # 批处理,首先关闭所有可以关闭的PR,直接合并sync且没有ci_failed的PR # 然后对所有其他的PR再进行review # define 3 queues to be shared across threads # List of PRs to be reviewed, by review_repos() -PENDING_PRS = ThreadSafeQueue() +PENDING_PRS = queue.Queue() # review_pr() get pr from PENDING_PRS, if can be obviously handled, put comment into submitting_prs, otherwise, move to NEED_REVIEW_PRS # that are being preprocessed for review -NEED_REVIEW_PRS = ThreadSafeQueue() -MANUAL_REVIEW_PRS = ThreadSafeQueue() +NEED_REVIEW_PRS = queue.Queue() +MANUAL_REVIEW_PRS = queue.Queue() # PRs that are being submitted -SUBMITTING_PRS = ThreadSafeQueue() +SUBMITTING_PRS = queue.Queue() #def generate_review_from_ollama(pr_content, prompt, model="llama3.1:8b"): def generate_review_from_ollama(pr_content, prompt, model): @@ -174,7 +214,7 @@ def load_config(): cf.read(cf_path) return cf else: - print("ERROR: no such file:"+cf_path) + print("ERROR: miss config file:"+cf_path) return None def edit_content(text, editor): @@ -222,34 +262,56 @@ def easy_classify(pull_request): pass return suggest_action, suggest_reason -def sort_pr(user_gitee): +def filter_pr(pull_request, filter): + #print(filter) + for label in pull_request["labels"]: + if label["name"] in filter["labels"]: + return True + if pull_request["user"]["login"] in filter["submitters"]: + return True + if pull_request["head"]["repo"]["path"] in filter["repos"]: + return True + return False + +def sort_pr(user_gitee, filter): + wait_error = 0 while True: - item = PENDING_PRS.get() - if not item: + try: + review_item = PENDING_PRS.get(timeout=GLOBAL_TIMEOUT) + except queue.Empty as e: + print("No PR to be sorted in a while.") + if wait_error >= GLOBAL_MAX_RETRY: + break + else: + wait_error = wait_error + 1 + continue + if not review_item: + PENDING_PRS.task_done() break #print(f"Got {item} from queue") - pull_request = user_gitee.get_pr(item["repo"], item["number"], item["owner"]) + pull_request = user_gitee.get_pr(review_item["repo"], review_item["number"], review_item["owner"]) + + if filter_pr(pull_request, filter): + continue suggest_action, suggest_reason = easy_classify(pull_request) + PENDING_PRS.task_done() if suggest_action == "": - need_review_pr = {} - need_review_pr['pull_request'] = pull_request - need_review_pr['pr_info'] = item - NEED_REVIEW_PRS.put(need_review_pr) + review_item['pull_request'] = pull_request + NEED_REVIEW_PRS.put(review_item) else: review_comment_raw = suggest_action + "\n" + suggest_reason - submitting_pr = {} - submitting_pr['review_comment'] = review_comment_raw - submitting_pr['pull_request'] = pull_request - submitting_pr['pr_info'] = item - submitting_pr['suggest_action'] = suggest_action - submitting_pr['suggest_reason'] = suggest_reason - SUBMITTING_PRS.put(submitting_pr) + review_item['review_comment'] = review_comment_raw + review_item['pull_request'] = pull_request + review_item['suggest_action'] = suggest_action + review_item['suggest_reason'] = suggest_reason + SUBMITTING_PRS.put(review_item) NEED_REVIEW_PRS.put(None) - print("sort pr exits") + print("sort pr finished") + NEED_REVIEW_PRS.join() def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model): pr_diff = user_gitee.get_diff(repo, pull_id, group) @@ -263,27 +325,35 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model): return pr_diff, review, review_rating def ai_review(user_gitee, ai_flag, ai_model): + wait_error = 0 while True: - item = NEED_REVIEW_PRS.get() + try: + review_item = NEED_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) + except queue.Empty as e: + print("No PR to be reviewed by AI in a while.") + if wait_error > GLOBAL_MAX_RETRY: + break + else: + wait_error = wait_error + 1 + continue #print("ai review works") - if not item: + if not review_item: + NEED_REVIEW_PRS.task_done() break - pr_info = item["pr_info"] - pr_diff, review, review_rating = ai_review_impl(user_gitee, pr_info['repo'], pr_info['number'], pr_info['owner'], ai_flag, ai_model) + pr_diff, review, review_rating = ai_review_impl(user_gitee, review_item['repo'], review_item['number'], review_item['owner'], ai_flag, ai_model) if pr_diff == "": continue - manual_review_pr = {} - manual_review_pr['pr_info'] = pr_info - manual_review_pr['pull_request'] = item['pull_request'] - manual_review_pr['pr_diff'] = pr_diff - manual_review_pr['review'] = review - manual_review_pr['review_rating'] = review_rating - MANUAL_REVIEW_PRS.put(manual_review_pr) + review_item['pr_diff'] = pr_diff + review_item['review'] = review + review_item['review_rating'] = review_rating + NEED_REVIEW_PRS.task_done() + MANUAL_REVIEW_PRS.put(review_item) MANUAL_REVIEW_PRS.put(None) - print("ai review exits") + print("ai review finished") + MANUAL_REVIEW_PRS.join() def clean_advisor_comment(comment): """ @@ -291,6 +361,7 @@ def clean_advisor_comment(comment): """ comment = comment.replace('[🔵]', 'ongoing').replace('[🟡]', 'question') comment = comment.replace('[◯]', 'NA').replace('[🔴]', 'nogo').replace('[🟢]', 'GO') + comment = comment.replace('[😀]', 'smile').replace('[white_checkmark]', 'smile') return comment def manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor): @@ -307,44 +378,56 @@ def manually_review_impl(user_gitee, pr_info, pull_request, review, review_ratin comments = user_gitee.get_pr_comments_all(pr_info['owner'], pr_info['repo'], pr_info['number']) for comment in comments: if comment['user']['name'] == "openeuler-ci-bot": - if comment['body'].startswith("\n**以下为 openEuler-Advisor"): advisor_comment = comment['body'] - elif comment['user']['name'] == "openeuler-sync-bot": sync_comment += comment["body"] + "\n" else: history_comment += comment["user"]["name"] + ":\n" history_comment += comment["body"] + "\n" + review_content += "\n# Branch Status\n" + sync_comment review_content += "\n# History\n" + history_comment - review_content += "\n# Advisor\n" + clean_advisor_comment(advisor_comment) review_comment_raw = edit_content(review_content + '\n\n# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + pr_diff, editor) return review_comment_raw def manually_review(user_gitee, editor): + wait_error = 0 while True: - item = MANUAL_REVIEW_PRS.get() - #print("manually review works") - if not item: + try: + review_item = MANUAL_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) + except queue.Empty as e: + print("No PR to be review by hand in a while.") + if wait_error >= GLOBAL_MAX_RETRY: + break + else: + wait_error = wait_error + 1 + continue + + if not review_item: + MANUAL_REVIEW_PRS.task_done() break - pull_request = item['pull_request'] - pr_info = item['pr_info'] - review = item['review'] - review_rating = item['review_rating'] - pr_diff = item['pr_diff'] - review_comment_raw = manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor) + pr_info = {} + pr_info["owner"] = review_item['owner'] + pr_info["repo"] = review_item["repo"] + pr_info["number"] = review_item["number"] + + review_comment_raw = manually_review_impl(user_gitee, pr_info, + review_item['pull_request'], + review_item['review'], + review_item['review_rating'], + review_item['pr_diff'], + editor) - submitting_pr = {} - submitting_pr['review_comment'] = review_comment_raw - submitting_pr['pr_info'] = pr_info - submitting_pr['pull_request'] = pull_request - SUBMITTING_PRS.put(submitting_pr) + review_item['review_comment'] = review_comment_raw + MANUAL_REVIEW_PRS.task_done() + SUBMITTING_PRS.put(review_item) SUBMITTING_PRS.put(None) - print("manually review exits") + print("manually review finished") + SUBMITTING_PRS.join() def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action="", suggest_reason=""): result = " is handled and review is published." @@ -383,22 +466,39 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges print("!{number}: {title}{res}".format(number=pr_info["number"], title=pull_request["title"], res=result)) def submmit_review(user_gitee): + wait_error = 0 while True: - item = SUBMITTING_PRS.get() + try: + review_item = SUBMITTING_PRS.get(timeout=GLOBAL_TIMEOUT) + except queue.Empty as e: + print("No PR review to be summited in a while.") + if wait_error >= GLOBAL_MAX_RETRY: + break + else: + wait_error = wait_error + 1 + continue #print("submit review works") #print(item) - if not item: + if not review_item: + SUBMITTING_PRS.task_done() break - review_comment = item['review_comment'] - pr_info = item['pr_info'] - pull_request = item['pull_request'] - suggest_action = item.get('suggest_action', "") - suggest_reason = item.get('suggest_reason', "") - - submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action, suggest_reason) - print("submit review exits") - -def review_pr_new(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model): + + pr_info = {} + pr_info['owner'] = review_item['owner'] + pr_info['repo'] = review_item['repo'] + pr_info['number'] = review_item['number'] + + suggest_action = review_item.get('suggest_action', "") + suggest_reason = review_item.get('suggest_reason', "") + + submit_review_impl(user_gitee, pr_info, + review_item['pull_request'], + review_item['review_comment'], + suggest_action, suggest_reason) + SUBMITTING_PRS.task_done() + print("submit review finish") + +def review_pr_new(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, filter): """ New Implementation of Review Pull Request, reuse code from threading implementation """ @@ -409,6 +509,10 @@ def review_pr_new(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_mod pull_request = user_gitee.get_pr(repo_name, pull_id, group) + if filter_pr(pull_request, filter): + print("PR has been filtered, do not review") + return + print("Doing review") suggest_action, suggest_reason = easy_classify(pull_request) pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_flag, ai_model) review_comment = manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor) @@ -437,20 +541,42 @@ def review_repo(user_gitee, owner, repo): #print(pending_pr) PENDING_PRS.put(pending_pr) +def print_progress(current, total, percentage): + #print current progress when cur is 10%, 20% ... till 100% + #keep silent otherwise + if (current / total) * 100 > percentage: + print(f'generate_pending_prs in {percentage}%') + return True + else: + return False + def generate_pending_prs(user_gitee, sig): """ Generate pending PRs """ src_oe_repos = user_gitee.get_repos_by_sig(sig) + oe_repos = user_gitee.get_openeuler_repos_by_sig(sig) + + total_len = len(src_oe_repos) + len(oe_repos) + current_percentage = 10 + counter = 0 + + print(f"start generate list of pending pr.") for repo in src_oe_repos: + counter = counter + 1 + if print_progress(counter, total_len, current_percentage): + current_percentage = current_percentage + 10 review_repo(user_gitee, 'src-openeuler', repo) - oe_repos = user_gitee.get_openeuler_repos_by_sig(sig) for repo in oe_repos: + counter = counter + 1 + if print_progress(counter, total_len, current_percentage): + current_percentage = current_percentage + 10 review_repo(user_gitee, 'openeuler', repo) PENDING_PRS.put(None) - print("DONE PENDING GENERATE") + print("generate_pending_pr finished") + PENDING_PRS.join() return 0 def get_responsible_sigs(user_gitee): @@ -471,7 +597,7 @@ def get_responsible_sigs(user_gitee): result.append((sig_info["name"])) return result -def review_sig(user_gitee, sig, editor, ai_flag, ai_model): +def review_sig(user_gitee, sig, editor, ai_flag, ai_model, filter): """ Review sig 1. Generate pending PRs for sig @@ -483,7 +609,7 @@ def review_sig(user_gitee, sig, editor, ai_flag, ai_model): print("Reviewing sig: {}".format(sig)) generate_pending_prs_thread = threading.Thread(target=generate_pending_prs, args=(user_gitee, sig)) - sort_pr_thread = threading.Thread(target=sort_pr, args=(user_gitee,)) + sort_pr_thread = threading.Thread(target=sort_pr, args=(user_gitee, filter)) ai_review_thread = threading.Thread(target=ai_review, args=(user_gitee, ai_flag, ai_model)) manually_review_thread = threading.Thread(target=manually_review, args=(user_gitee, editor)) submmit_review_thread = threading.Thread(target=submmit_review, args=(user_gitee,)) @@ -530,14 +656,18 @@ def main(): if args.model: ai_model = args.model + filter = {} + filter['labels'] = set(cf.get('filter', 'labels').split()) + filter['submitters'] = set(cf.get('filter', 'submitters').split()) + filter['repos'] = set(cf.get('filter', 'repos').split()) + if args.active_user: if args.sig == "": sigs = get_responsible_sigs(user_gitee) for sig in sigs: - review_sig(user_gitee, sig, editor, not args.no_ai, ai_model) + review_sig(user_gitee, sig, editor, not args.no_ai, ai_model, filter) else: - review_sig(user_gitee, args.sig, editor, not args.no_ai, ai_model) - + review_sig(user_gitee, args.sig, editor, not args.no_ai, ai_model, filter) else: params = extract_params(args) if not params: @@ -545,8 +675,7 @@ def main(): group = params[0] repo_name = params[1] pull_id = params[2] - print(args.no_ai) - review_pr_new(user_gitee, repo_name, pull_id, group, editor, not args.no_ai, ai_model) + review_pr_new(user_gitee, repo_name, pull_id, group, editor, not args.no_ai, ai_model, filter) return 0 diff --git a/advisors/package_type.py b/advisors/package_type.py index 9d5f27da..cabf248f 100755 --- a/advisors/package_type.py +++ b/advisors/package_type.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # ****************************************************************************** # Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. # licensed under the Mulan PSL v2. -- Gitee From 770e8ba54e0027faedfd99cfdcfc030036928792 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Fri, 20 Dec 2024 11:09:34 +0800 Subject: [PATCH 02/20] code refactor --- advisors/oe_review.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index ff8f7726..9004174c 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -65,7 +65,7 @@ a rating number in range 1-100. import threading import time -""" + class ThreadSafeQueueSimple: def __init__(self): self.queue = [] # Your data structure (list) can be replaced with any other like deque from collections @@ -87,7 +87,6 @@ class ThreadSafeQueueSimple: def qsize(self): with self.lock: return len(self.queue) -""" class ThreadSafeQueueComplex: def __init__(self, maxsize=0): @@ -129,9 +128,10 @@ class ThreadSafeQueueComplex: # define 3 queues to be shared across threads # List of PRs to be reviewed, by review_repos() PENDING_PRS = queue.Queue() -# review_pr() get pr from PENDING_PRS, if can be obviously handled, put comment into submitting_prs, otherwise, move to NEED_REVIEW_PRS +# sort_pr() get pr from PENDING_PRS, if can be obviously handled, put comment into submitting_prs, otherwise, move to NEED_REVIEW_PRS # that are being preprocessed for review NEED_REVIEW_PRS = queue.Queue() +# manually_review() get pr from NEED_REVIEW_PRS, and edit comment MANUAL_REVIEW_PRS = queue.Queue() # PRs that are being submitted SUBMITTING_PRS = queue.Queue() -- Gitee From a332138c76502a2ad34338085be9b138c0fa718c Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Fri, 20 Dec 2024 17:56:24 +0800 Subject: [PATCH 03/20] refactor code --- advisors/oe_review.py | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 9004174c..b2e49606 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # ****************************************************************************** # Copyright (c) Huawei Technologies Co., Ltd. 2020-2024. All rights reserved. # licensed under the Mulan PSL v2. @@ -269,8 +269,11 @@ def filter_pr(pull_request, filter): return True if pull_request["user"]["login"] in filter["submitters"]: return True - if pull_request["head"]["repo"]["path"] in filter["repos"]: - return True + for filter_repo in filter["repos"]: + if re.match(filter_repo, pull_request["head"]["repo"]["path"]): + return True + #if pull_request["head"]["repo"]["path"] in filter["repos"]: + # return True return False def sort_pr(user_gitee, filter): @@ -279,7 +282,7 @@ def sort_pr(user_gitee, filter): try: review_item = PENDING_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("No PR to be sorted in a while.") + print("PENDING_PRS queue is empty for a while.") if wait_error >= GLOBAL_MAX_RETRY: break else: @@ -312,6 +315,7 @@ def sort_pr(user_gitee, filter): NEED_REVIEW_PRS.put(None) print("sort pr finished") NEED_REVIEW_PRS.join() + print("NEED_REVIEW_PRS join finished") def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model): pr_diff = user_gitee.get_diff(repo, pull_id, group) @@ -330,7 +334,7 @@ def ai_review(user_gitee, ai_flag, ai_model): try: review_item = NEED_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("No PR to be reviewed by AI in a while.") + print("NEED_REVIEW_PRS queue is empty for a while.") if wait_error > GLOBAL_MAX_RETRY: break else: @@ -354,6 +358,7 @@ def ai_review(user_gitee, ai_flag, ai_model): MANUAL_REVIEW_PRS.put(None) print("ai review finished") MANUAL_REVIEW_PRS.join() + print("MANUAL_REVIEW_PRS join finished") def clean_advisor_comment(comment): """ @@ -398,7 +403,7 @@ def manually_review(user_gitee, editor): try: review_item = MANUAL_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("No PR to be review by hand in a while.") + print("MANUAL_REVIEW_PRS queue is empty for a while.") if wait_error >= GLOBAL_MAX_RETRY: break else: @@ -428,6 +433,7 @@ def manually_review(user_gitee, editor): SUBMITTING_PRS.put(None) print("manually review finished") SUBMITTING_PRS.join() + print("SUBMITTING_PRS join finished") def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action="", suggest_reason=""): result = " is handled and review is published." @@ -436,11 +442,16 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) return + last_comment = user_gitee.get_pr_comments_all(pr_info['owner'], pr_info['repo'], pr_info['number']) + review_to_submit = "" for line in review_comment.split("\n"): if line == "====": if review_to_submit == "": continue + if last_comment[-1]['body'] == review_to_submit: + print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) + continue try: user_gitee.create_pr_comment(pr_info['repo'], pr_info['number'], review_to_submit, pr_info['owner']) except http.client.RemoteDisconnected as e: @@ -449,10 +460,13 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges else: review_to_submit += line + "\n" else: - try: - user_gitee.create_pr_comment(pr_info['repo'], pr_info['number'], review_to_submit, pr_info['owner']) - except http.client.RemoteDisconnected as e: - print("Failed to sumit review comment: {error}".format(error=e)) + if review_to_submit == last_comment[-1]['body']: + print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) + else: + try: + user_gitee.create_pr_comment(pr_info['repo'], pr_info['number'], review_to_submit, pr_info['owner']) + except http.client.RemoteDisconnected as e: + print("Failed to sumit review comment: {error}".format(error=e)) if suggest_action == "/close": @@ -471,7 +485,7 @@ def submmit_review(user_gitee): try: review_item = SUBMITTING_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("No PR review to be summited in a while.") + print("SUBMITTING_PRS queue is empty for a while.") if wait_error >= GLOBAL_MAX_RETRY: break else: @@ -498,7 +512,7 @@ def submmit_review(user_gitee): SUBMITTING_PRS.task_done() print("submit review finish") -def review_pr_new(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, filter): +def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, filter): """ New Implementation of Review Pull Request, reuse code from threading implementation """ @@ -577,6 +591,7 @@ def generate_pending_prs(user_gitee, sig): PENDING_PRS.put(None) print("generate_pending_pr finished") PENDING_PRS.join() + print("PENDING_PRS join finished") return 0 def get_responsible_sigs(user_gitee): @@ -675,7 +690,7 @@ def main(): group = params[0] repo_name = params[1] pull_id = params[2] - review_pr_new(user_gitee, repo_name, pull_id, group, editor, not args.no_ai, ai_model, filter) + review_pr(user_gitee, repo_name, pull_id, group, editor, not args.no_ai, ai_model, filter) return 0 -- Gitee From 62b58fa61fbfc147a790882851132afb865105db Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Mon, 23 Dec 2024 11:14:15 +0800 Subject: [PATCH 04/20] code refactor --- .gitignore | 3 ++- advisors/oe_review.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 2b1a25d5..372f5a91 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ # 排除python 生成的中间文件 *.pyc # 排查python 生成的中间文件夹 -__pycache__ \ No newline at end of file +__pycache__ +advisors/openEuler-Advisor.code-workspace diff --git a/advisors/oe_review.py b/advisors/oe_review.py index b2e49606..faca1056 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -294,13 +294,13 @@ def sort_pr(user_gitee, filter): #print(f"Got {item} from queue") pull_request = user_gitee.get_pr(review_item["repo"], review_item["number"], review_item["owner"]) + PENDING_PRS.task_done() if filter_pr(pull_request, filter): continue suggest_action, suggest_reason = easy_classify(pull_request) - PENDING_PRS.task_done() if suggest_action == "": review_item['pull_request'] = pull_request NEED_REVIEW_PRS.put(review_item) @@ -346,6 +346,7 @@ def ai_review(user_gitee, ai_flag, ai_model): break pr_diff, review, review_rating = ai_review_impl(user_gitee, review_item['repo'], review_item['number'], review_item['owner'], ai_flag, ai_model) + NEED_REVIEW_PRS.task_done() if pr_diff == "": continue @@ -353,7 +354,6 @@ def ai_review(user_gitee, ai_flag, ai_model): review_item['pr_diff'] = pr_diff review_item['review'] = review review_item['review_rating'] = review_rating - NEED_REVIEW_PRS.task_done() MANUAL_REVIEW_PRS.put(review_item) MANUAL_REVIEW_PRS.put(None) print("ai review finished") @@ -366,7 +366,7 @@ def clean_advisor_comment(comment): """ comment = comment.replace('[🔵]', 'ongoing').replace('[🟡]', 'question') comment = comment.replace('[◯]', 'NA').replace('[🔴]', 'nogo').replace('[🟢]', 'GO') - comment = comment.replace('[😀]', 'smile').replace('[white_checkmark]', 'smile') + comment = comment.replace('[😀]', 'smile').replace('[:white_check_mark:]', 'GO') return comment def manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor): -- Gitee From f1dbef734f789eeef702891dfeaeecbd05db9688 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Mon, 23 Dec 2024 14:45:44 +0800 Subject: [PATCH 05/20] get pr from quickissue --- advisors/oe_review.py | 133 +++++++++++++++++++++++++++++++++--------- 1 file changed, 107 insertions(+), 26 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index faca1056..d3e0103c 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -30,12 +30,14 @@ import yaml import json import requests import configparser +import math from openai import OpenAI from advisors import gitee GLOBAL_MAX_RETRY = 1000 GLOBAL_TIMEOUT = 60 +GLOBAL_VERBOSE = False OE_REVIEW_PR_PROMPT=""" You are a code reviewer of a openEuler Pull Request, providing feedback on the code changes below. @@ -65,6 +67,9 @@ a rating number in range 1-100. import threading import time +def print_verbose(msg): + if GLOBAL_VERBOSE: + print(msg) class ThreadSafeQueueSimple: def __init__(self): @@ -192,7 +197,8 @@ def args_parser(): arguments parser """ pars = argparse.ArgumentParser() - pars.add_argument("-q", "--quiet", action='store_true', default=False, help="No log print") + pars.add_argument("-q", "--quite", action='store_true', default=False, help="Disable all log print") + pars.add_argument("-v", "--verbose", action='store_true', default=False, help="Print Verbose Log") pars.add_argument("-a", "--active_user", action='store_true', default=False, help="Review all PRs in repositories as maintainer or committer") pars.add_argument("-n", "--repo", type=str, help="Repository name that include group") pars.add_argument("-p", "--pull", type=str, help="Number ID of Pull Request") @@ -222,7 +228,11 @@ def edit_content(text, editor): with os.fdopen(fd, 'w') as tmp: tmp.write(text) tmp.flush() - subprocess.call([editor["editor"], editor["editor-option"], path]) + + result = subprocess.run([editor["editor"], editor["editor-option"], path], capture_output=True, text=True) + print_verbose(result.stdout) + print_verbose(result.stderr) + text_new = open(path).read() return text_new @@ -263,7 +273,7 @@ def easy_classify(pull_request): return suggest_action, suggest_reason def filter_pr(pull_request, filter): - #print(filter) + print_verbose("filter is: "+str(filter)) for label in pull_request["labels"]: if label["name"] in filter["labels"]: return True @@ -272,8 +282,6 @@ def filter_pr(pull_request, filter): for filter_repo in filter["repos"]: if re.match(filter_repo, pull_request["head"]["repo"]["path"]): return True - #if pull_request["head"]["repo"]["path"] in filter["repos"]: - # return True return False def sort_pr(user_gitee, filter): @@ -313,9 +321,9 @@ def sort_pr(user_gitee, filter): SUBMITTING_PRS.put(review_item) NEED_REVIEW_PRS.put(None) - print("sort pr finished") + print_verbose("sort pr finished") NEED_REVIEW_PRS.join() - print("NEED_REVIEW_PRS join finished") + print_verbose("NEED_REVIEW_PRS join finished") def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model): pr_diff = user_gitee.get_diff(repo, pull_id, group) @@ -334,13 +342,12 @@ def ai_review(user_gitee, ai_flag, ai_model): try: review_item = NEED_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("NEED_REVIEW_PRS queue is empty for a while.") + print_verbose("NEED_REVIEW_PRS queue is empty for a while.") if wait_error > GLOBAL_MAX_RETRY: break else: wait_error = wait_error + 1 continue - #print("ai review works") if not review_item: NEED_REVIEW_PRS.task_done() break @@ -356,9 +363,9 @@ def ai_review(user_gitee, ai_flag, ai_model): review_item['review_rating'] = review_rating MANUAL_REVIEW_PRS.put(review_item) MANUAL_REVIEW_PRS.put(None) - print("ai review finished") + print_verbose("ai review finished") MANUAL_REVIEW_PRS.join() - print("MANUAL_REVIEW_PRS join finished") + print_verbose("MANUAL_REVIEW_PRS join finished") def clean_advisor_comment(comment): """ @@ -403,7 +410,7 @@ def manually_review(user_gitee, editor): try: review_item = MANUAL_REVIEW_PRS.get(timeout=GLOBAL_TIMEOUT) except queue.Empty as e: - print("MANUAL_REVIEW_PRS queue is empty for a while.") + print_verbose("MANUAL_REVIEW_PRS queue is empty for a while.") if wait_error >= GLOBAL_MAX_RETRY: break else: @@ -431,15 +438,16 @@ def manually_review(user_gitee, editor): SUBMITTING_PRS.put(review_item) SUBMITTING_PRS.put(None) - print("manually review finished") + print_verbose("manually review finished") SUBMITTING_PRS.join() - print("SUBMITTING_PRS join finished") + print_verbose("SUBMITTING_PRS join finished") def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action="", suggest_reason=""): result = " is handled and review is published." + print("{owner}/{repo}!{number}: {title}".format(owner=pr_info["owner"], repo=pr_info["repo"], number=pr_info["number"], title=pull_request["title"])) if review_comment == "": - print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) + print(" - review comment is ignored due to empty content") return last_comment = user_gitee.get_pr_comments_all(pr_info['owner'], pr_info['repo'], pr_info['number']) @@ -450,7 +458,7 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges if review_to_submit == "": continue if last_comment[-1]['body'] == review_to_submit: - print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) + print(" - review comment is ignored due to duplication with last comment") continue try: user_gitee.create_pr_comment(pr_info['repo'], pr_info['number'], review_to_submit, pr_info['owner']) @@ -461,7 +469,7 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges review_to_submit += line + "\n" else: if review_to_submit == last_comment[-1]['body']: - print("!{number}: {title} is ignored".format(number=pr_info["number"], title=pull_request["title"])) + print(" - review comment is ignored due to duplication with last comment") else: try: user_gitee.create_pr_comment(pr_info['repo'], pr_info['number'], review_to_submit, pr_info['owner']) @@ -477,7 +485,7 @@ def submit_review_impl(user_gitee, pr_info, pull_request, review_comment, sugges result = " is approved due to {reason}.".format(reason=suggest_reason) else: pass - print("!{number}: {title}{res}".format(number=pr_info["number"], title=pull_request["title"], res=result)) + print(" - PR{res}".format(res=result)) def submmit_review(user_gitee): wait_error = 0 @@ -510,7 +518,7 @@ def submmit_review(user_gitee): review_item['review_comment'], suggest_action, suggest_reason) SUBMITTING_PRS.task_done() - print("submit review finish") + print_verbose("submit review finish") def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, filter): """ @@ -526,13 +534,13 @@ def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, if filter_pr(pull_request, filter): print("PR has been filtered, do not review") return - print("Doing review") + print_verbose("Doing review") suggest_action, suggest_reason = easy_classify(pull_request) pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_flag, ai_model) review_comment = manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor) submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action, suggest_reason) - print("Finish Review") + print_verbose("Finish Review") def review_repo(user_gitee, owner, repo): """" @@ -564,7 +572,7 @@ def print_progress(current, total, percentage): else: return False -def generate_pending_prs(user_gitee, sig): +def generate_pending_prs_old(user_gitee, sig): """ Generate pending PRs """ @@ -575,7 +583,7 @@ def generate_pending_prs(user_gitee, sig): current_percentage = 10 counter = 0 - print(f"start generate list of pending pr.") + print_verbose(f"start generate list of pending pr.") for repo in src_oe_repos: counter = counter + 1 if print_progress(counter, total_len, current_percentage): @@ -589,9 +597,9 @@ def generate_pending_prs(user_gitee, sig): review_repo(user_gitee, 'openeuler', repo) PENDING_PRS.put(None) - print("generate_pending_pr finished") + print_verbose("generate_pending_pr finished") PENDING_PRS.join() - print("PENDING_PRS join finished") + print_verbose("PENDING_PRS join finished") return 0 def get_responsible_sigs(user_gitee): @@ -612,6 +620,76 @@ def get_responsible_sigs(user_gitee): result.append((sig_info["name"])) return result +def get_quickissue(url): + try: + result = urllib.request.urlopen(url) + json_resp = json.loads(result.read().decode("utf-8")) + return json_resp + except urllib.error.HTTPError as error: + print("get_quickissue failed to access: %s" % (url)) + print("get_quickissue failed: %d, %s" % (error.code, error.reason)) + return None + +def get_quickissue_pulls_by_sig(sig): + """ + GET from quckissue api + """ + quickissue_base_url = "https://quickissue.openeuler.org/api-issues/pulls" + + query_url = quickissue_base_url + "?sig=" + sig + "&page=1&per_page=100&sort=created_at&state=open" + results = [] + total = 0 + pages = 1 + json_resp = get_quickissue(query_url) + if json_resp == None: + return results, total + elif json_resp["data"] == None: + return results, total + + total = json_resp["total"] + + for d in json_resp["data"]: + res = {} + res['owner'] = d["repo"].split("/")[0] + res['repo'] = d["repo"].split("/")[1] + res['number'] = d["link"].split("/")[-1] + results.append(res) + + pages = math.ceil(json_resp["total"] / json_resp["per_page"]) + + for page in range(2, pages + 1): + query_url = quickissue_base_url + "?sig=" + sig + "&page=" + str(page) + "&per_page=100&sort=created_at&state=open" + json_resp = get_quickissue(query_url) + if json_resp == None: + return results, total + elif json_resp["data"] == None: + return results, total + for d in json_resp["data"]: + res = {} + res['owner'] = d["repo"].split("/")[0] + res['repo'] = d["repo"].split("/")[1] + res['number'] = d["link"].split("/")[-1] + results.append(res) + return results, total + +def generate_pending_prs(user_gitee, sig): + """ + Generating pending PR via quickissue + """ + results, total = get_quickissue_pulls_by_sig(sig) + + print_verbose(f"start generate list of pending pr.") + + #print(results) + for result in results: + PENDING_PRS.put(result) + + PENDING_PRS.put(None) + print_verbose("generate_pending_pr finished") + PENDING_PRS.join() + print_verbose("PENDING_PRS join finished") + return 0 + def review_sig(user_gitee, sig, editor, ai_flag, ai_model, filter): """ Review sig @@ -648,7 +726,10 @@ def main(): args = args_parser() cf = load_config() - if args.quiet: + if args.verbose: + GLOBAL_VERBOSE = True + + if args.quite: sys.stdout = open('/dev/null', 'w') sys.stderr = sys.stdout -- Gitee From 40ee56850dd25d196c8f7708a8d626739a229f9e Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Mon, 23 Dec 2024 17:26:18 +0800 Subject: [PATCH 06/20] add requirements.txt --- advisors/oe_review.py | 2 +- requirements.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 requirements.txt diff --git a/advisors/oe_review.py b/advisors/oe_review.py index d3e0103c..5e29bb82 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -680,7 +680,7 @@ def generate_pending_prs(user_gitee, sig): print_verbose(f"start generate list of pending pr.") - #print(results) + print_verbose("Pending PRs of {sig}: {results}".format(sig=sig, results=results)) for result in results: PENDING_PRS.put(result) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..4fbbec60 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +openai +pyrpm +pyyaml +requests>=2.32.3 -- Gitee From 9f3a1941e936cbfa6431fe7a40c8a32478f8ed57 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Wed, 25 Dec 2024 11:57:32 +0800 Subject: [PATCH 07/20] code refactor --- advisors/oe_review.py | 141 ++++++++++++++++++++++++++++++++++++++++-- requirements.txt | 1 + 2 files changed, 137 insertions(+), 5 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 5e29bb82..3a185f92 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -33,13 +33,37 @@ import configparser import math from openai import OpenAI +import chromadb + from advisors import gitee + GLOBAL_MAX_RETRY = 1000 GLOBAL_TIMEOUT = 60 GLOBAL_VERBOSE = False OE_REVIEW_PR_PROMPT=""" +You are a code reviewer of a openEuler Pull Request, providing feedback on the code changes below. + As a code reviewer, your task is: + - Review the code changes (diffs) in the patch and provide feedback. + - If changelog is updated, it should describe visible changes to end-users or developers, not simply say "upgrade to blahblah". + - If there are any bugs, highlight them. + - Does the code do what it says in the commit messages? + - Do not highlight minor issues and nitpicks. + - Use bullet points if you have multiple comments. + - If no suggestions are provided, please give good feedback. + - please use chinese to give feedback. + - Based on the feedback all above, if the Pull Request is good, you need to decide merge the Pull Request by respones "/lgtm /approve". + - If the Pull Request is not good, you need to reject it by response "/close". + - Give this decision in the first line. +Following is a previous example of Pull Request review, You can use it as a reference. +{example} +Now you are provided with the pull request changes in complete format. +It includes the repository name, target branch, pull request title, pull request body, +Patch of the Pull Request and Review History of the Pull Request. +""" + +OE_REVIEW_PR_PROMPT_OLD=""" You are a code reviewer of a openEuler Pull Request, providing feedback on the code changes below. As a code reviewer, your task is: - Above all, you need to decide "/close" the PR, or "/lgtm" and "/approve" the PR. @@ -63,11 +87,18 @@ patches from easy patches, core changes from leaf changes. Please evaluate the b a rating number in range 1-100. """ +CHROMADB_DB_PATH = os.path.expanduser("~/.config/openEuler-Advisor/chromadb") +CHROMADB_COLLECTION_NAME = "oe_review" + +g_chromadb_client = None +g_chromadb_collection = None + # define data structure that contains queue and mutex lock for thread sharing import threading import time def print_verbose(msg): + global GLOBAL_VERBOSE if GLOBAL_VERBOSE: print(msg) @@ -151,12 +182,16 @@ def generate_review_from_ollama(pr_content, prompt, model): headers["Content-Type"] = "application/json;charset=UTF-8" url = f"{base_url}/generate" - + num_ctx = math.ceil((len(pr_content) + len(prompt)) / 2048) * 2048 values = {} values["model"] = model values['prompt'] = pr_content values['system'] = prompt values['stream'] = False + values['options'] = {"num_ctx": num_ctx} + print_verbose("ollama request model: "+model) + print_verbose("ollama request prompt: "+prompt) + print_verbose("ollama request content: "+pr_content) response = requests.post(url, headers=headers, json=values) return response.json().get('response', '') @@ -272,6 +307,27 @@ def easy_classify(pull_request): pass return suggest_action, suggest_reason +def review_history(user_gitee, owner, repo, number, pull_request): + review_comment = {} + review_comment['target_branch'] = pull_request["base"]["ref"] + history_comment = "" + sync_comment = "" + advisor_comment = "" + comments = user_gitee.get_pr_comments_all(owner, repo, number) + for comment in comments: + if comment['user']['name'] == "openeuler-ci-bot": + if comment['body'].startswith("\n**以下为 openEuler-Advisor"): + advisor_comment = comment['body'] + elif comment['user']['name'] == "openeuler-sync-bot": + sync_comment += comment["body"] + "\n" + else: + history_comment += comment["user"]["name"] + ":\n" + history_comment += comment["body"] + "\n" + review_comment['history_comment'] = history_comment + review_comment['sync_comment'] = sync_comment + review_comment['advisor_comment'] = advisor_comment + return review_comment + def filter_pr(pull_request, filter): print_verbose("filter is: "+str(filter)) for label in pull_request["labels"]: @@ -309,6 +365,9 @@ def sort_pr(user_gitee, filter): suggest_action, suggest_reason = easy_classify(pull_request) + review_comment = review_history(user_gitee, review_item['owner'], review_item['repo'], review_item['number'], pull_request) + review_item['review_comment'] = review_comment + if suggest_action == "": review_item['pull_request'] = pull_request NEED_REVIEW_PRS.put(review_item) @@ -325,14 +384,50 @@ def sort_pr(user_gitee, filter): NEED_REVIEW_PRS.join() print_verbose("NEED_REVIEW_PRS join finished") -def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model): +def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model, review_comment, pull_request): + global g_chromadb_client + global g_chromadb_collection + pr_diff = user_gitee.get_diff(repo, pull_id, group) if not pr_diff: print("Failed to get PR:%s of repository:%s/%s, make sure the PR is exist." % (pull_id, group, repo)) return "", "", "" if not ai_flag: return pr_diff, "", "" - review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) + + if g_chromadb_client is None: + g_chromadb_client = chromadb.PersistentClient(path=CHROMADB_DB_PATH) + g_chromadb_collection = g_chromadb_client.get_or_create_collection(CHROMADB_COLLECTION_NAME) + + chomadb_query_text = pr_diff + chromadb_result = g_chromadb_collection.query( + query_texts=[chomadb_query_text], + n_results=2, + include=["documents"] + ) + + print_verbose(chromadb_result["documents"]) + + if len(chromadb_result["documents"][0]) == 0: + review_example = chromadb_result["documents"][0] + else: + review_example = chromadb_result["documents"][0][0] + + review_content = """ + Pull Request to {owner}/{repo}:{target_branch} + Pull Request Title: {title} + Pull Request Body: {body} + Patch of the Pull Request: + {pr_diff} + Review History of the Pull Request: + {history_comment} + """.format(owner=group, repo=repo, target_branch=pull_request["base"]["ref"], + title=pull_request["title"], body=pull_request["body"], + pr_diff=pr_diff, history_comment=review_comment["history_comment"]) + review_prompt = OE_REVIEW_PR_PROMPT.format(example=review_example) + + #review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) + review = generate_review_from_ollama(review_content, review_prompt, ai_model) review_rating = generate_review_from_ollama(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) return pr_diff, review, review_rating @@ -352,7 +447,8 @@ def ai_review(user_gitee, ai_flag, ai_model): NEED_REVIEW_PRS.task_done() break - pr_diff, review, review_rating = ai_review_impl(user_gitee, review_item['repo'], review_item['number'], review_item['owner'], ai_flag, ai_model) + pr_diff, review, review_rating = ai_review_impl(user_gitee, review_item['repo'], review_item['number'], review_item['owner'], + ai_flag, ai_model, review_item['review_comment'], review_item['pull_request']) NEED_REVIEW_PRS.task_done() if pr_diff == "": @@ -402,6 +498,34 @@ def manually_review_impl(user_gitee, pr_info, pull_request, review, review_ratin review_content += "\n# History\n" + history_comment review_content += "\n# Advisor\n" + clean_advisor_comment(advisor_comment) review_comment_raw = edit_content(review_content + '\n\n# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + pr_diff, editor) + + global g_chromadb_client + global g_chromadb_collection + # save review_comment_raw to chromadb + if g_chromadb_client is None: + g_chromadb_client = chromadb.PersistentClient(path=CHROMADB_DB_PATH) + g_chromadb_collection = g_chromadb_client.get_or_create_collection(CHROMADB_COLLECTION_NAME) + + chromadb_document = """ + Pull Request to {owner}/{repo}:{target_branch} + Pull Request Title: {title} + Pull Request Body: {body} + Patch of the Pull Request: + {pr_diff} + Review History of the Pull Request: + {history_comment} + My Review Comment: + {review_comment} + """.format(owner=pr_info["owner"], repo=pr_info["repo"], target_branch=target_branch, + title=pull_request["title"], body=pull_request["body"], + pr_diff=pr_diff, history_comment=history_comment, + review_comment=review_comment_raw) + + g_chromadb_collection.upsert( + documents=[chromadb_document], + metadatas=[{"owner": pr_info["owner"], "repo": pr_info["repo"]}], + ids=[str(pr_info["owner"]) + "/" + str(pr_info["repo"]) + "/" + str(pr_info["number"])] + ) return review_comment_raw def manually_review(user_gitee, editor): @@ -536,7 +660,8 @@ def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, return print_verbose("Doing review") suggest_action, suggest_reason = easy_classify(pull_request) - pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_flag, ai_model) + review_history_comment = review_history(user_gitee, group, repo_name, pull_id, pull_request) + pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_flag, ai_model, review_history_comment, pull_request) review_comment = manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor) submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action, suggest_reason) @@ -727,6 +852,7 @@ def main(): cf = load_config() if args.verbose: + global GLOBAL_VERBOSE GLOBAL_VERBOSE = True if args.quite: @@ -757,6 +883,11 @@ def main(): filter['submitters'] = set(cf.get('filter', 'submitters').split()) filter['repos'] = set(cf.get('filter', 'repos').split()) + global g_chromadb_client + global g_chromadb_collection + g_chromadb_client = chromadb.PersistentClient(path=CHROMADB_DB_PATH) + g_chromadb_collection = g_chromadb_client.get_or_create_collection(CHROMADB_COLLECTION_NAME) + if args.active_user: if args.sig == "": sigs = get_responsible_sigs(user_gitee) diff --git a/requirements.txt b/requirements.txt index 4fbbec60..f1361ba8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ openai pyrpm pyyaml requests>=2.32.3 +chromadb -- Gitee From b43240604efbe575ef2f814aa38ff5a2697715f2 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Tue, 21 Jan 2025 08:43:37 +0800 Subject: [PATCH 08/20] code refactory and fix typo --- advisors/oe_review.py | 66 +++++-------------------------------------- 1 file changed, 7 insertions(+), 59 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 3a185f92..9ebd5818 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -38,7 +38,7 @@ import chromadb from advisors import gitee -GLOBAL_MAX_RETRY = 1000 +GLOBAL_MAX_RETRY = 60 * 24 * 3 GLOBAL_TIMEOUT = 60 GLOBAL_VERBOSE = False @@ -102,62 +102,6 @@ def print_verbose(msg): if GLOBAL_VERBOSE: print(msg) -class ThreadSafeQueueSimple: - def __init__(self): - self.queue = [] # Your data structure (list) can be replaced with any other like deque from collections - self.lock = threading.Lock() - self.condition = threading.Condition(self.lock) - - def put(self, item): - with self.condition: - self.queue.append(item) - self.condition.notify_all() # Notify all waiting threads that new item is added - - def get(self): - with self.condition: - while len(self.queue) == 0: - self.condition.wait() # Wait until there are items in the queue - item = self.queue.pop(0) - return item - - def qsize(self): - with self.lock: - return len(self.queue) - -class ThreadSafeQueueComplex: - def __init__(self, maxsize=0): - self.queue = collections.deque() # 使用deque替代list - self.lock = threading.Lock() - self.condition = threading.Condition(self.lock) - self.maxsize = maxsize # 添加最大容量限制 - - def put(self, item, block=True, timeout=None): - with self.condition: - while self.maxsize > 0 and len(self.queue) >= self.maxsize: - if not block: - raise self.queue.Full - if timeout is not None: - if not self.condition.wait(timeout): - raise self.queue.Full - else: - self.condition.wait() - self.queue.append(item) - self.condition.notify() - - def get(self, block=True, timeout=None): - with self.condition: - while len(self.queue) == 0: - if not block: - raise queue.Empty - if timeout is not None: - if not self.condition.wait(timeout): - raise queue.Empty - else: - self.condition.wait() - item = self.queue.popleft() # 使用popleft()而不是pop(0) - self.condition.notify() - return item - # 建三个队列,一个是待处理PR列表,一个是经过预处理的PR列表,一个是待提交PR列表 # 批处理,首先关闭所有可以关闭的PR,直接合并sync且没有ci_failed的PR # 然后对所有其他的PR再进行review @@ -727,7 +671,7 @@ def generate_pending_prs_old(user_gitee, sig): print_verbose("PENDING_PRS join finished") return 0 -def get_responsible_sigs(user_gitee): +def get_responsible_sigs(user_gitee, filter): """ Get responsible sigs from config file """ @@ -736,6 +680,9 @@ def get_responsible_sigs(user_gitee): for sig in sigs: if sig == "sig-minzuchess" or sig == "README.md": continue + if sig in filter["sigs"]: + print_verbose(f"sig {sig} is filtered".format(sig=sig)) + continue sig_info_str = user_gitee.get_sig_info(sig) if sig_info_str == None: continue @@ -882,6 +829,7 @@ def main(): filter['labels'] = set(cf.get('filter', 'labels').split()) filter['submitters'] = set(cf.get('filter', 'submitters').split()) filter['repos'] = set(cf.get('filter', 'repos').split()) + filter['sigs'] = set(cf.get('filter', 'sigs').split()) global g_chromadb_client global g_chromadb_collection @@ -890,7 +838,7 @@ def main(): if args.active_user: if args.sig == "": - sigs = get_responsible_sigs(user_gitee) + sigs = get_responsible_sigs(user_gitee, filter) for sig in sigs: review_sig(user_gitee, sig, editor, not args.no_ai, ai_model, filter) else: -- Gitee From 49d9f4428469f6ff47d52ad4c90d30ba502bc3a2 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Wed, 29 Jan 2025 19:41:52 +0800 Subject: [PATCH 09/20] update to support deepseek.com --- advisors/oe_review.py | 187 ++++++++++++++++++++++++++++++++++++------ develop_env.sh | 13 +++ 2 files changed, 174 insertions(+), 26 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 9ebd5818..abcc1202 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -33,6 +33,7 @@ import configparser import math from openai import OpenAI +import openai import chromadb from advisors import gitee @@ -97,15 +98,71 @@ g_chromadb_collection = None import threading import time +# define data structure to contain AI model information +class oe_review_ai_model: + def __init__(self, type): + if type == "local": + self._type = type + self._base_url = "http://localhost:11434/api" + self._model_name = "llama3.1:8b" + elif type == "deepseek": + self._type = "deepseek" + self._base_url = "https://api.deepseek.com" + self._model_name = "deepseek-chat" + self._api_key = "" + elif type == "bailian": + self._type = "bailian" + self._base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" + self._model_name = "deepseek-v3" + self._api_key = "" + elif type == "no": + self._type = "no" + else: + self._type = type + + @property + def type(self): + return self._type + @type.setter + def type(self, new_value): + self._type = new_value + + @property + def base_url(self): + return self._base_url + @base_url.setter + def base_url(self, new_value): + self._base_url = new_value + + @property + def model_name(self): + return self._model_name + @model_name.setter + def model_name(self, new_value): + self._model_name = new_value + + @property + def api_key(self): + if self._type != "local" and self.type != "no": + return self._api_key + else: + return "" + @api_key.setter + def api_key(self, new_value): + if self._type != "local" and self.type != "no": + self._api_key = new_value + else: + pass # we dont need api_key for local or no + def print_verbose(msg): global GLOBAL_VERBOSE if GLOBAL_VERBOSE: print(msg) -# 建三个队列,一个是待处理PR列表,一个是经过预处理的PR列表,一个是待提交PR列表 +# 建四个队列,一个是待处理PR队列,一个是经过预处理的PR队列,一个是待人工审核的PR队列,一个是待提交PR队列 # 批处理,首先关闭所有可以关闭的PR,直接合并sync且没有ci_failed的PR # 然后对所有其他的PR再进行review -# define 3 queues to be shared across threads +# define 4 queues to be shared across threads # List of PRs to be reviewed, by review_repos() PENDING_PRS = queue.Queue() # sort_pr() get pr from PENDING_PRS, if can be obviously handled, put comment into submitting_prs, otherwise, move to NEED_REVIEW_PRS @@ -117,7 +174,7 @@ MANUAL_REVIEW_PRS = queue.Queue() SUBMITTING_PRS = queue.Queue() #def generate_review_from_ollama(pr_content, prompt, model="llama3.1:8b"): -def generate_review_from_ollama(pr_content, prompt, model): +def generate_review_from_ollama(pr_content, prompt, ai_model): base_url = "http://localhost:11434/api" json_resp = [] resp = None @@ -128,6 +185,7 @@ def generate_review_from_ollama(pr_content, prompt, model): url = f"{base_url}/generate" num_ctx = math.ceil((len(pr_content) + len(prompt)) / 2048) * 2048 values = {} + model = ai_model.model_name values["model"] = model values['prompt'] = pr_content values['system'] = prompt @@ -139,6 +197,39 @@ def generate_review_from_ollama(pr_content, prompt, model): response = requests.post(url, headers=headers, json=values) return response.json().get('response', '') +def generate_review_from_openai(pr_content, prompt, model): + #Get URL and API Key from config file + print_verbose("api_key: " + model.api_key) + print_verbose("base_url: " + model.base_url) + print_verbose("model_name: " + model.model_name) + client = OpenAI(api_key=model.api_key, base_url=model.base_url) + try: + response = client.chat.completions.create( + model = model.model_name, + messages = [ + {'role': 'system', 'content': urllib.parse.quote(prompt)}, + {'role': 'user', 'content': urllib.parse.quote(pr_content)}, + ], + stream = False + ) + print(response.model_dump_json()) + return (response.choices[0].message.content) + except openai.APIError as e: + print(f"API Error: {e.status_code} - {e.message}") + except openai.APIConnectionError as e: + print(f"Connection error: {e}") + except openai.RateLimitError as e: + print(f"Rate limit exceeded: {e}") + except openai.AuthenticationError as e: + print(f"Authentication failed: {e}") + except openai.BadRequestError as e: + print(f"Invalid request: {e}") + except openai.OpenAIError as e: + print(f"OpenAI error: {e}") + except Exception as e: + print(f"Unexpected error: {type(e).__name__}, {str(e)}") + + def check_pr_url(url): """ check whether the URL of Pull Request is valid @@ -185,7 +276,7 @@ def args_parser(): pars.add_argument("-s", "--sig", type=str, default="", help="When active_user is set, review all PRs in specified SIG") pars.add_argument("-m", "--model", type=str, help="Model of selection to generate review") pars.add_argument("-e", "--editor", type=str, help="Editor of choice to edit content, default to nvim") - pars.add_argument("-b", "--no_ai", action='store_true', default=False, help="No AI to generate review") + pars.add_argument("-i", "--intelligent", type=str, help="Select Intelligent from local/deepseek/no") pars.add_argument("-o", "--editor-option", type=str, help="Commandline option for editor") return pars.parse_args() @@ -203,14 +294,21 @@ def load_config(): return None def edit_content(text, editor): + print_verbose("starting edit_content") fd, path = tempfile.mkstemp(suffix=".tmp", prefix="oe_review") with os.fdopen(fd, 'w') as tmp: tmp.write(text) tmp.flush() - result = subprocess.run([editor["editor"], editor["editor-option"], path], capture_output=True, text=True) - print_verbose(result.stdout) - print_verbose(result.stderr) + print_verbose(editor["editor-option"]) + if editor["editor-option"] == '""': + result = subprocess.Popen([editor["editor"]] + [path]) + result.wait() + else: + result = subprocess.run([editor["editor"], editor["editor-option"], path]) + #result = subprocess.run([editor["editor"], editor["editor-option"], path], capture_output=True, text=True) + print_verbose(result.stdout) + print_verbose(result.stderr) text_new = open(path).read() return text_new @@ -328,21 +426,25 @@ def sort_pr(user_gitee, filter): NEED_REVIEW_PRS.join() print_verbose("NEED_REVIEW_PRS join finished") -def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model, review_comment, pull_request): +def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, pull_request): global g_chromadb_client global g_chromadb_collection + print_verbose("start getting diff") pr_diff = user_gitee.get_diff(repo, pull_id, group) if not pr_diff: print("Failed to get PR:%s of repository:%s/%s, make sure the PR is exist." % (pull_id, group, repo)) return "", "", "" - if not ai_flag: + + if ai_model.type == "no": return pr_diff, "", "" + print_verbose("initialize chromadb instance") if g_chromadb_client is None: g_chromadb_client = chromadb.PersistentClient(path=CHROMADB_DB_PATH) g_chromadb_collection = g_chromadb_client.get_or_create_collection(CHROMADB_COLLECTION_NAME) + print_verbose(f"start querying chromadb") chomadb_query_text = pr_diff chromadb_result = g_chromadb_collection.query( query_texts=[chomadb_query_text], @@ -350,7 +452,7 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model, review_c include=["documents"] ) - print_verbose(chromadb_result["documents"]) + print_verbose(f"chromadb search result: {chromadb_result['documents']}") if len(chromadb_result["documents"][0]) == 0: review_example = chromadb_result["documents"][0] @@ -370,12 +472,18 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_flag, ai_model, review_c pr_diff=pr_diff, history_comment=review_comment["history_comment"]) review_prompt = OE_REVIEW_PR_PROMPT.format(example=review_example) - #review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) - review = generate_review_from_ollama(review_content, review_prompt, ai_model) - review_rating = generate_review_from_ollama(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) + print_verbose(f"review_prompt is: {review_prompt}") + + if ai_model.type == "local": + #review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) + review = generate_review_from_ollama(review_content, review_prompt, ai_model) + review_rating = generate_review_from_ollama(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) + elif ai_model.type == "deepseek" or ai_model.type == "bailian": + review = generate_review_from_openai(review_content, review_prompt, ai_model) + review_rating = generate_review_from_openai(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) return pr_diff, review, review_rating -def ai_review(user_gitee, ai_flag, ai_model): +def ai_review(user_gitee, ai_model): wait_error = 0 while True: try: @@ -392,7 +500,7 @@ def ai_review(user_gitee, ai_flag, ai_model): break pr_diff, review, review_rating = ai_review_impl(user_gitee, review_item['repo'], review_item['number'], review_item['owner'], - ai_flag, ai_model, review_item['review_comment'], review_item['pull_request']) + ai_model, review_item['review_comment'], review_item['pull_request']) NEED_REVIEW_PRS.task_done() if pr_diff == "": @@ -441,6 +549,11 @@ def manually_review_impl(user_gitee, pr_info, pull_request, review, review_ratin review_content += "\n# Branch Status\n" + sync_comment review_content += "\n# History\n" + history_comment review_content += "\n# Advisor\n" + clean_advisor_comment(advisor_comment) + + if review is None: + review = "" + if review_rating is None: + review_rating = "" review_comment_raw = edit_content(review_content + '\n\n# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + pr_diff, editor) global g_chromadb_client @@ -588,7 +701,7 @@ def submmit_review(user_gitee): SUBMITTING_PRS.task_done() print_verbose("submit review finish") -def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, filter): +def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_model, filter): """ New Implementation of Review Pull Request, reuse code from threading implementation """ @@ -604,8 +717,10 @@ def review_pr(user_gitee, repo_name, pull_id, group, editor, ai_flag, ai_model, return print_verbose("Doing review") suggest_action, suggest_reason = easy_classify(pull_request) + print_verbose(f"suggest_action: {suggest_action}") review_history_comment = review_history(user_gitee, group, repo_name, pull_id, pull_request) - pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_flag, ai_model, review_history_comment, pull_request) + print_verbose(f"review_history: {review_history_comment}") + pr_diff, review, review_rating = ai_review_impl(user_gitee, repo_name, pull_id, group, ai_model, review_history_comment, pull_request) review_comment = manually_review_impl(user_gitee, pr_info, pull_request, review, review_rating, pr_diff, editor) submit_review_impl(user_gitee, pr_info, pull_request, review_comment, suggest_action, suggest_reason) @@ -762,7 +877,7 @@ def generate_pending_prs(user_gitee, sig): print_verbose("PENDING_PRS join finished") return 0 -def review_sig(user_gitee, sig, editor, ai_flag, ai_model, filter): +def review_sig(user_gitee, sig, editor, ai_model, filter): """ Review sig 1. Generate pending PRs for sig @@ -775,7 +890,7 @@ def review_sig(user_gitee, sig, editor, ai_flag, ai_model, filter): print("Reviewing sig: {}".format(sig)) generate_pending_prs_thread = threading.Thread(target=generate_pending_prs, args=(user_gitee, sig)) sort_pr_thread = threading.Thread(target=sort_pr, args=(user_gitee, filter)) - ai_review_thread = threading.Thread(target=ai_review, args=(user_gitee, ai_flag, ai_model)) + ai_review_thread = threading.Thread(target=ai_review, args=(user_gitee, ai_model)) manually_review_thread = threading.Thread(target=manually_review, args=(user_gitee, editor)) submmit_review_thread = threading.Thread(target=submmit_review, args=(user_gitee,)) @@ -798,6 +913,9 @@ def main(): args = args_parser() cf = load_config() + my_model = None + no_ai = False + if args.verbose: global GLOBAL_VERBOSE GLOBAL_VERBOSE = True @@ -821,9 +939,26 @@ def main(): if args.editor_option: editor["editor-option"] = args.editor_option - ai_model = cf.get('model', 'name') - if args.model: - ai_model = args.model + print_verbose(f"editor option is: {editor['editor-option']}") + if args.intelligent == "local": + my_model = oe_review_ai_model("local") + my_model.model_name = cf.get('model', 'name') + if args.model: + my_model.model_name = args.model + elif args.intelligent == "deepseek": + my_model = oe_review_ai_model("deepseek") + my_model.model_name = cf.get('deepseek', 'name') + my_model.api_key = cf.get('deepseek', 'api_key') + my_model.base_url = cf.get('deepseek', 'base_url') + elif args.intelligent == "bailian": + my_model = oe_review_ai_model("bailian") + my_model.model_name = cf.get('bailian', 'name') + my_model.api_key = cf.get('bailian', 'api_key') + my_model.base_url = cf.get('bailian', 'base_url') + elif args.intelligent == "no": + my_model = oe_review_ai_model("no") + else: + my_model = oe_review_ai_model("no") filter = {} filter['labels'] = set(cf.get('filter', 'labels').split()) @@ -840,17 +975,17 @@ def main(): if args.sig == "": sigs = get_responsible_sigs(user_gitee, filter) for sig in sigs: - review_sig(user_gitee, sig, editor, not args.no_ai, ai_model, filter) + review_sig(user_gitee, sig, editor, my_model, filter) else: - review_sig(user_gitee, args.sig, editor, not args.no_ai, ai_model, filter) + review_sig(user_gitee, args.sig, editor, my_model, filter) else: params = extract_params(args) if not params: - return 1 + return 1 group = params[0] repo_name = params[1] pull_id = params[2] - review_pr(user_gitee, repo_name, pull_id, group, editor, not args.no_ai, ai_model, filter) + review_pr(user_gitee, repo_name, pull_id, group, editor, my_model, filter) return 0 diff --git a/develop_env.sh b/develop_env.sh index 15737258..7d632af1 100755 --- a/develop_env.sh +++ b/develop_env.sh @@ -16,3 +16,16 @@ if [ $existed -eq 0 ]; then export PYTHONPATH=${PYTHONPATH}:${advisor_path} fi echo "PYTHONPATH=${PYTHONPATH}" + +existed=0 +for p in ${PATH} +do + if [ "${advisor_path}/advisors" = $p ]; then + existed=1 + fi +done + +if [ $existed -eq 0 ]; then + export PATH=${PATH}:${advisor_path}/advisors:${advisor_path}/command +fi +echo "PATH=${PATH}" \ No newline at end of file -- Gitee From c5acf6acc21bdb756b31704c2726316d75b572de Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Wed, 29 Jan 2025 22:23:07 +0800 Subject: [PATCH 10/20] ignore reponse dumping while not in verbose mode --- advisors/oe_review.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index abcc1202..59cfe76d 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -212,7 +212,7 @@ def generate_review_from_openai(pr_content, prompt, model): ], stream = False ) - print(response.model_dump_json()) + print_verbose(f"response is {response.model_dump_json()}") return (response.choices[0].message.content) except openai.APIError as e: print(f"API Error: {e.status_code} - {e.message}") -- Gitee From 2bf1ec5ed6bb165c35e603606724f47d012668c8 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sat, 1 Feb 2025 15:18:38 +0800 Subject: [PATCH 11/20] support siliconflow --- advisors/oe_review.py | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 59cfe76d..06ee016c 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -115,6 +115,11 @@ class oe_review_ai_model: self._base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" self._model_name = "deepseek-v3" self._api_key = "" + elif type == "siliconflow": + self._type = "siliconflow" + self._base_url = "https://api.siliconflow.cn/v1/chat/completions" + self._model_name = "deepseek-r1" + self._api_key = "sk-riamcwezutshhoxdifdddyfipvhuhxofmuqtejmbdmokjjda" elif type == "no": self._type = "no" else: @@ -197,6 +202,29 @@ def generate_review_from_ollama(pr_content, prompt, ai_model): response = requests.post(url, headers=headers, json=values) return response.json().get('response', '') +def generate_review_from_request(pr_content, prompt, model): + payload = { + "model": model.model_name, + "messages": [ + { "role": "user", + "content": urllib.parse.quote(pr_content) + }, + { 'role': 'system', + 'content': urllib.parse.quote(prompt) + }, + ], + "stream": False, + } + headers = { + "Authorization": f"Bearer {model.api_key}", + "Content-Type": "application/json" + } + # print_verbose(f"payload is: {payload}") + # print_verbose(f"header is {headers}") + response = requests.request("POST", model.base_url, json=payload, headers=headers) + + return (response.text) + def generate_review_from_openai(pr_content, prompt, model): #Get URL and API Key from config file print_verbose("api_key: " + model.api_key) @@ -435,7 +463,9 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, p if not pr_diff: print("Failed to get PR:%s of repository:%s/%s, make sure the PR is exist." % (pull_id, group, repo)) return "", "", "" - + else: + print_verbose(f"pr_diff is {pr_diff}") + if ai_model.type == "no": return pr_diff, "", "" @@ -459,6 +489,8 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, p else: review_example = chromadb_result["documents"][0][0] + print_verbose(f"review example is {review_example}") + review_content = """ Pull Request to {owner}/{repo}:{target_branch} Pull Request Title: {title} @@ -481,6 +513,9 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, p elif ai_model.type == "deepseek" or ai_model.type == "bailian": review = generate_review_from_openai(review_content, review_prompt, ai_model) review_rating = generate_review_from_openai(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) + elif ai_model.type == "siliconflow": + review = generate_review_from_request(review_content, review_prompt, ai_model) + review_rating = generate_review_from_request(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) return pr_diff, review, review_rating def ai_review(user_gitee, ai_model): @@ -955,6 +990,11 @@ def main(): my_model.model_name = cf.get('bailian', 'name') my_model.api_key = cf.get('bailian', 'api_key') my_model.base_url = cf.get('bailian', 'base_url') + elif args.intelligent == "siliconflow": + my_model = oe_review_ai_model("siliconflow") + my_model.model_name = cf.get('siliconflow', 'name') + my_model.api_key = cf.get('siliconflow', 'api_key') + my_model.base_url = cf.get('siliconflow', 'base_url') elif args.intelligent == "no": my_model = oe_review_ai_model("no") else: -- Gitee From 7ebe9ebdc3da25068f7a83123b7450f01eba4e28 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sat, 1 Feb 2025 16:47:46 +0800 Subject: [PATCH 12/20] use openai interface intead of requests --- advisors/oe_review.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 06ee016c..79000bd6 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -105,21 +105,25 @@ class oe_review_ai_model: self._type = type self._base_url = "http://localhost:11434/api" self._model_name = "llama3.1:8b" + self._method = "ollama" elif type == "deepseek": self._type = "deepseek" self._base_url = "https://api.deepseek.com" self._model_name = "deepseek-chat" self._api_key = "" + self._method = "openai" elif type == "bailian": self._type = "bailian" self._base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" self._model_name = "deepseek-v3" self._api_key = "" + self._method = "openai" elif type == "siliconflow": self._type = "siliconflow" self._base_url = "https://api.siliconflow.cn/v1/chat/completions" self._model_name = "deepseek-r1" - self._api_key = "sk-riamcwezutshhoxdifdddyfipvhuhxofmuqtejmbdmokjjda" + self._api_key = "" + self._method = "openai" elif type == "no": self._type = "no" else: @@ -159,6 +163,13 @@ class oe_review_ai_model: else: pass # we dont need api_key for local or no + @property + def method(self): + return self._method + @method.setter + def method(self, new_value): + self._method = new_value + def print_verbose(msg): global GLOBAL_VERBOSE if GLOBAL_VERBOSE: @@ -510,10 +521,11 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, p #review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) review = generate_review_from_ollama(review_content, review_prompt, ai_model) review_rating = generate_review_from_ollama(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) - elif ai_model.type == "deepseek" or ai_model.type == "bailian": +# elif ai_model.type == "deepseek" or ai_model.type == "bailian" or ai_model.type == "siliconflow": + elif ai_model.method == "openai": review = generate_review_from_openai(review_content, review_prompt, ai_model) review_rating = generate_review_from_openai(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) - elif ai_model.type == "siliconflow": + elif ai_model.method == "requests": review = generate_review_from_request(review_content, review_prompt, ai_model) review_rating = generate_review_from_request(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) return pr_diff, review, review_rating @@ -978,28 +990,32 @@ def main(): if args.intelligent == "local": my_model = oe_review_ai_model("local") my_model.model_name = cf.get('model', 'name') - if args.model: - my_model.model_name = args.model elif args.intelligent == "deepseek": my_model = oe_review_ai_model("deepseek") my_model.model_name = cf.get('deepseek', 'name') my_model.api_key = cf.get('deepseek', 'api_key') my_model.base_url = cf.get('deepseek', 'base_url') + my_model.method = cf.get('deepseek', 'method') elif args.intelligent == "bailian": my_model = oe_review_ai_model("bailian") my_model.model_name = cf.get('bailian', 'name') my_model.api_key = cf.get('bailian', 'api_key') my_model.base_url = cf.get('bailian', 'base_url') + my_model.method = cf.get('bailian', 'method') elif args.intelligent == "siliconflow": my_model = oe_review_ai_model("siliconflow") my_model.model_name = cf.get('siliconflow', 'name') my_model.api_key = cf.get('siliconflow', 'api_key') my_model.base_url = cf.get('siliconflow', 'base_url') + my_model.method = cf.get('siliconflow', 'method') elif args.intelligent == "no": my_model = oe_review_ai_model("no") else: my_model = oe_review_ai_model("no") + if args.model: + my_model.model_name = args.model + filter = {} filter['labels'] = set(cf.get('filter', 'labels').split()) filter['submitters'] = set(cf.get('filter', 'submitters').split()) -- Gitee From 2c4b225a369a32c87b746f9fdd6bdc8bf408c1f1 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sun, 2 Feb 2025 13:44:31 +0800 Subject: [PATCH 13/20] simplify code --- .gitignore | 2 +- advisors/oe_review.py | 43 +++++++++++++++++-------------------------- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/.gitignore b/.gitignore index 372f5a91..dbc090ca 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ *.pyc # 排查python 生成的中间文件夹 __pycache__ -advisors/openEuler-Advisor.code-workspace +oE_ENV/ \ No newline at end of file diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 79000bd6..5d783161 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -517,11 +517,9 @@ def ai_review_impl(user_gitee, repo, pull_id, group, ai_model, review_comment, p print_verbose(f"review_prompt is: {review_prompt}") - if ai_model.type == "local": - #review = generate_review_from_ollama(pr_diff, OE_REVIEW_PR_PROMPT, ai_model) + if ai_model.method == "ollama": review = generate_review_from_ollama(review_content, review_prompt, ai_model) review_rating = generate_review_from_ollama(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) -# elif ai_model.type == "deepseek" or ai_model.type == "bailian" or ai_model.type == "siliconflow": elif ai_model.method == "openai": review = generate_review_from_openai(review_content, review_prompt, ai_model) review_rating = generate_review_from_openai(pr_diff, OE_REVIEW_RATING_PROMPT, ai_model) @@ -987,33 +985,26 @@ def main(): editor["editor-option"] = args.editor_option print_verbose(f"editor option is: {editor['editor-option']}") - if args.intelligent == "local": - my_model = oe_review_ai_model("local") - my_model.model_name = cf.get('model', 'name') - elif args.intelligent == "deepseek": - my_model = oe_review_ai_model("deepseek") - my_model.model_name = cf.get('deepseek', 'name') - my_model.api_key = cf.get('deepseek', 'api_key') - my_model.base_url = cf.get('deepseek', 'base_url') - my_model.method = cf.get('deepseek', 'method') - elif args.intelligent == "bailian": - my_model = oe_review_ai_model("bailian") - my_model.model_name = cf.get('bailian', 'name') - my_model.api_key = cf.get('bailian', 'api_key') - my_model.base_url = cf.get('bailian', 'base_url') - my_model.method = cf.get('bailian', 'method') - elif args.intelligent == "siliconflow": - my_model = oe_review_ai_model("siliconflow") - my_model.model_name = cf.get('siliconflow', 'name') - my_model.api_key = cf.get('siliconflow', 'api_key') - my_model.base_url = cf.get('siliconflow', 'base_url') - my_model.method = cf.get('siliconflow', 'method') - elif args.intelligent == "no": + + if args.intelligent == "no": my_model = oe_review_ai_model("no") else: - my_model = oe_review_ai_model("no") + if not cf.has_section(args.intelligent): + print("Section of config not found in config file.") + return 1 + else: + try: + my_model = oe_review_ai_model(args.intelligent) + my_model.model_name = cf.get(args.intelligent, 'model') + my_model.api_key = cf.get(args.intelligent, 'api_key') + my_model.base_url = cf.get(args.intelligent, 'base_url') + my_model.method = cf.get(args.intelligent, 'method') + except configparser.NoOptionError as e: + print(f"Config option is missing: {e}") + return 1 if args.model: + print_verbose(f"command line model is overriding config file") my_model.model_name = args.model filter = {} -- Gitee From 358cef7c4086c83d7a27a727bd02c44078be0154 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sun, 2 Feb 2025 14:15:10 +0800 Subject: [PATCH 14/20] add document for oe_review --- doc/oe_review.md | 138 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 doc/oe_review.md diff --git a/doc/oe_review.md b/doc/oe_review.md new file mode 100644 index 00000000..bff67b9b --- /dev/null +++ b/doc/oe_review.md @@ -0,0 +1,138 @@ +# openEuler Pull Request Review Tool 用户使用指南 + +## 简介 +oe_review(全称 openEuler Pull Request Review Tool)是一个用于自动化审核 openEuler 项目中的 Pull Request (PR) 的工具。它通过结合 AI 模型和人工审核,帮助开发者快速、高效地处理 PR。该工具支持多种 AI 模型,包括本地模型和云端模型,并提供了灵活的配置选项。 + +## 安装与配置 + +### 1. 安装依赖 +确保您的系统已安装以下依赖: + +Python 3.x +requests 库 +yaml 库 +chromadb 库 +openai 库 + +建议考虑通过如下命令安装和初始化: +```bash +python3 -m venv oE_ENV # 在当前目录下初始化虚拟环境 +source oE_ENV/bin/active # 使用 oE_ENV 作为 python 运行的虚拟环境 +source developer.sh # 引入 openEuler-Advisor 进入 python 运行环境 +pip install requests pyyaml pyrpm chromadb openai +``` + +### 2. 配置文件 +创建 ~/.config/openEuler-Advisor/config.ini 配置文件,内容如下: + +```config +[editor] +# 所有 PR 审视内容提交前会由审核者本地编辑,因此需要配置一个本地的编辑器。 +# 比如 console 下的 vim 或者 osx 上的 neovide。此处需要确保 编辑器 在运行过程中阻塞程序逻辑继续执行,比如对于 +# neovide 来说,要配置 option 为 --no-fork 来保证这一点。 +command = vim +option = "" + +[filter] +# 基于 label,提交者,代码仓或者sig 来过滤要处理的 PR +labels = lgtm-shinwell +# 比如通过 lgtm-shinwell,过滤掉 community 中 shinwell 已经发表过 /lgtm 意见的 PR +submitters = user1 +repos = repo1 repo2 +sigs = sig1 sig2 + +[community] +# 指明本地准备的 openEuler 社区管理代码仓 + community = "~/Projects/openEuler/community" + release-manage = "~/Projects/openEuler/release-management" + +[local] +# 支持本地运行的大模型服务 + method = ollama + base_url = http://localhost:11434/api + model = "llama3.1:8b" + +# 以下可按实际需要配置 +[deepseek] +model = deepseek-chat +api_key = your_api_key_here +base_url = https://api.deepseek.com +method = openai + +[bailian] +model = deepseek-v3 +api_key = your_api_key_here +base_url = https://dashscope.aliyuncs.com/compatible-mode/v1 +method = openai + +[siliconflow] +model = deepseek-r1 +api_key = your_api_key_here +base_url = https://api.siliconflow.cn/v1/ +method = openai +``` + +### 3. 设置GITEE API 的 token 变量 +保存在 ~/.gitee_personal_token.json +```json +{"access_token":"place_your_access_token_here", "user":"shinwell_hu"} +``` + +## 使用说明 +### 1. 命令行参数 +该工具支持以下命令行参数: + +-q, --quite: 禁用所有日志输出。 +-v, --verbose: 启用详细日志输出。 +-a, --active_user: 以维护者或提交者身份审核所有仓库中的 PR。 +-n, --repo: 指定仓库名称(包括组名)。 +-p, --pull: 指定 PR 的 ID。 +-u, --url: 指定 PR 的 URL。 +-s, --sig: 当 --active_user 启用时,审核指定 SIG 中的所有 PR。 +-m, --model: 选择用于生成审核的 AI 模型。 +-e, --editor: 选择用于编辑内容的编辑器,默认为 nvim。 +-i, --intelligent: 选择智能模型(local、deepseek、no)。 +-o, --editor-option: 编辑器的命令行选项。 + +### 2. 审核单个 PR +要审核单个 PR,可以使用以下命令: + +python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -i local +或者使用 PR 的 URL: + +python3 advisors/oe_review.py -u https://gitee.com/src-openeuler/repo_name/pulls/123 -i deepseek + +### 3. 审核整个 SIG 的 PR +如果您是某个 SIG 的维护者或提交者,可以使用以下命令审核该 SIG 中的所有 PR: + +python3 advisors/oe_review.py -a -s sig_name -i no + +### 4. 使用不同的 AI 模型 +您可以通过 -i 参数选择不同的 AI 模型。例如,使用 deepseek 模型: + +python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -i deepseek + +### 5. 手动编辑审核内容 +在审核过程中,工具会调用指定的编辑器(默认为 nvim)来编辑审核内容。您可以通过 -e 参数指定其他编辑器: + +python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -e vim + +## 审核流程 +生成待审核 PR 列表: 工具会从指定的仓库或 SIG 中获取所有待审核的 PR。 +简单分类: 工具会根据 PR 的标签和状态进行简单分类,决定是否可以直接关闭或合并。 +AI 审核: 对于需要进一步审核的 PR,工具会调用 AI 模型生成审核意见。 +人工审核: 工具会调用指定的编辑器,允许用户手动编辑 AI 生成的审核意见。 +提交审核: 工具会将最终的审核意见提交到 Gitee。 + +## 常见问题 +### 1. 如何配置 AI 模型? +在配置文件中,您可以为不同的 AI 模型(如 deepseek、bailian、siliconflow)配置 API Key 和模型名称。确保您已正确填写这些信息。 + +### 2. 如何过滤不需要审核的 PR? +在配置文件的 [filter] 部分,您可以指定需要过滤的标签、提交者和仓库。工具会自动跳过这些 PR。 + +### 3. 如何查看详细的日志输出? +使用 -v 或 --verbose 参数可以启用详细日志输出,帮助您调试和了解工具的运行情况。 + +## 结论 +openEuler Pull Request Review Tool 是一个强大的工具,能够帮助开发者高效地审核和管理 openEuler 项目中的 PR。通过结合 AI 模型和人工审核,它能够显著提高审核效率,减少人工工作量。希望本指南能帮助您更好地使用该工具。 \ No newline at end of file -- Gitee From e9bb15a42ac01958f14df9f3e005eda882cb8351 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sun, 2 Feb 2025 14:18:18 +0800 Subject: [PATCH 15/20] fix --- doc/oe_review.md | 70 +++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/doc/oe_review.md b/doc/oe_review.md index bff67b9b..b06f61c9 100644 --- a/doc/oe_review.md +++ b/doc/oe_review.md @@ -8,11 +8,11 @@ oe_review(全称 openEuler Pull Request Review Tool)是一个用于自动化 ### 1. 安装依赖 确保您的系统已安装以下依赖: -Python 3.x -requests 库 -yaml 库 -chromadb 库 -openai 库 +- Python 3.x +- requests 库 +- yaml 库 +- chromadb 库 +- openai 库 建议考虑通过如下命令安装和初始化: ```bash @@ -43,14 +43,14 @@ sigs = sig1 sig2 [community] # 指明本地准备的 openEuler 社区管理代码仓 - community = "~/Projects/openEuler/community" - release-manage = "~/Projects/openEuler/release-management" +community = "~/Projects/openEuler/community" +release-manage = "~/Projects/openEuler/release-management" [local] # 支持本地运行的大模型服务 - method = ollama - base_url = http://localhost:11434/api - model = "llama3.1:8b" +method = ollama +base_url = http://localhost:11434/api +model = "llama3.1:8b" # 以下可按实际需要配置 [deepseek] @@ -82,47 +82,49 @@ method = openai ### 1. 命令行参数 该工具支持以下命令行参数: --q, --quite: 禁用所有日志输出。 --v, --verbose: 启用详细日志输出。 --a, --active_user: 以维护者或提交者身份审核所有仓库中的 PR。 --n, --repo: 指定仓库名称(包括组名)。 --p, --pull: 指定 PR 的 ID。 --u, --url: 指定 PR 的 URL。 --s, --sig: 当 --active_user 启用时,审核指定 SIG 中的所有 PR。 --m, --model: 选择用于生成审核的 AI 模型。 --e, --editor: 选择用于编辑内容的编辑器,默认为 nvim。 --i, --intelligent: 选择智能模型(local、deepseek、no)。 --o, --editor-option: 编辑器的命令行选项。 +. -q, --quite: 禁用所有日志输出。 +. -v, --verbose: 启用详细日志输出。 +. -a, --active_user: 以维护者或提交者身份审核所有仓库中的 PR。 +. -n, --repo: 指定仓库名称(包括组名)。 +. -p, --pull: 指定 PR 的 ID。 +. -u, --url: 指定 PR 的 URL。 +. -s, --sig: 当 --active_user 启用时,审核指定 SIG 中的所有 PR。 +. -m, --model: 选择用于生成审核的 AI 模型。 +. -e, --editor: 选择用于编辑内容的编辑器,默认为 nvim。 +. -i, --intelligent: 选择智能模型(local、deepseek、no)。 +. -o, --editor-option: 编辑器的命令行选项。 ### 2. 审核单个 PR 要审核单个 PR,可以使用以下命令: +```bash python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -i local +``` 或者使用 PR 的 URL: - +```bash python3 advisors/oe_review.py -u https://gitee.com/src-openeuler/repo_name/pulls/123 -i deepseek - +``` ### 3. 审核整个 SIG 的 PR 如果您是某个 SIG 的维护者或提交者,可以使用以下命令审核该 SIG 中的所有 PR: - +```bash python3 advisors/oe_review.py -a -s sig_name -i no - +``` ### 4. 使用不同的 AI 模型 您可以通过 -i 参数选择不同的 AI 模型。例如,使用 deepseek 模型: - +```bash python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -i deepseek - +``` ### 5. 手动编辑审核内容 在审核过程中,工具会调用指定的编辑器(默认为 nvim)来编辑审核内容。您可以通过 -e 参数指定其他编辑器: - +```bash python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -e vim - +``` ## 审核流程 -生成待审核 PR 列表: 工具会从指定的仓库或 SIG 中获取所有待审核的 PR。 -简单分类: 工具会根据 PR 的标签和状态进行简单分类,决定是否可以直接关闭或合并。 -AI 审核: 对于需要进一步审核的 PR,工具会调用 AI 模型生成审核意见。 -人工审核: 工具会调用指定的编辑器,允许用户手动编辑 AI 生成的审核意见。 -提交审核: 工具会将最终的审核意见提交到 Gitee。 +- 生成待审核 PR 列表: 工具会从指定的仓库或 SIG 中获取所有待审核的 PR。 +- 简单分类: 工具会根据 PR 的标签和状态进行简单分类,决定是否可以直接关闭或合并。 +- AI 审核: 对于需要进一步审核的 PR,工具会调用 AI 模型生成审核意见。 +- 人工审核: 工具会调用指定的编辑器,允许用户手动编辑 AI 生成的审核意见。 +- 提交审核: 工具会将最终的审核意见提交到 Gitee。 ## 常见问题 ### 1. 如何配置 AI 模型? -- Gitee From 4f905b0d577e5c4527f9fe7929c75b9e760e466e Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sun, 2 Feb 2025 14:20:40 +0800 Subject: [PATCH 16/20] clean up document --- doc/oe_review.md | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/doc/oe_review.md b/doc/oe_review.md index b06f61c9..845845f8 100644 --- a/doc/oe_review.md +++ b/doc/oe_review.md @@ -82,17 +82,17 @@ method = openai ### 1. 命令行参数 该工具支持以下命令行参数: -. -q, --quite: 禁用所有日志输出。 -. -v, --verbose: 启用详细日志输出。 -. -a, --active_user: 以维护者或提交者身份审核所有仓库中的 PR。 -. -n, --repo: 指定仓库名称(包括组名)。 -. -p, --pull: 指定 PR 的 ID。 -. -u, --url: 指定 PR 的 URL。 -. -s, --sig: 当 --active_user 启用时,审核指定 SIG 中的所有 PR。 -. -m, --model: 选择用于生成审核的 AI 模型。 -. -e, --editor: 选择用于编辑内容的编辑器,默认为 nvim。 -. -i, --intelligent: 选择智能模型(local、deepseek、no)。 -. -o, --editor-option: 编辑器的命令行选项。 +- -q, --quite: 禁用所有日志输出。 +- -v, --verbose: 启用详细日志输出。 +- -a, --active_user: 以维护者或提交者身份审核所有仓库中的 PR。 +- -n, --repo: 指定仓库名称(包括组名)。 +- -p, --pull: 指定 PR 的 ID。 +- -u, --url: 指定 PR 的 URL。 +- -s, --sig: 当 --active_user 启用时,审核指定 SIG 中的所有 PR。 +- -m, --model: 选择用于生成审核的 AI 模型。 +- -e, --editor: 选择用于编辑内容的编辑器,默认为 nvim。 +- -i, --intelligent: 选择智能模型(local、deepseek、no)。 +- -o, --editor-option: 编辑器的命令行选项。 ### 2. 审核单个 PR 要审核单个 PR,可以使用以下命令: @@ -122,8 +122,9 @@ python3 advisors/oe_review.py -n src-openeuler/repo_name -p 123 -e vim ## 审核流程 - 生成待审核 PR 列表: 工具会从指定的仓库或 SIG 中获取所有待审核的 PR。 - 简单分类: 工具会根据 PR 的标签和状态进行简单分类,决定是否可以直接关闭或合并。 -- AI 审核: 对于需要进一步审核的 PR,工具会调用 AI 模型生成审核意见。 +- AI 审核: 对于需要进一步审核的 PR,工具从本地向量数据库中获取类似审核信息,作为范例提供给 AI 模型,并生成建议审核意见。 - 人工审核: 工具会调用指定的编辑器,允许用户手动编辑 AI 生成的审核意见。 +- 保存审核意见:工具会将人工审核的意见,连同PR相关信息一起保存到本地的向量数据库 - 提交审核: 工具会将最终的审核意见提交到 Gitee。 ## 常见问题 -- Gitee From e5474f3a3b6bac334975d24be7755269c705a724 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Sat, 15 Feb 2025 11:35:38 +0800 Subject: [PATCH 17/20] new function --- advisors/cleanup_repo.py | 88 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100755 advisors/cleanup_repo.py diff --git a/advisors/cleanup_repo.py b/advisors/cleanup_repo.py new file mode 100755 index 00000000..b1b19f27 --- /dev/null +++ b/advisors/cleanup_repo.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +#****************************************************************************** +# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +# licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +# +# ******************************************************************************/ +""" +This is a command line tool for adding new repo +""" + +import sys +import argparse +import yaml + + +def main(): + """ + Main entrance for command line + """ + par = argparse.ArgumentParser() + par.add_argument("-r", "--repo", help="YAML file for repositories", type=str, required=True) + par.add_argument("-i", "--sigs", help="YAML file for sigs", type=str, required=True) + par.add_argument("-s", "--sig", help="Sig manage this repo", type=str, required=True) + par.add_argument("-n", "--name", help="Name for new repo", type=str, required=True) + par.add_argument("-d", "--desc", help="Description for new repo", type=str, required=True) + par.add_argument("-u", "--upstream", help="Upstream for new repo", type=str, required=True) + + args = par.parse_args() + + with open(args.sigs) as sigs_file: + sigs = yaml.load(sigs_file.read(), Loader=yaml.Loader) + if not sigs: + print("Failed to load {file}".format(file=args.sigs)) + sys.exit(1) + + with open(args.repo) as repo_file: + repo = yaml.load(repo_file.read(), Loader=yaml.Loader) + if not repo: + print("Failed to load {file}".format(file=args.repo)) + sys.exit(1) + + repo_info = {} + repo_info["name"] = args.name + repo_info["description"] = args.desc + repo_info["upstream"] = args.upstream + repo_info["protected_branches"] = ["master"] + repo_info["type"] = "public" + + exist = [x for x in repo["repositories"] if x["name"] == args.name] + if exist != []: + print("Repo already exist") + sys.exit(1) + + if repo["community"] == "openeuler": + repo["repositories"].append(repo_info) + elif repo["community"] == "src-openeuler": + repo_info["upstream"] = args.upstream + repo["repositories"].append(repo_info) + + repo["repositories"].sort(key=lambda r: r["name"]) + + valid_sig = False + for sig in sigs["sigs"]: + if sig["name"] == args.sig: + sig["repositories"].append(repo["community"] + "/" + args.name) + sig["repositories"].sort() + valid_sig = True + continue + + if valid_sig: + with open(args.repo, "w") as repo_file: + yaml.dump(repo, repo_file) + with open(args.sigs, "w") as sigs_file: + yaml.dump(sigs, sigs_file) + else: + print("SIG name is not valid") + sys.exit(1) + + +if __name__ == "__main__": + main() -- Gitee From 8bb60ad636f5ef1a7fb47d7a3915e6f834a4fa49 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Thu, 27 Feb 2025 13:43:50 +0800 Subject: [PATCH 18/20] reorder to move review result at head --- advisors/oe_review.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 5d783161..86602e4a 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -599,7 +599,8 @@ def manually_review_impl(user_gitee, pr_info, pull_request, review, review_ratin review = "" if review_rating is None: review_rating = "" - review_comment_raw = edit_content(review_content + '\n\n# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + pr_diff, editor) + #review_comment_raw = edit_content(review_content + '\n\n# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + pr_diff, editor) + review_comment_raw = edit_content('# ReviewBot\n\n' + review + '\n\n# ReviewRating\n\n' + review_rating + '\n\n' + review_content + '\n\n' + pr_diff, editor) global g_chromadb_client global g_chromadb_collection -- Gitee From 28fbef119e4bfe31fd7880579dcec91de6460956 Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Thu, 27 Mar 2025 13:28:30 +0800 Subject: [PATCH 19/20] Simplify and refactor of code --- .gitignore | 3 +- advisors/oe_review.py | 173 +++++++++++++++++++++--------------------- 2 files changed, 87 insertions(+), 89 deletions(-) diff --git a/.gitignore b/.gitignore index dbc090ca..6940ff09 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ *.pyc # 排查python 生成的中间文件夹 __pycache__ -oE_ENV/ \ No newline at end of file +oE_ENV/ +.venv/ \ No newline at end of file diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 86602e4a..892200b7 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -18,7 +18,6 @@ import re import sys import argparse import subprocess -import collections import queue import tempfile import urllib @@ -96,7 +95,6 @@ g_chromadb_collection = None # define data structure that contains queue and mutex lock for thread sharing import threading -import time # define data structure to contain AI model information class oe_review_ai_model: @@ -214,27 +212,25 @@ def generate_review_from_ollama(pr_content, prompt, ai_model): return response.json().get('response', '') def generate_review_from_request(pr_content, prompt, model): + """Send review request to API endpoint and return response""" + messages = [ + {"role": "user", "content": urllib.parse.quote(pr_content)}, + {"role": "system", "content": urllib.parse.quote(prompt)} + ] + payload = { "model": model.model_name, - "messages": [ - { "role": "user", - "content": urllib.parse.quote(pr_content) - }, - { 'role': 'system', - 'content': urllib.parse.quote(prompt) - }, - ], - "stream": False, + "messages": messages, + "stream": False } + headers = { "Authorization": f"Bearer {model.api_key}", "Content-Type": "application/json" } - # print_verbose(f"payload is: {payload}") - # print_verbose(f"header is {headers}") - response = requests.request("POST", model.base_url, json=payload, headers=headers) - return (response.text) + response = requests.post(model.base_url, json=payload, headers=headers) + return response.text def generate_review_from_openai(pr_content, prompt, model): #Get URL and API Key from config file @@ -286,19 +282,15 @@ def extract_params(args): if args.url and len(args.url) > 0: res = check_pr_url(args.url) if res: - group = res.group(3) - repo_name = res.group(4) - pull_id = res.group(5) - return (group, repo_name, pull_id) + return (res.group(3), res.group(4), res.group(5)) print("ERROR: URL is wrong, please check!") return () - if args.repo and args.pull and len(args.repo) > 0 and len(args.pull) > 0: - group = args.repo.split('/')[0] - repo_name = args.repo.split('/')[1] - pull_id = args.pull - return group, repo_name, pull_id - print("WARNING: please specify the URL of PR or repository name and PR's ID.\ - \nDetails use -h/--help option.") + + if args.repo and args.pull: + group, repo_name = args.repo.split('/') + return (group, repo_name, args.pull) + + print("WARNING: please specify the URL of PR or repository name and PR's ID.\nDetails use -h/--help option.") return () def args_parser(): @@ -333,24 +325,42 @@ def load_config(): return None def edit_content(text, editor): + """ + Edit content using the specified editor. + + Args: + text (str): The text content to edit + editor (dict): Dictionary containing editor and editor options + + Returns: + str: The edited text content + """ print_verbose("starting edit_content") - fd, path = tempfile.mkstemp(suffix=".tmp", prefix="oe_review") - with os.fdopen(fd, 'w') as tmp: - tmp.write(text) - tmp.flush() - - print_verbose(editor["editor-option"]) - if editor["editor-option"] == '""': - result = subprocess.Popen([editor["editor"]] + [path]) - result.wait() - else: - result = subprocess.run([editor["editor"], editor["editor-option"], path]) - #result = subprocess.run([editor["editor"], editor["editor-option"], path], capture_output=True, text=True) - print_verbose(result.stdout) - print_verbose(result.stderr) + + # Create temporary file + fd, temp_path = tempfile.mkstemp(suffix=".tmp", prefix="oe_review") + + # Write content to temp file + with os.fdopen(fd, 'w') as temp_file: + temp_file.write(text) + temp_file.flush() - text_new = open(path).read() - return text_new + print_verbose(editor["editor-option"]) + + # Launch editor based on options + if editor["editor-option"] == '""': + # Simple editor launch + editor_process = subprocess.Popen([editor["editor"], temp_path]) + editor_process.wait() + else: + # Launch with additional options + result = subprocess.run([editor["editor"], editor["editor-option"], temp_path]) + print_verbose(result.stdout) + print_verbose(result.stderr) + + # Read and return edited content + with open(temp_path) as edited_file: + return edited_file.read() def easy_classify(pull_request): suggest_action = "" @@ -389,25 +399,18 @@ def easy_classify(pull_request): return suggest_action, suggest_reason def review_history(user_gitee, owner, repo, number, pull_request): - review_comment = {} - review_comment['target_branch'] = pull_request["base"]["ref"] - history_comment = "" - sync_comment = "" - advisor_comment = "" comments = user_gitee.get_pr_comments_all(owner, repo, number) - for comment in comments: - if comment['user']['name'] == "openeuler-ci-bot": - if comment['body'].startswith("\n**以下为 openEuler-Advisor"): - advisor_comment = comment['body'] - elif comment['user']['name'] == "openeuler-sync-bot": - sync_comment += comment["body"] + "\n" - else: - history_comment += comment["user"]["name"] + ":\n" - history_comment += comment["body"] + "\n" - review_comment['history_comment'] = history_comment - review_comment['sync_comment'] = sync_comment - review_comment['advisor_comment'] = advisor_comment - return review_comment + + return { + 'target_branch': pull_request["base"]["ref"], + 'advisor_comment': next((c['body'] for c in comments + if c['user']['name'] == "openeuler-ci-bot" + and c['body'].startswith("\n**以下为 openEuler-Advisor")), ""), + 'sync_comment': ''.join(c['body'] + '\n' for c in comments + if c['user']['name'] == "openeuler-sync-bot"), + 'history_comment': ''.join(f"{c['user']['name']}:\n{c['body']}\n" for c in comments + if c['user']['name'] not in ["openeuler-ci-bot", "openeuler-sync-bot"]) + } def filter_pr(pull_request, filter): print_verbose("filter is: "+str(filter)) @@ -868,41 +871,35 @@ def get_quickissue_pulls_by_sig(sig): GET from quckissue api """ quickissue_base_url = "https://quickissue.openeuler.org/api-issues/pulls" - - query_url = quickissue_base_url + "?sig=" + sig + "&page=1&per_page=100&sort=created_at&state=open" results = [] total = 0 - pages = 1 + + def process_response(json_resp): + if not json_resp or not json_resp.get("data"): + return False + for d in json_resp["data"]: + repo_parts = d["repo"].split("/") + results.append({ + 'owner': repo_parts[0], + 'repo': repo_parts[1], + 'number': d["link"].split("/")[-1] + }) + return True + + # Get first page + query_url = f"{quickissue_base_url}?sig={sig}&page=1&per_page=100&sort=created_at&state=open" json_resp = get_quickissue(query_url) - if json_resp == None: + if not process_response(json_resp): return results, total - elif json_resp["data"] == None: - return results, total - - total = json_resp["total"] - - for d in json_resp["data"]: - res = {} - res['owner'] = d["repo"].split("/")[0] - res['repo'] = d["repo"].split("/")[1] - res['number'] = d["link"].split("/")[-1] - results.append(res) - pages = math.ceil(json_resp["total"] / json_resp["per_page"]) + total = json_resp["total"] + pages = math.ceil(total / json_resp["per_page"]) + # Get remaining pages for page in range(2, pages + 1): - query_url = quickissue_base_url + "?sig=" + sig + "&page=" + str(page) + "&per_page=100&sort=created_at&state=open" - json_resp = get_quickissue(query_url) - if json_resp == None: - return results, total - elif json_resp["data"] == None: - return results, total - for d in json_resp["data"]: - res = {} - res['owner'] = d["repo"].split("/")[0] - res['repo'] = d["repo"].split("/")[1] - res['number'] = d["link"].split("/")[-1] - results.append(res) + query_url = f"{quickissue_base_url}?sig={sig}&page={page}&per_page=100&sort=created_at&state=open" + process_response(get_quickissue(query_url)) + return results, total def generate_pending_prs(user_gitee, sig): -- Gitee From bf4760e2bc3cafd5a68b9f8aa4199426fb97ec8c Mon Sep 17 00:00:00 2001 From: Shinwell Hu Date: Fri, 28 Mar 2025 17:48:56 +0800 Subject: [PATCH 20/20] refactor code --- advisors/oe_review.py | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/advisors/oe_review.py b/advisors/oe_review.py index 892200b7..b956d672 100755 --- a/advisors/oe_review.py +++ b/advisors/oe_review.py @@ -264,7 +264,6 @@ def generate_review_from_openai(pr_content, prompt, model): except Exception as e: print(f"Unexpected error: {type(e).__name__}, {str(e)}") - def check_pr_url(url): """ check whether the URL of Pull Request is valid @@ -809,29 +808,22 @@ def generate_pending_prs_old(user_gitee, sig): """ Generate pending PRs """ - src_oe_repos = user_gitee.get_repos_by_sig(sig) - oe_repos = user_gitee.get_openeuler_repos_by_sig(sig) + print_verbose("start generate list of pending pr.") - total_len = len(src_oe_repos) + len(oe_repos) - current_percentage = 10 - counter = 0 - - print_verbose(f"start generate list of pending pr.") - for repo in src_oe_repos: - counter = counter + 1 - if print_progress(counter, total_len, current_percentage): - current_percentage = current_percentage + 10 - review_repo(user_gitee, 'src-openeuler', repo) - - for repo in oe_repos: - counter = counter + 1 - if print_progress(counter, total_len, current_percentage): - current_percentage = current_percentage + 10 - review_repo(user_gitee, 'openeuler', repo) + repos = { + 'src-openeuler': user_gitee.get_repos_by_sig(sig), + 'openeuler': user_gitee.get_openeuler_repos_by_sig(sig) + } + + total = sum(len(r) for r in repos.values()) + for owner, repo_list in repos.items(): + for i, repo in enumerate(repo_list, 1): + if print_progress(i, total, (i/total)*100): + review_repo(user_gitee, owner, repo) PENDING_PRS.put(None) print_verbose("generate_pending_pr finished") - PENDING_PRS.join() + PENDING_PRS.join() print_verbose("PENDING_PRS join finished") return 0 @@ -1034,7 +1026,6 @@ def main(): return 0 - if __name__ == "__main__": sys.exit(main()) -- Gitee