From 4853dbb35f79defe4c948b7e93b800be86151f7a Mon Sep 17 00:00:00 2001 From: smileknife Date: Thu, 10 Dec 2020 14:28:05 +0800 Subject: [PATCH] [which_archived]fix issue: fetch decommissioned GNU software failed Signed-off-by: smileknife --- advisors/which_archived.py | 60 +++---- advisors/yaml2url.py | 327 +++++++++++++++++++------------------ 2 files changed, 188 insertions(+), 199 deletions(-) diff --git a/advisors/which_archived.py b/advisors/which_archived.py index 81e468ed..0464ee53 100755 --- a/advisors/which_archived.py +++ b/advisors/which_archived.py @@ -17,6 +17,8 @@ @date: 2020/10/1 @notice: this tool check websites in 'helper/community_archived.yaml', user needs to configure first before running. +step1: copy helper/community_archived.yaml to ~/.community_archived.yaml +step2: edit ~/.community_archived.yaml """ import sys import os @@ -24,7 +26,6 @@ import json import logging import argparse import signal -import re import urllib.request import urllib.error import requests @@ -39,8 +40,6 @@ from advisors import yaml2url urllib3.disable_warnings() GET_METHOD_PEOJECTS = "/projects" -SIGS_URL = "https://gitee.com/openeuler/community/raw/master/sig/sigs.yaml" -COMMUNITY_ARCHIVED_YAML = "helper/community_archived.yaml" headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW 64; rv:23.0) Gecko/20100101 Firefox/23.0'} gitlab_list = ['gnome', 'freedesktop'] RECORDER_YAML = ".query_result_lasttime" @@ -81,16 +80,6 @@ def gitlab_list_project(urlbase, token, params, group_path=""): return __gitlab_get_method(query_url, token, params) -def get_sigs(): - """ - get sigs from oe - """ - req = urllib.request.Request(url=SIGS_URL, headers=headers) - res = urllib.request.urlopen(req) - sigs = yaml.load(res.read().decode("utf-8"), Loader=yaml.Loader) - return sigs['sigs'] - - def record_pkginfo(py_object): """ record package info for running quickly next time @@ -112,27 +101,23 @@ def read_pkginfo_lasttime(): return {} -def get_oe_repo_dict(cwd_path, nocached): +def get_oe_repo_dict(cwd_path, use_cache): """ get oe repo list from sigs.yaml """ logging.debug("begin to query oe.") - data = get_sigs() oe_repo_dict = {} - try: - my_gitee = gitee.Gitee() - except NameError: - sys.exit(1) - last_record_dict = {} - if not nocached: + my_gitee = gitee.Gitee() + data = my_gitee.get_sigs()['sigs'] + if use_cache: last_record_dict = read_pkginfo_lasttime() if len(last_record_dict) == 0: logging.info("last recorder not exist.") for repos in data: for repo in repos['repositories']: if repo.startswith('src-openeuler/'): - name = repo.replace('src-openeuler/', '') + name = repo.split('/')[1] repo_url = last_record_dict.get(name, None) if repo_url: logging.info("%s has record.", name) @@ -140,10 +125,9 @@ def get_oe_repo_dict(cwd_path, nocached): pkginfo = get_pkg_info(my_gitee, name, cwd_path) if pkginfo: repo_url = yaml2url.yaml2url(pkginfo) - else: - repo_url = 'none' - repo = {name: repo_url} - oe_repo_dict.update(repo) + if not repo_url: + repo_url = 'none' + oe_repo_dict.update({name: repo_url}) logging.info("total %d repositories in src-openeuler", len(oe_repo_dict)) record_pkginfo(oe_repo_dict) return oe_repo_dict @@ -154,7 +138,8 @@ def load_config(): load configuration """ try: - with open(COMMUNITY_ARCHIVED_YAML, 'r', encoding = 'utf-8') as archived_file: + config = os.path.expanduser("~/.community_archived.yaml") + with open(config, 'r', encoding = 'utf-8') as archived_file: return yaml.load(archived_file.read(), Loader = yaml.Loader) except OSError as reason: print("Load yaml failed!" + str(reason)) @@ -201,8 +186,8 @@ def arg_parser(): ['gnome', 'freedesktop', 'gnu'], default = "", help = "community name.") parser.add_argument('-d', '--default', type = str, default = os.getcwd(), help="The fallback place to look for YAML information") - parser.add_argument('-x', '--nocached', action = 'store_true', \ - default = False, help = 'not use result of last query') + parser.add_argument('-x', '--cached', action = 'store_true', \ + default = False, help = 'use result of last query') parser.set_defaults(func=cmd_check) sub_parser = parser.add_subparsers(title="sub-command list") parser_list = sub_parser.add_parser("list", help="list archived projects in upstream.") @@ -230,13 +215,11 @@ def parse_gnu_html(url, repo_url_list): file = urllib.request.urlopen(url, timeout=5) data = file.read() soup = bs4.BeautifulSoup(data.decode('utf-8'), 'html.parser') - tag = soup.find(text=re.compile("decommissioned")) - while tag is not None and getattr(tag, 'name', None) != 'p': + res = soup.find_all(class_="package-list emph-box") + tags = res[-1].children + for tag in tags: if getattr(tag, 'name', None) == 'a': repo_url_list.append(GNU_SOFTWARE_PAGE + tag.string) - tag = tag.nextSibling - # delete invalid element '' - del repo_url_list[0] def get_upstream_repo_url_list(name=""): @@ -247,8 +230,7 @@ def get_upstream_repo_url_list(name=""): data = load_config() if data is None or len(data) == 0: - print("Load \'%s\' failed, please check!" % COMMUNITY_ARCHIVED_YAML) - sys.exit(1) + return [] if name == "": for entry in data.values(): if entry['type'] == 'REST': @@ -274,6 +256,8 @@ def cmd_list(args): cmd list handler """ url_list = get_upstream_repo_url_list(args.name) + if not url_list: + return 1 for repo_url in url_list: print(repo_url) print("Total %d projects" % len(url_list)) @@ -285,8 +269,10 @@ def cmd_check(args): cmd check handler """ result = {} - oe_repo_dict = get_oe_repo_dict(args.default, args.nocached) + oe_repo_dict = get_oe_repo_dict(args.default, args.cached) url_list = get_upstream_repo_url_list(args.name) + if not url_list: + return 1 for key1, value1 in oe_repo_dict.items(): if value1 in url_list: result.update({key1:value1}) diff --git a/advisors/yaml2url.py b/advisors/yaml2url.py index 94647861..7dd16fff 100755 --- a/advisors/yaml2url.py +++ b/advisors/yaml2url.py @@ -1,162 +1,165 @@ -# !/usr/bin/python3 -# ****************************************************************************** -# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. -# licensed under the Mulan PSL v2. -# You can use this software according to the terms and conditions of the Mulan PSL v2. -# You may obtain a copy of Mulan PSL v2 at: -# http://license.coscl.org.cn/MulanPSL2 -# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR -# PURPOSE. -# See the Mulan PSL v2 for more details. -# -# ******************************************************************************/ -""" -This is an script for get url from repo name -""" - -from urllib.parse import urljoin - - -def __get_hg_url(pkg_info): - """ - Get hg repo url of package - """ - url = urljoin(pkg_info["src_repo"] + "/", "json-tags") - return url - - -def __get_hg_raw_url(pkg_info): - """ - Get hg raw repo url of package - """ - url = urljoin(pkg_info["src_repo"] + "/", "raw-tags") - return url - - -def __get_github_url(pkg_info): - """ - Get github repo url of package - """ - url = "https://github.com/" + pkg_info["src_repo"] + ".git" - return url - - -def __get_gnome_url(pkg_info): - """ - Get gnome repo url of package - """ - src_repos = pkg_info["src_repo"].split("/") - if len(src_repos) == 1: - url = "https://gitlab.gnome.org/GNOME/" + pkg_info["src_repo"] + ".git" - else: - url = "https://gitlab.gnome.org/" + pkg_info["src_repo"] + ".git" - return url - - -def __get_git_url(pkg_info): - """ - Get git repo url of package - """ - url = pkg_info["src_repo"] - return url - - -def __get_svn_url(pkg_info): - """ - Get svn repo url of package - """ - tag_dir = pkg_info.get("tag_dir", "tags") - url = pkg_info["src_repo"] + "/" + tag_dir - return url - - -def __get_metacpan_url(pkg_info): - """ - Get metacpan repo url of package - """ - url = urljoin("https://metacpan.org/release/", pkg_info["src_repo"]) - return url - - -def __get_gitee_url(pkg_info): - """ - Get gitee repo url of package - """ - url = "https://gitee.com/" + pkg_info["src_repo"] + ".git" - return url - - -def __get_gnu_ftp_url(pkg_info): - """ - Get gnu ftp repo url of package - """ - url = urljoin("https://ftp.gnu.org/gnu/", pkg_info["src_repo"] + "/") - return url - - -def __get_ftp_url(pkg_info): - """ - Get ftp repo url of package - """ - url = urljoin('ftp', pkg_info["src_repo"] + "/") - return url - - -def __get_pypi_url(pkg_info): - """ - Get pypi repo url of package - """ - url = urljoin("https://pypi.org/pypi/", pkg_info["src_repo"] + "/json") - return url - - -def __get_rubygem_url(pkg_info): - """ - Get rubygem repo url of package - """ - url = urljoin("https://rubygems.org/api/v1/versions/", pkg_info["src_repo"] + ".json") - return url - - -def __get_sourceforge_url(pkg_info): - """ - Get git repo url of package - """ - url = pkg_info["src_repo"] - return url - - -def yaml2url(pkg_info): - """ - Get url from yaml - """ - vc_type = pkg_info.get("version_control", None) - if vc_type is None: - print("Missing version_control in YAML file") - return None - - switcher = { - "hg": __get_hg_url, - "hg-raw": __get_hg_raw_url, - "github": __get_github_url, - "git": __get_git_url, - "gitlab.gnome": __get_gnome_url, - "svn": __get_svn_url, - "metacpan": __get_metacpan_url, - "pypi": __get_pypi_url, - "rubygem": __get_rubygem_url, - "gitee": __get_gitee_url, - "gnu-ftp": __get_gnu_ftp_url, - "ftp": __get_ftp_url, - "sourceforge": __get_sourceforge_url - } - - get_url_method = switcher.get(vc_type, None) - if get_url_method: - url = get_url_method(pkg_info) - else: - print("Unsupport version control method {vc}".format(vc=vc_type)) - return None - - return url +# !/usr/bin/python3 +# ****************************************************************************** +# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +# licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +# +# ******************************************************************************/ +""" +This is an script for get url from repo name +""" + +from urllib.parse import urljoin + + +def __get_hg_url(pkg_info): + """ + Get hg repo url of package + """ + url = urljoin(pkg_info["src_repo"] + "/", "json-tags") + return url + + +def __get_hg_raw_url(pkg_info): + """ + Get hg raw repo url of package + """ + url = urljoin(pkg_info["src_repo"] + "/", "raw-tags") + return url + + +def __get_github_url(pkg_info): + """ + Get github repo url of package + """ + url = "https://github.com/" + pkg_info["src_repo"] + ".git" + return url + + +def __get_gnome_url(pkg_info): + """ + Get gnome repo url of package + """ + src_repos = pkg_info["src_repo"].split("/") + if len(src_repos) == 1: + url = "https://gitlab.gnome.org/GNOME/" + pkg_info["src_repo"] + ".git" + else: + url = "https://gitlab.gnome.org/" + pkg_info["src_repo"] + ".git" + return url + + +def __get_git_url(pkg_info): + """ + Get git repo url of package + """ + url = pkg_info["src_repo"] + return url + + +def __get_svn_url(pkg_info): + """ + Get svn repo url of package + """ + tag_dir = pkg_info.get("tag_dir", "tags") + url = pkg_info["src_repo"] + "/" + tag_dir + return url + + +def __get_metacpan_url(pkg_info): + """ + Get metacpan repo url of package + """ + url = urljoin("https://metacpan.org/release/", pkg_info["src_repo"]) + return url + + +def __get_gitee_url(pkg_info): + """ + Get gitee repo url of package + """ + url = "https://gitee.com/" + pkg_info["src_repo"] + ".git" + return url + + +def __get_gnu_ftp_url(pkg_info): + """ + Get gnu ftp repo url of package + """ + url = urljoin("https://ftp.gnu.org/gnu/", pkg_info["src_repo"] + "/") + return url + + +def __get_ftp_url(pkg_info): + """ + Get ftp repo url of package + """ + url = urljoin('ftp', pkg_info["src_repo"] + "/") + return url + + +def __get_pypi_url(pkg_info): + """ + Get pypi repo url of package + """ + url = urljoin("https://pypi.org/pypi/", pkg_info["src_repo"] + "/json") + return url + + +def __get_rubygem_url(pkg_info): + """ + Get rubygem repo url of package + """ + url = urljoin("https://rubygems.org/api/v1/versions/", pkg_info["src_repo"] + ".json") + return url + + +def __get_sourceforge_url(pkg_info): + """ + Get git repo url of package + """ + url = pkg_info["src_repo"] + return url + + +def yaml2url(pkg_info): + """ + Get url from yaml + """ + if not isinstance(pkg_info, dict): + print("ERROR: parameter pkg_info type error") + return None + vc_type = pkg_info.get("version_control", None) + if vc_type is None: + print("Missing version_control in YAML file") + return None + + switcher = { + "hg": __get_hg_url, + "hg-raw": __get_hg_raw_url, + "github": __get_github_url, + "git": __get_git_url, + "gitlab.gnome": __get_gnome_url, + "svn": __get_svn_url, + "metacpan": __get_metacpan_url, + "pypi": __get_pypi_url, + "rubygem": __get_rubygem_url, + "gitee": __get_gitee_url, + "gnu-ftp": __get_gnu_ftp_url, + "ftp": __get_ftp_url, + "sourceforge": __get_sourceforge_url + } + + get_url_method = switcher.get(vc_type, None) + if get_url_method: + url = get_url_method(pkg_info) + else: + print("Unsupport version control method {vc}".format(vc=vc_type)) + return None + + return url -- Gitee