From 0572cd0f90f5cd117fd6fe6fb95bd3a904b495a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Mon, 31 Jul 2023 09:05:18 +0000 Subject: [PATCH 01/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 58 +++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index dbccca8..ca5f162 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -1,18 +1,38 @@ #!/usr/bin/python3 import argparse -from xml.dom.minidom import parse import xml.dom.minidom from openpyxl import Workbook import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline +import yaml +import urllib.request +import gzip +import importlib from build_log import Log logger = Log() -def create_csv(xml_file, all_rpm_name_file): +def install_module(): + # 导入模块 + module_list = ["argparse", "openpyxl", "pandas", "sklearn", "yaml", "gzip", "importlib"] + for module in module_list: + importlib.import_module(module) + + +def create_csv(yaml_file): + # 配置文件 + with open(yaml_file, 'r') as f: + data = yaml.safe_load(f) + url = data["xml_url"] + xml_file = "primary.xml" + urllib.request.urlretrieve(url, xml_file + '.gz') + with gzip.open(xml_file + '.gz', 'rb') as f_in: + with open(xml_file, 'wb') as f_out: + f_out.write(f_in.read()) + # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement @@ -36,16 +56,17 @@ def create_csv(xml_file, all_rpm_name_file): name = package.getElementsByTagName('name')[0] rpm_name = name.childNodes[0].data - # 所需要的包名文件 - with open(all_rpm_name_file, 'r', encoding="utf-8") as f: - for line in f: - if line.strip() == rpm_name: - logger.info(rpm_name) - description = package.getElementsByTagName('description')[0] - group = package.getElementsByTagName('rpm:group')[0] - summary = package.getElementsByTagName('summary')[0] - ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, - description.childNodes[0].data.replace('\n', '')]) + + description = package.getElementsByTagName('description')[0] + group = package.getElementsByTagName('rpm:group')[0] + summary = package.getElementsByTagName('summary')[0] + print(rpm_name, description.childNodes) + if not description.childNodes: + ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, + len(description.childNodes)]) + continue + ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, + description.childNodes[0].data.replace('\n', '')]) for cell in ws['B']: if cell.value is not None and isinstance(cell.value, str): @@ -55,13 +76,13 @@ def create_csv(xml_file, all_rpm_name_file): wb.save('file_num.xlsx') # 将xlsx转换成csv data_xls = pd.read_excel('file_num.xlsx', engine='openpyxl') - data_xls.to_csv('file_num.csv', encoding='utf-8') + data_xls.to_csv('file_num.csv', encoding='utf-8', index=False) return "file_num.csv" -def analyza_csv(xml_file, all_rpm_name_file): - csv_file = create_csv(xml_file, all_rpm_name_file) +def analyze_csv(yaml_file): + csv_file = create_csv(yaml_file) # 加载数据 # 将此路径替换为你的csv文件的路径 file_path = csv_file @@ -104,12 +125,13 @@ def init_args(): :return: """ parser = argparse.ArgumentParser() - parser.add_argument("-x", type=str, dest="xml_file_name", help="xml file name") - parser.add_argument("-r", type=str, dest="rpm_file_name", help="所需要解析的包名") + parser.add_argument("-f", type=str, dest="xml_url", help="xml url") return parser.parse_args() if "__main__" == __name__: args = init_args() - analyza_csv(args.xml_file_name, args.rpm_file_name) \ No newline at end of file + + install_module() + analyze_csv(args.xml_url) \ No newline at end of file -- Gitee From 88a6a2e6fc2fb6658edecb55481ea40c3d63e6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Mon, 31 Jul 2023 09:08:52 +0000 Subject: [PATCH 02/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index ca5f162..794c914 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -16,14 +16,12 @@ logger = Log() def install_module(): - # 导入模块 module_list = ["argparse", "openpyxl", "pandas", "sklearn", "yaml", "gzip", "importlib"] for module in module_list: importlib.import_module(module) def create_csv(yaml_file): - # 配置文件 with open(yaml_file, 'r') as f: data = yaml.safe_load(f) url = data["xml_url"] @@ -134,4 +132,4 @@ if "__main__" == __name__: args = init_args() install_module() - analyze_csv(args.xml_url) \ No newline at end of file + analyze_csv(args.xml_url) -- Gitee From ef5a4cfb654b7d6ae4e2a9527823eed1c62cab79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Mon, 31 Jul 2023 09:10:06 +0000 Subject: [PATCH 03/23] =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/build_log.py | 50 ++++++++++++++++++++++++++++++++++ tools/analyze_xml/config.yaml | 1 + 2 files changed, 51 insertions(+) create mode 100644 tools/analyze_xml/build_log.py create mode 100644 tools/analyze_xml/config.yaml diff --git a/tools/analyze_xml/build_log.py b/tools/analyze_xml/build_log.py new file mode 100644 index 0000000..7e863f1 --- /dev/null +++ b/tools/analyze_xml/build_log.py @@ -0,0 +1,50 @@ +# coding:utf-8 + +import logging +import colorlog + + +class Log: + def __init__(self, name=None, log_level=logging.INFO): + """ + @param name: log name + @log_level: log level + """ + self.logger = logging.getLogger(name) + + self.logger.handlers = [] + + self.logger.setLevel(log_level) + + console_fmt = '[%(asctime)s]-[%(name)s]-[line:%(lineno)d]-[%(levelname)s]: %(log_color)s%(message)s' + + color_config = { + 'DEBUG': 'cyan', + 'INFO': 'green', + 'WARNING': 'yellow', + 'ERROR': 'red', + 'CRITICAL': 'purple', + } + + console_formatter = colorlog.ColoredFormatter(fmt=console_fmt, log_colors=color_config) + + console_handler = logging.StreamHandler() + + console_handler.setFormatter(console_formatter) + + self.logger.addHandler(console_handler) + + def debug(self, message): + self.logger.debug(message) + + def info(self, message): + self.logger.info(message) + + def warning(self, message): + self.logger.warning(message) + + def error(self, message): + self.logger.error(message) + + def critical(self, message): + self.logger.critical(message) diff --git a/tools/analyze_xml/config.yaml b/tools/analyze_xml/config.yaml new file mode 100644 index 0000000..f55dcfa --- /dev/null +++ b/tools/analyze_xml/config.yaml @@ -0,0 +1 @@ +xml_url: https://ftp.lysator.liu.se/pub/opensuse/source/tumbleweed/repo/oss/repodata/4f3104fa2e0bbb9381fd5b27dcb527314fa7cf14dc197d4b8824a72001043b5e7208e357e77fcd0a8224311dc084fb39a11ffc919c4aa8ad6f9d1dbd37e0f8c7-primary.xml.gz \ No newline at end of file -- Gitee From d3f6e7926d5078cf88baf7fb18e4c2f4fcc225d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Mon, 31 Jul 2023 09:21:15 +0000 Subject: [PATCH 04/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index 794c914..59f7985 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -58,7 +58,6 @@ def create_csv(yaml_file): description = package.getElementsByTagName('description')[0] group = package.getElementsByTagName('rpm:group')[0] summary = package.getElementsByTagName('summary')[0] - print(rpm_name, description.childNodes) if not description.childNodes: ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, len(description.childNodes)]) -- Gitee From b6d961aab4722fb736fbd89860cdf5141118ee43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 01:15:11 +0000 Subject: [PATCH 05/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index 59f7985..e1dd552 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -1,22 +1,23 @@ #!/usr/bin/python3 import argparse +import gzip +import importlib +import pandas as pd +import urllib.request import xml.dom.minidom +import yaml from openpyxl import Workbook -import pandas as pd from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline -import yaml -import urllib.request -import gzip -import importlib + from build_log import Log logger = Log() def install_module(): - module_list = ["argparse", "openpyxl", "pandas", "sklearn", "yaml", "gzip", "importlib"] + module_list = ["argparse", "openpyxl", "pandas", "sklearn", "yaml", "gzip", "importlib"] for module in module_list: importlib.import_module(module) -- Gitee From 159c06acf633fcc81a450a2c84c62ffc5cb6bce4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 01:28:57 +0000 Subject: [PATCH 06/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index e1dd552..9193d4e 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -3,9 +3,9 @@ import argparse import gzip import importlib import pandas as pd +import yaml import urllib.request import xml.dom.minidom -import yaml from openpyxl import Workbook from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB -- Gitee From fde7e4aeab896bf72378fd18cd39df8a15a2c409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 02:18:10 +0000 Subject: [PATCH 07/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index 9193d4e..af733b2 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -1,11 +1,12 @@ #!/usr/bin/python3 +import urllib.request +import xml.dom.minidom import argparse import gzip import importlib +import os import pandas as pd import yaml -import urllib.request -import xml.dom.minidom from openpyxl import Workbook from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB @@ -26,9 +27,10 @@ def create_csv(yaml_file): with open(yaml_file, 'r') as f: data = yaml.safe_load(f) url = data["xml_url"] - xml_file = "primary.xml" + xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: + os.chmod(xml_file, 0o644) with open(xml_file, 'wb') as f_out: f_out.write(f_in.read()) @@ -71,12 +73,12 @@ def create_csv(yaml_file): cell.value = cell.value.replace('Unspecified', '') # 所需要保存的表的路径 - wb.save('file_num.xlsx') + wb.save(data["result_xlsx"]) # 将xlsx转换成csv - data_xls = pd.read_excel('file_num.xlsx', engine='openpyxl') - data_xls.to_csv('file_num.csv', encoding='utf-8', index=False) + data_xls = pd.read_excel(data["result_xlsx"], engine='openpyxl') + data_xls.to_csv(data["result_csv"], encoding='utf-8', index=False) - return "file_num.csv" + return data["result_csv"] def analyze_csv(yaml_file): @@ -132,4 +134,4 @@ if "__main__" == __name__: args = init_args() install_module() - analyze_csv(args.xml_url) + analyze_csv(args.xml_url) \ No newline at end of file -- Gitee From f0e15d20990cb5add0a5769c646b648fb27cf63e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 02:55:43 +0000 Subject: [PATCH 08/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index af733b2..8d886a7 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -33,7 +33,7 @@ def create_csv(yaml_file): os.chmod(xml_file, 0o644) with open(xml_file, 'wb') as f_out: f_out.write(f_in.read()) - + os.chmod(xml_file, 0o755) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement -- Gitee From 91fbd3df102fee4a319e2c4b7aeb2d7cd76042ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 03:04:48 +0000 Subject: [PATCH 09/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index 8d886a7..a76f27c 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -30,7 +30,6 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - os.chmod(xml_file, 0o644) with open(xml_file, 'wb') as f_out: f_out.write(f_in.read()) os.chmod(xml_file, 0o755) -- Gitee From 7f04eec1814171003b5f0f2d35eec1077ced77e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 06:44:30 +0000 Subject: [PATCH 10/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index a76f27c..d868617 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -30,9 +30,10 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - with open(xml_file, 'wb') as f_out: - f_out.write(f_in.read()) - os.chmod(xml_file, 0o755) + f_out = os.open(xml_file, os.O_CREAT) + with os.fdopen(f_out, 'w') as f: + f.write(f_in) + os.chmod(xml_file, 0o755) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement -- Gitee From 2269aa636182f32c13f720a3d5405134295fa191 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 07:14:09 +0000 Subject: [PATCH 11/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index d868617..aa3f4ed 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -30,10 +30,8 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - f_out = os.open(xml_file, os.O_CREAT) - with os.fdopen(f_out, 'w') as f: - f.write(f_in) - os.chmod(xml_file, 0o755) + with os.fdopen(xml_file, 'w', encoding='utf-8', closefd=True) as f_out: + f_out.write(f_in.read()) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement -- Gitee From 8aa12b2e75886403f17573d3892d8bbea1896538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 07:35:35 +0000 Subject: [PATCH 12/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index aa3f4ed..5af6630 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -30,8 +30,10 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - with os.fdopen(xml_file, 'w', encoding='utf-8', closefd=True) as f_out: + fd = os.open(xml_file, os.O_CREAT) + with os.fdopen(fd, 'w', encoding='utf-8', closefd=True) as f_out: f_out.write(f_in.read()) + os.close(fd) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement -- Gitee From e3c05ea10db48e8d54e028a01e6cc927091f3882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 07:41:31 +0000 Subject: [PATCH 13/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index 5af6630..aa3f4ed 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -30,10 +30,8 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - fd = os.open(xml_file, os.O_CREAT) - with os.fdopen(fd, 'w', encoding='utf-8', closefd=True) as f_out: + with os.fdopen(xml_file, 'w', encoding='utf-8', closefd=True) as f_out: f_out.write(f_in.read()) - os.close(fd) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement -- Gitee From 3a08ee4d3c12a3f2d4543b61e6dee0fffc01569a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 07:55:50 +0000 Subject: [PATCH 14/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index aa3f4ed..f32f30c 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -12,9 +12,9 @@ from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline -from build_log import Log +import logging.config -logger = Log() +logger = logging.getLogger("xml") def install_module(): -- Gitee From 68a05856807be8a71222f4a5923055021c6ee5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 07:55:57 +0000 Subject: [PATCH 15/23] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=96=87=E4=BB=B6=20to?= =?UTF-8?q?ols/analyze=5Fxml/build=5Flog.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tools/analyze_xml/build_log.py | 50 ---------------------------------- 1 file changed, 50 deletions(-) delete mode 100644 tools/analyze_xml/build_log.py diff --git a/tools/analyze_xml/build_log.py b/tools/analyze_xml/build_log.py deleted file mode 100644 index 7e863f1..0000000 --- a/tools/analyze_xml/build_log.py +++ /dev/null @@ -1,50 +0,0 @@ -# coding:utf-8 - -import logging -import colorlog - - -class Log: - def __init__(self, name=None, log_level=logging.INFO): - """ - @param name: log name - @log_level: log level - """ - self.logger = logging.getLogger(name) - - self.logger.handlers = [] - - self.logger.setLevel(log_level) - - console_fmt = '[%(asctime)s]-[%(name)s]-[line:%(lineno)d]-[%(levelname)s]: %(log_color)s%(message)s' - - color_config = { - 'DEBUG': 'cyan', - 'INFO': 'green', - 'WARNING': 'yellow', - 'ERROR': 'red', - 'CRITICAL': 'purple', - } - - console_formatter = colorlog.ColoredFormatter(fmt=console_fmt, log_colors=color_config) - - console_handler = logging.StreamHandler() - - console_handler.setFormatter(console_formatter) - - self.logger.addHandler(console_handler) - - def debug(self, message): - self.logger.debug(message) - - def info(self, message): - self.logger.info(message) - - def warning(self, message): - self.logger.warning(message) - - def error(self, message): - self.logger.error(message) - - def critical(self, message): - self.logger.critical(message) -- Gitee From 221717cd71a6f23648f3cb54f623265811f5c513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Tue, 1 Aug 2023 08:17:44 +0000 Subject: [PATCH 16/23] update tools/analyze_xml/handle_xml.py. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- tools/analyze_xml/handle_xml.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index f32f30c..34af8da 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -1,4 +1,5 @@ #!/usr/bin/python3 +import logging.config import urllib.request import xml.dom.minidom import argparse @@ -12,7 +13,6 @@ from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline -import logging.config logger = logging.getLogger("xml") @@ -30,7 +30,7 @@ def create_csv(yaml_file): xml_file = data["xml_file"] urllib.request.urlretrieve(url, xml_file + '.gz') with gzip.open(xml_file + '.gz', 'rb') as f_in: - with os.fdopen(xml_file, 'w', encoding='utf-8', closefd=True) as f_out: + with os.fdopen(1, 'wb', closefd=True) as f_out: f_out.write(f_in.read()) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) @@ -111,7 +111,7 @@ def analyze_csv(yaml_file): data.loc[data['group-new'].isna(), 'group-new'] = predicted_categories # 保存更新后的表格到新的csv文件 - data.to_csv('updated_file1.csv', index=False) + data.to_csv(csv_file, index=False) logger.info("Categories have been predicted and the updated file has been saved as 'updated_file.csv'") else: logger.info("All packages are already categorized.") -- Gitee From 856417b6943cd8528660f921e283e08e237af467 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Wed, 2 Aug 2023 02:50:15 +0000 Subject: [PATCH 17/23] =?UTF-8?q?=E6=96=B0=E5=BB=BA=20suse=E5=AE=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- "tools/suse\345\256\217/.keep" | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 "tools/suse\345\256\217/.keep" diff --git "a/tools/suse\345\256\217/.keep" "b/tools/suse\345\256\217/.keep" new file mode 100644 index 0000000..e69de29 -- Gitee From 70650ad78128a51322e937703f98060153588d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Wed, 2 Aug 2023 02:50:51 +0000 Subject: [PATCH 18/23] =?UTF-8?q?rename=20tools/suse=E5=AE=8F/.keep=20to?= =?UTF-8?q?=20tools/suse=E5=AE=8F/suse=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE?= =?UTF-8?q?=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...45\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename "tools/suse\345\256\217/.keep" => "tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" (100%) diff --git "a/tools/suse\345\256\217/.keep" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" similarity index 100% rename from "tools/suse\345\256\217/.keep" rename to "tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" -- Gitee From b7027886bb8ab078ddff0c801393450c1fa95c30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Wed, 2 Aug 2023 03:41:11 +0000 Subject: [PATCH 19/23] =?UTF-8?q?update=20tools/suse=E5=AE=8F/suse?= =?UTF-8?q?=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...75\344\273\244\344\277\256\346\224\271.md" | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" index e69de29..6378144 100644 --- "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -0,0 +1,72 @@ + +## 脚本和宏的修改 +#### 一 python宏 +##### 1 缺少符号系列宏 +例如: +- %{python_version_nodots} +- 0%{suse_version} + +修改方法: +- %{python_version_nodots} --> 0%{?python_version_nodots} +- 0%{suse_version} --> 0%{?suse_version} +##### 2 多符号系列宏 +例如: +- %{#python_sitelib} +- %{$python_sitelib} +- %{#python_sitearch} +- %{$python_sitearch} + +修改方法: + + **这里需要注意的是构建环境用到的python版本,如果是python3,则修改为%{python3_sitelib} +如果是Python2,则修改成%{python2_sitelib};也可以通过软连接指定系统版本方法。** +- %{#python_sitelib} --> %{python3_sitelib} +- %{$python_sitelib} --> %{python3_sitelib} +- %{#python_sitearch} --> %{python3_sitearch} +- %{$python_sitearch} --> %{python3_sitearch} +##### 3 依赖为python模块 +例如: +- BuildRequires: %{python_module setuptools} +- BuildRequires: %{python_module pip} +- BuildRequires: %{python_module wheel} + + **这里无法识别%{python_module setuptools}系列宏** + +修改方法: +- %{python_module setuptools} --> python-setuptools +- %{python_module pip} --> python-pip +- %{python_module wheel} --> python-wheel + +rpmbuild中可以用sed方法实现统一修改: +- sed -i 's/%{python_module \(.*\)}/python-\1/g' xxx.spec + +##### 4 %files 阶段无法识别的宏 +例如: +- %files %{python_files} +- %files %{python_files doc} +- %files %{python_files devel} + +这里无法识别 + + + + + + + + + + + + + + + + + + + + + + + -- Gitee From d6936937d6668d08cf0a06dd08aae5a2c4a04598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Wed, 2 Aug 2023 03:54:27 +0000 Subject: [PATCH 20/23] =?UTF-8?q?update=20tools/suse=E5=AE=8F/suse?= =?UTF-8?q?=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...75\344\273\244\344\277\256\346\224\271.md" | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" index 6378144..78aefa3 100644 --- "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -38,7 +38,9 @@ - %{python_module wheel} --> python-wheel rpmbuild中可以用sed方法实现统一修改: -- sed -i 's/%{python_module \(.*\)}/python-\1/g' xxx.spec +``` +sed -i 's/%{python_module \(.*\)}/python-\1/g' $spec_dir/*.spec +``` ##### 4 %files 阶段无法识别的宏 例如: @@ -46,8 +48,33 @@ rpmbuild中可以用sed方法实现统一修改: - %files %{python_files doc} - %files %{python_files devel} -这里无法识别 + **这里会出现%{python_files} does not exist的报错** +修改方法: +- %files %{python_files} --> %files +- %files %{python_files doc} --> %files +- %files %{python_files devel} --> %files + +rpmbuild中可以用sed方法实现统一修改: + +``` +sed -i 's/%{python_files \(.*\)}//g' $spec_dir/*.spec +``` + +##### 5 build和install阶段无法识别的宏 +例如: +- %python_build +- %python_install +- %pyproject_wheel +- %pyproject_install + +修改方法: + + **这里也需要注意构建所需要的python版本,以python3为例** +- %python_build --> %py3_build +- %python_install --> %py3_install + + _%pyproject_wheel和%pyproject_install暂时没找到能顶替的宏,这里用的是宏命令展开的方式来替代宏_ -- Gitee From 796f597f60fc855861f452450b4dabe13c1e5123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Wed, 2 Aug 2023 10:14:11 +0000 Subject: [PATCH 21/23] =?UTF-8?q?update=20tools/suse=E5=AE=8F/suse?= =?UTF-8?q?=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...75\344\273\244\344\277\256\346\224\271.md" | 49 ++++++++++++++++--- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" index 78aefa3..33c40f4 100644 --- "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -1,14 +1,24 @@ ## 脚本和宏的修改 -#### 一 python宏 +#### 二 通用宏 +- %{?suse_version} +- 0%{suse_version} +- 0%{suse_version} --> 0%{?suse_version} + + +#### 二 python宏 ##### 1 缺少符号系列宏 例如: - %{python_version_nodots} -- 0%{suse_version} +- %python_version_nodots +- %ifpython2 +- %ifpython3 修改方法: - %{python_version_nodots} --> 0%{?python_version_nodots} -- 0%{suse_version} --> 0%{?suse_version} +- %python_version_nodots --> 0%{?python_version_nodots} +- %ifpython2 --> %if python2 +- %ifpython3 --> %if python3 ##### 2 多符号系列宏 例如: - %{#python_sitelib} @@ -65,29 +75,52 @@ sed -i 's/%{python_files \(.*\)}//g' $spec_dir/*.spec 例如: - %python_build - %python_install +- %python_clone +- %python3_expand %fdupes +- %python_expand %fdupes - %pyproject_wheel - %pyproject_install + 修改方法: **这里也需要注意构建所需要的python版本,以python3为例** - %python_build --> %py3_build - %python_install --> %py3_install +- %python3_expand %fdupes --> %fdupes +- %python_expand %fdupes --> %fdupes _%pyproject_wheel和%pyproject_install暂时没找到能顶替的宏,这里用的是宏命令展开的方式来替代宏_ +``` +- %pyproject_wheel --> /usr/bin/python3 -mpip wheel --verbose --progress-bar off --disable-pip-version-check --use-pep517 --no-build-isolation --no-deps --wheel-dir \./build \. +- %pyproject_install -->/usr/bin/python3 -mpip install --verbose --progress-bar off --disable-pip-version-check --root %{buildroot} --no-compile --ignore-installed --no-deps --no-index --find-links \./build name==version +``` + _%python_clone暂时没找到能顶替的宏,暂时将这行内容删除,不影响编译_ +##### 6 chekc阶段无法识别的宏 +例如: +- %pyunittest discover -v +- python-testsuite +- python3-testsuite +- %pytest +处理方法: + **_可以考虑跳过check阶段_** +##### 7 无法识别的一些宏 +例如: +- %{psuffix} +- %python_subpackages +- %python_exec +- %pycache_only +- %python_alternative +处理方法: - - - - - +_目前无法找到能替代的宏命令,在不影响编译情况下,做注释或删除操作_ -- Gitee From c01c391730d3e56d7b68ac386ba4f14a2d4c1a1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Thu, 3 Aug 2023 01:39:31 +0000 Subject: [PATCH 22/23] =?UTF-8?q?update=20tools/suse=E5=AE=8F/suse?= =?UTF-8?q?=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...5\221\275\344\273\244\344\277\256\346\224\271.md" | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" index 33c40f4..61d5972 100644 --- "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -1,11 +1,21 @@ ## 脚本和宏的修改 -#### 二 通用宏 +#### 一 通用宏 +##### 1 缺少符号 - %{?suse_version} - 0%{suse_version} + +修改方法 - 0%{suse_version} --> 0%{?suse_version} +- %{?suse_version} --> 0%{?suse_version} +##### 2 install和build阶段 +- %cmake_install +- %cmake_build +修改方法 +- %cmake_install --> %make_install +- %cmake_build --> %make_build #### 二 python宏 ##### 1 缺少符号系列宏 例如: -- Gitee From c44ec24d608eb2220d13a697a71a6e74e03e2a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=B1=AA=E9=87=8D=E9=98=B3?= <15757101689@163.com> Date: Sat, 5 Aug 2023 07:09:38 +0000 Subject: [PATCH 23/23] =?UTF-8?q?update=20tools/suse=E5=AE=8F/suse?= =?UTF-8?q?=E5=AE=8F=E5=91=BD=E4=BB=A4=E4=BF=AE=E6=94=B9.md.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 汪重阳 <15757101689@163.com> --- ...75\344\273\244\344\277\256\346\224\271.md" | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" index 61d5972..42a4ba4 100644 --- "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -2,6 +2,7 @@ ## 脚本和宏的修改 #### 一 通用宏 ##### 1 缺少符号 +例如: - %{?suse_version} - 0%{suse_version} @@ -10,12 +11,48 @@ - %{?suse_version} --> 0%{?suse_version} ##### 2 install和build阶段 +例如: - %cmake_install - %cmake_build +- %{__make} %{?_smp_mflags} +- %{__make} test 修改方法 - %cmake_install --> %make_install - %cmake_build --> %make_build +- %{__make} %{?_smp_mflags} --> %{make_build} +- %{__make} test --> make test + + +##### 3 files阶段打包无法识别的宏 +例如: +- %{?ext_man} +- %{ext_man} + +问题:在打包阶段无法找到文件路径 + +修改方法: +- %{?ext_man} --> .gz +- %{ext_man} --> .gz + +##### 4 perl包在build和install宏无法识别 +例如: +- %{__perl} Makefile.PL INSTALLDIRS=vendor +- %{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" +- perl Makefile.PL INSTALLDIRS=vendor +- %perl_make_install +- %perl_process_packlist +- %perl_gen_filelist + +修改方法: + +- `%{__perl} Makefile.PL INSTALLDIRS=vendor --> %{__perl} Makefile.PL INSTALLDIRS=vendor NO_PACKLIST=1 NO_PERLLOCAL=1` +- `%{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" --> %{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" NO_PACKLIST=1 NO_PERLLOCAL=1` +- `perl Makefile.PL INSTALLDIRS=vendor --> perl Makefile.PL INSTALLDIRS=vendor NO_PERLLOCAL=1 NO_PACKLIST=1` +- `%perl_make_install --> %{make_install}` +- `%perl_process_packlist --> %{_fixperms} %{buildroot}` +- `%perl_gen_filelist --> 删除` + #### 二 python宏 ##### 1 缺少符号系列宏 例如: -- Gitee