diff --git a/tools/analyze_xml/config.yaml b/tools/analyze_xml/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f55dcfa5c3de520ea2b8289ecc332ce918ea9057 --- /dev/null +++ b/tools/analyze_xml/config.yaml @@ -0,0 +1 @@ +xml_url: https://ftp.lysator.liu.se/pub/opensuse/source/tumbleweed/repo/oss/repodata/4f3104fa2e0bbb9381fd5b27dcb527314fa7cf14dc197d4b8824a72001043b5e7208e357e77fcd0a8224311dc084fb39a11ffc919c4aa8ad6f9d1dbd37e0f8c7-primary.xml.gz \ No newline at end of file diff --git a/tools/analyze_xml/handle_xml.py b/tools/analyze_xml/handle_xml.py index dbccca867ff2aa7b2b299158f2ca6d1f05014d44..34af8da0471127282d4ad3f2146382650772ffb0 100644 --- a/tools/analyze_xml/handle_xml.py +++ b/tools/analyze_xml/handle_xml.py @@ -1,18 +1,37 @@ #!/usr/bin/python3 -import argparse -from xml.dom.minidom import parse +import logging.config +import urllib.request import xml.dom.minidom -from openpyxl import Workbook +import argparse +import gzip +import importlib +import os import pandas as pd +import yaml +from openpyxl import Workbook from sklearn.feature_extraction.text import CountVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.pipeline import make_pipeline -from build_log import Log -logger = Log() + +logger = logging.getLogger("xml") -def create_csv(xml_file, all_rpm_name_file): +def install_module(): + module_list = ["argparse", "openpyxl", "pandas", "sklearn", "yaml", "gzip", "importlib"] + for module in module_list: + importlib.import_module(module) + + +def create_csv(yaml_file): + with open(yaml_file, 'r') as f: + data = yaml.safe_load(f) + url = data["xml_url"] + xml_file = data["xml_file"] + urllib.request.urlretrieve(url, xml_file + '.gz') + with gzip.open(xml_file + '.gz', 'rb') as f_in: + with os.fdopen(1, 'wb', closefd=True) as f_out: + f_out.write(f_in.read()) # 使用minidom解析器打开 XML 文档 dom_tree = xml.dom.minidom.parse(xml_file) collection = dom_tree.documentElement @@ -36,32 +55,32 @@ def create_csv(xml_file, all_rpm_name_file): name = package.getElementsByTagName('name')[0] rpm_name = name.childNodes[0].data - # 所需要的包名文件 - with open(all_rpm_name_file, 'r', encoding="utf-8") as f: - for line in f: - if line.strip() == rpm_name: - logger.info(rpm_name) - description = package.getElementsByTagName('description')[0] - group = package.getElementsByTagName('rpm:group')[0] - summary = package.getElementsByTagName('summary')[0] - ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, - description.childNodes[0].data.replace('\n', '')]) + + description = package.getElementsByTagName('description')[0] + group = package.getElementsByTagName('rpm:group')[0] + summary = package.getElementsByTagName('summary')[0] + if not description.childNodes: + ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, + len(description.childNodes)]) + continue + ws.append([rpm_name, group.childNodes[0].data, group.childNodes[0].data, summary.childNodes[0].data, + description.childNodes[0].data.replace('\n', '')]) for cell in ws['B']: if cell.value is not None and isinstance(cell.value, str): cell.value = cell.value.replace('Unspecified', '') # 所需要保存的表的路径 - wb.save('file_num.xlsx') + wb.save(data["result_xlsx"]) # 将xlsx转换成csv - data_xls = pd.read_excel('file_num.xlsx', engine='openpyxl') - data_xls.to_csv('file_num.csv', encoding='utf-8') + data_xls = pd.read_excel(data["result_xlsx"], engine='openpyxl') + data_xls.to_csv(data["result_csv"], encoding='utf-8', index=False) - return "file_num.csv" + return data["result_csv"] -def analyza_csv(xml_file, all_rpm_name_file): - csv_file = create_csv(xml_file, all_rpm_name_file) +def analyze_csv(yaml_file): + csv_file = create_csv(yaml_file) # 加载数据 # 将此路径替换为你的csv文件的路径 file_path = csv_file @@ -92,7 +111,7 @@ def analyza_csv(xml_file, all_rpm_name_file): data.loc[data['group-new'].isna(), 'group-new'] = predicted_categories # 保存更新后的表格到新的csv文件 - data.to_csv('updated_file1.csv', index=False) + data.to_csv(csv_file, index=False) logger.info("Categories have been predicted and the updated file has been saved as 'updated_file.csv'") else: logger.info("All packages are already categorized.") @@ -104,12 +123,13 @@ def init_args(): :return: """ parser = argparse.ArgumentParser() - parser.add_argument("-x", type=str, dest="xml_file_name", help="xml file name") - parser.add_argument("-r", type=str, dest="rpm_file_name", help="所需要解析的包名") + parser.add_argument("-f", type=str, dest="xml_url", help="xml url") return parser.parse_args() if "__main__" == __name__: args = init_args() - analyza_csv(args.xml_file_name, args.rpm_file_name) \ No newline at end of file + + install_module() + analyze_csv(args.xml_url) \ No newline at end of file diff --git "a/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" new file mode 100644 index 0000000000000000000000000000000000000000..42a4ba452951c7a5a44367472be5af19f4944bdd --- /dev/null +++ "b/tools/suse\345\256\217/suse\345\256\217\345\221\275\344\273\244\344\277\256\346\224\271.md" @@ -0,0 +1,179 @@ + +## 脚本和宏的修改 +#### 一 通用宏 +##### 1 缺少符号 +例如: +- %{?suse_version} +- 0%{suse_version} + +修改方法 +- 0%{suse_version} --> 0%{?suse_version} +- %{?suse_version} --> 0%{?suse_version} + +##### 2 install和build阶段 +例如: +- %cmake_install +- %cmake_build +- %{__make} %{?_smp_mflags} +- %{__make} test + +修改方法 +- %cmake_install --> %make_install +- %cmake_build --> %make_build +- %{__make} %{?_smp_mflags} --> %{make_build} +- %{__make} test --> make test + + +##### 3 files阶段打包无法识别的宏 +例如: +- %{?ext_man} +- %{ext_man} + +问题:在打包阶段无法找到文件路径 + +修改方法: +- %{?ext_man} --> .gz +- %{ext_man} --> .gz + +##### 4 perl包在build和install宏无法识别 +例如: +- %{__perl} Makefile.PL INSTALLDIRS=vendor +- %{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" +- perl Makefile.PL INSTALLDIRS=vendor +- %perl_make_install +- %perl_process_packlist +- %perl_gen_filelist + +修改方法: + +- `%{__perl} Makefile.PL INSTALLDIRS=vendor --> %{__perl} Makefile.PL INSTALLDIRS=vendor NO_PACKLIST=1 NO_PERLLOCAL=1` +- `%{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" --> %{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="%{optflags}" NO_PACKLIST=1 NO_PERLLOCAL=1` +- `perl Makefile.PL INSTALLDIRS=vendor --> perl Makefile.PL INSTALLDIRS=vendor NO_PERLLOCAL=1 NO_PACKLIST=1` +- `%perl_make_install --> %{make_install}` +- `%perl_process_packlist --> %{_fixperms} %{buildroot}` +- `%perl_gen_filelist --> 删除` + +#### 二 python宏 +##### 1 缺少符号系列宏 +例如: +- %{python_version_nodots} +- %python_version_nodots +- %ifpython2 +- %ifpython3 + +修改方法: +- %{python_version_nodots} --> 0%{?python_version_nodots} +- %python_version_nodots --> 0%{?python_version_nodots} +- %ifpython2 --> %if python2 +- %ifpython3 --> %if python3 +##### 2 多符号系列宏 +例如: +- %{#python_sitelib} +- %{$python_sitelib} +- %{#python_sitearch} +- %{$python_sitearch} + +修改方法: + + **这里需要注意的是构建环境用到的python版本,如果是python3,则修改为%{python3_sitelib} +如果是Python2,则修改成%{python2_sitelib};也可以通过软连接指定系统版本方法。** +- %{#python_sitelib} --> %{python3_sitelib} +- %{$python_sitelib} --> %{python3_sitelib} +- %{#python_sitearch} --> %{python3_sitearch} +- %{$python_sitearch} --> %{python3_sitearch} +##### 3 依赖为python模块 +例如: +- BuildRequires: %{python_module setuptools} +- BuildRequires: %{python_module pip} +- BuildRequires: %{python_module wheel} + + **这里无法识别%{python_module setuptools}系列宏** + +修改方法: +- %{python_module setuptools} --> python-setuptools +- %{python_module pip} --> python-pip +- %{python_module wheel} --> python-wheel + +rpmbuild中可以用sed方法实现统一修改: +``` +sed -i 's/%{python_module \(.*\)}/python-\1/g' $spec_dir/*.spec +``` + +##### 4 %files 阶段无法识别的宏 +例如: +- %files %{python_files} +- %files %{python_files doc} +- %files %{python_files devel} + + **这里会出现%{python_files} does not exist的报错** + +修改方法: +- %files %{python_files} --> %files +- %files %{python_files doc} --> %files +- %files %{python_files devel} --> %files + +rpmbuild中可以用sed方法实现统一修改: + +``` +sed -i 's/%{python_files \(.*\)}//g' $spec_dir/*.spec +``` + +##### 5 build和install阶段无法识别的宏 +例如: +- %python_build +- %python_install +- %python_clone +- %python3_expand %fdupes +- %python_expand %fdupes +- %pyproject_wheel +- %pyproject_install + + +修改方法: + + **这里也需要注意构建所需要的python版本,以python3为例** +- %python_build --> %py3_build +- %python_install --> %py3_install +- %python3_expand %fdupes --> %fdupes +- %python_expand %fdupes --> %fdupes + + _%pyproject_wheel和%pyproject_install暂时没找到能顶替的宏,这里用的是宏命令展开的方式来替代宏_ + +``` +- %pyproject_wheel --> /usr/bin/python3 -mpip wheel --verbose --progress-bar off --disable-pip-version-check --use-pep517 --no-build-isolation --no-deps --wheel-dir \./build \. +- %pyproject_install -->/usr/bin/python3 -mpip install --verbose --progress-bar off --disable-pip-version-check --root %{buildroot} --no-compile --ignore-installed --no-deps --no-index --find-links \./build name==version +``` + _%python_clone暂时没找到能顶替的宏,暂时将这行内容删除,不影响编译_ + +##### 6 chekc阶段无法识别的宏 +例如: +- %pyunittest discover -v +- python-testsuite +- python3-testsuite +- %pytest + +处理方法: + + **_可以考虑跳过check阶段_** + + +##### 7 无法识别的一些宏 +例如: +- %{psuffix} +- %python_subpackages +- %python_exec +- %pycache_only +- %python_alternative + +处理方法: + +_目前无法找到能替代的宏命令,在不影响编译情况下,做注释或删除操作_ + + + + + + + + +