diff --git a/migrationTools/__init__.py b/migrationTools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5becc17c04a9e3ad1c2a15f53252b7bb5a7517e7 --- /dev/null +++ b/migrationTools/__init__.py @@ -0,0 +1 @@ +__version__ = "1.0.0" diff --git a/migrationTools/data/repo-sqlite/openEuler/aarch64/63ab13615c7e35a77bfb0719b6c2af5c4298a609132950963f4a0ea99b341112-primary.sqlite b/migrationTools/data/repo-sqlite/openEuler/aarch64/63ab13615c7e35a77bfb0719b6c2af5c4298a609132950963f4a0ea99b341112-primary.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..a0ea7928b2241ae67104d5f908717b70f83e2671 Binary files /dev/null and b/migrationTools/data/repo-sqlite/openEuler/aarch64/63ab13615c7e35a77bfb0719b6c2af5c4298a609132950963f4a0ea99b341112-primary.sqlite differ diff --git a/migrationTools/data/repo-sqlite/openEuler/x86_64/2ad7cabc63634d5c75336929453bf9ff2054434547844df0c279c4759cd05409-primary.sqlite b/migrationTools/data/repo-sqlite/openEuler/x86_64/2ad7cabc63634d5c75336929453bf9ff2054434547844df0c279c4759cd05409-primary.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..01cb7c549084339084d3d79188c85ab751f8f526 Binary files /dev/null and b/migrationTools/data/repo-sqlite/openEuler/x86_64/2ad7cabc63634d5c75336929453bf9ff2054434547844df0c279c4759cd05409-primary.sqlite differ diff --git a/migrationTools/data/report-template/index.html b/migrationTools/data/report-template/index.html new file mode 100644 index 0000000000000000000000000000000000000000..04c831ce35e3f75809419607a23ca0238c0bb3e3 --- /dev/null +++ b/migrationTools/data/report-template/index.html @@ -0,0 +1,24 @@ + + + + + + + + + + <%= htmlWebpackPlugin.options.title %> + + + + + +
+ + + + + \ No newline at end of file diff --git a/migrationTools/scanRPM/__init__.py b/migrationTools/scanRPM/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/migrationTools/scanRPM/db_operates.py b/migrationTools/scanRPM/db_operates.py new file mode 100644 index 0000000000000000000000000000000000000000..2fddf516e748f4285901b62f15cd6c0322e2e684 --- /dev/null +++ b/migrationTools/scanRPM/db_operates.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +from sqlite3.dbapi2 import Cursor +import sys +import sqlite3 + +class DBOperate(object): + ''' 操作sqlite数据库,sql语句执行失败,自动回滚 + + example: + with DBOperate("./centos7-primary.sqlite") as db: + db.execute_sql("select * from conflicts") + print(db.cursor.fetchall()) + ''' + + # new 是负责实例化的静态方法,会被最先调用并返回该实例,是静态方法 + # __init__ 在类的实例返回后(也就是 new 执行完之后)被调用,进行各种类本身相关的初始化,是实例方法 + # new 的返回值(实例)将被作为 init 的第一个参数传给对象的 init + # cls 代表当前类 + # 一顿操作猛如虎,又是 new、又是 enter、exit 的,是为了可以像打开文件一样,用 with 操作 sql + def __new__(cls = None, *args, **kwargs): + if not hasattr(cls, '_instance'): + cls._instance = super(DBOperate, cls).__new__(cls) + return cls._instance + + + def __init__(self, db_name): + self.db_name = db_name + self.connect = sqlite3.connect(self.db_name) + self.cursor = self.connect.cursor() + + + def __enter__(self): + return self + + + def __exit__(self, exc_type, exc_val, exc_tb): + self.connect.close() + + def select(self, sql): + return self.cursor.execute(sql) + + def execute_sql(self, sql): + try: + self.cursor.execute(sql) + self.connect.commit() + except Exception: + self.connect.rollback() + + + def executemany_sql(self, sql, data_list): + ''' 使用 executemany 进行批量操作,(据说)在万条插入时,相比一条条执行的速度会有上百倍的提升 + Example: + sql = 'insert into filelist (pkgKey, dirname, filenames, filetypes) values (?, ?, ?, ?);' + data_list = [(1, '...', '...', 'f'), (2, '...', '...', 'd')] + ''' + try: + self.cursor.executemany(sql, data_list) + self.connect.commit() + except Exception: + self.connect.rollback() + print('error: executemany failed') + sys.exit(1) diff --git a/migrationTools/scanRPM/scan_rpm.py b/migrationTools/scanRPM/scan_rpm.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1f4e07b76eebb9940b82fdf73fc803cdb831cd --- /dev/null +++ b/migrationTools/scanRPM/scan_rpm.py @@ -0,0 +1,343 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +import json +import os +import platform +import shutil +import subprocess as sup + +from migrationTools.scanRPM.db_operates import DBOperate +from migrationTools.utils.config import PathConf +from migrationTools.utils.logger import Logger +import migrationTools.utils.html as html + +logger = Logger(__name__) + +class ProvideMapItem(object): + ''' 每个包中每一项 provide 的对比结构 + + Attributes: + origin_provide: dict, 包括当前系统上的 provide 的名称和版本 + openeuler_pkg_provide: dict, 与当前系统上的 provide 对应的, openEuler 提供的包的 provide 名称和版本 + openeuler_pkg_key: int, openEuler 中的包 pkgKey, 内部匹配用,无需输出 + openeuler_pkg_name: str, 提供该 provide 的 openEuler 上的包名 + ''' + def __init__(self): + self.origin_provide = {'p': None, 'v': None} # p: provide, v: version + self.openeuler_pkg_provide = {'p': None, 'v': None} + self.openeuler_pkg_key = 0 + self.openeuler_pkg_name = None + + +class ParsedPkgInfo(object): + ''' 解析后的每个包的信息 + + Attributes: + pkg_name: str, 当前系统(待迁移系统)上该包的包名 + is_version_leaped: bool, 标记该包在当前系统和 openEuler 中的版本变化是否较大。只要所有 provide 中有 + 任意一项的 provide 大版本(版本号第一位数字)不同,则为 True + pkg_provides_map: list, 当前系统(待迁移系统)上该包每个 provide 到 openEuler 上的 provide 的映射关系 list 。 + 每个 provide 都将被描述,该 list 中的元素是 ProvideMapItem 实例 + ''' + def __init__(self, pkg_name: str, add_tags: bool = True): + self.pkg_name = pkg_name + self.is_version_leaped = True + self.tags = [] + self.pkg_provides_map = [] + + self.fill_provides_item() + + def fill_provides_item(self): + ''' 填充当前包的信息(填充本类中的各个属性) + ''' + provides_tuple_list = get_pkg_provides_by_name(self.pkg_name) + + if PathConf.arch == "aarch64": + repodb_path = PathConf.data_path + "/repo-sqlite/openEuler/aarch64/63ab13615c7e35a77bfb0719b6c2af5c4298a609132950963f4a0ea99b341112-primary.sqlite" + elif PathConf.arch == "x86_64": + repodb_path = PathConf.data_path + "/repo-sqlite/openEuler/x86_64/2ad7cabc63634d5c75336929453bf9ff2054434547844df0c279c4759cd05409-primary.sqlite" + + with DBOperate(repodb_path) as db: + for provide_tuple in provides_tuple_list: + tmp_provide_map_item = ProvideMapItem() + tmp_provide_map_item.origin_provide = provide_tuple + + db.execute_sql("select * from provides where name='" + + provide_tuple['p'] + "'") + for row in db.cursor: # 实际上,由于 provide 名是唯一的,若有结果也一定是 1 。若为 0 则直接重新开始循环 + tmp_openeuler_pkg_provide_tuple = {} + # tmp_openeuler_pkg_provide_tuple['p'] = row[0] + if provide_tuple['p'] == row[0]: + tmp_openeuler_pkg_provide_tuple[ + 'p'] = 1 # 反正都存在了那么名字肯定完全相同,直接输出 1,否则 0 + else: # 语意化交给前端去做 + tmp_openeuler_pkg_provide_tuple['p'] = 0 + if provide_tuple['v'] != None and row[3] != None: + tmp_openeuler_pkg_provide_tuple['v'] = row[3] + if provide_tuple['v'][0] == tmp_openeuler_pkg_provide_tuple[ + 'v'][0]: + self.is_version_leaped = False + else: + tmp_openeuler_pkg_provide_tuple['v'] = None + tmp_provide_map_item.openeuler_pkg_provide = tmp_openeuler_pkg_provide_tuple + tmp_provide_map_item.openeuler_pkg_key = row[5] + + db.execute_sql("select * from packages where pkgKey=" + + str(tmp_provide_map_item.openeuler_pkg_key) + "") + for row in db.cursor: + # 把版本信息添加了: %{name}-%{version}.%{release} + tmp_provide_map_item.openeuler_pkg_name = row[2] + "-" + row[ + 4] + "." + row[6] + + self.pkg_provides_map.append(tmp_provide_map_item) + + provide_exist = False + for provide_map_item in self.pkg_provides_map: # 先过滤一遍,看 provide 列表中是否存在我们提供了的 + if provide_map_item.openeuler_pkg_provide['p'] == 1: + provide_exist = True + break + if not provide_exist: # 只要有一个就会为 True,如果 False 就是根本不存在 provide 的,即完全没提供 + self.tags.append("Nothing provided") + else: # 如果存在我们提供了的,再看一下是不是完全提供了 + provide_all = True + for provide_map_item in self.pkg_provides_map: + if provide_map_item.openeuler_pkg_provide[ + 'p'] == None or provide_map_item.openeuler_pkg_provide[ + 'p'] == 0: + provide_all = False + break + if provide_all: + self.tags.append("All provided") + else: + self.tags.append("Partially provided") + if self.is_version_leaped: + self.tags.append("Version leaped") + + +def get_current_pkg_list() -> list: + ''' 获取当前(运行该程序的)系统上所有已安装的 rpm 包的列表 + + 列表通过 yum list installed 获取 + + Returns: + 由本系统上已装包的组成的列表,每项是一个软件包名 + 例如: ['yelp-libs', 'yelp-tools', 'yelp-xsl', 'yum', 'zenity', 'zip', 'zlib'] + ''' + current_pkgs = [] + get_pkgs_proc = sup.run('rpm -qa -q --qf "%{name}\n"', + shell=True, + stdout=sup.PIPE, + env={"LANG": "en_US.UTF-8"}) + output = get_pkgs_proc.stdout.decode("utf-8")[:-1] + current_pkgs = output.split("\n") + + for i in range(0, len(current_pkgs)): + get_full_name_proc = sup.run("rpm -q " + current_pkgs[i], + shell=True, + stdout=sup.PIPE, + env={"LANG": "en_US.UTF-8"}) + output = get_full_name_proc.stdout.decode("utf-8")[0:-1] + if "\n" in output: # 如果同一个名字对应了超过一个包(常见于多个 kernel 相关包,一个包会有多个版本),要进入一个针对这些包进行分类的小循环 + same_name_pkgs = output.split("\n") + for same_name_pkg in same_name_pkgs: + current_pkgs[i] = same_name_pkg + i += 1 + continue + current_pkgs[i] = output # 如果一个名字只对应了一个包,直接赋值即可 + + return current_pkgs + + +def get_pkg_provides_by_name(pkg_name: str) -> list: + ''' 根据包名 pkg_name 获取当前系统上该包的 provides + + Args: + pkg_name: 当前系统上要获取 provides 列表的包名 + + Returns: + 存储了 pkg_name 包的 provides 的列表, 列表中的每一项都是一个 tuple, + 例如: {'p': 'anaconda-core', 'v': '33.16.3.26'} + ''' + provides_list_orig = [] + get_provides_proc = sup.run(["rpm", "-q", "--provides", pkg_name], + stdout=sup.PIPE, + env={"LANG": "en_US.UTF-8"}) + output = get_provides_proc.stdout.decode("utf-8") + provides_list_orig = output[:-1].split("\n") + + provides_list = [] + # 所有的过滤都可以在这里进行 + for provide in provides_list_orig: + # "(x86_64)" in provide or "(aarch-64)" in provide or \ 这两个考虑去掉。 + # 因为对于包 NetworkManager 而言,NetworkManager-dispatcher(aarch-64) 确实没提供, + # 而且也确实没有名为 NetworkManager-dispatcher 的 provide + if "application()" in provide or \ + "metainfo()" in provide or \ + "mimehandler(" in provide: + continue + provides_list.append(provide) + + provides_tuple_list = [] + + for provide in provides_list: + provide_item = {} + if " = " in provide: + epoch_version_release = provide[provide.find(' = ') + + 3:len(provide)] + version_release = epoch_version_release + if ':' in epoch_version_release: + version_release = epoch_version_release[ + epoch_version_release.find(':') + + 1:len(epoch_version_release)] + if '-' in version_release: + version = version_release[0:version_release.find('-')] + else: + version = version_release + provide_item = {'p': provide[0:provide.find(' = ')], 'v': version} + else: + provide_item = {'p': provide, 'v': None} + provides_tuple_list.append(provide_item) + + return provides_tuple_list + + +def parsed_pkg_to_json(parsed_pkgs: list) -> str: + ''' 将解析后的包列表对象转为 json 字符串并返回 + + 由于嵌套结构中包含自定义类,所以只能手搓一个转换器 + ''' + json_str = '[' + for pkg in parsed_pkgs: + logger.debug(pkg.pkg_name) + json_str += '{' + pn = '"pn": "' + pkg.pkg_name + '",' + json_str += pn # pn: pkg_name + json_str += '"vl": ' # vl: is_version_leaped + if pkg.is_version_leaped: + json_str += 'true,' + else: + json_str += 'false,' + json_str += '"tags": ' + json.dumps(pkg.tags) + ',' + json_str += '"ppm": ' # ppm: pkg_provides_map + json_str += '[' + for provide_map_item in pkg.pkg_provides_map: + json_str += '{' + # upn: openeuler_pkg_name + json_str += '"upn": "' + str(provide_map_item.openeuler_pkg_name) + '",' + # op: origin_provide + json_str += '"op": ' + json.dumps( + provide_map_item.origin_provide) + ',' + # upp: openeuler_pkg_provide + json_str += '"upp": ' + json.dumps( + provide_map_item.openeuler_pkg_provide) + json_str += '},' + json_str = json_str.strip(',') + json_str += ']},' + json_str = json_str.strip(',') + json_str += ']' + return json_str + + +def scan_rpms(output_json_filename: str, + only_show_leap=True, + exclude_fonts=True, + exclude_kernel_modules=True, + add_tags=True): + ''' 扫描 RPM 包,直接输出 json 文件 + + Args: + output_json_filename: 输出的文件名,包括目录 + only_show_leap: 是否只输出大版本变化的包 + exclude_fonts: 是否排除字体类包 + exclude_kernel_modules: 是否排除 kernel-modules 包 + ''' + parsed_pkgs = [] + installed_pkgs = get_current_pkg_list() + + if exclude_fonts: + filted_installed_pkgs = [] + for pkg in installed_pkgs: + if 'fonts' in pkg: + continue + filted_installed_pkgs.append(pkg) + installed_pkgs = filted_installed_pkgs + + if exclude_kernel_modules: + filted_installed_pkgs = [] + for pkg in installed_pkgs: + if 'kernel-modules' in pkg: + continue + filted_installed_pkgs.append(pkg) + installed_pkgs = filted_installed_pkgs + + for pkg_name in installed_pkgs: + tmp_pkg_info = ParsedPkgInfo(pkg_name, add_tags) + parsed_pkgs.append(tmp_pkg_info) + + if only_show_leap: + filted_pkgs = [] + for pkg in parsed_pkgs: + if pkg.is_version_leaped: + filted_pkgs.append(pkg) + parsed_pkgs = filted_pkgs + + json_str = parsed_pkg_to_json(parsed_pkgs) + + current_OS = platform.linux_distribution( + )[0] + " " + platform.linux_distribution()[1] + + target_OS = "openEuler" + + try: + of = open(output_json_filename, mode='w') + of.write('utmt_report_mode="rpmscan";\n') + of.write('ut_current_system_info = { system: "' + current_OS + '", ' + + 'targetOS: "' + target_OS + '"};\n') + of.write('utmt_report_data=`' + json_str + '`') + except OSError as e: + print(e) + finally: + of.close() + + +def generate_rpm_list_js(): + report_dir = PathConf.report_dir + + report_name = f"rpm_info_report_{PathConf.timestamp}" + ## 拷贝模板文件时, 会自动创建 report/datafile 目录 + html.copy_html_resource() + datafiledir = os.path.join(report_dir, "datafile") + if not os.path.exists(datafiledir): + os.makedirs(datafiledir) + + # js + jsfile_path = os.path.join(report_dir, 'datafile', f"{report_name}.js") + # 对应的入口 html + html_path = os.path.join(report_dir, f"{report_name}.html") + + html_document = html.gen_html_template(report_name) + + scan_rpms(jsfile_path, + only_show_leap=False, + exclude_fonts=False, + exclude_kernel_modules=False) + + print('jsfile = %s' %(jsfile)) + scanrpms_html_file = open(html_path, 'w') + + with scanrpms_html_file: + scanrpms_html_file.write(html_document) + logger.info(f"report has been generated: {html_path}.") + + +def main(): + + scan_rpms("./jsonoutput.json", + only_show_leap=False, + exclude_fonts=False, + exclude_kernel_modules=False) + + +if __name__ == "__main__": + main() diff --git a/migrationTools/utils/config.py b/migrationTools/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..0dc1bfdeca8045c1435cf2291c4f1f7e1ea7d46a --- /dev/null +++ b/migrationTools/utils/config.py @@ -0,0 +1,26 @@ +#!/bin/python3 + +import datetime +import os +import platform +import sys + + +class PathConf(): + ## 时间戳用于文件名区分 + timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + arch = platform.machine() + ## ../../ 目录是项目目录 + __base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + data_path = os.path.join(__base_path, 'data') + + __curdir = os.path.dirname(os.path.realpath(sys.argv[0])) + output_path = os.path.join(__curdir, 'utmtc-output') + + ## public + log_dir = os.path.join(output_path, "log") + log_file = os.path.join(log_dir, f"{timestamp}.log") + run_dir = os.path.join(output_path, "run", timestamp) + report_dir = os.path.join(output_path, "report") + report_template_file = os.path.join(data_path, "report-template", + "index.html") diff --git a/migrationTools/utils/html.py b/migrationTools/utils/html.py new file mode 100644 index 0000000000000000000000000000000000000000..01614344fbba34ec1edd56b7cda0ee768a2fb732 --- /dev/null +++ b/migrationTools/utils/html.py @@ -0,0 +1,54 @@ +#!/bin/python3 +import os +from re import L +import shutil +import fcntl +import time +from migrationTools.utils.config import PathConf + +def gen_html_template(datapath: str) -> str: + # 拼接 HTML + html_template = PathConf.report_template_file + with open(html_template, 'r', encoding='utf-8') as f: + template_str = f.read() + + result_js = os.path.basename(datapath) + if result_js.endswith('.js'): + html = template_str.replace('/datafile/scanresult.js', + './datafile/' + result_js) + else: + html = template_str.replace('/datafile/scanresult.js', + f'./datafile/{result_js}.js') + html = html.replace('="/css', '="./resource/css') + html = html.replace('="/js', '="./resource/js') + + return html + +class LockDirectory(object): + def __init__(self, directory): + assert os.path.exists(directory) + self.directory = directory + + def __enter__(self): + self.dir_fd = os.open(self.directory, os.O_RDONLY) + try: + fcntl.flock(self.dir_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except IOError as ex: + time.sleep(10) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + # fcntl.flock(self.dir_fd,fcntl.LOCK_UN) + os.close(self.dir_fd) + +def copy_html_resource(): + # 拷贝资源文件 + resource_dir = os.path.join(PathConf.data_path, "report-template", + 'resource') + dst_dir = os.path.join(PathConf.report_dir, "resource") + if not os.path.exists(PathConf.report_dir): + os.makedirs(PathConf.report_dir) + with LockDirectory(PathConf.report_dir) as lock: + if os.path.exists(dst_dir): + shutil.rmtree(dst_dir) + shutil.copytree(resource_dir, dst_dir) diff --git a/migrationTools/utils/logger.py b/migrationTools/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..f77ff741a1df78f30399d22327c1438f91eddb61 --- /dev/null +++ b/migrationTools/utils/logger.py @@ -0,0 +1,57 @@ +from codecs import encode +import logging +import logging.handlers +import os + +from migrationTools.utils.config import PathConf + + +class Logger(object): + def __init__(self, name, ch_level=logging.INFO, fh_level=logging.DEBUG): + self.logger = logging.getLogger(name) + self.logger.setLevel(logging.DEBUG) + + log_file = PathConf.log_file + ch = logging.StreamHandler() + ch.setLevel(ch_level) + fh = logging.handlers.RotatingFileHandler(log_file, + encoding='utf-8', + maxBytes=1024 * 1024 * 10, + backupCount=5, + delay=True) + fh.setLevel(fh_level) + if not self.logger.handlers: + self.logger.addHandler(ch) + self.logger.addHandler(fh) + + chfmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + fhfmt = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(chfmt) + fh.setFormatter(fhfmt) + + ## make sure the log dir is exist, if not, create it + def _check_log_dir(self): + log_dir = PathConf.log_dir + if not os.path.exists(log_dir): + os.makedirs(log_dir) + + def info(self, msg): + self._check_log_dir() + self.logger.info(msg) + + def debug(self, msg): + self._check_log_dir() + self.logger.debug(msg) + + def warning(self, msg): + self._check_log_dir() + self.logger.warning(msg) + + def error(self, msg): + self._check_log_dir() + self.logger.error(msg) + + def critical(self, msg): + self._check_log_dir() + self.logger.critical(msg)