From 3189b38a3f025620dab61a6e1dd5f478a071bcb3 Mon Sep 17 00:00:00 2001 From: wangchuangGG Date: Tue, 23 Jun 2020 19:29:38 +0800 Subject: [PATCH 1/5] init checklicenses --- advisors/check_licenses.py | 341 ++++++++++++++++++++++++++++++++++ advisors/download.py | 43 +++++ advisors/license_translations | 103 ++++++++++ 3 files changed, 487 insertions(+) create mode 100755 advisors/check_licenses.py create mode 100755 advisors/download.py create mode 100644 advisors/license_translations diff --git a/advisors/check_licenses.py b/advisors/check_licenses.py new file mode 100755 index 00000000..f08d81dc --- /dev/null +++ b/advisors/check_licenses.py @@ -0,0 +1,341 @@ +#!/usr/bin/python3 +""" +(1) This is a script that checks whether the licenses in the LICENSE file + in the tar package and the licenses in the SPEC file are the same. + If they are the same, output: + "licenses from LICENSES are same as form SPEC:[xxx, yyy] <==> [xxx, zzz]" + + If they are not the same, output: + "licenses from LICENSES are not same as form SPEC:[xxx, yyy] <==> [xxx, yyy]" + +(2) This script depends on download.py and license_translations, + you can add keywords for licenses in license_translations. + +(3) Command parameters + Required parameters: + -t Specify the path or url of the tar package + (e.g. /home/test.tar.gz or https://example.com/test.tar.gz) + -s Specify the path of the spec file + (e.g. /home/test.spec) + + Optional parameters: + -w With this parameter, if the licenses in the tar + and the licenses in the spec file are are not the same, + modify the spec file directly. + -d Specify the decompression path of the tar package, + default: /var/tmp/tmp_tarball +""" +import argparse +import configparser +import os +import re +import sys +import hashlib +import tarfile +import bz2 +import shutil +import download +import chardet +import logging +logging.basicConfig(format='%(message)s', level=logging.INFO) + +licenses_for_license = [] +licenses_for_spec = [] +license_translations = {} +def main(): + """ Entry point for check_licenses.""" + parser = argparse.ArgumentParser() + parser.add_argument("-t", "--tarball", default="", nargs="?", + help="tarball path or url (e.g." + "/home/test.tar.gz" + " or http://example.com/test.tar.gz)") + parser.add_argument("-s", "--specfile", default="", nargs="?", + help="SPEC file path (e.g. /home/mytar.spec)") + parser.add_argument("-w", "--writespec", dest="writespec", action="store_true", + default=False, + help="Overwrite the licenses of SPEC file") + parser.add_argument("-d", "--downloadpath", default="", nargs="?", + help="The dest download or extract path of tarball" + " (e.g. /home/tmp_tarball default: /var/tmp/tmp_tarball)") + args = parser.parse_args() + + if not args.tarball: + parser.error(argparse.ArgumentTypeError( + "the tarball path or url argument['-t'] is required")) + + if not args.specfile: + parser.error(argparse.ArgumentTypeError( + "the spec file argument['-s'] is required")) + + if args.downloadpath: + download_path = args.downloadpath + else: + download_path = "/var/tmp/tmp_tarball" + if os.path.exists(download_path): + shutil.rmtree(download_path) + os.makedirs(download_path, exist_ok=True) + process_licenses(args, download_path) + + +def get_contents(filename): + """ + Get file contents and return values. + If read failed return None. + """ + with open(filename, "rb") as f: + return f.read() + return None + + +def get_tarball_from_url(upstream_url, download_path, tarfile): + """ + Get tar package from url. + return: tar package path. + """ + tarball_path = download_path + "/" + tarfile + if not os.path.isfile(tarball_path): + download.do_curl(upstream_url, dest=tarball_path) + return tarball_path + + +def extract_tar(tarball_path, extraction_path): + """ + Extract tar package in extraction_path. + If extract failed the program will exit. + """ + if not os.path.isfile(tarball_path): + logging.error("{} is not a tarball file".format(tarball_path)) + exit(1) + with tarfile.open(tarball_path) as content: + content.extractall(path=extraction_path) + + +def decode_license(license, charset): + """ + Decode the license string. + return the license string or nothing. + """ + if not charset: + return + return license.decode(charset) + + +def add_license_from_spec_file(license): + """ + Add license to licenses_for_spec. + """ + if license in licenses_for_spec: + logging.debug("the license was in licenses_for_spec:{}".format(license)) + else: + licenses_for_spec.append(license) + + +def add_license_from_license_file(license): + """ + Add license to licenses_for_license. + """ + if license in licenses_for_license: + logging.debug("the license was in licenses_for_license:{}\n".format(license)) + else: + licenses_for_license.append(license) + + +def scan_licenses(copying): + """ + Scan licenses from copying file and add to licenses_for_license. + if get contents failed or decode data failed, return nothing. + """ + try: + data = get_contents(copying) + except FileNotFoundError: + return + data = decode_license(data, chardet.detect(data)['encoding']) + if not data: + return + for word in license_translations: + if word in data: + real_word = license_translations.get(word, word) + add_license_from_license_file(real_word) + logging.debug("all licenses from license file is:{}".format(licenses_for_license)) + + +def scan_licenses_in_LICENSE(srcdir): + """ + Find LICENSE files and scan. + """ + targets = ["copyright", + "copyright.txt", + "apache-2.0", + "artistic.txt", + "libcurllicense", + "gpl.txt", + "gpl2.txt", + "gplv2.txt", + "notice", + "copyrights", + "licence", + "about_bsd.txt"] + target_pat = re.compile(r"^((copying)|(licen[cs]e))|(licen[cs]e)(\.(txt|xml))?$") + files = os.listdir(srcdir) + for filename in files: + if not os.path.isdir(filename): + if filename.lower() in targets or target_pat.search(filename.lower()): + scan_licenses(os.path.join(srcdir, filename)) + + +def clean_license_string(lic): + """ + Clean up license string by replace substrings. + """ + reps = [(" (", "("), + (" v2", "-2"), + (" v3", "-3"), + (" 2", "-2"), + (" 3", "-3"), + (" <", "<"), + (" >", ">"), + ("= ", "="), + ("GPL(>=-2)", "GPL-2.0+"), + ("Modified", ""), + ("OSI", ""), + ("Approved", ""), + ("Simplified", ""), + ("file", ""), + ("LICENSE", "")] + + for sub, rep in reps: + lic = lic.replace(sub, rep) + return lic + + +def scan_licenses_in_SPEC(specfile): + """ + Find spec file and scan. + If no spec file or open file failed, + the program will exit with an error. + """ + if not specfile.endswith(".spec"): + logging.error("{} is not a spec file".format(specfile)) + exit(1) + try: + with open(specfile, 'r') as specfd: + lines = specfd.readlines() + except FileNotFoundError: + logging.error("no SPEC file found!") + exit(1) + for line in lines: + if line.startswith("#"): + continue + excludes = ["and", "AND"] + if line.startswith("License"): + splits = line.split(":")[1:] + words = ":".join(splits).strip() + if words in license_translations: + real_words = license_translations.get(words, words) + add_license_from_spec_file(real_words) + else: + words = clean_license_string(words).split() + for word in words: + if word not in excludes: + real_word = license_translations.get(word, word) + logging.debug("after translate license_string ==> " + "real_license: {} ==> {}".format(word, real_word)) + add_license_from_spec_file(real_word) + logging.debug("\nall licenses from SPEC file is:{}".format(licenses_for_spec)) + + +def check_licenses_is_same(): + """ + Check if the licenses from SPEC is the same as the licenses from LICENSE file. + if same, return True. + if not same return False. + """ + for lic_from_licenses in licenses_for_license: + if lic_from_licenses not in licenses_for_spec: + return False + for lic_from_spec in licenses_for_spec: + if lic_from_spec not in licenses_for_license: + return False + return True + + +def overwrite_spec(specfile): + """ + Write License in SPEC file. + If open file failed, return nothing. + """ + licenses_for_wirte = "License:\t" + for lic in licenses_for_license: + if lic == licenses_for_license[0]: + licenses_for_wirte += lic + else: + licenses_for_wirte += " and " + lic + licenses_for_wirte += "\n" + + try: + with open(specfile, 'r') as specfd: + lines = specfd.readlines() + specfd.close() + except FileNotFoundError: + return + f = open(specfile, 'w') + for line in lines: + if line.startswith("License"): + f.write(licenses_for_wirte) + else: + f.write(line) + f.close() + logging.info("licenses wirte to spec success") + + +def read_licenses_translate_conf(filename): + """ + Read the licenses_translate file. + """ + conf_dir = os.path.dirname(os.path.abspath(__file__)) + conf_path = os.path.join(conf_dir, filename) + if not os.path.isfile(conf_path): + logging.info("not found the config file:{}".format(conf_path)) + return + with open(conf_path, "r") as conf_file: + for line in conf_file: + if line.startswith("#"): + continue + key_name, final_name = line.rsplit(", ", 1) + license_translations[key_name] = final_name.rstrip() + + +def process_licenses(args, download_path): + """ + Begin process licenses in tar package and spec file. + """ + read_licenses_translate_conf("license_translations") + + if os.path.exists(args.tarball): + tarball_path = args.tarball + else: + tarball_path = get_tarball_from_url(args.tarball, download_path, os.path.basename(args.tarball)) + extract_tar(tarball_path, download_path) + + tarball_name = os.path.basename(tarball_path) + extract_tar_name = os.path.splitext(tarball_name)[0] + extract_file_name = os.path.splitext(extract_tar_name)[0] + scan_licenses_in_LICENSE(os.path.join(download_path, extract_file_name)) + + specfile = args.specfile + scan_licenses_in_SPEC(specfile) + + if check_licenses_is_same(): + logging.info("licenses from LICENSES are same as form SPEC:" + "{} <==> {}".format(licenses_for_license, licenses_for_spec)) + else: + logging.info("licenses from LICENSES are not same as form SPEC:" + "{} <==> {}".format(licenses_for_license, licenses_for_spec)) + if args.writespec: + overwrite_spec(specfile) + exit(0) + exit(0) + + +if __name__ == '__main__': + main() diff --git a/advisors/download.py b/advisors/download.py new file mode 100755 index 00000000..301f6901 --- /dev/null +++ b/advisors/download.py @@ -0,0 +1,43 @@ +#!/usr/bin/true +""" +download tar package with url +""" +import os +import sys +import io +import pycurl + +def do_curl(url, dest=None): + """ + Perform a curl operation for url. + If perform failure or write to dest failure, + the program exiting with an error. + """ + c = pycurl.Curl() + c.setopt(c.URL, url) + c.setopt(c.FOLLOWLOCATION, True) + c.setopt(c.FAILONERROR, True) + c.setopt(c.CONNECTTIMEOUT, 10) + c.setopt(c.TIMEOUT, 600) + c.setopt(c.LOW_SPEED_LIMIT, 1) + c.setopt(c.LOW_SPEED_TIME, 10) + buf = io.BytesIO() + c.setopt(c.WRITEDATA, buf) + try: + c.perform() + except pycurl.error as e: + print("Unable to fetch {}: {} or tarball path is wrong".format(url, e)) + sys.exit(1) + finally: + c.close() + + if dest: + try: + with open(dest, 'wb') as fp: + fp.write(buf.getvalue()) + except IOError as e: + if os.path.exists(dest): + os.unlink(dest) + print("Unable to write to {}: {}".format(dest, e)) + sys.exit(1) + diff --git a/advisors/license_translations b/advisors/license_translations new file mode 100644 index 00000000..c971658e --- /dev/null +++ b/advisors/license_translations @@ -0,0 +1,103 @@ +# : +2-clause, BSD-2-Clause +AGPL-3, AGPL-3.0 +APL-2.0, Apache-2.0 +APL2, Apache-2.0 +APL2.0, Apache-2.0 +ASL 2.0, Apache-2.0 +ASL-2, Apache-2.0 +ASL-2.0, Apache-2.0 +Apache 2.0, Apache-2.0 +Apache License 2.0, Apache-2.0 +Apache License, Version 2.0, Apache-2.0 +Apache, Apache-2.0 +Apache-2, Apache-2.0 +Apache2.0, Apache-2.0 +Apachev2, Apache-2.0 +Artistic-1.0+GPL-1.0, Artistic-1.0 GPL-1.0 +BSD(3-clause), BSD-3-Clause +BSD_2_clause, BSD-2-Clause +BSD_3_clause, BSD-3-Clause +Boost, BSL-1.0 +CC0, CC0-1.0 +CPL, CPL-1.0 +Expat, MIT +GFDL1.1, GFDL-1.1 +GPL(==-2), GPL-2.0 +GPL(>=-2), GPL-2.0+ +GPL(>=-2.0), GPL-2.0+ +GPL(>=-2.1), GPL-2.0 +GPL(>=-3), GPL-3.0 +GPL(>=2), GPL-2.0+ +GPL(>=3), GPL-3.0+ +GPL-2+, GPL-2.0+ +GPL-2, GPL-2.0 +GPL-2.0+, GPL-2.0+ +GPL-2.0+LGPL-2.1, GPL-2.0 LGPL-2.1 +GPL-2.0, GPL-2.0 +GPL-2.0-or-later, GPL-2.0+ +GPL-3+, GPL-3.0 +GPL-3, GPL-3.0 +GPL-3.0+, GPL-3.0+ +GPL-3.0, GPL-3.0 +GPL2, GPL-2.0 +GPL3, GPL-3.0 +GPLV2, GPL-2.0 +GPLV3, GPL-3.0 +GPLv2+, GPL-2.0+ +GPLv2, GPL-2.0 +GPLv3+, GPL-3.0+ +GPLv3, GPL-3.0 + ISCL , ISC +LGPL(>=-2), LGPL-2.0+ +LGPL(>=-2.1), LGPL-2.1 +LGPL(>=2), LGPL-2.0+ +LGPL-2, LGPL-2.0 +LGPL-2.0+, LGPL-2.0+ +LGPL-2.1+, LGPL-2.1+ +LGPL-2.1-or-later, LGPL-2.1+ +LGPL-3+, LGPL-3.0+ +LGPL-3, LGPL-3.0 +LGPLv2+, LGPL-2.1+ +LGPLv2, LGPL-2.0 +LGPLv2.1+, LGPL-2.1+ +LGPLv2.1, LGPL-2.1 +LGPLv3+, LGPL-3.0+ +LGPLv3, LGPL-3.0 +MIT/X, MIT +MPL-2, MPL-2.0 +MPL2, MPL-2.0 +MPLv1.1, MPL-1.1 +MPLv2, MPL-2.0 +MPLv2.0, MPL-2.0 +MPLv2.0,, MPL-2.0 +PSF, Python-2.0 +Perl, Artistic-1.0-Perl +Python, Python-2.0 +VIM, Vim +ZLIB, Zlib +ZPL 2.1, ZPL-2.1 + ZPL, ZPL-2.0 +apache, Apache-2.0 +artistic2, Artistic-2.0 +artistic_2, Artistic-2.0 +gplv3, GPL-3.0 +http://creativecommons.org/licenses/BSD/, BSD-2-Clause +http://opensource.org/licenses/MIT, MIT +http://www.apache.org/licenses/LICENSE-2.0, Apache-2.0 + lgpl, LGPL-2.1 + mit, MIT + MIT, MIT +(mit), MIT +(MIT), MIT + perl, Artistic-1.0-Perl +(per1), Artistic-1.0-Perl +w3c, W3C +zlib, Zlib +zlib/libpng, zlib-acknowledgement +OFL, OFL +GNU LESSER GENERAL PUBLIC LICENSE, LGPL-2.1+ +The name of the author may not be used to endorse, BSD + BSD, BSD +version 3 of the GNU General Public License, GPL-3.0+ +either version 2, GPL-2.0+ -- Gitee From a01a5af7b97cd37649ef31c8df361e3ffc9367fb Mon Sep 17 00:00:00 2001 From: wangchuangGG Date: Sun, 28 Jun 2020 14:47:53 +0800 Subject: [PATCH 2/5] Modify Log --- advisors/check_licenses.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/advisors/check_licenses.py b/advisors/check_licenses.py index f08d81dc..9e0cbf7f 100755 --- a/advisors/check_licenses.py +++ b/advisors/check_licenses.py @@ -87,12 +87,12 @@ def get_contents(filename): return None -def get_tarball_from_url(upstream_url, download_path, tarfile): +def get_tarball_from_url(upstream_url, download_path, tarpackage): """ Get tar package from url. return: tar package path. """ - tarball_path = download_path + "/" + tarfile + tarball_path = download_path + "/" + tarpackage if not os.path.isfile(tarball_path): download.do_curl(upstream_url, dest=tarball_path) return tarball_path @@ -104,30 +104,30 @@ def extract_tar(tarball_path, extraction_path): If extract failed the program will exit. """ if not os.path.isfile(tarball_path): - logging.error("{} is not a tarball file".format(tarball_path)) + logging.error("%s is not a tarball file", tarball_path) exit(1) with tarfile.open(tarball_path) as content: content.extractall(path=extraction_path) -def decode_license(license, charset): +def decode_license(license_string, charset): """ Decode the license string. return the license string or nothing. """ if not charset: return - return license.decode(charset) + return license_string.decode(charset) -def add_license_from_spec_file(license): +def add_license_from_spec_file(spec_license): """ Add license to licenses_for_spec. """ - if license in licenses_for_spec: - logging.debug("the license was in licenses_for_spec:{}".format(license)) + if spec_license in licenses_for_spec: + logging.debug("the license was in licenses_for_spec: %s", spec_license) else: - licenses_for_spec.append(license) + licenses_for_spec.append(spec_license) def add_license_from_license_file(license): @@ -135,7 +135,7 @@ def add_license_from_license_file(license): Add license to licenses_for_license. """ if license in licenses_for_license: - logging.debug("the license was in licenses_for_license:{}\n".format(license)) + logging.debug("the license was in licenses_for_license: %s\n", license) else: licenses_for_license.append(license) @@ -156,7 +156,7 @@ def scan_licenses(copying): if word in data: real_word = license_translations.get(word, word) add_license_from_license_file(real_word) - logging.debug("all licenses from license file is:{}".format(licenses_for_license)) + logging.debug("all licenses from license file is: %s", licenses_for_license) def scan_licenses_in_LICENSE(srcdir): @@ -215,7 +215,7 @@ def scan_licenses_in_SPEC(specfile): the program will exit with an error. """ if not specfile.endswith(".spec"): - logging.error("{} is not a spec file".format(specfile)) + logging.error("%s is not a spec file", specfile) exit(1) try: with open(specfile, 'r') as specfd: @@ -239,9 +239,9 @@ def scan_licenses_in_SPEC(specfile): if word not in excludes: real_word = license_translations.get(word, word) logging.debug("after translate license_string ==> " - "real_license: {} ==> {}".format(word, real_word)) + "real_license: %s ==> %s", word, real_word) add_license_from_spec_file(real_word) - logging.debug("\nall licenses from SPEC file is:{}".format(licenses_for_spec)) + logging.debug("\nall licenses from SPEC file is: %s", licenses_for_spec) def check_licenses_is_same(): @@ -295,7 +295,7 @@ def read_licenses_translate_conf(filename): conf_dir = os.path.dirname(os.path.abspath(__file__)) conf_path = os.path.join(conf_dir, filename) if not os.path.isfile(conf_path): - logging.info("not found the config file:{}".format(conf_path)) + logging.info("not found the config file: %s", conf_path) return with open(conf_path, "r") as conf_file: for line in conf_file: @@ -327,10 +327,10 @@ def process_licenses(args, download_path): if check_licenses_is_same(): logging.info("licenses from LICENSES are same as form SPEC:" - "{} <==> {}".format(licenses_for_license, licenses_for_spec)) + "%s <==> %s", licenses_for_license, licenses_for_spec) else: logging.info("licenses from LICENSES are not same as form SPEC:" - "{} <==> {}".format(licenses_for_license, licenses_for_spec)) + "%s <==> %s", licenses_for_license, licenses_for_spec) if args.writespec: overwrite_spec(specfile) exit(0) -- Gitee From e71ba0dd69d486dc35f44e0fff90609b7477f80f Mon Sep 17 00:00:00 2001 From: wangchuangGG Date: Sun, 28 Jun 2020 15:08:06 +0800 Subject: [PATCH 3/5] Modify Log --- advisors/check_licenses.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/advisors/check_licenses.py b/advisors/check_licenses.py index 9e0cbf7f..06b7b942 100755 --- a/advisors/check_licenses.py +++ b/advisors/check_licenses.py @@ -120,24 +120,24 @@ def decode_license(license_string, charset): return license_string.decode(charset) -def add_license_from_spec_file(spec_license): +def add_license_from_spec_file(license_string): """ Add license to licenses_for_spec. """ - if spec_license in licenses_for_spec: - logging.debug("the license was in licenses_for_spec: %s", spec_license) + if license_string in licenses_for_spec: + logging.debug("the license was in licenses_for_spec: %s", license_string) else: - licenses_for_spec.append(spec_license) + licenses_for_spec.append(license_string) -def add_license_from_license_file(license): +def add_license_from_license_file(license_string): """ Add license to licenses_for_license. """ - if license in licenses_for_license: - logging.debug("the license was in licenses_for_license: %s\n", license) + if license_string in licenses_for_license: + logging.debug("the license was in licenses_for_license: %s\n", license_string) else: - licenses_for_license.append(license) + licenses_for_license.append(license_string) def scan_licenses(copying): -- Gitee From 1b01b6bddeeda54d0eae76a7bac348a50100de50 Mon Sep 17 00:00:00 2001 From: wangchuangGG Date: Mon, 29 Jun 2020 19:39:09 +0800 Subject: [PATCH 4/5] update advisors/check_licenses.py. --- advisors/check_licenses.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/advisors/check_licenses.py b/advisors/check_licenses.py index 06b7b942..c283ab68 100755 --- a/advisors/check_licenses.py +++ b/advisors/check_licenses.py @@ -25,6 +25,21 @@ -d Specify the decompression path of the tar package, default: /var/tmp/tmp_tarball """ +#****************************************************************************** +# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +# licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +# Author: wangchuangGG +# Create: 2020-06-27 +# Description: provide a tool to check licenses in tar package and spec file +# ******************************************************************************/ + import argparse import configparser import os -- Gitee From 32582ea95f042c436d8466fcc54cac02faeea134 Mon Sep 17 00:00:00 2001 From: wangchuangGG Date: Mon, 29 Jun 2020 19:40:04 +0800 Subject: [PATCH 5/5] update advisors/download.py. --- advisors/download.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/advisors/download.py b/advisors/download.py index 301f6901..d73fe88b 100755 --- a/advisors/download.py +++ b/advisors/download.py @@ -2,6 +2,21 @@ """ download tar package with url """ +#****************************************************************************** +# Copyright (c) Huawei Technologies Co., Ltd. 2020-2020. All rights reserved. +# licensed under the Mulan PSL v2. +# You can use this software according to the terms and conditions of the Mulan PSL v2. +# You may obtain a copy of Mulan PSL v2 at: +# http://license.coscl.org.cn/MulanPSL2 +# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR +# PURPOSE. +# See the Mulan PSL v2 for more details. +# Author: wangchuangGG +# Create: 2020-06-27 +# Description: provide a tool to download tar package with url +# ******************************************************************************/ + import os import sys import io -- Gitee