From ee6c53769b4f4749d901efcc504ff457f16e42ac Mon Sep 17 00:00:00 2001 From: lvmingfu Date: Mon, 21 Feb 2022 09:49:55 +0800 Subject: [PATCH] add white function in lint_link.py --- .../docs/source_en/shor_algorithm.md | 2 +- tools/link_detection/README_CN.md | 17 +++++++ tools/link_detection/filter_linklint.txt | 14 ++++++ tools/link_detection/link_lint.py | 45 +++++++++++++++---- 4 files changed, 68 insertions(+), 10 deletions(-) create mode 100644 tools/link_detection/filter_linklint.txt diff --git a/docs/mindquantum/docs/source_en/shor_algorithm.md b/docs/mindquantum/docs/source_en/shor_algorithm.md index e84e07b0f2..282ade39f2 100644 --- a/docs/mindquantum/docs/source_en/shor_algorithm.md +++ b/docs/mindquantum/docs/source_en/shor_algorithm.md @@ -1,6 +1,6 @@ # Shor's algorithm based on MindQuantum -[![View Source](https://gitee.com/mindspore/docs/raw/master/resource/_static/logo_source_en.png)](https://gitee.com/mindspore/docs/tree/master/docs/mindquantum/docs/source_en/shor_algorithm.md) +[![View Source](https://gitee.com/mindspore/docs/raw/master/resource/_static/logo_source_en.png)](https://gitee.com/mindspore/docs/blob/master/docs/mindquantum/docs/source_en/shor_algorithm.md) ## Introduction to Shor's Algorithm diff --git a/tools/link_detection/README_CN.md b/tools/link_detection/README_CN.md index d9ca6befb3..271abab0ed 100644 --- a/tools/link_detection/README_CN.md +++ b/tools/link_detection/README_CN.md @@ -51,3 +51,20 @@ docs/tutorials/source_zh_cn/intermediate/text/sentimentnet.ipynb:line_22:404: Er - 报错的行数:`line_22`。即检测文件中第22行报错。 - 报错代码:`404`。即该行中存在状态码是404的链接,即不存在该网址。 + +## 检测白名单设置 + +`filter_linklint.txt`文件中存储着链接检测的白名单,每行可写一个链接列入白名单。 +该文件默认放在与`link_lint.py`同目录。也可以通过命令传入`--white_path={白名单文件地址}`载入指定地址的白名单文件。 + +```bash +python link_lint.py --white_path=xxx/xxx/xx.txt xxx xxx +``` + +白名单内容可书写如下: + +```text +https://xxxxx.com +https://xxx.com/xxx.html +... +``` \ No newline at end of file diff --git a/tools/link_detection/filter_linklint.txt b/tools/link_detection/filter_linklint.txt new file mode 100644 index 0000000000..47d257eeb4 --- /dev/null +++ b/tools/link_detection/filter_linklint.txt @@ -0,0 +1,14 @@ +https:// +https://xxx +http://127.0.0.1:8080 +https://127.0.0.0:6666 +https://127.0.0.0 +http://127.0.0.1:1500/model/lenet/version/1:predict +http://localhost:%d +http://xxx/v1/mindinsight/profile/cluster-flops +http://xxxx/v1/mindinsight/debugger/sessions/xxxx/update-watchpoint +http://%s:%s%s +http://localhost:5500/x/:add_common +http://xxxx/v1/mindinsight/profile/memory-graphics +https://gitee.com/mindspore/docs/blob/xxx +http://192.168.216.124:11202/scaleout diff --git a/tools/link_detection/link_lint.py b/tools/link_detection/link_lint.py index fdedbe9fb8..6725c007b3 100644 --- a/tools/link_detection/link_lint.py +++ b/tools/link_detection/link_lint.py @@ -36,7 +36,7 @@ def get_urls(content): url_list = [] urls = re.findall(re_url, content) for url in urls: - url_list.append(url[0]+url[1].replace("\n", "")) + url_list.append(url[0]+url[1].split("\n\n")[0].replace("\n", "")) return url_list def check_url_status(url): @@ -94,7 +94,9 @@ def run_check(file): 检测文件中的urls链接 """ data = get_content(file) - urls = get_urls(data) + file_urls = get_urls(data) + white_urls = get_white_urls() + urls = set(file_urls) - set(white_urls) pool = [] for url in urls: k = threading.Thread(target=update_url_status_to_json, args=(url,)) @@ -104,16 +106,40 @@ def run_check(file): j.join() generate_info(file) - os.remove("url_status.json") + if os.path.exists("url_status.json"): + os.remove("url_status.json") + +def get_white_urls(white_file="filter_linklint.txt"): + """获取白名单中的链接""" + for i in sys.argv[1:]: + if "--white_path=" in i: + white_file = i.split("=")[-1] + if os.path.exists(white_file): + try: + with open(white_file, "r", encoding="utf-8") as f: + urls = f.readlines() + except Exception: + with open(white_file, "r", encoding="GBK") as f: + urls = f.readlines() + else: + urls = [] + return urls def generate_info(file): """ 输出404链接的信息 """ - with open("url_status.json", "r") as f: - url_status = json.load(f) - with open(file, "r", encoding="utf-8") as f: - lines = f.readlines() + if os.path.exists("url_status.json"): + with open("url_status.json", "r") as f: + url_status = json.load(f) + else: + url_status = {"https://www.mindspore.cn": 200} + try: + with open(file, "r", encoding="utf-8") as f: + lines = f.readlines() + except Exception: + with open("filter_linklint.txt", "r", encoding="GBK") as f: + lines = f.readlines() for line_num, line_content in enumerate(lines, 1): for i in get_urls(line_content): if url_status[i] == 404: @@ -125,9 +151,10 @@ def generate_info(file): if __name__ == "__main__": for check_path_ in sys.argv[1:]: - if os.path.isfile(check_path_): + extension = ["md", "py", "rst", "ipynb", "js", "html", "c", "cc", "txt"] + if os.path.isfile(check_path_) and check_path_.split(".")[-1] in extension: run_check(check_path_) elif os.path.isdir(check_path_): - check_f_ = [file for file in find_file(check_path_, files=[])] + check_f_ = [file for file in find_file(check_path_, files=[]) if file.split(".")[-1] in extension] for one_f in check_f_: run_check(one_f) -- Gitee