From dfc046c5367d4ef6cdc6b246aa55fdfb5c0a45c5 Mon Sep 17 00:00:00 2001 From: zhangwu Date: Thu, 19 Oct 2023 16:57:48 +0800 Subject: [PATCH] =?UTF-8?q?capi=E8=A7=A3=E6=9E=90=E5=B7=A5=E5=85=B7?= =?UTF-8?q?=E6=A1=86=E6=9E=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhangwu --- capi_parser/.gitignore | 2 + capi_parser/readme.md | 3 + capi_parser/requirements.txt | Bin 0 -> 96 bytes capi_parser/src/bin/config.py | 38 +++ .../src/coreImpl/parser/generating_tables.py | 64 ++++ .../src/coreImpl/parser/parse_include.py | 283 ++++++++++++++++++ capi_parser/src/coreImpl/parser/parser.py | 139 +++++++++ capi_parser/src/main.py | 21 ++ capi_parser/src/utils/constants.py | 7 + 9 files changed, 557 insertions(+) create mode 100644 capi_parser/.gitignore create mode 100644 capi_parser/readme.md create mode 100644 capi_parser/requirements.txt create mode 100644 capi_parser/src/bin/config.py create mode 100644 capi_parser/src/coreImpl/parser/generating_tables.py create mode 100644 capi_parser/src/coreImpl/parser/parse_include.py create mode 100644 capi_parser/src/coreImpl/parser/parser.py create mode 100644 capi_parser/src/main.py create mode 100644 capi_parser/src/utils/constants.py diff --git a/capi_parser/.gitignore b/capi_parser/.gitignore new file mode 100644 index 000000000..05d361948 --- /dev/null +++ b/capi_parser/.gitignore @@ -0,0 +1,2 @@ +/**/__pycache__/* +.idea/ \ No newline at end of file diff --git a/capi_parser/readme.md b/capi_parser/readme.md new file mode 100644 index 000000000..c2d1e5556 --- /dev/null +++ b/capi_parser/readme.md @@ -0,0 +1,3 @@ +1.使用该工具前需要修改[constants.py](./src/utils/constants.py)文件下的StringConstant.LIB_CLANG_PATH和StringConstant.LINK_INCLUDE_PATH; +StringConstant.LIB_CLANG_PATH:clang共享库 +StringConstant.LINK_INCLUDE_PATH:所需要引入的头文件路径目录 diff --git a/capi_parser/requirements.txt b/capi_parser/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e623e5c24d68c5da8aed925fa4b4411dc162084f GIT binary patch literal 96 zcmezWFPR~SA(0`EA)Ub%2n`ub8T1$ofY_XYmw}5RpP_&u6)0Q4P{~jMR0&dJ43sql WVk5AM0>', '+', '-', '*', '/'] + for token in tokens: + if token.spelling in spelling_arr: + data["operator"] = token.spelling + + +def distinction_member(cursor, data): # 区别结构体和联合体成员 + parent_kind = find_parent(cursor) # 查找父节点类型 + if parent_kind == CursorKind.UNION_DECL: + data["member"] = "union_member" + elif parent_kind == CursorKind.STRUCT_DECL: + data["member"] = "struct_member" + + +def processing_parm(cursor, data): # 函数参数节点处理 + if cursor.spelling: # 函数参数是否带参数名 + data["name"] = cursor.spelling + else: + data["name"] = "arg_no_spelling" + + if cursor.type.get_pointee().kind == TypeKind.FUNCTIONPROTO: # 参数为函数指针,获取对应的返回类型 + data["func_pointer_result_type"] = cursor.type.get_pointee().get_result().spelling + + +def processing_enum(cursor, data): # 获取枚举值 + data["value"] = cursor.enum_value + + +def processing_def(cursor, data): # 处理宏定义 + marco_ext = cursor.extent + tokens = cursor.translation_unit.get_tokens(extent=marco_ext) # 找到对应的宏定义位置 + tokens = list(tokens) # Generator转为list + processing_complex_def(tokens, data) # 获取宏名和宏文本 + data["type"] = "def_no_type" + + +def processing_func(cursor, data): # 处理函数 + data["return_type"] = cursor.result_type.spelling # 增加返回类型键值对 + judgment_extern(cursor, data) + + +def processing_type(cursor, data): # 没有类型的节点处理 + if cursor.kind == CursorKind.MACRO_INSTANTIATION: # 也属于宏定义 --宏引用 + data["type"] = "insta_no_type" + + elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: # 头文件也没type,规定 + data["type"] = "inclusion_no_type" + return + + +def processing_name(cursor, data): # 没有名的节点处理 + if cursor.kind == CursorKind.PAREN_EXPR: # 括号表达式() + data["paren"] = "()" + data["name"] = "paren_expr_no_spelling" + + elif cursor.kind == CursorKind.UNEXPOSED_EXPR: # 未公开表达式,用于表示未明确定义的表达式 + data["name"] = "unexposed_expr_no_spelling" + + +def processing_char(cursor, data): # 字符节点处理 + tokens = list(cursor.get_tokens()) + char_value = (tokens[0].spelling.strip("'")) + data["name"] = char_value + + +special_node_process = { + CursorKind.ENUM_CONSTANT_DECL.name: processing_enum, + CursorKind.MACRO_DEFINITION.name: processing_def, + CursorKind.FUNCTION_DECL.name: processing_func, + CursorKind.VAR_DECL.name: judgment_extern, + CursorKind.PARM_DECL.name: processing_parm, + CursorKind.FIELD_DECL.name: distinction_member, + CursorKind.MACRO_INSTANTIATION.name: processing_type, + CursorKind.INCLUSION_DIRECTIVE.name: processing_type, + CursorKind.BINARY_OPERATOR.name: binary_operator, + CursorKind.PAREN_EXPR.name: processing_name, + CursorKind.UNEXPOSED_EXPR.name: processing_name, + CursorKind.CHARACTER_LITERAL.name: processing_char +} + + +def processing_special_node(cursor, data): # 处理需要特殊处理的节点 + if cursor.kind.name in special_node_process.keys(): + node_process = special_node_process[cursor.kind.name] + node_process(cursor, data) # 调用对应节点处理函数 + + +def ast_to_dict(cursor, comment=None): # 解析数据的整理 + data = { # 通用 + "name": cursor.spelling, + "kind": cursor.kind.name, + "type": cursor.type.spelling, + } + + if cursor.raw_comment: # 是否有注释信息,有就取,没有过 + data["comment"] = cursor.raw_comment + else: + pass + + if cursor.kind == CursorKind.TRANSLATION_UNIT: # 把最开始的注释放在根节点这,如果有的话 + if comment: + data["comment"] = comment[0] + + else: + processing_special_node(cursor, data) # 节点处理 + + children = list(cursor.get_children()) # 判断是否有子节点,有就追加children,没有根据情况来 + if len(children) > 0: + if cursor.kind == CursorKind.FUNCTION_DECL: # 函数参数 + name = "parm" + elif cursor.kind == CursorKind.ENUM_DECL or cursor.kind == CursorKind.STRUCT_DECL or cursor.kind == CursorKind.UNION_DECL: + name = "members" + else: + name = "children" + data[name] = [] + for child in children: + if child.location.file is not None and child.kind != CursorKind.UNEXPOSED_ATTR: # 剔除多余宏定义和跳过UNEXPOSED_ATTR节点 + child_data = ast_to_dict(child) + data[name].append(child_data) + else: + processing_no_child(cursor, data) # 处理没有子节点的节点 + return data + + +def preorder_travers_ast(cursor, total, comment): # 获取属性 + ast_dict = ast_to_dict(cursor, comment) # 获取节点属性 + total.append(ast_dict) # 追加到数据统计列表里面 + + +def get_start_comments(include_path): # 获取每个头文件的最开始注释 + with open(include_path, 'r', encoding='utf-8') as f: + f.seek(0) + content = f.read() + pattern = r'/\*[^/]*\*/\s*/\*[^/]*\*/\s*(?=#ifndef)' + matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE) + if matches is None: + pattern = r'/\*[^/]*\*/\s*(?=#ifndef)' + matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE) + return matches + else: + return None + + +def api_entrance(share_lib, include_path=None, link_path=None): # 统计入口 + # clang.cindex需要用到libclang.dll共享库 所以配置共享库 + if Config.loaded: + print("config.loaded == true") + else: + Config.set_library_file(share_lib) + print("lib.dll: install path") + + # 创建AST索引 + index = Index.create() + print('=' * 50) + # options赋值为如下,代表宏定义解析数据也要 + args = ['-I{}'.format(path) for path in link_path] + options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD + print(args) + + data_total = [] # 列表对象-用于统计 + for i in range(len(include_path)): # 对每个头文件做处理 + file = r'{}'.format(include_path[i]) + print('文件名:{}'.format(file)) + tu = index.parse(include_path[i], args=args, options=options) + print(tu) + print('=' * 50) + ast_root_node = tu.cursor # 获取根节点 + print(ast_root_node) + matches = get_start_comments(include_path[i]) # 接收文件最开始的注释 + # 前序遍历AST + preorder_travers_ast(ast_root_node, data_total, matches) # 调用处理函数 + print('=' * 50) + + return data_total + + +def get_include_file(libclang, include_file_path, link_path): # 库路径、.h文件路径、链接头文件路径 + # libclang.dll库路径 + libclang_path = libclang + # c头文件的路径 + file_path = include_file_path + # 头文件链接路径 + link_include_path = link_path # 可以通过列表传入 + data = api_entrance(libclang_path, file_path, link_include_path) # 调用接口 + + return data + diff --git a/capi_parser/src/coreImpl/parser/parser.py b/capi_parser/src/coreImpl/parser/parser.py new file mode 100644 index 000000000..3f6e27135 --- /dev/null +++ b/capi_parser/src/coreImpl/parser/parser.py @@ -0,0 +1,139 @@ +import os # 可用于操作目录文件 +import glob # 可用于查找指定目录下指定后缀的文件 +import re # 正则表达是模块--可用于操作文件里面的内容 +from coreImpl.parser import parse_include, generating_tables # 引入解析文件 # 引入得到结果表格文件 +import json +from utils.constants import StringConstant + + +def find_gn_file(directory): # 找指定目录下所有GN文件 + gn_files = [] + for root, dirs, files in os.walk(directory): # dirpath, dirnames, filenames(对应信息) + for file in files: + if file.endswith(".gn"): + gn_files.append(os.path.join(root, file)) + return gn_files + + +def find_function_file(file, function_name): # 在GN文件中查找指定函数并在有函数名,获取对应sources的值 + with open(file, 'r') as f: + content = f.read() # 获取文件内容 + pattern = r'\b' + re.escape(function_name) + r'\b' # '\b'确保函数名的完全匹配 + matches = re.findall(pattern, content) + f.seek(0) # 回到文件开始位置 + if len(matches): # 是否匹配成功 + sources = [] # 转全部匹配的sources的.h(可能不止一个-headers函数) + end = 0 + for i in range(len(matches)): + # 匹配sources = \[[^\]]*\](匹配方括号内的内容,其中包括一个或多个非右括号字符),\s*:匹配0个或多个空白字符 + pattern = r'sources\s*=\s*\[[^\]]*\]' + sources_match = re.search(pattern, content) + if sources_match: + sources_value = sources_match.group(0) # 获取完整匹配的字符串 + sources_value = re.sub(r'\s', '', sources_value) # 去除源字符串的空白字符(换行符)和空格 + pattern = r'"([^"]+h)"' # 匹配引号中的内容,找对应的.h + source = re.findall(pattern, sources_value) + sources.extend(source) + end += sources_match.end() # 每次找完一个sources的.h路径,记录光标结束位置 + f.seek(end) # 移动光标在该结束位置 + content = f.read() # 从当前位置读取问价内容,防止重复 + return len(matches) > 0, sources + else: + return None, None # gn文件没有对应的函数 + + +def find_json_file(gn_file_match): # 找gn文件同级目录下的.json文件 + match_json_file = [] + directory = os.path.dirname(gn_file_match) + for file in glob.glob(os.path.join(directory, "*.json")): # 统计.json文件 + match_json_file.append(file) + return match_json_file + + +def dire_func(gn_file, func_name): # 统计数据的 + matches_file_total = [] # 统计有ohos_ndk_headers函数的gn文件 + json_file_total = [] # 统计跟含有函数的gn文件同级的json文件 + source_include = [] # 统计sources里面的.h + length, source = find_function_file(gn_file, func_name) # 找到包含函数的gn文件和同级目录下的.json文件 + if length: # 保证两个都不为空,source可能为空(因为gn能没有函数名) + source_include = source # 获取头文件列表 + matches_file_total.append(gn_file) # 调用匹配函数的函数(说明有对应的函数、source) + json_file_total.extend(find_json_file(gn_file)) # 找json + + return matches_file_total, json_file_total, source_include + + +def change_json_file(dict_data, name): # 生成json文件 + file_name = name + '_new' + '.json' # json文件名 + with open(file_name, 'w', encoding='UTF-8') as f: # encoding='UTF-8'能显示中文 + # ensure_ascii=False确保能显示中文,indent=4(格式控制)使生成的json样式跟字典一样 + json.dump(dict_data, f, ensure_ascii=False, indent=4) + + return file_name + + +def change_abs(include_files, dire_path): # 获取.h绝对路径 + abs_path = [] + for j in range(len(include_files)): # 拼接路径,生成绝对路径 + # os.path.normpath(path):规范或者是格式化路径,它会把所有路径分割符按照操作系统进行替换 + # 把规范路径和gn文件对应的目录路径拼接 + if os.path.isabs(include_files[j]): # 是否是绝对路径,是就拼接路径盘,不是就拼接gn目录路径 + head = os.path.splitdrive(dire_path) # 获取windows盘路径 + include_file = os.path.normpath(include_files[j]) + include_file = include_file.replace('\\\\', '\\') # 去掉绝对路径的双\\ + include_file = os.path.join(head[0], include_file) # 拼接盘和路径 + abs_path.append(include_file) + else: + abs_path.append(os.path.join(dire_path, os.path.normpath(include_files[j]))) + print("头文件绝对路径:\n", abs_path) + print("=" * 50) + return abs_path + + +def get_result_table(json_files, abs_path, lib_path, link_path): # 进行处理,生成表格 + if json_files: + file_name = os.path.split(json_files[0]) # 取第一个json名,但我是用列表装的 + file_name = os.path.splitext(file_name[1]) # 取下标1对应的元素(元组) + data = parse_include.get_include_file(lib_path, abs_path, link_path) # 获取解析返回的数据 + parse_json_name = change_json_file(data, file_name[0]) # 生成json文件 + result_list, head_name = generating_tables.get_json_file(parse_json_name, json_files) # 解析完后,传两个json文件,对比两个json文件,最后生成数据表格 + return result_list, head_name + else: + return None, None + + +def main_entrance(directory_path, function_names, lib_path, link_path): # 主入口 + gn_file_total = find_gn_file(directory_path) # 查找gn文件 + print("gn文件:", gn_file_total) + + for i in range(len(gn_file_total)): # 处理每个gn文件 + match_files, json_files, include_files = dire_func(gn_file_total[i], function_names) + dire_path = os.path.dirname(gn_file_total[i]) # 获取gn文件路径 + + print("目录路径: {}".format(dire_path)) + + print("同级json文件:\n", json_files) + print("头文件:\n", include_files) + + if match_files: # 符合条件的gn文件 + abs_path = change_abs(include_files, dire_path) # 接收.h绝对路径 + result_list, head_name = get_result_table(json_files, abs_path, lib_path, link_path) # 接收是否获转为表格信息 + if result_list: + generating_tables.generate_excel(result_list, head_name) + print("有匹配项,已生成表格") + else: + print("没有匹配项 or gn文件下无json文件") + else: + print("gn文件无header函数") + + +def parser(directory_path): # 目录路径 + function_name = StringConstant.FUNK_NAME.value # 匹配的函数名 + + libclang_path = StringConstant.LIB_CLANG_PATH.value # 共享库路径 + link_include_path = StringConstant.LINK_INCLUDE_PATH.value # 链接头文件路径 + + main_entrance(directory_path, function_name, libclang_path, link_include_path) # 调用入口函数 + + + diff --git a/capi_parser/src/main.py b/capi_parser/src/main.py new file mode 100644 index 000000000..f7b0ba128 --- /dev/null +++ b/capi_parser/src/main.py @@ -0,0 +1,21 @@ + +import argparse +from bin import config + + +parser = argparse.ArgumentParser( + prog=config.Config.name, description=config.Config.description) +for command in config.Config.commands: + abbr = command.get("abbr") + name = command.get("name") + choices = command.get("choices") + required = (True if command.get("required") else False) + type = command.get("type") + default = command.get("default") + help = command.get("help") + parser.add_argument(abbr, name, choices=choices, + required=required, type=type, default=default, help=help) + + +# args = parser.parse_args() +config.run_tools(parser.parse_args()) diff --git a/capi_parser/src/utils/constants.py b/capi_parser/src/utils/constants.py new file mode 100644 index 000000000..02dc14594 --- /dev/null +++ b/capi_parser/src/utils/constants.py @@ -0,0 +1,7 @@ +import enum + + +class StringConstant(enum.Enum): + LIB_CLANG_PATH = r'D:\Environment\LLVM\bin\libclang.dll' + FUNK_NAME = "ohos_ndk_headers" + LINK_INCLUDE_PATH = [r'E:\interface_sdk_c\interface_sdk_c'] -- Gitee