diff --git a/capi_parser/.gitignore b/capi_parser/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..05d3619486129b5ef047908b00945f70eda13d97 --- /dev/null +++ b/capi_parser/.gitignore @@ -0,0 +1,2 @@ +/**/__pycache__/* +.idea/ \ No newline at end of file diff --git a/capi_parser/readme.md b/capi_parser/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..c2d1e555660e0dd18f84ec8e4e92f67c38246c3b --- /dev/null +++ b/capi_parser/readme.md @@ -0,0 +1,3 @@ +1.使用该工具前需要修改[constants.py](./src/utils/constants.py)文件下的StringConstant.LIB_CLANG_PATH和StringConstant.LINK_INCLUDE_PATH; +StringConstant.LIB_CLANG_PATH:clang共享库 +StringConstant.LINK_INCLUDE_PATH:所需要引入的头文件路径目录 diff --git a/capi_parser/requirements.txt b/capi_parser/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e623e5c24d68c5da8aed925fa4b4411dc162084f Binary files /dev/null and b/capi_parser/requirements.txt differ diff --git a/capi_parser/src/bin/config.py b/capi_parser/src/bin/config.py new file mode 100644 index 0000000000000000000000000000000000000000..450d9c168f62eced5b15393347a52bc61eb92733 --- /dev/null +++ b/capi_parser/src/bin/config.py @@ -0,0 +1,38 @@ +import enum +from coreImpl.parser import parser + + +class ToolNameType(enum.Enum): + COOLLECT = 'collect' + DIFF = 'diff' + + +toolNameTypeSet = [member.value for name, + member in ToolNameType.__members__.items()] + + +class FormatType(enum.Enum): + JSON = 'json' + EXCEL = 'excel' + + +formatSet = [member.value for name, + member in FormatType.__members__.items()] + + +def run_tools(options): + tool_name = options.tool_name + if tool_name == ToolNameType["COOLLECT"].value: + parser.parser(options.parser_path) + elif tool_name == ToolNameType["DIFF"].value: + print("开发中。。。") + else: + print("工具名称错误") + + +class Config(object): + name = 'parser' + version = '0.1.0' + description = 'Compare the parser the NDKS' + commands = [{"name": "--tool-name", "abbr": "-N", "required": True, "choices": toolNameTypeSet, "type": str, "default": ToolNameType["COOLLECT"], "help":"工具名称"}, + {"name": "--parser-path", "abbr": "-P", "required": True, "type": str, "help": "解析路径"}] diff --git a/capi_parser/src/coreImpl/parser/generating_tables.py b/capi_parser/src/coreImpl/parser/generating_tables.py new file mode 100644 index 0000000000000000000000000000000000000000..1d6ef90bb271422b0b3fb65e5c6bf5de7cbe76e2 --- /dev/null +++ b/capi_parser/src/coreImpl/parser/generating_tables.py @@ -0,0 +1,64 @@ +import json +import pandas as pd # 用于生成表格 +import os + + +def compare_json_file(js_file1, js_file2): # 获取对比结果 + with open(js_file1, 'r', encoding='utf-8') as js1: + data1 = json.load(js1) + with open(js_file2, 'r') as js2: + data2 = json.load(js2) + compare_result = [] + result_api = filter_compare(data1) + for i in range(len(result_api)): + name1 = result_api[i]["name"] + for item in data2: + if item["name"]: + name2 = item["name"] + if name1 == name2: + compare_result.append(result_api[i]) + return compare_result + + +def filter_compare(data1): # 获取函数和变量 + result_api = [] + for i in range(len(data1)): + for item1 in data1[i]["children"]: # 抛开根节点 + if (item1["kind"] == 'FUNCTION_DECL' or item1["kind"] == 'VAR_DECL') and item1["is_extern"]: + if item1["kind"] == 'FUNCTION_DECL': + del item1["parm"] # 剔除参数键值对,可以不需要 + del item1["is_extern"] # 剔除is_extern键值对,过滤后都是extern + result_api.append(item1) + + return result_api + + +def generate_excel(array, name): + pf = pd.DataFrame.from_dict(array, orient='columns') # 将列表转成DataFrame,并且按列的方式读取数据(orient='columns') + + order = ['name', 'kind', 'type', 'return_type'] # 指定字段顺序 + pf = pf[order] + columns_map = { # 将列名换为中文名 + 'name': '名称', + 'kind': '节点类型', + 'type': '类型', + 'return_type': '返回类型', + } + + pf.rename(columns=columns_map, inplace=True) + + pf.to_excel(name, index=False) # 生成该表格 + + +def get_json_file(json_file_new, json_file): # 获取生成的json文件 + json_file1 = r'{}'.format(json_file_new) # 获取要对比的json文件 + json_file2 = json_file + head_name = os.path.splitext(json_file1) # 去掉文件名后缀 + head_name = head_name[0] + '.xlsx' # 加后缀 + result_list = [] + for i in range(len(json_file2)): # 对比每一个json(目录下的) + result_list = compare_json_file(json_file1, json_file2[i]) # 对比两个json文件 + + return result_list, head_name # 返回对比数据,和所需表格名 + + diff --git a/capi_parser/src/coreImpl/parser/parse_include.py b/capi_parser/src/coreImpl/parser/parse_include.py new file mode 100644 index 0000000000000000000000000000000000000000..c4c603e3c3ff249fb19d5ff2e1eb0f40c5191f92 --- /dev/null +++ b/capi_parser/src/coreImpl/parser/parse_include.py @@ -0,0 +1,283 @@ +import re +import clang.cindex +from clang.cindex import Config # 配置 +from clang.cindex import Index # 主要API +from clang.cindex import CursorKind # 索引结点的类别 +from clang.cindex import TypeKind # 节点的语义类别 + +import json + + +def find_parent(cursor): # 获取父节点 + cursor_parent = cursor.semantic_parent + if cursor_parent is not None: + if cursor_parent.kind == CursorKind.VAR_DECL: # 父节点为VAR_DECL 用于整型变量节点 + return cursor_parent.kind + + if cursor_parent.kind == CursorKind.STRUCT_DECL or cursor_parent.kind == CursorKind.UNION_DECL: # 用于判断里面成员属于那类 + return cursor_parent.kind + else: + parent = cursor_parent.semantic_parent + if parent is not None: + return parent.kind + else: + return None + + +def processing_no_child(cursor, data): # 处理没有子节点的节点 + if cursor.kind.name == CursorKind.INTEGER_LITERAL: # 整型字面量类型节点,没有子节点 + parent_kind = find_parent(cursor) # 判断是属于那类的 + if parent_kind == CursorKind.STRUCT_DECL: + data["name"] = 'struct_int_no_spelling' + elif parent_kind == CursorKind.UNION_DECL: + data["name"] = 'union_int_no_spelling' + elif parent_kind == CursorKind.ENUM_DECL: + data["name"] = 'enum_int_no_spelling' + elif parent_kind == CursorKind.VAR_DECL: + data["name"] = 'var_int_no_spelling' + else: + data["name"] = "integer_no_spelling" + tokens = cursor.get_tokens() + for token in tokens: + data["integer_value"] = token.spelling # 获取整型变量值 + + +def processing_complex_def(tokens, data): # 处理复合型宏 + tokens_new = tokens[1:] # 跳过正常宏名 + logo_com = 0 # 记录复合型,复合型文本也得根据这个 + count_com = 0 + for token in tokens_new: + if token.kind.name == 'IDENTIFIER': + count = 1 + logo = 0 + for token_2 in tokens_new: + if token_2.spelling == ')': + logo = 1 + break + else: + count += 1 + if logo == 1: # 获取复合型宏定义宏名 + logo_com = logo + count_com = count + 1 + tokens_name = tokens[:count + 1] + data["name"] = ''.join([token.spelling for token in tokens_name]) + get_def_text(tokens, data, logo_com, count_com) # 获取宏文本 + + +def get_def_text(tokens, data, logo_compose, count_compose): # 获取宏文本 + if logo_compose == 1: + marco_expansion = ''.join([token.spelling for token in tokens[count_compose:]]) # 获取宏文本,有就记录,没有不管 + if marco_expansion: + data["text"] = marco_expansion + else: + pass + else: + marco_expansion = ''.join([token.spelling for token in tokens[1:]]) # 获取宏文本,有就记录,没有不管 + if marco_expansion: + data["text"] = marco_expansion + else: + pass + + +def judgment_extern(cursor, data): # 判断是否带有extern + is_extern = False + for token in cursor.get_tokens(): + if token.spelling == 'extern': + is_extern = True + break + if is_extern: + data["is_extern"] = is_extern + else: + data["is_extern"] = is_extern + + +def binary_operator(cursor, data): # 二元操作符处理 + data["name"] = "binary_ope_no_spelling" + tokens = cursor.get_tokens() + spelling_arr = ['<<', '>>', '+', '-', '*', '/'] + for token in tokens: + if token.spelling in spelling_arr: + data["operator"] = token.spelling + + +def distinction_member(cursor, data): # 区别结构体和联合体成员 + parent_kind = find_parent(cursor) # 查找父节点类型 + if parent_kind == CursorKind.UNION_DECL: + data["member"] = "union_member" + elif parent_kind == CursorKind.STRUCT_DECL: + data["member"] = "struct_member" + + +def processing_parm(cursor, data): # 函数参数节点处理 + if cursor.spelling: # 函数参数是否带参数名 + data["name"] = cursor.spelling + else: + data["name"] = "arg_no_spelling" + + if cursor.type.get_pointee().kind == TypeKind.FUNCTIONPROTO: # 参数为函数指针,获取对应的返回类型 + data["func_pointer_result_type"] = cursor.type.get_pointee().get_result().spelling + + +def processing_enum(cursor, data): # 获取枚举值 + data["value"] = cursor.enum_value + + +def processing_def(cursor, data): # 处理宏定义 + marco_ext = cursor.extent + tokens = cursor.translation_unit.get_tokens(extent=marco_ext) # 找到对应的宏定义位置 + tokens = list(tokens) # Generator转为list + processing_complex_def(tokens, data) # 获取宏名和宏文本 + data["type"] = "def_no_type" + + +def processing_func(cursor, data): # 处理函数 + data["return_type"] = cursor.result_type.spelling # 增加返回类型键值对 + judgment_extern(cursor, data) + + +def processing_type(cursor, data): # 没有类型的节点处理 + if cursor.kind == CursorKind.MACRO_INSTANTIATION: # 也属于宏定义 --宏引用 + data["type"] = "insta_no_type" + + elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: # 头文件也没type,规定 + data["type"] = "inclusion_no_type" + return + + +def processing_name(cursor, data): # 没有名的节点处理 + if cursor.kind == CursorKind.PAREN_EXPR: # 括号表达式() + data["paren"] = "()" + data["name"] = "paren_expr_no_spelling" + + elif cursor.kind == CursorKind.UNEXPOSED_EXPR: # 未公开表达式,用于表示未明确定义的表达式 + data["name"] = "unexposed_expr_no_spelling" + + +def processing_char(cursor, data): # 字符节点处理 + tokens = list(cursor.get_tokens()) + char_value = (tokens[0].spelling.strip("'")) + data["name"] = char_value + + +special_node_process = { + CursorKind.ENUM_CONSTANT_DECL.name: processing_enum, + CursorKind.MACRO_DEFINITION.name: processing_def, + CursorKind.FUNCTION_DECL.name: processing_func, + CursorKind.VAR_DECL.name: judgment_extern, + CursorKind.PARM_DECL.name: processing_parm, + CursorKind.FIELD_DECL.name: distinction_member, + CursorKind.MACRO_INSTANTIATION.name: processing_type, + CursorKind.INCLUSION_DIRECTIVE.name: processing_type, + CursorKind.BINARY_OPERATOR.name: binary_operator, + CursorKind.PAREN_EXPR.name: processing_name, + CursorKind.UNEXPOSED_EXPR.name: processing_name, + CursorKind.CHARACTER_LITERAL.name: processing_char +} + + +def processing_special_node(cursor, data): # 处理需要特殊处理的节点 + if cursor.kind.name in special_node_process.keys(): + node_process = special_node_process[cursor.kind.name] + node_process(cursor, data) # 调用对应节点处理函数 + + +def ast_to_dict(cursor, comment=None): # 解析数据的整理 + data = { # 通用 + "name": cursor.spelling, + "kind": cursor.kind.name, + "type": cursor.type.spelling, + } + + if cursor.raw_comment: # 是否有注释信息,有就取,没有过 + data["comment"] = cursor.raw_comment + else: + pass + + if cursor.kind == CursorKind.TRANSLATION_UNIT: # 把最开始的注释放在根节点这,如果有的话 + if comment: + data["comment"] = comment[0] + + else: + processing_special_node(cursor, data) # 节点处理 + + children = list(cursor.get_children()) # 判断是否有子节点,有就追加children,没有根据情况来 + if len(children) > 0: + if cursor.kind == CursorKind.FUNCTION_DECL: # 函数参数 + name = "parm" + elif cursor.kind == CursorKind.ENUM_DECL or cursor.kind == CursorKind.STRUCT_DECL or cursor.kind == CursorKind.UNION_DECL: + name = "members" + else: + name = "children" + data[name] = [] + for child in children: + if child.location.file is not None and child.kind != CursorKind.UNEXPOSED_ATTR: # 剔除多余宏定义和跳过UNEXPOSED_ATTR节点 + child_data = ast_to_dict(child) + data[name].append(child_data) + else: + processing_no_child(cursor, data) # 处理没有子节点的节点 + return data + + +def preorder_travers_ast(cursor, total, comment): # 获取属性 + ast_dict = ast_to_dict(cursor, comment) # 获取节点属性 + total.append(ast_dict) # 追加到数据统计列表里面 + + +def get_start_comments(include_path): # 获取每个头文件的最开始注释 + with open(include_path, 'r', encoding='utf-8') as f: + f.seek(0) + content = f.read() + pattern = r'/\*[^/]*\*/\s*/\*[^/]*\*/\s*(?=#ifndef)' + matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE) + if matches is None: + pattern = r'/\*[^/]*\*/\s*(?=#ifndef)' + matches = re.findall(pattern, content, re.DOTALL | re.MULTILINE) + return matches + else: + return None + + +def api_entrance(share_lib, include_path=None, link_path=None): # 统计入口 + # clang.cindex需要用到libclang.dll共享库 所以配置共享库 + if Config.loaded: + print("config.loaded == true") + else: + Config.set_library_file(share_lib) + print("lib.dll: install path") + + # 创建AST索引 + index = Index.create() + print('=' * 50) + # options赋值为如下,代表宏定义解析数据也要 + args = ['-I{}'.format(path) for path in link_path] + options = clang.cindex.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD + print(args) + + data_total = [] # 列表对象-用于统计 + for i in range(len(include_path)): # 对每个头文件做处理 + file = r'{}'.format(include_path[i]) + print('文件名:{}'.format(file)) + tu = index.parse(include_path[i], args=args, options=options) + print(tu) + print('=' * 50) + ast_root_node = tu.cursor # 获取根节点 + print(ast_root_node) + matches = get_start_comments(include_path[i]) # 接收文件最开始的注释 + # 前序遍历AST + preorder_travers_ast(ast_root_node, data_total, matches) # 调用处理函数 + print('=' * 50) + + return data_total + + +def get_include_file(libclang, include_file_path, link_path): # 库路径、.h文件路径、链接头文件路径 + # libclang.dll库路径 + libclang_path = libclang + # c头文件的路径 + file_path = include_file_path + # 头文件链接路径 + link_include_path = link_path # 可以通过列表传入 + data = api_entrance(libclang_path, file_path, link_include_path) # 调用接口 + + return data + diff --git a/capi_parser/src/coreImpl/parser/parser.py b/capi_parser/src/coreImpl/parser/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..3f6e271350f747560c51c1571255686b6862a220 --- /dev/null +++ b/capi_parser/src/coreImpl/parser/parser.py @@ -0,0 +1,139 @@ +import os # 可用于操作目录文件 +import glob # 可用于查找指定目录下指定后缀的文件 +import re # 正则表达是模块--可用于操作文件里面的内容 +from coreImpl.parser import parse_include, generating_tables # 引入解析文件 # 引入得到结果表格文件 +import json +from utils.constants import StringConstant + + +def find_gn_file(directory): # 找指定目录下所有GN文件 + gn_files = [] + for root, dirs, files in os.walk(directory): # dirpath, dirnames, filenames(对应信息) + for file in files: + if file.endswith(".gn"): + gn_files.append(os.path.join(root, file)) + return gn_files + + +def find_function_file(file, function_name): # 在GN文件中查找指定函数并在有函数名,获取对应sources的值 + with open(file, 'r') as f: + content = f.read() # 获取文件内容 + pattern = r'\b' + re.escape(function_name) + r'\b' # '\b'确保函数名的完全匹配 + matches = re.findall(pattern, content) + f.seek(0) # 回到文件开始位置 + if len(matches): # 是否匹配成功 + sources = [] # 转全部匹配的sources的.h(可能不止一个-headers函数) + end = 0 + for i in range(len(matches)): + # 匹配sources = \[[^\]]*\](匹配方括号内的内容,其中包括一个或多个非右括号字符),\s*:匹配0个或多个空白字符 + pattern = r'sources\s*=\s*\[[^\]]*\]' + sources_match = re.search(pattern, content) + if sources_match: + sources_value = sources_match.group(0) # 获取完整匹配的字符串 + sources_value = re.sub(r'\s', '', sources_value) # 去除源字符串的空白字符(换行符)和空格 + pattern = r'"([^"]+h)"' # 匹配引号中的内容,找对应的.h + source = re.findall(pattern, sources_value) + sources.extend(source) + end += sources_match.end() # 每次找完一个sources的.h路径,记录光标结束位置 + f.seek(end) # 移动光标在该结束位置 + content = f.read() # 从当前位置读取问价内容,防止重复 + return len(matches) > 0, sources + else: + return None, None # gn文件没有对应的函数 + + +def find_json_file(gn_file_match): # 找gn文件同级目录下的.json文件 + match_json_file = [] + directory = os.path.dirname(gn_file_match) + for file in glob.glob(os.path.join(directory, "*.json")): # 统计.json文件 + match_json_file.append(file) + return match_json_file + + +def dire_func(gn_file, func_name): # 统计数据的 + matches_file_total = [] # 统计有ohos_ndk_headers函数的gn文件 + json_file_total = [] # 统计跟含有函数的gn文件同级的json文件 + source_include = [] # 统计sources里面的.h + length, source = find_function_file(gn_file, func_name) # 找到包含函数的gn文件和同级目录下的.json文件 + if length: # 保证两个都不为空,source可能为空(因为gn能没有函数名) + source_include = source # 获取头文件列表 + matches_file_total.append(gn_file) # 调用匹配函数的函数(说明有对应的函数、source) + json_file_total.extend(find_json_file(gn_file)) # 找json + + return matches_file_total, json_file_total, source_include + + +def change_json_file(dict_data, name): # 生成json文件 + file_name = name + '_new' + '.json' # json文件名 + with open(file_name, 'w', encoding='UTF-8') as f: # encoding='UTF-8'能显示中文 + # ensure_ascii=False确保能显示中文,indent=4(格式控制)使生成的json样式跟字典一样 + json.dump(dict_data, f, ensure_ascii=False, indent=4) + + return file_name + + +def change_abs(include_files, dire_path): # 获取.h绝对路径 + abs_path = [] + for j in range(len(include_files)): # 拼接路径,生成绝对路径 + # os.path.normpath(path):规范或者是格式化路径,它会把所有路径分割符按照操作系统进行替换 + # 把规范路径和gn文件对应的目录路径拼接 + if os.path.isabs(include_files[j]): # 是否是绝对路径,是就拼接路径盘,不是就拼接gn目录路径 + head = os.path.splitdrive(dire_path) # 获取windows盘路径 + include_file = os.path.normpath(include_files[j]) + include_file = include_file.replace('\\\\', '\\') # 去掉绝对路径的双\\ + include_file = os.path.join(head[0], include_file) # 拼接盘和路径 + abs_path.append(include_file) + else: + abs_path.append(os.path.join(dire_path, os.path.normpath(include_files[j]))) + print("头文件绝对路径:\n", abs_path) + print("=" * 50) + return abs_path + + +def get_result_table(json_files, abs_path, lib_path, link_path): # 进行处理,生成表格 + if json_files: + file_name = os.path.split(json_files[0]) # 取第一个json名,但我是用列表装的 + file_name = os.path.splitext(file_name[1]) # 取下标1对应的元素(元组) + data = parse_include.get_include_file(lib_path, abs_path, link_path) # 获取解析返回的数据 + parse_json_name = change_json_file(data, file_name[0]) # 生成json文件 + result_list, head_name = generating_tables.get_json_file(parse_json_name, json_files) # 解析完后,传两个json文件,对比两个json文件,最后生成数据表格 + return result_list, head_name + else: + return None, None + + +def main_entrance(directory_path, function_names, lib_path, link_path): # 主入口 + gn_file_total = find_gn_file(directory_path) # 查找gn文件 + print("gn文件:", gn_file_total) + + for i in range(len(gn_file_total)): # 处理每个gn文件 + match_files, json_files, include_files = dire_func(gn_file_total[i], function_names) + dire_path = os.path.dirname(gn_file_total[i]) # 获取gn文件路径 + + print("目录路径: {}".format(dire_path)) + + print("同级json文件:\n", json_files) + print("头文件:\n", include_files) + + if match_files: # 符合条件的gn文件 + abs_path = change_abs(include_files, dire_path) # 接收.h绝对路径 + result_list, head_name = get_result_table(json_files, abs_path, lib_path, link_path) # 接收是否获转为表格信息 + if result_list: + generating_tables.generate_excel(result_list, head_name) + print("有匹配项,已生成表格") + else: + print("没有匹配项 or gn文件下无json文件") + else: + print("gn文件无header函数") + + +def parser(directory_path): # 目录路径 + function_name = StringConstant.FUNK_NAME.value # 匹配的函数名 + + libclang_path = StringConstant.LIB_CLANG_PATH.value # 共享库路径 + link_include_path = StringConstant.LINK_INCLUDE_PATH.value # 链接头文件路径 + + main_entrance(directory_path, function_name, libclang_path, link_include_path) # 调用入口函数 + + + diff --git a/capi_parser/src/main.py b/capi_parser/src/main.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b0ba128fcc957702a5014a38aa34cf252b641c --- /dev/null +++ b/capi_parser/src/main.py @@ -0,0 +1,21 @@ + +import argparse +from bin import config + + +parser = argparse.ArgumentParser( + prog=config.Config.name, description=config.Config.description) +for command in config.Config.commands: + abbr = command.get("abbr") + name = command.get("name") + choices = command.get("choices") + required = (True if command.get("required") else False) + type = command.get("type") + default = command.get("default") + help = command.get("help") + parser.add_argument(abbr, name, choices=choices, + required=required, type=type, default=default, help=help) + + +# args = parser.parse_args() +config.run_tools(parser.parse_args()) diff --git a/capi_parser/src/utils/constants.py b/capi_parser/src/utils/constants.py new file mode 100644 index 0000000000000000000000000000000000000000..02dc14594055ad92299976bba19901a6af104aaf --- /dev/null +++ b/capi_parser/src/utils/constants.py @@ -0,0 +1,7 @@ +import enum + + +class StringConstant(enum.Enum): + LIB_CLANG_PATH = r'D:\Environment\LLVM\bin\libclang.dll' + FUNK_NAME = "ohos_ndk_headers" + LINK_INCLUDE_PATH = [r'E:\interface_sdk_c\interface_sdk_c']