diff --git a/Makefile b/Makefile index 20275dc1959c116c3690eb0ae0f6345314522a25..98f7b9eb38028992d2677747212296a61cbc4c4a 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,7 @@ binary: clean: make -C $(SRC)/lib clean make -C $(SRC)/tools/monitor/mservice/master clean + make -C $(SRC)/tools/monitor/unity clean rm -rf $(OBJPATH) clean_middle: make -C $(SRC)/lib clean diff --git a/doc/venv.md b/doc/venv.md new file mode 100644 index 0000000000000000000000000000000000000000..386717103c18ad1000eb731e15978f47c7a824a9 --- /dev/null +++ b/doc/venv.md @@ -0,0 +1,17 @@ +# 1、搭建虚拟环境 + pip3.9 uninstall virtualenv -y 卸载3.9版本的virtualenv + pip3.6 install virtualenv -i "${ALIYUN_MIRROR}" 安装virtualenv + -i 指定镜像源 + 进入项目根目录,执行:virtualenv virtualenv + +# 2、requirement配置 + 将所需的包全部列在requirement.txt 参考:https://code.alibaba-inc.com/SystemTools/sysAK/blob/opensource_branch/source/tools/combine/sar/requirements.txt + +# 3、调整venv.sh 参数 + 调整venv.sh 脚本,修改venv.sh中的参数50行执行命令 + 两种打包方式: + 1)pyinstaller -F sample.py --add-data './config.yaml:./' -y + 2)写一个pyinstaller的spec文件:pyinstaller spec.spec + +# 4、打包 + 执行venv.sh \ No newline at end of file diff --git a/doc/venv/config.yaml b/doc/venv/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10470f70c91199dfafac6efdb194efe35b92bc75 --- /dev/null +++ b/doc/venv/config.yaml @@ -0,0 +1 @@ +host_config: http://127.0.0.1:8400 \ No newline at end of file diff --git a/doc/venv/requirement.txt b/doc/venv/requirement.txt new file mode 100644 index 0000000000000000000000000000000000000000..55f1eba31a76cc46dc95499c04289df1af61a118 --- /dev/null +++ b/doc/venv/requirement.txt @@ -0,0 +1,3 @@ +requests==2.27.1 +pyinstaller==3.5 +pyyaml==6.0.1 \ No newline at end of file diff --git a/doc/venv/sample.py b/doc/venv/sample.py new file mode 100644 index 0000000000000000000000000000000000000000..5e63acd4ec0b7df555188b13664955d40161383e --- /dev/null +++ b/doc/venv/sample.py @@ -0,0 +1,31 @@ +import requests +import yaml +import os + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def yaml_config(): + """load response code status""" + yaml_name = "config.yaml" + yaml_path = os.path.join(BASE_DIR, yaml_name) + with open(yaml_path, 'r') as f: + config = yaml.load(f, Loader=yaml.FullLoader) + return config + + +config = yaml_config() +host_config = config["host_config"] + + +def get_request(url, params=None): + try: + response = requests.get(url, params=params) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + print(e) + + +if __name__ == "__main__": + get_request(url = host_config) diff --git a/doc/venv/venv.sh b/doc/venv/venv.sh new file mode 100644 index 0000000000000000000000000000000000000000..69926eb1d5a6246dad7ea0f32e846d712886c104 --- /dev/null +++ b/doc/venv/venv.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +ALIYUN_MIRROR="https://mirrors.aliyun.com/pypi/simple/" +export SERVER_HOME=$(pwd) + +VIRTUALENV_HOME="${SERVER_HOME}/virtualenv" + +touch_virtualenv() { + pip3.9 uninstall virtualenv -y + pip3.6 install virtualenv -i "${ALIYUN_MIRROR}" + if [ -d ${VIRTUALENV_HOME} ]; then + echo "virtualenv exists, skip" + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + else + virtualenv ${VIRTUALENV_HOME} + if [ "$?" = 0 ]; then + echo "INFO: create virtualenv success" + else + echo "ERROR: create virtualenv failed" + exit 1 + fi + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + check_requirements + fi +} + + +check_requirements() { + echo "INFO: begin install requirements..." + if ! [ -d ${SERVER_HOME}/logs/ ]; then + mkdir -p ${SERVER_HOME}/logs/ || exit 1 + fi + + local requirements_log="${SERVER_HOME}/logs/requirements.log" + local requirements="requirements.txt" + touch "$requirements_log" || exit + pip3.6 install --upgrade pip + pip3.6 install -r ${requirements} -i "${ALIYUN_MIRROR}" |tee -a "${requirements_log}" || exit 1 + local pip_res=$? + if [ $pip_res -ne 0 ]; then + echo "ERROR: requirements not satisfied and auto install failed, please check ${requirements_log}" + exit 1 + fi +} + +pyinstaller_sar() { + echo "INFO: begin pyinstaller sar..." + pyinstaller -F sample.py --add-data './config.yaml:./' -y + deactivate + rm -rf ${VIRTUALENV_HOME} +} + +deploy() { + touch_virtualenv + pyinstaller_sar +} + +deploy diff --git a/rpm/.sysak.diag.config b/rpm/.sysak.diag.config new file mode 100644 index 0000000000000000000000000000000000000000..ee8bbb195091d72fb01955f4d37ad65d41417805 --- /dev/null +++ b/rpm/.sysak.diag.config @@ -0,0 +1,15 @@ +系统健康检查/系统健康检查:default{sysak ossre_client -d} +调度诊断中心/系统负载诊断:default{sysak loadtask -s -g } +调度诊断中心/调度抖动诊断:default{sysak -g schedmoni -s 20 20}:cmd{sysak -g schedmoni [-s timeout(s)] [threshold(s)]} +调度诊断中心/应用profile分析:default{sysak raptor oncpu --server local --app-name aa --exit-time 5}:cmd{sysak raptor oncpu --server local --app-name aa [--exit-time timeout(min)]} +存储诊断中心/IO时延分析:default{sysak -g iosdiag latency -t 1000 -T 10}:cmd{sysak -g iosdiag latency [-t threshold(ms)] [-T timeout(s)] [disk] } +存储诊断中心/IO流量分析:default{sysak iofsstat -m -P -c 15 -T 15}:cmd{sysak iofsstat -m -P [-c timeout(s)] [-T timeout(s)] [-d disk]} +存储诊断中心/IO HANG分析:default{sysak -g iosdiag hangdetect -t 5000 -T 10}:cmd{sysak -g iosdiag hangdetect [-t threshold(ms)] [-T timeout(s)] [disk]} +网络诊断中心/丢包诊断:default{sysak -g rtrace drop --duration 10}:cmd{sysak -g rtrace drop [--duration timeout(s)]} +网络诊断中心/抖动诊断:default{sysak -g rtrace latency --json --duration 10 --proto icmp}:cmd{{sysak -g rtrace latency --json [--duration timeout(s)] --proto icmp}} +网络诊断中心/重传诊断:default{sysak -g rtrace retran --duration 10}:cmd{sysak -g rtrace retran [--duration timeout(s)]} +网络诊断中心/时延诊断:default{sysak pingtrace -c 127.0.0.1 -t 10}:cmd{sysak pingtrace [-c target_ip] [-t timeout(s)] [-i interval_time(us)] [-C packet_num]} +内存诊断中心/内存大盘:default{sysak memgraph -g -l -f -k -c 3} +内存诊断中心/Cache分析:default{sysak podmem -r 100 -s}:cmd{sysak podmem -r 100 [-s / -a / -p pod / -f cgroup / -c container_id]} +内存诊断中心/OOM诊断:default{sysak oomcheck -j}:cmd{sysak oomcheck -j [-t check_time(YYYY-MM-DD HH:MM:SS)]} +链路诊断中心/RT时延分析:default{sysak -g rtdelay -u}:cmd{sysak -g rtdelay -u [-p pid] [-s server_pid] [-d timeout(s)]} diff --git a/rpm/sysak-build-nodep-aarch64.sh b/rpm/sysak-build-nodep-aarch64.sh index f2680985ce2055fbc75b2cae2832a8232f03275c..e07bba21c831f1676a14105f0aaeae08919c924d 100755 --- a/rpm/sysak-build-nodep-aarch64.sh +++ b/rpm/sysak-build-nodep-aarch64.sh @@ -47,6 +47,7 @@ mkdir -p \$RPM_BUILD_ROOT/usr/lib/systemd/system/ #/bin/cp -f $BUILD_DIR/.sysak_components/tools/dist/app/collector/plugin.yaml \$RPM_BUILD_ROOT/etc/sysak/ /bin/cp $SOURCE_DIR/rpm/sysak.service \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp $SOURCE_DIR/rpm/sysak_server.conf \$RPM_BUILD_ROOT/usr/local/sysak/ +/bin/cp $SOURCE_DIR/rpm/.sysak.diag.config \$RPM_BUILD_ROOT/etc/sysak/ %preun systemctl stop sysak diff --git a/rpm/sysak-build-nodep.sh b/rpm/sysak-build-nodep.sh index e194c14e5027c96dfe5e4e966cfe54ec04a000c6..de1d632e27d0db641ace54552b7a410ff5f02c7f 100755 --- a/rpm/sysak-build-nodep.sh +++ b/rpm/sysak-build-nodep.sh @@ -50,6 +50,8 @@ mkdir -p \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp -f $BUILD_DIR/.sysak_components/tools/dist/app/etc/* \$RPM_BUILD_ROOT/etc/sysak/ /bin/cp $SOURCE_DIR/rpm/sysak.service \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp $SOURCE_DIR/rpm/sysak_server.conf \$RPM_BUILD_ROOT/usr/local/sysak/ +/bin/cp $SOURCE_DIR/rpm/.sysak.diag.config \$RPM_BUILD_ROOT/etc/sysak/ +/usr/bin/rm -rf \$RPM_BUILD_ROOT/usr/local/sysak/.sysak_components/tools/$LINUX_VERSION %preun systemctl stop sysak diff --git a/rpm/sysak-build.sh b/rpm/sysak-build.sh index e68b7ed89470ef430cfb15cc5b8bb03e97924719..8e236d502550ce67fa427b40ec8190bc7563dab8 100755 --- a/rpm/sysak-build.sh +++ b/rpm/sysak-build.sh @@ -45,6 +45,7 @@ mkdir -p \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp $SOURCE_DIR/rpm/sysak.service \$RPM_BUILD_ROOT/usr/lib/systemd/system/ /bin/cp -f $BUILD_DIR/.sysak_components/tools/monitor/sysakmon.conf \$RPM_BUILD_ROOT/usr/local/sysak/ /bin/cp $SOURCE_DIR/rpm/sysak_server.conf \$RPM_BUILD_ROOT/usr/local/sysak/ +/bin/cp $SOURCE_DIR/rpm/.sysak.diag.config \$RPM_BUILD_ROOT/etc/sysak/ %preun diff --git a/source/lib/internal/ebpf/coolbpf b/source/lib/internal/ebpf/coolbpf index d0fabf821d2d8eb9d27da8eefd4797dc181cc92b..ba65096ff73cb6350c5a383fc6d9092f1b25a4fb 160000 --- a/source/lib/internal/ebpf/coolbpf +++ b/source/lib/internal/ebpf/coolbpf @@ -1 +1 @@ -Subproject commit d0fabf821d2d8eb9d27da8eefd4797dc181cc92b +Subproject commit ba65096ff73cb6350c5a383fc6d9092f1b25a4fb diff --git a/source/lib/uapi/kcore_utils.c b/source/lib/uapi/kcore_utils.c index e58b45c21151eae4508d706367b3036a5b02ed71..e745945b0cec27040985efd444cdce8fdbef30af 100644 --- a/source/lib/uapi/kcore_utils.c +++ b/source/lib/uapi/kcore_utils.c @@ -246,7 +246,7 @@ static int download_btf() memset(sysak_path, 0, sizeof(sysak_path)); strcpy(sysak_path, tmp); strcat(sysak_path, "/tools/"); - strcat(sysak_path, kernel); + //strcat(sysak_path, kernel); } snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", sysak_path, kernel, ®ion[3],®ion[3], timeout,arch, kernel); @@ -278,7 +278,7 @@ char *prepare_btf_file() do_cmd(cmd, ver, LEN); if (getenv("SYSAK_WORK_PATH") != NULL) - sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver); + sprintf(btf,"%s/tools/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver); else sprintf(btf,"/boot/vmlinux-%s", ver); diff --git a/source/mk/pyinstaller.mk b/source/mk/pyinstaller.mk index c499222fbab6b376e065adf1b1976c1ae12fec02..e37f1b81b37c40d6defbaecd1f2b9cd2ed934b81 100644 --- a/source/mk/pyinstaller.mk +++ b/source/mk/pyinstaller.mk @@ -8,7 +8,7 @@ all: $(target) target_rule $(target): $@ rm -Rf build dist - pyinstaller $@.spec + pyinstaller --clean $@.spec cp dist/$@ $(TARGET_PATH)/ include $(SRC)/mk/target.inc diff --git a/source/sysak.c b/source/sysak.c old mode 100644 new mode 100755 index 67e0db5dd9fc11b365dba52e6695c72354d6aa8e..b70a51691ac25507090a9f8eff11a565d7e0d31e --- a/source/sysak.c +++ b/source/sysak.c @@ -12,6 +12,7 @@ #define KVERSION 64 #define MAX_SUBCMD_ARGS 512 #define MAX_DEPEND_LEN 128 +#define DEFAULT_LEN 128 #define MAX_NAME_LEN 64 #define MAX_WORK_PATH_LEN 512 #define ERR_NOSUBTOOL 2 @@ -30,7 +31,29 @@ enum TOOL_TYPE { MAX_TOOL_TYPE }; -struct tool_info { +struct diagnose_info +{ + char default_cmd[MAX_SUBCMD_ARGS]; + char cmd[MAX_SUBCMD_ARGS]; + char name[MAX_NAME_LEN]; + char helpinfo[MAX_SUBCMD_ARGS]; +}; + +struct diagnose_tool +{ + struct diagnose_info tool; + struct diagnose_tool *next; +}; + +struct diagnose_menu_node +{ + struct diagnose_menu_node *next; + struct diagnose_tool *diagnose_tool_head; + char diagnose_menu_name[MAX_NAME_LEN]; +}; + +struct tool_info +{ char module[MAX_NAME_LEN]; char name[MAX_NAME_LEN]; char helpinfo[MAX_SUBCMD_ARGS]; @@ -47,7 +70,7 @@ struct tool_list { }; char *module = "/sysak.ko"; -char *log_path="/var/log/sysak"; +char *log_path = "/var/log/sysak"; char *system_modules = "/proc/modules"; char *sysak_root_path = "/usr/local/sysak"; char *python_bin = "/usr/bin/python"; @@ -58,7 +81,7 @@ char kern_version[KVERSION]; char machine[KVERSION]; char prev_dep[MAX_DEPEND_LEN]; char post_dep[MAX_DEPEND_LEN]; -char run_depend[MAX_DEPEND_LEN]= {0}; +char run_depend[MAX_DEPEND_LEN] = {0}; char tools_path[MAX_WORK_PATH_LEN]; char tools_exec[MAX_WORK_PATH_LEN] = {0}; char sysak_rule[MAX_WORK_PATH_LEN]; @@ -70,6 +93,7 @@ char sysak_components_server[MAX_WORK_PATH_LEN]; char sysak_oss_server[MAX_WORK_PATH_LEN]; char download_cmd[MAX_WORK_PATH_LEN]; char region[MAX_NAME_LEN] = {0}; +char sysak_diagnose_rule[MAX_WORK_PATH_LEN]; bool pre_module = false; bool post_module = false; @@ -86,17 +110,21 @@ static struct tool_list tool_lists[MAX_TOOL_TYPE]={ {"sysak tools for bclinux", NULL} }; +static struct diagnose_menu_node *diag_list_head, *diag_menu_choose; +static struct diagnose_tool *diag_tool_choose = NULL; + static void usage(void) { fprintf(stdout, - "Usage: sysak [cmd] [subcmd [cmdargs]]\n" - " cmd:\n" - " list [-a], show subcmds\n" - " -h/help, help informati on for specify subcmd\n" - " -g, auto download btf and components from anolis mirrors\n" - " -oss, auto download btf and components from oss\n" - " -d, only download btf and components. example: sysak -oss -d\n" - " subcmd: see the result of list\n"); + "Usage: sysak [cmd] [subcmd [cmdargs]]\n" + " cmd:\n" + " list [-a], show subcmds\n" + " -h/help, help informati on for specify subcmd\n" + " -g, auto download btf and components from anolis mirrors\n" + " -oss, auto download btf and components from oss\n" + " -d, only download btf and components. example: sysak -oss -d\n" + " -c, show diagnosis center\n" + " subcmd: see the result of list\n"); } static void strim(char *str) @@ -106,17 +134,17 @@ static void strim(char *str) while (pstr && *pstr) { if (isspace(*pstr)) { - *pstr = 0; - has_space = true; - } + *pstr = 0; + has_space = true; + } else if (!start) { - start = pstr; - } - pstr++; + start = pstr; + } + pstr++; } if (start && start != str) - strcpy(str, start); + strcpy(str, start); } static void kern_release(void) @@ -221,7 +249,7 @@ static bool get_module_tag(void) strcpy(module_tag, pstr); strim(module_tag); if (strlen(module_tag) == 0) - return false; + return false; return true; } } @@ -259,7 +287,7 @@ static int down_install_ext_tools(const char *tool) char *pstr; sprintf(download_cmd, "wget %s/sysak/ext_tools/%s/%s/rule -P %s &>/dev/null", - sysak_components_server, machine, tool, tools_path); + sysak_components_server, machine, tool, tools_path); //printf("%s ... \n", download_cmd); ret = system(download_cmd); if (ret < 0) @@ -328,8 +356,13 @@ static int down_install(const char *component_name) char ko_path[MAX_WORK_PATH_LEN]; char ko_file[MAX_WORK_PATH_LEN]; char btf_file[MAX_WORK_PATH_LEN]; + int retry_cnt = 0; int ret = 0; + sprintf(ko_path, "%s/%s", module_path, kern_version); + sprintf(ko_file, "%s/%s/%s", module_path, kern_version, "sysak.ko"); + sprintf(btf_file, "%s/vmlinux-%s", tools_path, kern_version); + if (!get_server_addr()) return -1; @@ -337,40 +370,63 @@ static int down_install(const char *component_name) if (!get_module_tag()) return -1; - sprintf(ko_path, "%s/%s", module_path, kern_version); - sprintf(ko_file, "%s/%s/%s", module_path, kern_version, "sysak.ko"); - sprintf(btf_file, "%s/%s/vmlinux-%s", tools_path, kern_version, kern_version); if (access(ko_path,0) != 0) mkdir(ko_path, 0755 ); //sprintf(download_cmd, "wget %s/sysak/sysak_modules/%s/%s/sysak.ko -P %s/%s 1&>/dev/null", // sysak_components_server, machine, module_tag, module_path, kern_version); if (oss_get_components){ +retry_ko_oss: sprintf(download_cmd, "wget -T 5 -t 2 -q -O %s/%s/sysak.ko %s-%s.oss-%s-internal.aliyuncs.com/home/hive/sysak/modules/%s/sysak-%s.ko", module_path, kern_version, sysak_oss_server, ®ion[0], ®ion[0], machine, kern_version); } else - sprintf(download_cmd, "wget %s/sysak/modules/%s/sysak-%s.ko -O %s/%s/sysak.ko &>/dev/null", +retry_ko: + sprintf(download_cmd, "wget -T 5 -t 2 -q %s/sysak/modules/%s/sysak-%s.ko -O %s/%s/sysak.ko &>/dev/null", sysak_components_server, machine, kern_version, module_path, kern_version); //printf("%s ... \n", download_cmd); + ret = system(download_cmd); if (access(ko_file,0) == 0) ret = 0; + else if (retry_cnt == 0){ + if (oss_get_components){ + retry_cnt++; + goto retry_ko; + } + else { + retry_cnt++; + goto retry_ko_oss; + } + } return ret; + } else if (strcmp(component_name, "btf") == 0) { //sprintf(download_cmd, "wget %s/coolbpf/btf/%s/vmlinux-%s -P %s/%s 1&>/dev/null", // sysak_components_server, machine, kern_version, tools_path, kern_version); if (oss_get_components){ +retry_btf_oss: sprintf(download_cmd, "wget -T 5 -t 2 -q -P %s %s-%s.oss-%s-internal.aliyuncs.com/home/hive/btf/%s/vmlinux-%s", tools_path, sysak_oss_server, ®ion[0], ®ion[0], machine, kern_version); } else - sprintf(download_cmd, "wget %s/coolbpf/btf/%s/vmlinux-%s -P %s &>/dev/null", +retry_btf: + sprintf(download_cmd, "wget -T 5 -t 2 -q %s/coolbpf/btf/%s/vmlinux-%s -P %s &>/dev/null", sysak_components_server, machine, kern_version, tools_path); //printf("%s ... \n", download_cmd); ret = system(download_cmd); if (access(btf_file,0) == 0) ret = 0; + else if (retry_cnt == 0){ + if (oss_get_components){ + retry_cnt++; + goto retry_btf; + } + else { + retry_cnt++; + goto retry_btf_oss; + } + } return ret; } else { return down_install_ext_tools(component_name); @@ -413,7 +469,7 @@ static int pre_down_install(const char *module, const char *btf, const char *com if (ret < 0) ret = -EEXIST; } else { - ret = -EEXIST; + ret = -EEXIST; } return ret; } @@ -579,7 +635,7 @@ static int exectue(int argc, char *argv[]) snprintf(subcmd_exec_final, sizeof(subcmd_exec_final), "%s;%s", sysak_work_path, tools_exec); ret = my_system(subcmd_exec_final); if (ret < 0) - return ret; + return ret; if (post_module) mod_ctrl(false); @@ -709,6 +765,139 @@ static void subcmd_list(bool show_all) print_each_tool(show_all); } +static int build_diagnose_cmd_info_from_file(FILE *fp) +{ + struct diagnose_menu_node *menu_node; + struct diagnose_tool *node; + char buf[MAX_NAME_LEN + MAX_SUBCMD_ARGS], *default_str, *cmd_str; + char tools_name[MAX_NAME_LEN], menu_name[MAX_NAME_LEN]; + char tools_class_module[MAX_NAME_LEN]; + int ret = 0, flag = 0, menu_num = 0; + + // diag_list_head + + while (fgets(buf, sizeof(buf), fp)) + { + sscanf(buf, "%[^/]/%[^:]", menu_name, tools_name); + + if (!menu_name) + continue; + + menu_node = diag_list_head; + flag = 0; + while (menu_node && flag == 0) + { + if (strcmp(menu_name, menu_node->diagnose_menu_name) == 0) + { + flag = 1; + break; + } + menu_node = menu_node->next; + } + + if (!flag) + { + // create new menu + menu_node = malloc(sizeof(struct diagnose_menu_node)); + memset(menu_node, 0, sizeof(struct diagnose_menu_node)); + strcpy(menu_node->diagnose_menu_name, menu_name); + menu_node->next = diag_list_head; + diag_list_head = menu_node; + } + + node = malloc(sizeof(struct diagnose_tool)); + if (!node) + { + fclose(fp); + return -1; + } + memset(node, 0, sizeof(struct diagnose_tool)); + + strcpy(node->tool.name, tools_name); + default_str = strstr(buf, ":default{"); + cmd_str = strstr(buf, ":cmd{"); + + if (default_str) + sscanf(default_str, ":default{%[^}]}", node->tool.default_cmd); + if (cmd_str) + sscanf(cmd_str, ":cmd{%[^}]}", node->tool.cmd); + + if (menu_node->diagnose_tool_head == NULL) + { + menu_node->diagnose_tool_head = node; + } + else + { + node->next = menu_node->diagnose_tool_head; + menu_node->diagnose_tool_head = node; + } + ret++; + } + + fclose(fp); + return ret; +} + +static int build_diagnose_cmd_info() +{ + FILE *fp; + int ret = 0; + + if (access(sysak_diagnose_rule, 0) != 0) + { + printf(".sysak.diag.config not found!\n"); + return 0; + } + + fp = fopen(sysak_diagnose_rule, "r"); + if (fp) + ret += build_diagnose_cmd_info_from_file(fp); + + return ret; +} + +static void show_diagnose_menu() +{ + struct diagnose_menu_node *menu_node = diag_list_head; + int num = 0; + printf("\n"); + while (menu_node) + { + num++; + printf("%d %s\n", num, menu_node->diagnose_menu_name); + menu_node = menu_node->next; + } +} + +static void show_diagnose_tool_menu() +{ + struct diagnose_tool *node = diag_menu_choose->diagnose_tool_head; + int num = 0; + printf("\n"); + while (node) + { + num++; + printf(" %d %s\n", num, node->tool.name); + node = node->next; + } +} + +static int choose_diagnose_menu() +{ + int num; + printf("\nchoose a menu number:"); + scanf("%d", &num); + return num; +} + +static int choose_diagnose_tool_menu() +{ + int num; + printf("\nchoose a tool number:"); + scanf("%d", &num); + return num; +} + static bool tool_rule_parse(char *path, char *tool) { char *pstr = NULL; @@ -733,7 +922,7 @@ static bool tool_rule_parse(char *path, char *tool) sscanf(buf,"%[^:]:%[^:]", class_name, tools_name); if (strstr(class_name,"combine")) if (strstr(class_name,"/")) - continue; + continue; if (strcmp(tools_name, tool)) { continue; @@ -771,12 +960,82 @@ static bool tool_lookup(char *tool) } if (!tool_rule_parse(sysak_other_rule, tool) && - !tool_rule_parse(sysak_rule, tool)) + !tool_rule_parse(sysak_rule, tool)) return false; return true; } +int copy_file(char *dest_file, char *src_file) +{ + int cnt = 0; + FILE *fp1 = fopen(dest_file,"w"); + FILE *fp2 = fopen(src_file,"r"); + + if(fp1 == NULL) { + printf("%s:fopen failed!", dest_file); + return -1; + } + if(fp2 == NULL) { + printf("%s: fopen failed!", src_file); + return -1; + } + + char buffer = fgetc(fp2); + + while(!feof(fp2)) { + cnt++; + fputc(buffer,fp1); + buffer = fgetc(fp2); + } + fclose(fp1); + fclose(fp2); + return cnt; +} + +int has_string(char *dest_file, char *substring) +{ + FILE *fp; + int line=0; + char file_str[DEFAULT_LEN]; + + fp=fopen(dest_file,"r"); + if(fp==NULL) + { + return -1; + } + + while(fgets(file_str,sizeof(file_str),fp)) + { + line++; + if(strstr(file_str,substring)) + { + return 1; + } + } + fclose(fp); + return 0; +} + +void btf_support_check(void){ + char config[DEFAULT_LEN]; + char local_btf[DEFAULT_LEN] = "/sys/kernel/btf/vmlinux"; + char tool_btf[DEFAULT_LEN]; + char *config_name = "CONFIG_BPF=y"; + + if (access(local_btf,0) == 0){ + snprintf(tool_btf, sizeof(tool_btf), "%s/vmlinux-%s", tools_path, kern_version); + if (copy_file(tool_btf, local_btf) > 0){ + btf_depend = false; + return; + } + } + + snprintf(config, sizeof(config), "/boot/config-%s", kern_version); + if (!has_string(config, config_name)) + btf_depend = false; +} + static int subcmd_parse(int argc, char *argv[]) { int i; @@ -789,8 +1048,8 @@ static int subcmd_parse(int argc, char *argv[]) if (!strcmp(argv[0], "-d")) { only_download = true; pre_module = true; - btf_depend = true; - goto exec; + btf_depend = true; + goto exec; } if (strstr(prev_dep, "btf") != NULL) { @@ -816,9 +1075,76 @@ static int subcmd_parse(int argc, char *argv[]) } } exec: + btf_support_check(); return exectue(argc, argv); } +static void get_menu_node(int menu_num) +{ + diag_menu_choose = diag_list_head; + int num = 1; + while (diag_menu_choose && num < menu_num) + { + num++; + diag_menu_choose = diag_menu_choose->next; + } + if (diag_menu_choose) + printf("\n%s:", diag_menu_choose->diagnose_menu_name); + else + printf("please choose a valid menu number!\n"); +} + +static void get_diagnose_tool_node(int tool_num) +{ + diag_tool_choose = diag_menu_choose->diagnose_tool_head; + int num = 1; + while (diag_tool_choose && num < tool_num) + { + num++; + diag_tool_choose = diag_tool_choose->next; + } + if (!diag_tool_choose) + printf("please choose a valid tool number!\n"); +} + +static void show_diagnose_usage() +{ + printf("\nusage of %s:\n", diag_tool_choose->tool.name); + // printf("strlen=%d",strlen(diag_tool_choose->tool.cmd)); + if (strlen(diag_tool_choose->tool.cmd)) + { + printf("\n%16s %s\n", " ", diag_tool_choose->tool.cmd); + printf("\ndefault cmd:\n%16s%s \n\n", " ", diag_tool_choose->tool.default_cmd); + } + else + printf("\n%16s %s\n\n", " ", diag_tool_choose->tool.default_cmd); +} + +static int diagnose_page() +{ + if (build_diagnose_cmd_info() == 0) + { + printf("diagnose tool not found"); + return 0; + } + show_diagnose_menu(); + int menu_num, tool_num; + while (diag_menu_choose == NULL) + { + menu_num = choose_diagnose_menu(); + get_menu_node(menu_num); + } + show_diagnose_tool_menu(); + while (diag_tool_choose == NULL) + { + tool_num = choose_diagnose_tool_menu(); + get_diagnose_tool_node(tool_num); + } + show_diagnose_usage(); + + return 0; +} + static int parse_arg(int argc, char *argv[]) { bool show_all = false; @@ -836,7 +1162,14 @@ static int parse_arg(int argc, char *argv[]) return 0; } - if (!strcmp(argv[1], "help") || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { + if (!strcmp(argv[1], "-c")) + { + diagnose_page(); + return 0; + } + + if (!strcmp(argv[1], "help") || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) + { usage(); return 0; } @@ -904,7 +1237,7 @@ static void set_path(char *argv[]) if (access(components_path,0) != 0) snprintf(components_path, sizeof(tools_path), "%s%s", - sysak_root_path, "/.sysak_components"); + sysak_root_path, "/.sysak_components"); snprintf(tools_path, sizeof(tools_path), "%s%s", components_path, "/tools/"); @@ -913,7 +1246,8 @@ static void set_path(char *argv[]) snprintf(sysak_rule, sizeof(sysak_rule), "%s%s", components_path, "/tools/.sysak.rules"); snprintf(sysak_other_rule, sizeof(sysak_other_rule), "%s%s%s%s", - components_path, "/tools/",kern_version,"/.sysak.rules"); + components_path, "/tools/", kern_version, "/.sysak.rules"); + snprintf(sysak_diagnose_rule, sizeof(sysak_diagnose_rule), "%s", "/etc/sysak/.sysak.diag.config"); snprintf(sysak_work_path, sizeof(sysak_work_path), "%s%s", "export SYSAK_WORK_PATH=", components_path); } diff --git a/source/tools/combine/inj/Makefile b/source/tools/combine/inj/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..89b8ad7e28263a65e6ab50cc7437bb679f762fac --- /dev/null +++ b/source/tools/combine/inj/Makefile @@ -0,0 +1,5 @@ +target := inj + +mods := main.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/combine/inj/README.md b/source/tools/combine/inj/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7d7b3413f38e7272190e3f5b2c2c0a42719998ac --- /dev/null +++ b/source/tools/combine/inj/README.md @@ -0,0 +1,2 @@ +# 功能说明 +系统及应用故障注入 diff --git a/source/tools/combine/inj/main.c b/source/tools/combine/inj/main.c new file mode 100644 index 0000000000000000000000000000000000000000..f8a6acf1715e951eaca871f6de46e7ea5fe8c033 --- /dev/null +++ b/source/tools/combine/inj/main.c @@ -0,0 +1,436 @@ +#include +#include +#include +#include +#include + +typedef int (*INJ_FUNC)(void *args); +struct inj_op { + const char *inj_name; + INJ_FUNC inject; + INJ_FUNC check_result; +}; + +int inject_oops(void *args); +int inject_panic(void *args); +int inject_hung_task_panic(void *args); +int inject_high_load(void *args); +int inject_high_sys(void *args); +int inject_softlockup(void *args); +int inject_taskhang(void *args); +int inject_taskloop(void *args); +int inject_tasklimit(void *args); +int inject_fdlimit(void *args); +int inject_oom(void *args); +int inject_packdrop(void *args); +int inject_runqlat(void *args); +int inject_iolat(void *args); +int inject_netlat(void *args); + +int check_high_load(void *args); +int check_high_sys(void *args); +int check_softlockup(void *args); +int check_taskhang(void *args); +int check_taskloop(void *args); +int check_tasklimit(void *args); +int check_fdlimit(void *args); +int check_oom(void *args); +int check_packdrop(void *args); +int check_runqlat(void *args); +int check_iolat(void *args); +int check_netlat(void *args); + +struct inj_op inj_ops[] = { + {"oops", inject_oops, NULL}, + {"panic", inject_panic, NULL}, + {"hung_task_panic", inject_hung_task_panic, NULL}, + {"high_load", inject_high_load, check_high_load}, + {"high_sys", inject_high_sys, check_high_sys}, + {"softlockup", inject_softlockup, check_softlockup}, + {"taskhang", inject_taskhang, check_taskhang}, + {"taskloop", inject_taskloop, check_taskloop}, + {"tasklimit", inject_tasklimit, check_tasklimit}, + {"fdlimit", inject_fdlimit, check_fdlimit}, + {"oom", inject_oom, check_oom}, + {"packdrop", inject_packdrop, check_packdrop}, + {"runqlat", inject_runqlat, check_runqlat}, + {"iolat", inject_iolat, check_iolat}, + {"netlat", inject_netlat, check_netlat}, +}; + +#define NUM_INJ_OPS (sizeof(inj_ops)/sizeof(struct inj_op)) + +static char *work_path; + +static void usage(void) +{ + int i; + + printf("list for problem injection:\n"); + for (i = 0; i < NUM_INJ_OPS; i++) { + printf(" %s\n", inj_ops[i].inj_name); + } +} + +static void get_work_path(void) +{ + work_path = getenv("SYSAK_WORK_PATH"); +} + +static int exec_extern_tool(const char *name, const char *arg) +{ + char filepath[256]; + if (!work_path) + return -1; + + snprintf(filepath, 255, "%s/tools/%s", work_path, name); + filepath[255] = 0; + if (access((filepath), X_OK)) { + printf("file %s not exist\n",filepath); + return -1; + } + + if (arg) + snprintf(filepath, 255, "bash -c \"%s/tools/%s %s\" &", work_path, name, arg); + else + snprintf(filepath, 255, "bash -c %s/tools/%s &", work_path, name); + + filepath[255] = 0; + return system(filepath); +} + +int inject_oops(void *args) +{ + int pid = fork(); + + if (pid < 0) { + return -1; + } else if (pid == 0) { + /*wait for parent return*/ + sleep(2); + system("echo c > /proc/sysrq-trigger"); + return 0; + } + + return 0; +} + +int inject_panic(void *args) +{ + return 0; +} + +int inject_hung_task_panic(void *args) +{ + return 0; +} + +int inject_high_load(void *args) +{ + return 0; +} + +int inject_high_sys(void *args) +{ + return exec_extern_tool("high_sys", NULL); +} + +int inject_softlockup(void *args) +{ + return 0; +} + +int inject_taskhang(void *args) +{ + return 0; +} + +int inject_taskloop(void *args) +{ + return 0; +} + +static int get_system_process_limit(void) +{ + int pid_max = -1; + int threads_max; + FILE *fp; + char buf[256]; + + fp = fopen("/proc/sys/kernel/pid_max", "r"); + if (!fp) { + printf("open pid_max failed\n"); + return -1; + } + + memset(buf, 0, 256); + if (fgets(buf, 256, fp)) { + pid_max = atoi(buf); + } + + fclose(fp); + if (pid_max < 0) + return -1; + + fp = fopen("/proc/sys/kernel/threads-max", "r"); + if (!fp) { + printf("open threads-max failed\n"); + return -1; + } + + memset(buf, 0, 256); + if (fgets(buf, 256, fp)) { + threads_max = atoi(buf); + } + + fclose(fp); + if (threads_max < 0) + return -1; + + return pid_max < threads_max ? pid_max: threads_max; +} + +static int get_threads_nr(void) +{ + FILE *fp; + char buf[256]; + int threads, dummy, ret; + float fdummy; + + fp = fopen("/proc/loadavg", "r"); + if (!fp) + return -1; + + fgets(buf, 256, fp); + ret = sscanf(buf, "%f %f %f %d/%d %d", &fdummy, &fdummy, &fdummy, &dummy, &threads, &dummy); + + if (ret < 1) + return -1; + + return threads; +} + + +int inject_tasklimit(void *args) +{ + char buff[32]; + int task_nr = get_system_process_limit() - get_threads_nr(); + + if (task_nr > 0) { + snprintf(buff, 32, "%d", task_nr); + args = buff; + } else + args = NULL; + return exec_extern_tool("process_limit", args); +} + +int inject_fdlimit(void *args) +{ + return 0; +} + +int inject_oom(void *args) +{ + return exec_extern_tool("goom", NULL); +} + +int inject_packdrop(void *args) +{ + return 0; +} + +int inject_runqlat(void *args) +{ + return 0; +} + +int inject_iolat(void *args) +{ + return 0; +} + +int inject_netlat(void *args) +{ + return 0; +} + +int check_high_load(void *args) +{ + return 0; +} + +struct proc_stat { + unsigned long user; + unsigned long nice; + unsigned long sys; + unsigned long idle; + unsigned long iowait; + unsigned long irq; + unsigned long softirq; +}; + +static int read_proc_stat(struct proc_stat *stat) +{ + FILE *fp; + char buf[256]; + int ret; + + fp = fopen("/proc/stat", "r"); + if (!fp) + return -1; + + fgets(buf, 256, fp); + ret = sscanf(buf, "cpu %lu %lu %lu %lu %lu %lu %lu", + &stat->user, &stat->nice, &stat->sys, &stat->idle, + &stat->iowait, &stat->irq, &stat->softirq); + + if (ret < 7) + return -1; + + return 0; +} + +static int calculate_sys_util(void) +{ + struct proc_stat stat1, stat2; + unsigned long total = 0, sys; + float sys_util; + + if (read_proc_stat(&stat1)) + return -1; + sleep(1); + if (read_proc_stat(&stat2)) + return -1; + + sys = stat2.sys - stat1.sys; + + total += stat2.user - stat1.user; + total += stat2.nice - stat1.nice; + total += sys; + total += stat2.idle - stat1.idle; + total += stat2.iowait - stat1.iowait; + total += stat2.irq - stat1.irq; + total += stat2.softirq - stat1.softirq; + + sys_util = (float)sys / total; + return (int)(sys_util*100); +} + +int check_high_sys(void *args) +{ + int i; + + for (i = 0; i < 5; i++) { + if (calculate_sys_util() > 5) + return 0; + sleep(1); + } + + return -1; +} + +int check_softlockup(void *args) +{ + return 0; +} + +int check_taskhang(void *args) +{ + return 0; +} + +int check_taskloop(void *args) +{ + return 0; +} + +int check_tasklimit(void *args) +{ + int i, limit; + + limit = get_system_process_limit(); + if (limit < 0) + return -1; + + limit -= 100; + for (i = 0; i < 20; i++) { + if (get_threads_nr() > limit) + return 0; + sleep(1); + } + + return -1; +} + +int check_fdlimit(void *args) +{ + return 0; +} + +int check_oom(void *args) +{ + return 0; +} + +int check_packdrop(void *args) +{ + return 0; +} + +int check_runqlat(void *args) +{ + return 0; +} + +int check_iolat(void *args) +{ + return 0; +} + +int check_netlat(void *args) +{ + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret = -EINVAL, i; + char *args = NULL; + + if (argc < 2) + return ret; + + if (argc == 2 && !strcmp(argv[1], "-h")) { + usage(); + return 0; + } + + get_work_path(); + + if (argc == 3) + args = argv[2]; + + for (i = 0; i < NUM_INJ_OPS; i++) { + if (!strcmp(argv[1], inj_ops[i].inj_name)) { + ret = inj_ops[i].inject(args); + if (ret == 0){ + if (inj_ops[i].check_result) + ret = inj_ops[i].check_result(args); + } + + break; + } + } + + if (ret) { + if (i >= NUM_INJ_OPS) + printf("Invalid inputs\n"); + + printf("Failed\n"); + } else { + printf("Done\n"); + } + + return 0; +} + diff --git a/source/tools/combine/iosdiag/data_analysis/iosdiag_data_analysis.py b/source/tools/combine/iosdiag/data_analysis/iosdiag_data_analysis.py index 9daea5a9d37379de2d465917225fe0c60a4a6553..3bb249d8009775f642ffbf8b26187b37d80016b4 100644 --- a/source/tools/combine/iosdiag/data_analysis/iosdiag_data_analysis.py +++ b/source/tools/combine/iosdiag/data_analysis/iosdiag_data_analysis.py @@ -36,14 +36,15 @@ class latencyAnalysis: self.totalDiskCnt = 0 self.threshold = 0 self.componentDicts = OrderedDict([('os(block)',0),('os(driver)',1),\ - ('disk',2),('os(complete)',3)]) + ('disk',2),('os(complete)',3),('os(done)',4)]) self.delayStatJsonStr = \ '{ \ "diskname":"","delays":[ \ {"component":"os(block)","percent":"","max":0,"min":1000000000,"avg":0},\ {"component":"os(driver)","percent":"","max":0,"min":1000000000,"avg":0},\ {"component":"disk","percent":"","max":0,"min":1000000000,"avg":0}, \ - {"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0}]\ + {"component":"os(complete)","percent":"","max":0,"min":1000000000,"avg":0},\ + {"component":"os(done)","percent":"","max":0,"min":1000000000,"avg":0}]\ }' newDelayStatDict = json.loads("["+self.delayStatJsonStr + "]", object_pairs_hook=OrderedDict) self.delayStatDicts.setdefault('summary', newDelayStatDict) diff --git a/source/tools/combine/iosdiag/entry/iosdiag.sh b/source/tools/combine/iosdiag/entry/iosdiag.sh index ccbe458f588f9aaacd73f85c55f87df46459f7f9..aeead6d0d8cf3a87b2ea0abd7d301fae50babb3c 100755 --- a/source/tools/combine/iosdiag/entry/iosdiag.sh +++ b/source/tools/combine/iosdiag/entry/iosdiag.sh @@ -15,10 +15,12 @@ fi TOOLS_PATH=$WORK_PATH/tools/`uname -r` LIB_PATH=$WORK_PATH/lib/`uname -r` latency_bin=$WORK_PATH/tools/latency +iolatency_bin=$WORK_PATH/tools/tracing_latency hangdetect_bin=$WORK_PATH/tools/hangdetect data_analysis=$WORK_PATH/tools/iosdiag_data_analysis -logfile="/var/log/sysak/iosdiag/$1/result.log.seq" threshold_arg="-t 1000" +kernel_version=$(uname -r) +config_file="/boot/config-$kernel_version" function usage() { echo "Usage: sysak iosdiag [options] subcmd [cmdargs]" @@ -32,6 +34,16 @@ function usage() { echo " -s latency|[hangdetect], stop diagnosis" } +check_bpf_config() { + if grep -q "CONFIG_BPF=y" "$config_file"; then + # echo "found CONFIG_BPF=y" + support_bpf="Yes" + else + # echo "CONFIG_BPF=y not found" + support_bpf="No" + fi +} + upload_data() { datapath=$(dirname $logfile) cd $datapath @@ -57,7 +69,10 @@ datafile_analysis() { elif [ -e "/usr/bin/python3.5" ]; then run_python="/usr/bin/python3.5" fi - $run_python $data_analysis --$1 -s -f $logfile $threshold_arg + if [ -e "$data_analysis" ] + then + $run_python $data_analysis --$1 -s -f $logfile $threshold_arg + fi fi } @@ -121,24 +136,41 @@ disable_hangdetect() { } enable_latency() { - if [ ! -e "$latency_bin" ]; then - echo "$latency_bin not found" - echo "iosdiag latency not support '$(uname -r)', please report to the developer" - exit -1 - fi - { - flock -n 3 - [ $? -eq 1 ] && { echo "another latency is running."; exit -1; } - trap disable_latency SIGINT SIGTERM SIGQUIT - #mkdir -p `dirname $datafile` - chmod +x $latency_bin - rm -f $(dirname $logfile)/result.log* - #$SYSAK_WORK_PATH/../sysak btf - $latency_bin $* & - latency_pid=$! - wait $latency_pid - disable_latency - } 3<> /tmp/latency.lock + check_bpf_config + args=("$@") + num_args=${#args[@]} + if [ "$support_bpf" = "Yes" ]; then + if [ ! -e "$latency_bin" ]; then + echo "$latency_bin not found" + echo "iosdiag latency not support '$(uname -r)', please report to the developer" + exit -1 + fi + else + if [ ! -e "$iolatency_bin" ]; then + echo "$iolatency_bin not found" + echo "iosdiag iolatency not support '$(uname -r)', please report to the developer" + exit -1 + fi + latency_bin=$iolatency_bin + if (( $num_args % 2 == 1 && $num_args > 1 )); then + last_arg=${args[$num_args-1]} + unset args[num_args-1] + args+=("-d" "$last_arg") + fi + fi + + # echo ${args[@]} + { + flock -n 3 + [ $? -eq 1 ] && { echo "another latency is running."; exit -1; } + trap disable_latency SIGINT SIGTERM SIGQUIT + chmod +x $latency_bin + rm -f $(dirname $logfile)/result.log* + $latency_bin ${args[@]} & + latency_pid=$! + wait $latency_pid + disable_latency + } 3<> /tmp/latency.lock } disable_latency() { @@ -162,7 +194,6 @@ disable_latency() { exit 0 } - #execute command,every command need such args: # -h/--help: command usage # -f/--file: output files, default stdout @@ -178,7 +209,7 @@ function execute() { } diag_stop= -while getopts 'hs:u:' OPT; do +while getopts 'hos:u:' OPT; do case $OPT in "u") url=$OPTARG @@ -202,6 +233,7 @@ fi subcmd=${@:$OPTIND:1} subargs=${*:$OPTIND+1}; -[ "$subcmd" != "latency" -a "$subcmd" != "hangdetect" ] && { echo "not support subcmd $subcmd!!!"; usage; exit -1; } +[[ "$subcmd" != "latency" && "$subcmd" != "hangdetect" ]] && { echo "not support subcmd $subcmd!!!"; usage; exit -1; } +logfile="/var/log/sysak/iosdiag/$subcmd/result.log.seq" execute $subcmd $subargs diff --git a/source/tools/combine/iosdiag/iolat/Makefile b/source/tools/combine/iosdiag/iolat/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..a8bffafab2571952f4b198f305a4fc80af91f7ff --- /dev/null +++ b/source/tools/combine/iosdiag/iolat/Makefile @@ -0,0 +1,5 @@ +target = tracing_latency + +mods += $(target) + +include $(SRC)/mk/pyinstaller.mk diff --git a/source/tools/combine/iosdiag/iolat/iotracingClass.py b/source/tools/combine/iosdiag/iolat/iotracingClass.py new file mode 100644 index 0000000000000000000000000000000000000000..dd2e30f90329f6a4e31f8ee43939ba44f04de3bb --- /dev/null +++ b/source/tools/combine/iosdiag/iolat/iotracingClass.py @@ -0,0 +1,338 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import time +import re +import json +from collections import OrderedDict +from subprocess import PIPE, Popen + + +def execCmd(cmd): + p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + return p.stdout.read().decode('utf-8') + + +def echoFile(filename, txt): + execCmd("echo \""+txt+"\" > "+filename) + + +def echoFileAppend(filename, txt): + execCmd("echo \'"+txt+"\' >> "+filename) + + +def getDevt(devname): + try: + with open('/sys/class/block/' + devname + '/dev') as f: + dev = f.read().split(':') + return ((int(dev[0]) << 20) + int(dev[1])) + except Exception: + return -1 + +def getDevtRegion(devname): + if os.path.exists('/sys/block/'+devname): + isPart = False + elif os.path.exists('/sys/class/block/'+devname): + isPart = True + else: + return [-1, -1] + master = devname if not isPart else \ + os.readlink('/sys/class/block/'+devname).split('/')[-2] + partList = list( + filter(lambda x: master in x, + os.listdir('/sys/class/block/'+master))) + if not partList: + partList = [] + partList.append(master) + return [getDevt(p) for p in partList] + +class iotracingClass(): + def __init__(self, devname, thresh, log): + self.devt = min(getDevtRegion(devname)) if devname is not None else -1 + self.devname = devname + self.thresh = int(thresh) if thresh else 5000 + self.kprobeEvent = "/sys/kernel/debug/tracing/kprobe_events" + self.tracingDir = "/sys/kernel/debug/tracing/instances/iotracing" + self.blkTraceDir = self.tracingDir+"/events/block" + self.kprobeDir = self.tracingDir+"/events/kprobes" + self.tracepoints = \ + ['block_getrq', 'block_rq_insert', 'block_rq_issue', + 'block_rq_complete'] + arch = execCmd('lscpu | grep Architecture').split(":", 1)[1].strip() + regs = { + "arm":['%r0','%r1'], "x86":['%di', '%si'], "aarch64":['%x0','%x1']} + argv0 = argv1 = '' + for key,val in regs.items(): + if arch.startswith(key): + argv0 = val[0] + argv1 = val[1] + break + if argv0 == '': + raise ValueError('arch %s not support' % arch) + kprobepoints = [ + ('p scsi_dispatch_cmd dev=+0xc(+0xc8(+0x100(%s))):string'\ + ' sector=+0x68(+0x100(%s)):u64' %(argv0, argv0), + 'p scsi_done dev=+0xc(+0xc8(+0x100(%s))):string'\ + ' sector=+0x68(+0x100(%s)):u64' %(argv0, argv0)), + ('p nvme_queue_rq rq=+0x0(%s):u64 kick=+0x10(%s):u8'\ + ' dev=+0xc(+0xc8(+0x0(%s))):string'\ + ' sector=+0x68(+0x0(%s)):u64' %(argv1, argv1, argv1, argv1), + 'p blk_mq_complete_request rq=%s:u64 sector=+0x68(%s):u64' %( + argv0, argv0)), + ('p virtio_queue_rq rq=+0x0(%s):u64 kick=+0x10(%s):u8'\ + ' dev=+0xc(+0xc8(+0x0(%s))):string'\ + ' sector=+0x68(+0x0(%s)):u64' %(argv1, argv1, argv1, argv1), + 'p blk_mq_complete_request rq=%s:u64 sector=+0x68(%s):u64' %( + argv0, argv0)), + ('p blk_mq_complete_request rq=+0x0(%s):u64 '\ + ' sector=+0x68(+0x0(%s)):u64' %(argv0, argv0), + 'p blk_account_io_done dev=+0xc(+0xc8(%s)):string'\ + ' sector=+0x68(%s):u64' %( + argv0, argv0))] + try: + link = os.readlink('/sys/class/block/'+devname) + except Exception: + link = "none" + module = [l for l in ["virtio", "nvme", "target"] if l in link] + mod = module[0] if len(module) > 0 else 'none' + if mod == 'target': + mod = 'scsi' + self.kprobepoints = [] + for k in kprobepoints: + if mod in k[0] or mod == 'none': + self.kprobepoints = list(set(self.kprobepoints+list(k))) + if mod == 'none': + self.devname = None + + #self.stage = { + # "1":"os(block_G2I)", "2":"os(block_I2D)", "3":"os(driver)", + # "4":"disk", "5":"os(complete)"} + self.stage = { + "1":"os(block)", "2":"os(driver)", "3":"disk", "4":"os(complete)", "5":"os(done)"} + self.pointsCnt = len(self.stage) + 1 + self.diskInfoDicts = {} + diskList = os.listdir('/sys/class/block/') + for disk in diskList: + try: + with open('/sys/class/block/' + disk + '/dev') as f: + dev = f.read().split(':') + devt = (int(dev[0]) << 20) + int(dev[1]) + self.diskInfoDicts[str(devt)] = disk + except Exception: + continue + if not log: + log = '/var/log/sysak/iosdiag/latency/result.log.seq' + logdir = os.path.dirname(log) + if not os.path.exists(logdir): + os.mkdir(logdir) + self.fJson = open(log, 'w+') + pattern = r'\[([^\]]*\s)' + result = \ + re.sub(pattern, '', execCmd('echo test > /dev/kmsg;cat /proc/uptime;dmesg | tail -1')) + txt = re.split(' |\n',result) + self.timeDiff = \ + float(txt[0]) - float(txt[2].strip('[').strip(']')) + + def writeDataToJson(self, data): + self.fJson.write(data+'\n') + + def getDevNameByDevt(self, devt): + try: + return self.diskInfoDicts[str(devt)] + except Exception: + return '-' + + def config(self): + devt = self.devt + if not os.path.exists(self.tracingDir): + os.mkdir(self.tracingDir) + for point in self.tracepoints: + if devt > 0: + echoFile(self.blkTraceDir+"/"+point+"/filter", "dev=="+str(devt)) + else: + echoFile(self.blkTraceDir+"/"+point+"/filter", "") + echoFile(self.blkTraceDir+"/"+point+"/enable", "1") + + for exp in self.kprobepoints: + probe = 'p_'+exp.split()[1]+'_0' + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + filterKprobe = self.kprobeDir+"/"+probe+"/filter" + if os.path.exists(enableKprobe): + echoFile(enableKprobe, "0") + if devt > 0: + echoFile(filterKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + + echoFileAppend(self.kprobeEvent, exp) + if devt > 0: + if 'dev=' in exp: + echoFile(filterKprobe, "dev==\""+self.devname+"\"") + else: + echoFile(filterKprobe, "") + echoFile(enableKprobe, "1") + + def start(self): + echoFile(self.tracingDir+"/trace", "") + echoFile(self.tracingDir+"/tracing_on", "1") + + def stop(self): + echoFile(self.tracingDir+"/tracing_on", "0") + + def clear(self): + for point in self.tracepoints: + echoFile(self.blkTraceDir+"/"+point+"/enable", "0") + if self.devt > 0: + echoFile(self.blkTraceDir+"/"+point+"/filter", "0") + + for exp in self.kprobepoints: + probe = 'p_'+exp.split()[1]+'_0' + enableKprobe = self.kprobeDir+"/"+probe+"/enable" + if not os.path.exists(enableKprobe): + continue + echoFile(enableKprobe, "0") + if self.devt > 0: + filterKprobe = self.kprobeDir+"/"+probe+"/filter" + echoFile(filterKprobe, "0") + echoFileAppend(self.kprobeEvent, '-:%s' % probe) + self.fJson.close() + + def paserBlockTracepoints(self, oneIO, ios, point): + commSpaceCnt = 0 + for i in range(len(oneIO)-1, -1, -1): + if oneIO[i].startswith('['): + break + commSpaceCnt += 1 + refIdx = oneIO.index(point+':') + time = float(oneIO[refIdx-1].strip(':'))*1000000 + devinfo = oneIO[refIdx+1].split(',') + dev = ((int(devinfo[0]) << 20) + int(devinfo[1])) + diskname = self.getDevNameByDevt(dev) + if diskname == '-': + return + sector = int(oneIO[-4-commSpaceCnt]) + key = str(sector)+':'+diskname + if point != 'block_getrq': + try: + ios[key]["t"][point] = time + except Exception: + pass + return + cpu = int(oneIO[refIdx-3].lstrip('[').rstrip(']')) + comm = ' '.join( + oneIO[(len(oneIO)-1-commSpaceCnt):]).lstrip('[').rstrip(']') + pid = oneIO[refIdx-4].rsplit('-', 1)[1].strip() + iotype = oneIO[refIdx+2] + size = int(oneIO[-2-commSpaceCnt]) * 512 + ios[key] = { + "comm":comm, "pid":pid, "sector":sector, "diskname":diskname, + "datalen":size, "iotype":iotype, "cpu":cpu, + "t":OrderedDict({point:time})} + + def paserKbprobepoints(self, oneIO, ios, point): + # <...>-72446 [003] .... 52607435.533155: block_getrq: 253,0 W 74298000 + 8 [kworker/u8:3] + kick = 1 + diskname = \ + oneIO[-2].split('=')[1].strip('\"') if 'dev' in oneIO[-2] else '-' + refIdx = oneIO.index(point+':') + time = float(oneIO[refIdx-1].strip(':'))*1000000 + sector = oneIO[-1].split('=')[1] + try: + sector = int(sector, 16) + except ValueError: + sector = int(sector) + key = str(sector)+':'+diskname + if 'rq=' in oneIO[-2] or 'rq=' in oneIO[-4]: + rqField = oneIO[-2] if 'rq=' in oneIO[-2] else oneIO[-4] + rq = rqField.split('=')[1] + keyR = str(sector)+':'+rq + if 'queue_rq' in point: + kick = int(oneIO[-3].split('=')[1]) + ios['keyR'][keyR] = key + try: + key = ios['keyR'][keyR] + except Exception: + pass + if kick: + try: + ios[key]["t"][point] = time + except Exception: + pass + + def showJson(self, ios): + del ios['keyR'] + for io in ios.values(): + if len(io['t']) < self.pointsCnt: + continue + io['t'] = io['t'].values() + total_delay = io["t"][-1] - io["t"][0] + if (self.thresh*1000) > total_delay: + continue + with open('/proc/uptime') as f: + uptime = float(f.read().split()[0]) + now = time.time() + timestamp = now - uptime + io['t'][0]/1000000.0 + self.timeDiff + startTime = time.strftime( + '%Y-%m-%d %H:%M:%S', time.localtime(timestamp)) + startTime = startTime+'.'+str(timestamp).split('.')[1][:3] + io["time"] = startTime + delay = { + "time":startTime, "diskname":io["diskname"], + "totaldelay":total_delay, "delays":[]} + max_delay = 0 + for i in range(1, len(io["t"]), 1): + d = io["t"][i] - io["t"][i-1] + if d >= max_delay: + max_delay = d + component = self.stage[str(i)] + delay["delays"].append( + {"component":self.stage[str(i)], "delay":d}) + io["abnormal"] = ( + '%s delay (%d:%d us)' % ( + component, max_delay, total_delay)) + del io["t"] + self.writeDataToJson( + json.dumps(io)+'\n'+json.dumps(delay)) + + def show(self): + ios = {} + ios['keyR'] = {} + with open(self.tracingDir+"/trace") as f: + traceText = f.readlines() + # <...>-72446 [003] .... 52607435.533155: block_getrq: 253,0 W 74298000 + 8 [kworker/u8:3] + for entry in traceText: + oneIO = entry.split() + if len(oneIO) < 1: + return + if 'block_getrq' in entry: + self.paserBlockTracepoints(oneIO, ios, 'block_getrq') + elif 'block_rq_insert' in entry: + #self.paserBlockTracepoints(oneIO, ios, 'block_rq_insert') + pass + elif 'block_rq_issue' in entry: + self.paserBlockTracepoints(oneIO, ios, 'block_rq_issue') + elif 'scsi_dispatch_cmd' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_scsi_dispatch_cmd_0') + elif 'scsi_done' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_scsi_done_0') + elif 'nvme_queue_rq' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_nvme_queue_rq_0') + elif 'virtio_queue_rq' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_virtio_queue_rq_0') + elif 'blk_mq_complete_request' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_blk_mq_complete_request_0') + elif 'block_rq_complete' in entry: + self.paserBlockTracepoints(oneIO, ios, 'block_rq_complete') + elif 'blk_account_io_done' in entry: + self.paserKbprobepoints(oneIO, ios, 'p_blk_account_io_done_0') + self.showJson(ios) + + + def entry(self, interval): + self.start() + time.sleep(float(interval)) + self.stop() + self.show() diff --git a/source/tools/combine/iosdiag/iolat/tracing_latency.py b/source/tools/combine/iosdiag/iolat/tracing_latency.py new file mode 100644 index 0000000000000000000000000000000000000000..8f411d9f244866d4484228421c9710822bc8ed36 --- /dev/null +++ b/source/tools/combine/iosdiag/iolat/tracing_latency.py @@ -0,0 +1,49 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import os +import sys +import signal +import string +import argparse +from iotracingClass import iotracingClass + +global_stop = False +def signal_exit_handler(signum, frame): + global global_stop + global_stop = True + +def main(): + if os.geteuid() != 0: + print ("This program must be run as root. Aborting.") + sys.exit(0) + examples = """e.g. + ./tracing_latency.py -t 10 -T 10 -d vda + Report io delay over 10ms for vda 10s + """ + parser = argparse.ArgumentParser( + description="Report IO delay for disk.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-T','--Timeout', help='Specify the timeout for program exit(secs).') + parser.add_argument('-t','--thresh', help='Specify the delay-thresh to report.') + parser.add_argument('-d','--device', help='Specify the disk name.') + parser.add_argument('-f','--log', help='Specify the json-format output.') + args = parser.parse_args() + + signal.signal(signal.SIGINT, signal_exit_handler) + signal.signal(signal.SIGHUP, signal_exit_handler) + signal.signal(signal.SIGTERM, signal_exit_handler) + if args.Timeout is not None: + timeoutSec = args.Timeout if int(args.Timeout) > 0 else 10 + signal.signal(signal.SIGALRM, signal_exit_handler) + signal.alarm(int(timeoutSec)) + c = iotracingClass(args.device, args.thresh, args.log) + c.config() + + while global_stop != True: + c.entry(1) + c.clear() + +if __name__ == "__main__": + main() diff --git a/source/tools/combine/iosdiag/iolat/tracing_latency.spec b/source/tools/combine/iosdiag/iolat/tracing_latency.spec new file mode 100644 index 0000000000000000000000000000000000000000..3492a93a92ce31df82e19164e7d17a15176821cd --- /dev/null +++ b/source/tools/combine/iosdiag/iolat/tracing_latency.spec @@ -0,0 +1,33 @@ +# -*- mode: python ; coding: utf-8 -*- + +block_cipher = None + + +a = Analysis(['tracing_latency.py'], + pathex=["./"], + binaries=[], + datas=[], + hiddenimports=[], + hookspath=[], + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + [], + name='tracing_latency', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True ) \ No newline at end of file diff --git a/source/tools/combine/iosdiag/latency/Makefile b/source/tools/combine/iosdiag/latency/Makefile index 7ad4403cafe09ddd5d6c102927c13a1aa4027795..e600b736df0a5a6e67310a8f2faf4adb59c14798 100644 --- a/source/tools/combine/iosdiag/latency/Makefile +++ b/source/tools/combine/iosdiag/latency/Makefile @@ -2,7 +2,7 @@ newdirs += . csrcs += $(filter-out $(wildcard *.bpf.c), $(wildcard *.c)) bpfsrcs += $(wildcard *.bpf.c) -CFLAGS += -static +CFLAGS += -static -pthread target = latency diff --git a/source/tools/combine/iosdiag/latency/aggregator.c b/source/tools/combine/iosdiag/latency/aggregator.c new file mode 100644 index 0000000000000000000000000000000000000000..54baff25d7184818586ad0e17ac848d63a91a638 --- /dev/null +++ b/source/tools/combine/iosdiag/latency/aggregator.c @@ -0,0 +1,221 @@ +#include +#include +#include +#include +#include +#include "iosdiag.h" +#include "format_json.h" + + +int req_array_length = 0; +int req_capacity = 10; +struct iosdiag_req* req_array; + +void reset_req_statistics() +{ + req_array_length = 0; + req_capacity = 10; + req_array = (struct iosdiag_req*)realloc(req_array, req_capacity * sizeof(struct iosdiag_req)); +} + +void expand_req_array() +{ + req_capacity *= 2; + req_array = (struct iosdiag_req*)realloc(req_array, req_capacity * sizeof(struct iosdiag_req)); +} + +/* Update various statistical variables */ +void update_component_delay(struct iosdiag_req *iop, unsigned long *sum_delay, unsigned long *max_delay, int status) +{ + int m = 1; + unsigned long delay = 0; + for (; m < MAX_POINT; m++) { + if (iop->ts[m] > iop->ts[m-1]) { + delay = iop->ts[m] / 1000 - iop->ts[m-1] / 1000; + sum_delay[m-1] += delay; + if (status) { + max_delay[m-1] = delay; + } + } + } +} + +void init_aggregation_metrics(struct aggregation_metrics* ams, struct iosdiag_req *iop, int max_index) +{ + /* Initialize some statistical variables */ + ams->sum_data_len = iop->data_len; + ams->sum_max_delay = get_max_delay(iop); + ams->max_delay = ams->sum_max_delay; + ams->sum_total_delay = get_total_delay(iop); + ams->max_total_delay = ams->sum_total_delay; + ams->max_total_dalay_idx = max_index; + ams->maxdelay_component = get_max_delay_component(iop); + ams->max_total_delay_diskname = iop->diskname; + ams->sum_component_delay = + (unsigned long*)malloc(5 * sizeof(unsigned long)); + memset(ams->sum_component_delay, 0, 5 * sizeof(unsigned long)); + ams->max_component_delay = + (unsigned long*)malloc(5 * sizeof(unsigned long)); + memset(ams->max_component_delay, 0, 5 * sizeof(unsigned long)); + ams->count = 1; + update_component_delay(iop, ams->sum_component_delay, ams->max_component_delay, 1); +} + +int check_aggregation_conditions(struct iosdiag_req *iop1, struct iosdiag_req *iop2) +{ + int res = 0; + if (strcmp(iop1->diskname, iop2->diskname) == 0 && + strcmp(iop1->comm, iop2->comm) == 0 && + strcmp(iop1->op, iop2->op) == 0 && + strcmp(get_max_delay_component(iop1), get_max_delay_component(iop2)) == 0 && + // iop1->cpu[0] == iop2->cpu[0] && + // iop1->cpu[1] == iop2->cpu[1] && + // iop1->cpu[2] == iop2->cpu[2] && + // iop1->cpu[3] == iop2->cpu[3] && + iop1->queue_id == iop2->queue_id) { + res = 1; + } + return res; +} + +int check_aggregated(struct iosdiag_req *iop) +{ + if (strlen(iop->diskname) == 0 || strlen(iop->comm) == 0 || strlen(iop->op) == 0) { + return 1; + } + return 0; +} + +/* Update various statistical variables */ +void aggregate_events(struct aggregation_metrics* ams, struct iosdiag_req *iop, int new_index) +{ + ams->count ++; + unsigned long aggregated_total_delay = get_total_delay(iop); + unsigned long aggregated_max_delay = get_max_delay(iop); + ams->sum_total_delay += aggregated_total_delay; + ams->sum_max_delay += aggregated_max_delay; + ams->sum_data_len += iop->data_len; + // printf("max_total2 %d: %lu\n", new_index, aggregated_total_delay); + + if (ams->max_total_delay < aggregated_total_delay) { + ams->max_delay = aggregated_max_delay; + ams->max_total_delay = aggregated_total_delay; + ams->max_total_dalay_idx = new_index; + update_component_delay(iop, ams->sum_component_delay, ams->max_component_delay, 1); + } else { + update_component_delay(iop, ams->sum_component_delay, ams->max_component_delay, 0); + } + //printf("put %d into blank\n", j); + memset(iop->diskname, '\0', sizeof(iop->diskname)); +} + +/* Update various statistical variables */ +void post_aggregation_statistics(struct aggregation_metrics* ams) +{ + int m = 0; + // ams->sum_data_len /= ams->count; + ams->sum_max_delay /= ams->count; + ams->sum_total_delay /= ams->count; + for (; m < MAX_POINT - 1; m++) { + ams->sum_component_delay[m] /= ams->count; + // printf("sum_component_delay: %d, %lu\n", ams->count, ams->sum_component_delay[m]); + } +} + +void aggregation_summary_convert_to_unity(char* dest, struct iosdiag_req *iop, struct iosdiag_req *iop_max, struct aggregation_metrics* ams) +{ + sprintf(dest, + "sysom_iolatency,diskname=%s," + "comm=%s," + "iotype=%s," + "pid=%d," + "ppid=%d," + "queue_id=%d," + "maxdelay_component=%s " + "max_delay=%lu," + "total_delay=%lu," + "sum_datalen=%d," + "datalen=%d," + "initiated_cpu=%d," + "issue_cpu=%d," + "respond_cpu=%d," + "soft_interrupt_cpu=%d," + "block=%lu," + "driver=%lu," + "disk=%lu," + "complete=%lu," + "done=%lu," + "count=%d", + iop->diskname, + iop->comm, + iop->op, + iop->tid, + iop->pid, + iop->queue_id, + ams->maxdelay_component, + ams->sum_max_delay, + ams->sum_total_delay, + ams->sum_data_len, + ams->sum_data_len/ams->count, + iop->cpu[0], + iop->cpu[1], + iop->cpu[2], + iop->cpu[3], + ams->sum_component_delay[0], + ams->sum_component_delay[1], + ams->sum_component_delay[2], + ams->sum_component_delay[3], + ams->sum_component_delay[4], + ams->count + ); + sprintf(dest + strlen(dest), "%s", "\n"); + + sprintf(dest + strlen(dest), + "sysom_iolatency_max,diskname=%s," + "comm=%s," + "iotype=%s," + "pid=%d," + "ppid=%d," + "queue_id=%d," + "maxdelay_component=%s " + "max_delay=%lu," + "total_delay=%lu," + "sum_datalen=%d," + "datalen=%d," + "initiated_cpu=%d," + "issue_cpu=%d," + "respond_cpu=%d," + "soft_interrupt_cpu=%d," + "block=%lu," + "driver=%lu," + "disk=%lu," + "complete=%lu," + "done=%lu," + "count=%d", + ams->max_total_delay_diskname, + iop_max->comm, + iop_max->op, + iop_max->tid, + iop_max->pid, + iop_max->queue_id, + ams->maxdelay_component, + ams->max_delay, + ams->max_total_delay, + ams->sum_data_len, + iop_max->data_len, + iop_max->cpu[0], + iop_max->cpu[1], + iop_max->cpu[2], + iop_max->cpu[3], + ams->max_component_delay[0], + ams->max_component_delay[1], + ams->max_component_delay[2], + ams->max_component_delay[3], + ams->max_component_delay[4], + ams->count + ); +} + + + + diff --git a/source/tools/combine/iosdiag/latency/aggregator.h b/source/tools/combine/iosdiag/latency/aggregator.h new file mode 100644 index 0000000000000000000000000000000000000000..4438214866db74ed39ceefa5e24a93f38185db47 --- /dev/null +++ b/source/tools/combine/iosdiag/latency/aggregator.h @@ -0,0 +1,23 @@ +#ifndef _AGGREGATOR_H +#define _AGGREGATOR_H + +#define AGGREGATOR_INTERVAL 1 + +extern int req_array_length; +extern int req_capacity; +extern struct iosdiag_req* req_array; +pthread_mutex_t req_mutex; + +void reset_req_statistics(); +void expand_req_array(); +void init_aggregation_metrics(struct aggregation_metrics* ams, struct iosdiag_req *iop, int max_index); +void update_component_delay(struct iosdiag_req *iop, unsigned long *sum_delay, unsigned long *max_delay, int status); +int check_aggregation_conditions(struct iosdiag_req *iop1, struct iosdiag_req *iop2); +int check_aggregated(struct iosdiag_req *iop); +void aggregate_events(struct aggregation_metrics* ams, struct iosdiag_req *iop, int new_index); +void aggregation_summary_convert_to_unity(char* dest, struct iosdiag_req *iop, struct iosdiag_req *iop_max, struct aggregation_metrics* ams); +void post_aggregation_statistics(struct aggregation_metrics* ams); + +#endif + + diff --git a/source/tools/combine/iosdiag/latency/bpf_iosdiag_common.h b/source/tools/combine/iosdiag/latency/bpf_iosdiag_common.h index 1de5d0ba55c5c99d66136907a5ac3fc37af3f0b9..93d3ff9df00f01e7c4f927bd7739984be8f98a69 100644 --- a/source/tools/combine/iosdiag/latency/bpf_iosdiag_common.h +++ b/source/tools/combine/iosdiag/latency/bpf_iosdiag_common.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "iosdiag.h" struct bpf_map_def SEC("maps") iosdiag_maps = { @@ -45,12 +46,6 @@ inline int get_target_devt(void) return 0; } -inline void -init_iosdiag_key(unsigned long sector, struct iosdiag_key *key) -{ - key->sector = sector; -} - struct request___below_516 { struct gendisk *rq_disk; }; @@ -74,18 +69,37 @@ inline struct gendisk *get_rq_disk(struct request *req) return rq_disk; } -inline int +__always_inline void +init_iosdiag_key(unsigned long sector, unsigned int dev, struct iosdiag_key *key) +{ + key->sector = sector; + key->dev = dev; +} + +__always_inline int trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_type type) { struct iosdiag_req *ioreq; struct iosdiag_req new_ioreq = {0}; + struct iosdiag_req data = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); - sector_t sector; struct gendisk *rq_disk; + int complete = 0; + + sector_t sector; + dev_t devt = 0; + int major = 0; + int first_minor = 0; + struct gendisk *gd = get_rq_disk(req); + bpf_probe_read(&major, sizeof(int), &gd->major); + bpf_probe_read(&first_minor, sizeof(int), &gd->first_minor); + devt = ((major) << 20) | (first_minor); bpf_probe_read(§or, sizeof(sector_t), &req->__sector); - init_iosdiag_key(sector, &key); + + init_iosdiag_key(sector, devt, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[type]) @@ -94,8 +108,18 @@ trace_io_driver_route(struct pt_regs *ctx, struct request *req, enum ioroute_typ rq_disk = get_rq_disk(req); bpf_probe_read(ioreq->diskname, sizeof(ioreq->diskname), &rq_disk->disk_name); } - if (type == IO_RESPONCE_DRIVER_POINT) - ioreq->cpu[1] = bpf_get_smp_processor_id(); + if (type == IO_RESPONCE_DRIVER_POINT) { + ioreq->cpu[2] = bpf_get_smp_processor_id(); + } + if (type == IO_DONE_POINT){ + if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && + ioreq->ts[IO_RESPONCE_DRIVER_POINT]) + complete = 1; + } + if (complete) { + memcpy(&data, ioreq, sizeof(data)); + bpf_perf_event_output(ctx, &iosdiag_maps_notify, 0xffffffffULL, &data, sizeof(data)); + } } else return 0; bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); @@ -117,18 +141,26 @@ int tracepoint_block_getrq(struct block_getrq_args *args) struct iosdiag_req new_ioreq = {0}; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); - pid_t pid = bpf_get_current_pid_tgid(); + pid_t pid = pid(); + pid_t tid = tid(); u32 target_devt = get_target_devt(); if (target_devt && args->dev != target_devt) return 0; - new_ioreq.cpu[0] = new_ioreq.cpu[1] = new_ioreq.cpu[2] = -1; - init_iosdiag_key(args->sector, &key); + new_ioreq.cpu[0] = -1; + new_ioreq.cpu[1] = -1; + new_ioreq.cpu[2] = -1; + new_ioreq.cpu[3] = -1; + + //bpf_printk("block_getrq: %d\n", args->dev); + init_iosdiag_key(args->sector, args->dev, &key); if (pid) memcpy(new_ioreq.comm, args->comm, sizeof(args->comm)); + // IO_START_POINT new_ioreq.ts[IO_START_POINT] = now; new_ioreq.pid = pid; + new_ioreq.tid = tid; memcpy(new_ioreq.op, args->rwbs, sizeof(args->rwbs)); new_ioreq.sector = args->sector; new_ioreq.data_len = args->nr_sector * 512; @@ -154,14 +186,14 @@ int tracepoint_block_rq_issue(struct block_rq_issue_args *args) struct iosdiag_req *ioreq; struct iosdiag_key key = {0}; unsigned long long now = bpf_ktime_get_ns(); - pid_t pid = bpf_get_current_pid_tgid(); + // pid_t pid = bpf_get_current_pid_tgid(); int type = IO_ISSUE_DRIVER_POINT; u32 target_devt = get_target_devt(); if (target_devt && args->dev != target_devt) return 0; - init_iosdiag_key(args->sector, &key); + init_iosdiag_key(args->sector, args->dev, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (ioreq->ts[type]) @@ -172,6 +204,7 @@ int tracepoint_block_rq_issue(struct block_rq_issue_args *args) ioreq->data_len = args->bytes; else if (args->nr_sector) ioreq->data_len = args->nr_sector * 512; + ioreq->cpu[1] = bpf_get_smp_processor_id(); } else return 0; bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); @@ -199,24 +232,51 @@ int tracepoint_block_rq_complete(struct block_rq_complete_args *args) int complete = 0; if (target_devt && args->dev != target_devt) + //bpf_printk("block_rq_complete: %d, %d\n", args->dev, target_devt); return 0; - - init_iosdiag_key(args->sector, &key); + + init_iosdiag_key(args->sector, args->dev, &key); ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); if (ioreq) { if (!ioreq->ts[IO_COMPLETE_TIME_POINT]) ioreq->ts[IO_COMPLETE_TIME_POINT] = now; - if (ioreq->ts[IO_ISSUE_DEVICE_POINT] && - ioreq->ts[IO_RESPONCE_DRIVER_POINT]) - complete = 1; - ioreq->cpu[2] = bpf_get_smp_processor_id(); + ioreq->cpu[3] = bpf_get_smp_processor_id(); } else return 0; - if (complete) { - memcpy(&data, ioreq, sizeof(data)); - bpf_perf_event_output(args, &iosdiag_maps_notify, 0xffffffffULL, &data, sizeof(data)); + + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; +} + +SEC("kprobe/blk_account_io_done") +int kprobe_blk_account_io_done(struct pt_regs *ctx) +{ + struct request *req = (struct request *)PT_REGS_PARM1(ctx); + struct iosdiag_key key = {0}; + + sector_t sector; + dev_t devt = 0; + + int major = 0; + int first_minor = 0; + + struct gendisk *gd = get_rq_disk(req); + bpf_probe_read(&major, sizeof(int), &gd->major); + bpf_probe_read(&first_minor, sizeof(int), &gd->first_minor); + devt = ((major) << 20) | (first_minor); + bpf_probe_read(§or, sizeof(sector_t), &req->__sector); + + init_iosdiag_key(sector, devt, &key); + + if (!req) { + //bpf_printk("kprobe_blk_account_io_done: con't get request"); + return 0; } + + trace_io_driver_route(ctx, req, IO_DONE_POINT); bpf_map_delete_elem(&iosdiag_maps, &key); return 0; } + #endif + diff --git a/source/tools/combine/iosdiag/latency/cnf_put.c b/source/tools/combine/iosdiag/latency/cnf_put.c new file mode 100644 index 0000000000000000000000000000000000000000..24ec7ddf699a9bc0010c64d637e73312cd41c7c9 --- /dev/null +++ b/source/tools/combine/iosdiag/latency/cnf_put.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include "cnf_put.h" + +int upload_num = 0; +int upload_capacity = 10; +char** upload_array; + +int cnfPut_init(struct cnfPut* self, const char* path) { + self->_sock = socket(AF_UNIX, SOCK_DGRAM, 0); + if (self->_sock == -1) { + fprintf(stderr, "pipe path is not exist. please check Netinfo is running.\n"); + return -1; + } + memset(&self->_server_addr, 0, sizeof(self->_server_addr)); + self->_server_addr.sun_family = AF_UNIX; + strncpy(self->_server_addr.sun_path, PIPE_PATH, sizeof(self->_server_addr.sun_path) - 1); + return 0; +} + +int cnfPut_puts(struct cnfPut* self, const char* s) { + if (strlen(s) > MAX_BUFF) { + fprintf(stderr, "message len %zu is too long, should be less than %d\n", strlen(s), MAX_BUFF); + return -1; + } + + if (connect(self->_sock, (struct sockaddr*)&self->_server_addr, sizeof(self->_server_addr)) == -1) { + return -1; + } + + if (send(self->_sock, s, strlen(s), 0) == -1) { + fprintf(stderr, "send %s fail.\n", s); + return -1; + } + return 0; +} + +void cnfPut_destroy(struct cnfPut *cnfput) { + if (cnfput->_sock != -1) { + close(cnfput->_sock); + cnfput->_sock = -1; + } +} + +void reset_upload_statistics() +{ + upload_num = 0; + upload_capacity = 10; + upload_array = malloc(upload_capacity * sizeof(char*)); +} + +void expand_upload_array() +{ + upload_capacity *= 2; + upload_array = realloc(upload_array, upload_capacity * sizeof(char*)); +} diff --git a/source/tools/combine/iosdiag/latency/cnf_put.h b/source/tools/combine/iosdiag/latency/cnf_put.h new file mode 100644 index 0000000000000000000000000000000000000000..2e2a753fbddaf47a22069ac844b7b54eb5b57da6 --- /dev/null +++ b/source/tools/combine/iosdiag/latency/cnf_put.h @@ -0,0 +1,30 @@ +#ifndef _CNF_PUT_H +#define _CNF_PUT_H + +#include +#include +#include +#include "format_json.h" + +#define PIPE_PATH "/var/sysom/outline" +#define MAX_BUFF 128*1024 +#define UPLOAD_INTERVAL 3 + +extern int upload_num; +extern int upload_capacity; +extern char** upload_array; +pthread_mutex_t upload_mutex; + +struct cnfPut { + int _sock; + struct sockaddr_un _server_addr; +}; + +int cnfPut_init(struct cnfPut* self, const char* path); +int cnfPut_puts(struct cnfPut* self, const char* s); +void cnfPut_destroy(struct cnfPut *cnfput); +void reset_upload_statistics(); +void expand_upload_array(); + +#endif + diff --git a/source/tools/combine/iosdiag/latency/collect.c b/source/tools/combine/iosdiag/latency/collect.c index 2d752eb24aff8c01b40778790d27d79115ce9344..783c55208f5c0dd3bfbad3bdc309efbfc9ac2e81 100644 --- a/source/tools/combine/iosdiag/latency/collect.c +++ b/source/tools/combine/iosdiag/latency/collect.c @@ -12,8 +12,11 @@ #include #include #include +#include #include "ebpf_load.h" #include "iosdiag.h" +#include "cnf_put.h" +#include "aggregator.h" #include "format_json.h" #include @@ -55,11 +58,51 @@ static int exec_shell_cmd(char *cmd) return 0; } +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +int exit_flag = 0; +/* Make sure all threads exit safely */ +void signal_handler(int signum) +{ + printf("Received signal %d\n", signum); + + pthread_mutex_lock(&mutex); + exit_flag = 1; + pthread_mutex_unlock(&mutex); + + pthread_cond_broadcast(&cond); + + printf("Waiting for all threads to exit...\n"); + printf("Exiting...\n"); + exit(0); +} + +/* Ensure that the data of all IO path points are captured */ +static int check_catch_points(struct iosdiag_req *iop) +{ + int i = 0; + for (i = 0; i < (MAX_POINT - 1); i++) { + if (iop->ts[i] == 0) { + return 0; + } + } + + if (!iop->ts[IO_DONE_POINT]){ + iop->ts[IO_DONE_POINT] = iop->ts[IO_COMPLETE_TIME_POINT]; + } + + return 1; +} + static int over_threshold(struct iosdiag_req *iop) { unsigned long threshold_ns = get_threshold_us() * 1000; - unsigned long delay_ns = iop->ts[IO_COMPLETE_TIME_POINT] - - iop->ts[IO_START_POINT]; + unsigned long delay_ns = 0; + + if (iop->ts[MAX_POINT-1] > iop->ts[IO_START_POINT]) { + delay_ns = iop->ts[MAX_POINT-1] - + iop->ts[IO_START_POINT]; + } if (delay_ns >= threshold_ns) return 1; @@ -81,6 +124,180 @@ static void iosdiag_store_result(void *ctx, int cpu, void *data, __u32 size) } } +/* The aggregation thread aggregates multiple IO events generated +during the aggregation cycle. */ +void event_aggregator() +{ + pthread_mutex_lock(&req_mutex); + req_array = (struct iosdiag_req*)malloc(req_capacity * sizeof(struct iosdiag_req)); + pthread_mutex_unlock(&req_mutex); + + int pre_count = 0; + int post_count = 0; + unsigned long byte_count = 0; + + while (1) { + time_t startTime = time(NULL); + /* Check the exit signal */ + pthread_mutex_lock(&mutex); + if (exit_flag) { + pthread_mutex_unlock(&mutex); + break; + } + pthread_mutex_unlock(&mutex); + + /* Start aggregating received IO events */ + int lockResult = pthread_mutex_lock(&req_mutex); + + if (req_array_length > 1) { + int i, j; + for (i = 0; i < req_array_length; i++) { + /* Filter to IO events that have participated in aggregation */ + if (check_aggregated(&req_array[i])) { + continue; + } + struct aggregation_metrics agg_metrics = {0}; + init_aggregation_metrics(&agg_metrics, &req_array[i], i); + + for (j = i + 1; j < req_array_length; j++) { + if (check_aggregation_conditions(&req_array[i], &req_array[j])){ + aggregate_events(&agg_metrics, &req_array[j], j); + } + } + post_aggregation_statistics(&agg_metrics); + + set_check_time_date(); + char *latency_summary = malloc(JSON_BUFFER_SIZE); + memset(latency_summary, 0x0, JSON_BUFFER_SIZE); + + pthread_mutex_lock(&upload_mutex); + if (upload_num >= upload_capacity) { + expand_upload_array(); + } + + upload_array[upload_num] = malloc(JSON_BUFFER_SIZE); + + aggregation_summary_convert_to_unity(latency_summary, &req_array[i], + &req_array[agg_metrics.max_total_dalay_idx], &agg_metrics); + + free(agg_metrics.max_component_delay); + free(agg_metrics.sum_component_delay); + + strcpy(upload_array[upload_num], latency_summary); + upload_num++; + + pthread_mutex_unlock(&upload_mutex); + free(latency_summary); + } + reset_req_statistics(); + pthread_mutex_unlock(&req_mutex); + } else if (req_array_length == 1){ + set_check_time_date(); + char *latency_summary = malloc(JSON_BUFFER_SIZE); + memset(latency_summary, 0x0, JSON_BUFFER_SIZE); + summary_convert_to_unity(latency_summary, &req_array[0]); + reset_req_statistics(); + pthread_mutex_unlock(&req_mutex); + + pthread_mutex_lock(&upload_mutex); + if (upload_num >= upload_capacity) { + expand_upload_array(); + } + + upload_array[upload_num] = malloc(JSON_BUFFER_SIZE); + strcpy(upload_array[upload_num], latency_summary); + upload_num++; + + pthread_mutex_unlock(&upload_mutex); + free(latency_summary); + } + if (lockResult == 0) { + pthread_mutex_unlock(&req_mutex); + } + time_t endTime = time(NULL); + time_t sleepTime = AGGREGATOR_INTERVAL - (endTime - startTime); + if (sleepTime > 0) { + sleep(sleepTime); + } + } +} + +void event_upload_thread() +{ + pthread_mutex_lock(&upload_mutex); + upload_array = malloc(upload_capacity * sizeof(char*)); + pthread_mutex_unlock(&upload_mutex); + + struct cnfPut cnfput; + if (cnfPut_init(&cnfput, PIPE_PATH)){ + fprintf(stderr, "CnfPut init fail: %s\n", PIPE_PATH); + } + + while (1) { + int i = 0; + time_t startTime = time(NULL); + char *latency_summaries = malloc(JSON_BUFFER_SIZE); + memset(latency_summaries, 0x0, JSON_BUFFER_SIZE); + pthread_mutex_lock(&upload_mutex); + if (upload_num > 0) { + for (i = 0; i < upload_num; i++) { + if (strlen(latency_summaries) + strlen(upload_array[i]) >= JSON_BUFFER_SIZE){ + if (cnfPut_puts(&cnfput, latency_summaries)){ + fprintf(stderr, "CnfPut put fail: %s\n", PIPE_PATH); + cnfPut_destroy(&cnfput); + } + memset(latency_summaries, 0x0, JSON_BUFFER_SIZE); + } + sprintf(latency_summaries + strlen(latency_summaries), "%s", upload_array[i]); + if (i < upload_num - 1 && (strlen(latency_summaries) + strlen(upload_array[i+1]) + < JSON_BUFFER_SIZE)) { + sprintf(latency_summaries + strlen(latency_summaries), "%s", "\n"); + } + } + + if (cnfPut_puts(&cnfput, latency_summaries)){ + fprintf(stderr, "CnfPut put fail: %s\n", PIPE_PATH); + cnfPut_destroy(&cnfput); + } + + for (i = 0; i < upload_num; i++) { + free(upload_array[i]); + } + free(upload_array); + reset_upload_statistics(); + } + pthread_mutex_unlock(&upload_mutex); + free(latency_summaries); + + time_t endTime = time(NULL); + time_t sleepTime = UPLOAD_INTERVAL - (endTime - startTime); + if (sleepTime > 0) { + sleep(sleepTime); + } + } +} + +static void iosdiag_upload_result(void *ctx, int cpu, void *data, __u32 size) +{ + struct iosdiag_req *iop = (struct iosdiag_req *)data; + + if (check_catch_points(iop) ) + { + if (over_threshold(iop)) { + pthread_mutex_lock(&req_mutex); + + if (req_array_length >= req_capacity) { + expand_req_array(); + } + + req_array[req_array_length] = *iop; + req_array_length++; + + pthread_mutex_unlock(&req_mutex); + } + } +} + static void iosdiag_collect(void) { struct perf_buffer_opts pb_opts = {}; @@ -100,6 +317,27 @@ static void iosdiag_collect(void) printf("done\n"); } +static void iosdiag_collect_normalization(void) +{ + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb; + + pb_opts.sample_cb = iosdiag_upload_result; + + pb = perf_buffer__new(iosdiag_maps_notify, 1, &pb_opts); + printf("running...\n"); + fflush(stdout); + g_json_buf = malloc(JSON_BUFFER_SIZE); + memset(g_json_buf, 0x0, JSON_BUFFER_SIZE); + + while (!g_stop) + perf_buffer__poll(pb, 100); + + perf_buffer__free(pb); + free(g_json_buf); + printf("done\n"); +} + static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) { if (enable_debug_log()) @@ -113,7 +351,13 @@ static void iosdiag_stop(int signo) g_stop = 1; } -#define LOAD_IOSDIAG_BPF(name, load_map) \ +typedef struct { + char bpf_name[20]; + int bpf_load_map; + int bpf_period; +} LoadIosdiagArgs; + +#define LOAD_IOSDIAG_BPF(name, load_map, period) \ ({ \ __label__ out; \ int __ret = 0; \ @@ -146,11 +390,107 @@ static void iosdiag_stop(int signo) } \ if (!__ret) \ printf("load %s bpf success\n", #name); \ - name##_bpf_load = 1; \ + name##_bpf_load = 1; \ + if (period > 0) { \ + pthread_t attach_thread; \ + LoadIosdiagArgs *args = malloc(sizeof(LoadIosdiagArgs)); \ + strcpy(args->bpf_name, #name); \ + args->bpf_load_map = load_map; \ + args->bpf_period = period; \ + pthread_create(&attach_thread, NULL, attach_periodically, args); \ + } \ out: \ __ret; \ }) +void* attach_periodically(void* args) +{ + LoadIosdiagArgs* attach_args = (LoadIosdiagArgs*)args; + char* name = attach_args->bpf_name; + int load_map = attach_args->bpf_load_map; + int period = attach_args->bpf_period; + // printf("arg: name = %s, load_map = %d, period = %d\n", name, load_map, period); + + while (1) { + pthread_mutex_lock(&mutex); + if (exit_flag) { + pthread_mutex_unlock(&mutex); + break; + } + pthread_mutex_unlock(&mutex); + + sleep(period); + if (iosdiag_virtblk_bpf_load && !strcmp(name, "iosdiag_virtblk")){ + iosdiag_virtblk_bpf__detach(iosdiag_virtblk); + printf("dettach %s\n", "iosdiag_virtblk"); + }else if (iosdiag_nvme_bpf_load && !strcmp(name, "iosdiag_nvme")){ + iosdiag_nvme_bpf__detach(iosdiag_nvme); + printf("dettach %s\n", "iosdiag_nvme"); + }else if (iosdiag_scsi_bpf_load && !strcmp(name, "iosdiag_scsi")){ + iosdiag_scsi_bpf__detach(iosdiag_scsi); + printf("dettach %s\n", "iosdiag_scsi"); + }else if (iosdiag_scsi_mq_bpf_load && !strcmp(name, "iosdiag_scsi_mq")){ + iosdiag_scsi_mq_bpf__detach(iosdiag_scsi_mq); + printf("dettach %s\n", "iosdiag_scsi_mq"); + } + + sleep(period); + if (!strcmp(name, "iosdiag_virtblk")) { + printf("restart %s load bpf\n", "iosdiag_virtblk"); + if (iosdiag_virtblk_bpf__attach(iosdiag_virtblk)) { + printf("load %s bpf prog fail\n", "iosdiag_virtblk"); + iosdiag_virtblk_bpf__destroy(iosdiag_virtblk); + return NULL; + } + if (load_map) { + iosdiag_map = bpf_map__fd(iosdiag_virtblk->maps.iosdiag_maps); + iosdiag_maps_notify = bpf_map__fd(iosdiag_virtblk->maps.iosdiag_maps_notify); + iosdiag_maps_targetdevt = bpf_map__fd(iosdiag_virtblk->maps.iosdiag_maps_targetdevt); + } + }else if (!strcmp(name, "iosdiag_nvme")){ + printf("restart %s load bpf\n", "iosdiag_nvme"); + if (iosdiag_nvme_bpf__attach(iosdiag_nvme)) { + printf("attach bpf prog error\n"); + printf("load %s bpf fail\n", "iosdiag_nvme"); + iosdiag_nvme_bpf__destroy(iosdiag_nvme); + return NULL; + } + if (load_map) { + iosdiag_map = bpf_map__fd(iosdiag_nvme->maps.iosdiag_maps); + iosdiag_maps_notify = bpf_map__fd(iosdiag_nvme->maps.iosdiag_maps_notify); + iosdiag_maps_targetdevt = bpf_map__fd(iosdiag_nvme->maps.iosdiag_maps_targetdevt); + } + }else if (!strcmp(name, "iosdiag_scsi")){ + printf("restart %s load bpf\n", "iosdiag_scsi"); + if (iosdiag_scsi_bpf__attach(iosdiag_scsi)) { + printf("attach bpf prog error\n"); + printf("load %s bpf fail\n", "iosdiag_scsi"); + iosdiag_scsi_bpf__destroy(iosdiag_scsi); + return NULL; + } + if (load_map) { + iosdiag_map = bpf_map__fd(iosdiag_scsi->maps.iosdiag_maps); + iosdiag_maps_notify = bpf_map__fd(iosdiag_scsi->maps.iosdiag_maps_notify); + iosdiag_maps_targetdevt = bpf_map__fd(iosdiag_scsi->maps.iosdiag_maps_targetdevt); + } + }else if (!strcmp(name, "iosdiag_scsi_mq")){ + printf("restart %s load bpf\n", "iosdiag_scsi_mq"); + if (iosdiag_scsi_mq_bpf__attach(iosdiag_scsi_mq)) { + printf("attach bpf prog error\n"); + printf("load %s bpf fail\n", "iosdiag_scsi_mq"); + iosdiag_scsi_mq_bpf__destroy(iosdiag_scsi_mq); + return NULL; + } + if (load_map) { + iosdiag_map = bpf_map__fd(iosdiag_scsi_mq->maps.iosdiag_maps); + iosdiag_maps_notify = bpf_map__fd(iosdiag_scsi_mq->maps.iosdiag_maps_notify); + iosdiag_maps_targetdevt = bpf_map__fd(iosdiag_scsi_mq->maps.iosdiag_maps_targetdevt); + } + } + } + pthread_exit(NULL); +} + static unsigned int get_devt_by_devname(char *devname) { char sys_file[64]; @@ -198,43 +538,45 @@ static char *get_module_name_by_devname(char *devname) return "none"; } -int iosdiag_init(char *devname) +int iosdiag_init(char *devname, unsigned int attach_interval) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int key = 0; unsigned int target_devt = get_devt_by_devname(devname); char *module_name = get_module_name_by_devname(devname); - + if (attach_interval > 0) { + signal(SIGINT, signal_handler); + } setrlimit(RLIMIT_MEMLOCK, &r); libbpf_set_print(libbpf_print_fn); if (!strcmp(module_name, "virtblk")) { - if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) - return -1; + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1, attach_interval)) + return -1; } else if (!strcmp(module_name, "nvme")) { - if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1, attach_interval)) return -1; } else if (!strcmp(module_name, "scsi")) { - if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) { - if (LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 1)) + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1, attach_interval)) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 1, attach_interval)) return -1; } } else { - if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1)) { - if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1)) { - if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1)) { - if (LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 1)) + if (LOAD_IOSDIAG_BPF(iosdiag_virtblk, 1, attach_interval)) { + if (LOAD_IOSDIAG_BPF(iosdiag_nvme, 1, attach_interval)) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 1, attach_interval)) { + if (LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 1, attach_interval)) return -1; } } else { - if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 0)) { - LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 0); + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 0, attach_interval)) { + LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 0, attach_interval); } } } else { - LOAD_IOSDIAG_BPF(iosdiag_nvme, 0); - if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 0)) { - LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 0); + LOAD_IOSDIAG_BPF(iosdiag_nvme, 0, attach_interval); + if (LOAD_IOSDIAG_BPF(iosdiag_scsi, 0, attach_interval)) { + LOAD_IOSDIAG_BPF(iosdiag_scsi_mq, 0, attach_interval); } } } @@ -243,32 +585,45 @@ int iosdiag_init(char *devname) bpf_map__fd(iosdiag_virtblk->maps.iosdiag_virtblk_maps); if (target_devt) bpf_map_update_elem(iosdiag_maps_targetdevt, &key, &target_devt, BPF_ANY); - return 0; + + return 0; } -int iosdiag_run(int timeout, char *output_file) +int iosdiag_run(int timeout, int mode, char *output_file) { - char filepath[256]; - char cmd[272]; - - if (strlen(output_file) > (sizeof(filepath) - 1)) { - printf("error: output file name(%s) too large(max %lu bytes)\n", - output_file, sizeof(filepath)); - return -1; - } - strcpy(filepath, output_file); - sprintf(cmd, "mkdir %s -p", dirname(filepath)); - exec_shell_cmd(cmd); - g_log_fd = open(output_file, O_RDWR | O_CREAT, 0755); - if (g_log_fd < 0) { - printf("error: create output file \"%s\" fail\n", output_file); - return -1; - } signal(SIGINT, iosdiag_stop); signal(SIGALRM, iosdiag_stop); - if (timeout) - alarm(timeout); - iosdiag_collect(); + signal(SIGALRM, signal_handler); + alarm(timeout); + + pthread_t aggregator_thread; + pthread_create(&aggregator_thread, NULL, event_aggregator, NULL); + + if (mode == DIAGNOSTIC_MODE) { + char filepath[256]; + char cmd[272]; + + if (strlen(output_file) > (sizeof(filepath) - 1)) { + printf("error: output file name(%s) too large(max %lu bytes)\n", + output_file, sizeof(filepath)); + return -1; + } + strcpy(filepath, output_file); + sprintf(cmd, "mkdir %s -p", dirname(filepath)); + exec_shell_cmd(cmd); + g_log_fd = open(output_file, O_RDWR | O_CREAT, 0755); + if (g_log_fd < 0) { + printf("error: create output file \"%s\" fail\n", output_file); + return -1; + } + iosdiag_collect(); + }else{ + pthread_t export_metrics_thread; + pthread_create(&export_metrics_thread, NULL, event_upload_thread, NULL); + iosdiag_collect_normalization(); + pthread_join(export_metrics_thread, NULL); + } + close(g_log_fd); return 0; } @@ -285,3 +640,4 @@ void iosdiag_exit(char *module_name) iosdiag_scsi_mq_bpf__destroy(iosdiag_scsi_mq); } + diff --git a/source/tools/combine/iosdiag/latency/format_json.c b/source/tools/combine/iosdiag/latency/format_json.c index e91bf02f5cb33826094c7cfe898e3518ec55ed6c..e5f44de29b20aa0ca2b7fd0dfa6ba09f3463f405 100644 --- a/source/tools/combine/iosdiag/latency/format_json.c +++ b/source/tools/combine/iosdiag/latency/format_json.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -13,9 +12,11 @@ #include #include #include +#include #include "iosdiag.h" #include "format_json.h" -#include +#include "aggregator.h" + #define min(x, y) ((x) > (y) ? (y) : (x)) struct ts_info { @@ -29,6 +30,7 @@ struct ts_info g_points[] = { {"issue_device", IO_ISSUE_DEVICE_POINT}, {"device_complete", IO_RESPONCE_DRIVER_POINT}, {"complete", IO_COMPLETE_TIME_POINT}, + {"done", IO_DONE_POINT}, }; struct ts_info g_delays[] = { @@ -37,6 +39,7 @@ struct ts_info g_delays[] = { {"driver", IO_ISSUE_DEVICE_POINT}, {"disk", IO_RESPONCE_DRIVER_POINT}, {"complete", IO_COMPLETE_TIME_POINT}, + {"done", IO_DONE_POINT}, }; static char g_check_date[24]; @@ -52,7 +55,7 @@ static char *point_idx_to_str(int idx) return NULL; } -static char *delay_idx_to_str(int idx) +char *delay_idx_to_str(int idx) { int i = 0; @@ -85,33 +88,52 @@ static char *get_check_time_date(void) return g_check_date; } -static unsigned long get_total_delay(struct iosdiag_req *iop) +unsigned long get_total_delay(struct iosdiag_req *iop) { return iop->ts[MAX_POINT - 1] / 1000 - iop->ts[IO_START_POINT] / 1000; } -static unsigned long get_max_delay(struct iosdiag_req *iop) +unsigned long get_max_delay(struct iosdiag_req *iop) { - int i; + int i, n; + int skip = 0; unsigned long delay; unsigned long max_delay = 0; - for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { - delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + //for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { + for (i = IO_START_POINT + 1, n = 0; i < MAX_POINT; i++) { + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } if (max_delay < delay) max_delay = delay; } return max_delay; } -static char *get_max_delay_component(struct iosdiag_req *iop) +char *get_max_delay_component(struct iosdiag_req *iop) { - int i, idx = 0; + int i, idx, n = 0; + int skip = 0; unsigned long delay; unsigned long max_delay = 0; for (i = IO_START_POINT + 1; i < MAX_POINT; i++) { - delay = iop->ts[i] / 1000 - iop->ts[i - 1] / 1000; + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } if (max_delay < delay) { max_delay = delay; idx = i; @@ -191,7 +213,7 @@ void delay_convert_to_json(void *dest, void *src) void summary_convert_to_json(void *dest, void *src) { - char cpu[24] = {0}; + char cpu[32] = {0}; char component[16] = {0}; struct iosdiag_req *iop = src; char *maxdelay_component = get_max_delay_component(iop); @@ -203,11 +225,11 @@ void summary_convert_to_json(void *dest, void *src) maxdelay_component = component; } - if (iop->cpu[0] == iop->cpu[1] && iop->cpu[1] == iop->cpu[2]) + if (iop->cpu[0] == iop->cpu[1] && iop->cpu[1] == iop->cpu[2] && iop->cpu[2] == iop->cpu[3]) sprintf(cpu, "%d", iop->cpu[0]); else - sprintf(cpu, "%d -> %d -> %d", - iop->cpu[0], iop->cpu[1], iop->cpu[2]); + sprintf(cpu, "%d -> %d -> %d -> %d", + iop->cpu[0], iop->cpu[1], iop->cpu[2], iop->cpu[3]); //blk_rq_op_name(iop->cmd_flags, buf, sizeof(buf)); sprintf(dest, "{\"time\":\"%s\"," @@ -232,3 +254,128 @@ void summary_convert_to_json(void *dest, void *src) cpu); } +void summary_convert_to_unity(void *dest, void *src) +{ + int i, n; + int skip = 0; + unsigned long delay; + char cpu[24] = {0}; + char component[16] = {0}; + struct iosdiag_req *iop = src; + char *maxdelay_component = get_max_delay_component(iop); + unsigned long max_delay = get_max_delay(iop); + unsigned long total_delay = get_total_delay(iop); + if (!is_disk_delay(iop)) { + sprintf(component, "os(%s)", maxdelay_component); + maxdelay_component = component; + } + sprintf(dest, + "sysom_iolatency,diskname=%s," + "comm=%s," + "iotype=%s," + "pid=%d," + "ppid=%d," + "queue_id=%d," + "maxdelay_component=%s " + "max_delay=%lu," + "total_delay=%lu," + "sum_datalen=%d," + "datalen=%d," + "initiated_cpu=%d," + "issue_cpu=%d," + "respond_cpu=%d," + "soft_interrupt_cpu=%d," + "count=%d", + iop->diskname, + iop->comm, + iop->op, + iop->tid, + iop->pid, + iop->queue_id, + maxdelay_component, + max_delay, + total_delay, + iop->data_len, + iop->data_len, + iop->cpu[0], + iop->cpu[1], + iop->cpu[2], + iop->cpu[3], + 1 + ); + + for (i = 0, n = 0; i < MAX_POINT; i++) { + if (i == IO_START_POINT) { + continue; + } else { + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } + } + sprintf(dest + strlen(dest), + ",%s=%lu", + delay_idx_to_str(i), delay); + } + + sprintf(dest + strlen(dest), "%s", "\n"); + + sprintf(dest + strlen(dest), + "sysom_iolatency_max,diskname=%s," + "comm=%s," + "iotype=%s," + "pid=%d," + "ppid=%d," + "queue_id=%d," + "maxdelay_component=%s " + "max_delay=%lu," + "total_delay=%lu," + "sum_datalen=%d," + "datalen=%d," + "initiated_cpu=%d," + "issue_cpu=%d," + "respond_cpu=%d," + "soft_interrupt_cpu=%d," + "count=%d", + iop->diskname, + iop->comm, + iop->op, + iop->tid, + iop->pid, + iop->queue_id, + maxdelay_component, + max_delay, + total_delay, + iop->data_len, + iop->data_len, + iop->cpu[0], + iop->cpu[1], + iop->cpu[2], + iop->cpu[3], + 1 + ); + + for (i = 0, n = 0; i < MAX_POINT; i++) { + if (i == IO_START_POINT) { + continue; + } else { + if (!skip) + n = i - 1; + if (iop->ts[i] > iop->ts[n]) { + delay = iop->ts[i] / 1000 - iop->ts[n] / 1000; + skip = 0; + } else { + skip = 1; + continue; + } + } + sprintf(dest + strlen(dest), + ",%s=%lu", + delay_idx_to_str(i), delay); + } +} diff --git a/source/tools/combine/iosdiag/latency/format_json.h b/source/tools/combine/iosdiag/latency/format_json.h index ece2b8c9a234aacc646cf605a7a165dc787f4968..b0ad40537dace9c52c00109ea248da80905cccfb 100644 --- a/source/tools/combine/iosdiag/latency/format_json.h +++ b/source/tools/combine/iosdiag/latency/format_json.h @@ -4,7 +4,12 @@ #define JSON_BUFFER_SIZE 4096 void set_check_time_date(void); void summary_convert_to_json(void *dest, void *src); +void summary_convert_to_unity(void *dest, void *src); void delay_convert_to_json(void *dest, void *src); void point_convert_to_json(void *dest, void *src); +char *delay_idx_to_str(int idx); +unsigned long get_total_delay(struct iosdiag_req *iop); +unsigned long get_max_delay(struct iosdiag_req *iop); +char *get_max_delay_component(struct iosdiag_req *iop); #endif diff --git a/source/tools/combine/iosdiag/latency/iosdiag.h b/source/tools/combine/iosdiag/latency/iosdiag.h index 89288642033af36dc475e7238c727f24be616026..ca6cc746b26cbc842f5dcf4315f09940b67f519c 100644 --- a/source/tools/combine/iosdiag/latency/iosdiag.h +++ b/source/tools/combine/iosdiag/latency/iosdiag.h @@ -15,15 +15,23 @@ enum ioroute_type{ IO_ISSUE_DEVICE_POINT, IO_RESPONCE_DRIVER_POINT, IO_COMPLETE_TIME_POINT, + IO_DONE_POINT, MAX_POINT, }; +enum operating_mode{ + DIAGNOSTIC_MODE, + MONITOR_MODE, +}; + struct iosdiag_req { pid_t pid; + pid_t tid; + unsigned int queue_id; char comm[16]; char diskname[32]; unsigned long long ts[MAX_POINT]; - unsigned int cpu[3]; + unsigned int cpu[4]; //unsigned int complete; //unsigned int cmd_flags; char op[8]; @@ -31,6 +39,20 @@ struct iosdiag_req { unsigned long sector; }; +struct aggregation_metrics { + unsigned int sum_data_len; + unsigned long sum_max_delay; + unsigned long max_delay; + unsigned long sum_total_delay; + unsigned long max_total_delay; + unsigned int max_total_dalay_idx; + char* maxdelay_component; + char* max_total_delay_diskname; + unsigned long* sum_component_delay; + unsigned long* max_component_delay; + int count; +}; + struct iosdiag_key { #if 0 int cpu; @@ -41,7 +63,7 @@ struct iosdiag_key { unsigned long sector; }; -int iosdiag_init(char *module_name); -int iosdiag_run(int timeout, char *output_file); +int iosdiag_init(char *module_name, unsigned int attach_interval); +int iosdiag_run(int timeout, int mode, char *output_file); void iosdiag_exit(char *module_name); #endif diff --git a/source/tools/combine/iosdiag/latency/iosdiag_nvme.bpf.c b/source/tools/combine/iosdiag/latency/iosdiag_nvme.bpf.c index 12ea1a1494db76e6167b024bae4310d31567f4d2..950bf36476a29180cdf537177b7e5d27e5b9d54d 100644 --- a/source/tools/combine/iosdiag/latency/iosdiag_nvme.bpf.c +++ b/source/tools/combine/iosdiag/latency/iosdiag_nvme.bpf.c @@ -31,5 +31,3 @@ int kprobe_nvme_pci_complete_rq(struct pt_regs *ctx) } return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); } -char _license[] SEC("license") = "GPL"; - diff --git a/source/tools/combine/iosdiag/latency/iosdiag_scsi.bpf.c b/source/tools/combine/iosdiag/latency/iosdiag_scsi.bpf.c index ca4d116ba93e744af752bc2a5b84b88461fb8e7f..51418930ea41bb7d3bea66a1a7e8884ecc2ff182 100644 --- a/source/tools/combine/iosdiag/latency/iosdiag_scsi.bpf.c +++ b/source/tools/combine/iosdiag/latency/iosdiag_scsi.bpf.c @@ -31,5 +31,3 @@ int kprobe_scsi_done(struct pt_regs *ctx) } return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); } -char _license[] SEC("license") = "GPL"; - diff --git a/source/tools/combine/iosdiag/latency/iosdiag_scsi_mq.bpf.c b/source/tools/combine/iosdiag/latency/iosdiag_scsi_mq.bpf.c index f2b77f72fd19667293f64dfe1c106213ac2677c2..9b0ec8dc33608f4ed5672df3626726ef42bc0337 100644 --- a/source/tools/combine/iosdiag/latency/iosdiag_scsi_mq.bpf.c +++ b/source/tools/combine/iosdiag/latency/iosdiag_scsi_mq.bpf.c @@ -31,5 +31,3 @@ int kprobe_scsi_mq_done(struct pt_regs *ctx) } return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); } -char _license[] SEC("license") = "GPL"; - diff --git a/source/tools/combine/iosdiag/latency/iosdiag_virtblk.bpf.c b/source/tools/combine/iosdiag/latency/iosdiag_virtblk.bpf.c index 57f2d40445786088e599137e158dcbacaa8e4783..5b64ca1140ee65e9d624b1e7adbaf1776fd8520e 100644 --- a/source/tools/combine/iosdiag/latency/iosdiag_virtblk.bpf.c +++ b/source/tools/combine/iosdiag/latency/iosdiag_virtblk.bpf.c @@ -9,11 +9,27 @@ struct bpf_map_def SEC("maps") iosdiag_virtblk_maps = { SEC("kprobe/virtio_queue_rq") int kprobe_virtio_queue_rq(struct pt_regs *ctx) -{ +{ + struct blk_mq_hw_ctx *hctx = + (struct blk_mq_hw_ctx *)PT_REGS_PARM1(ctx); struct blk_mq_queue_data *bd = (struct blk_mq_queue_data *)PT_REGS_PARM2(ctx); + bool kick; unsigned long req_addr; + unsigned int queue_id; + struct request *req; + + struct iosdiag_req *ioreq; + struct iosdiag_req new_ioreq = {0}; + struct iosdiag_key key = {0}; + sector_t sector; + // unsigned long q = 0; + // unsigned long dev = 0; + dev_t devt = 0; + int major; + int first_minor; + pid_t pid = bpf_get_current_pid_tgid(); bpf_probe_read(&kick, sizeof(bool), &bd->last); @@ -22,10 +38,32 @@ int kprobe_virtio_queue_rq(struct pt_regs *ctx) bpf_probe_read(&req_addr, sizeof(struct request *), &bd->rq); if (!req_addr) { - //bpf_printk("kprobe_virtio_queue_rq: con't get request"); return 0; } + + bpf_probe_read(&queue_id, sizeof(unsigned int), &hctx->queue_num); + bpf_probe_read(&req, sizeof(struct request *), &bd->rq); + if (!req) { + return 0; + } + + struct gendisk *gd = get_rq_disk(req); + bpf_probe_read(&major, sizeof(int), &gd->major); + bpf_probe_read(&first_minor, sizeof(int), &gd->first_minor); + devt = ((major) << 20) | (first_minor); + + bpf_probe_read(§or, sizeof(sector_t), &req->__sector); + + init_iosdiag_key(sector, devt, &key); + ioreq = (struct iosdiag_req *)bpf_map_lookup_elem(&iosdiag_maps, &key); + if (ioreq) { + ioreq->queue_id = queue_id; + } else + return 0; + bpf_map_update_elem(&iosdiag_virtblk_maps, &pid, &req_addr, BPF_ANY); + bpf_map_update_elem(&iosdiag_maps, &key, ioreq, BPF_ANY); + return 0; } @@ -57,5 +95,4 @@ int kprobe_blk_mq_complete_request(struct pt_regs *ctx) } return trace_io_driver_route(ctx, req, IO_RESPONCE_DRIVER_POINT); } -char _license[] SEC("license") = "GPL"; diff --git a/source/tools/combine/iosdiag/latency/main.c b/source/tools/combine/iosdiag/latency/main.c index 608ac34e53ccaba55e4af9be14460380c4a36323..91583c2b2aacde6883944d837b2276d59b22b0c2 100644 --- a/source/tools/combine/iosdiag/latency/main.c +++ b/source/tools/combine/iosdiag/latency/main.c @@ -13,6 +13,7 @@ #include #include #include + #include "iosdiag.h" static void usage(void) @@ -22,12 +23,16 @@ static void usage(void) "latency [OPTION] disk_devname Detect IO latency in specified disk\n" "latency -t ms disk_devname Set IO latency threshold(default 1000ms)\n" "latency -T sec disk_devname How long to detect IO latency(default 10s)\n" + "latency -a sec disk_devname The period of time in the detach state(default 0s)\n" "latency -f log disk_devname Specify the output file log\n" + "latency -m Monitor mode, sending high-latency IO events to unity\n" "latency -v Display debug log during load bpf\n" "\ne.g.\n" "latency vda Detect IO latency in disk \"vda\"\n" "latency -t 10 vda Set IO latency threshold 10ms and detect IO latency in disk \"vda\"\n" - "latency -t 10 -T 30 vda Detect IO latency in disk \"vda\" 30 secs\n"); + "latency -t 10 -T 30 vda Detect IO latency in disk \"vda\" 30 secs\n" + "latency -t 10 -T 30 -m Detect IO latency in all disks 30 secs and send high-latency IO events to unity\n"); + exit(-1); } @@ -47,12 +52,23 @@ int main(int argc, char *argv[]) { int ch; int timeout_s = 10, threshold_ms = 1000; + unsigned int attach_s = 0; + int operating_mode = DIAGNOSTIC_MODE; char *result_dir = "/var/log/sysak/iosdiag/latency"; + char *tool_path = "/usr/local/sysak/.sysak_components/tools"; char *devname; char resultfile_path[256]; - while ((ch = getopt(argc, argv, "T:t:f:hv")) != -1) { + while ((ch = getopt(argc, argv, "a:mT:t:f:hv")) != -1) { switch (ch) { + case 'a': + attach_s = (unsigned int)strtoul(optarg, NULL, 0); + if (attach_s < 0) + attach_s = 0; + break; + case 'm': + operating_mode = MONITOR_MODE; + break; case 'T': timeout_s = (unsigned int)strtoul(optarg, NULL, 0); if (timeout_s <= 0) @@ -72,14 +88,16 @@ int main(int argc, char *argv[]) usage(); } } + devname = argv[argc - 1]; g_threshold_us = threshold_ms * 1000; - if (iosdiag_init(devname)) { + sprintf(resultfile_path, "%s/result.log.seq", result_dir); + + if (iosdiag_init(devname, attach_s)) { fprintf(stderr, "iosdiag_init fail\n"); return -1; } - sprintf(resultfile_path, "%s/result.log.seq", result_dir); - iosdiag_run(timeout_s, resultfile_path); + iosdiag_run(timeout_s, operating_mode, resultfile_path); iosdiag_exit(devname); return 0; } diff --git a/source/tools/combine/sar/Makefile b/source/tools/combine/sar/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..31865c7ac192ef8fa2faf4baf3e0e63567aa98f7 --- /dev/null +++ b/source/tools/combine/sar/Makefile @@ -0,0 +1,14 @@ +target := sar +ifeq ($(KERNEL_DEPEND), Y) +TARGET_PATH := $(OBJ_TOOLS_PATH) +else +TARGET_PATH := $(OBJ_TOOLS_ROOT) +endif + +all: $(target) target_rule + +$(target): $@ + sh venv.sh + cp dist/sar $(TARGET_PATH)/ + +include $(SRC)/mk/target.inc diff --git a/source/tools/combine/sar/README.md b/source/tools/combine/sar/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e1ecb15ed003e3b10cc17e25af09e9f0768a8a32 --- /dev/null +++ b/source/tools/combine/sar/README.md @@ -0,0 +1,94 @@ +# sar功能说明 + +收集服务器的系统信息(如cpu,io,mem,tcp等)进行数据统计展示 + + +### 1、cpu +##### 字段含义 +* user:表示CPU执行用户进程的时间,通常期望用户空间CPU越高越好 +* sys:表示CPU在内核运行时间,系统CPU占用率高,表明系统某部分存在瓶颈.通常值越低越好. +* wait:CPU在等待I/O操作完成所花费的时间. +* hirq: 系统处理硬中断所花费的时间百分比 +* sirq: 系统处理软中断所花费的时间百分比 +* util: CPU总使用的时间百分比 +*** + +### 2、mem +##### 字段含义 +* free: 空闲的物理内存的大小 +* used: 已经使用的内存大小 +* buff: buff使用的内存大小 +* cach: 缓存大小 +* total: 系统总的内存大小 +* util: 内存使用率 +*** + +### 3、load +##### 字段含义 +* load1: 一分钟的系统平均负载 +* load5: 五分钟的系统平均负载 +* load15:十五分钟的系统平均负载 +* runq: 在采样时刻,运行队列的任务的数目,与/proc/stat的procs_running表示相同意思 +* plit: 在采样时刻,系统中活跃的任务的个数(不包括运行已经结束的任务) +*** + +### 4、traffic +##### 字段含义 +* bytin: 入口流量byte/s +* bytout: 出口流量byte/s +* pktin: 入口pkt/s +* pktout: 出口pkt/s +* pkterr:发送及接收的错误总数 +* pktdrp:设备驱动程序丢弃的数据包总数 +*** + +### 5、tcp +##### 字段含义 +* active:主动打开的tcp连接数目 +* pasive:被动打开的tcp连接数目 +* iseg: 收到的tcp报文数目 +* outseg:发出的tcp报文数目 +* CurrEs:当前状态为ESTABLISHED的tcp连接数 +* retran:系统的重传率 +*** +### 6、udp +##### 字段含义 +* InEr:入口错误数 +* SndEr:发送的错误数 +* In:接收数量 +* RcvEr:接收的错误数 +* Out:发送数量 +* NoPort:udp协议层接收到目的地址或目的端口不存在的数据包 +*** +### 7、io +##### 字段含义 +* disk_name:设备名称 +* inflight: +* backlog +* rmsec +* util +* wkb +* xfers +* bsize +* wmsec +* rkb +* writes +* wmerge +* rmerge +* reads +*** +### 8、partition +##### 字段含义 +* path:分区目录 +* bfree:分区空闲的字节 +* bused: 分区使用中的字节 +* btotl: 分区总的大小 +* util: 分区使用率 +*** +### 9、pcsw +##### 字段含义 +* block:D状态任务数量 +* ctxt:上下文切换次数 +* run:并行任务数量 + + diff --git a/source/tools/combine/sar/config.yaml b/source/tools/combine/sar/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..10470f70c91199dfafac6efdb194efe35b92bc75 --- /dev/null +++ b/source/tools/combine/sar/config.yaml @@ -0,0 +1 @@ +host_config: http://127.0.0.1:8400 \ No newline at end of file diff --git a/source/tools/combine/sar/cpu.py b/source/tools/combine/sar/cpu.py new file mode 100644 index 0000000000000000000000000000000000000000..98bffa7f8b2a1564725e220485809f51fea7bfac --- /dev/null +++ b/source/tools/combine/sar/cpu.py @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- +#!/root/anaconda3/envs/python310 +import statistics +import datetime + +from db import get_sql_resp +from utils import get_print_title_distance + +def cpu_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["cpu_total"],date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "user":[], + "sys":[], + "iowait":[], + "hardirq":[], + "softirq":[], + "steal":[], + "idle":[] + } + cpu_dict_all={ + "user":[], + "sys":[], + "iowait":[], + "hardirq":[], + "softirq":[], + "util":[] + } + print("Time -----------------------cpu----------------------") + print("Time user sys wait hirq sirq util ") + title_cpu = "Time user sys wait hirq sirq util " + title_print_distance_str = get_print_title_distance(title_cpu) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time_strp+datetime.timedelta(minutes=distance_max) >= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time -----------------------cpu----------------------") + print("Time user sys wait hirq sirq util") + distance_num = 0 + minute_cpu_dict.get("user").append(i.get("values").get("user")) + minute_cpu_dict.get("sys").append(i.get("values").get("sys")) + minute_cpu_dict.get("iowait").append(i.get("values").get("iowait")) + minute_cpu_dict.get("hardirq").append(i.get("values").get("hardirq")) + minute_cpu_dict.get("softirq").append(i.get("values").get("softirq")) + minute_cpu_dict.get("steal").append(i.get("values").get("steal")) + minute_cpu_dict.get("idle").append(i.get("values").get("idle")) + user = (sum(minute_cpu_dict.get("user"))/len(minute_cpu_dict.get("user"))) + sys = (sum(minute_cpu_dict.get("sys"))/len(minute_cpu_dict.get("sys"))) + iowait = (sum(minute_cpu_dict.get("iowait"))/len(minute_cpu_dict.get("iowait"))) + hardirq = (sum(minute_cpu_dict.get("hardirq"))/len(minute_cpu_dict.get("hardirq"))) + softirq = (sum(minute_cpu_dict.get("softirq"))/len(minute_cpu_dict.get("softirq"))) + steal = (sum(minute_cpu_dict.get("steal"))/len(minute_cpu_dict.get("steal"))) + idle = (sum(minute_cpu_dict.get("idle"))/len(minute_cpu_dict.get("idle"))) + util = 100-idle-steal-iowait + cpu_dict_all.get("user").append(user) + cpu_dict_all.get("sys").append(sys) + cpu_dict_all.get("iowait").append(iowait) + cpu_dict_all.get("hardirq").append(hardirq) + cpu_dict_all.get("softirq").append(softirq) + cpu_dict_all.get("util").append(util) + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%user,"%.2f"%sys, "%.2f"%iowait,"%.2f"%hardirq, "%.2f"%softirq,"%.2f"%util)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("user")),"%.2f"%max(cpu_dict_all.get("sys")), + "%.2f"%max(cpu_dict_all.get("iowait")),"%.2f"%max(cpu_dict_all.get("hardirq")), "%.2f"%max(cpu_dict_all.get("softirq")), + "%.2f"%max(cpu_dict_all.get("util")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("user")),"%.2f"%statistics.mean(cpu_dict_all.get("sys")), + "%.2f"%statistics.mean(cpu_dict_all.get("iowait")),"%.2f"%statistics.mean(cpu_dict_all.get("hardirq")), "%.2f"%statistics.mean(cpu_dict_all.get("softirq")), + "%.2f"%statistics.mean(cpu_dict_all.get("util")))) + + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("user")),"%.2f"%min(cpu_dict_all.get("sys")), + "%.2f"%min(cpu_dict_all.get("iowait")),"%.2f"%min(cpu_dict_all.get("hardirq")), "%.2f"%min(cpu_dict_all.get("softirq")), + "%.2f"%min(cpu_dict_all.get("util")))) + break + else: + user = (sum(minute_cpu_dict.get("user"))/len(minute_cpu_dict.get("user"))) + sys = (sum(minute_cpu_dict.get("sys"))/len(minute_cpu_dict.get("sys"))) + iowait = (sum(minute_cpu_dict.get("iowait"))/len(minute_cpu_dict.get("iowait"))) + hardirq = (sum(minute_cpu_dict.get("hardirq"))/len(minute_cpu_dict.get("hardirq"))) + softirq = (sum(minute_cpu_dict.get("softirq"))/len(minute_cpu_dict.get("softirq"))) + steal = (sum(minute_cpu_dict.get("steal"))/len(minute_cpu_dict.get("steal"))) + idle = (sum(minute_cpu_dict.get("idle"))/len(minute_cpu_dict.get("idle"))) + util = 100-idle-steal-iowait + cpu_dict_all.get("user").append(user) + cpu_dict_all.get("sys").append(sys) + cpu_dict_all.get("iowait").append(iowait) + cpu_dict_all.get("hardirq").append(hardirq) + cpu_dict_all.get("softirq").append(softirq) + cpu_dict_all.get("util").append(util) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------cpu----------------------") + print("Time user sys wait hirq sirq util") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%user,"%.2f"%sys, "%.2f"%iowait,"%.2f"%hardirq, "%.2f"%softirq,"%.2f"%util)) + user = i.get("values").get("user") + sys = i.get("values").get("sys") + iowait = i.get("values").get("iowait") + hardirq = i.get("values").get("hardirq") + softirq = i.get("values").get("softirq") + steal = (sum(minute_cpu_dict.get("steal"))/len(minute_cpu_dict.get("steal"))) + idle = (sum(minute_cpu_dict.get("idle"))/len(minute_cpu_dict.get("idle"))) + util = 100-idle-steal-iowait + cpu_dict_all.get("user").append(user) + cpu_dict_all.get("sys").append(sys) + cpu_dict_all.get("iowait").append(iowait) + cpu_dict_all.get("hardirq").append(hardirq) + cpu_dict_all.get("softirq").append(softirq) + cpu_dict_all.get("util").append(util) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------cpu----------------------") + print("Time user sys wait hirq sirq util") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%user,"%.2f"%sys, "%.2f"%iowait,"%.2f"%hardirq, "%.2f"%softirq,"%.2f"%util)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("user")),"%.2f"%max(cpu_dict_all.get("sys")), + "%.2f"%max(cpu_dict_all.get("iowait")),"%.2f"%max(cpu_dict_all.get("hardirq")), "%.2f"%max(cpu_dict_all.get("softirq")), + "%.2f"%max(cpu_dict_all.get("util")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("user")),"%.2f"%statistics.mean(cpu_dict_all.get("sys")), + "%.2f"%statistics.mean(cpu_dict_all.get("iowait")),"%.2f"%statistics.mean(cpu_dict_all.get("hardirq")), "%.2f"%statistics.mean(cpu_dict_all.get("softirq")), + "%.2f"%statistics.mean(cpu_dict_all.get("util")))) + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("user")),"%.2f"%min(cpu_dict_all.get("sys")), + "%.2f"%min(cpu_dict_all.get("iowait")),"%.2f"%min(cpu_dict_all.get("hardirq")), "%.2f"%min(cpu_dict_all.get("softirq")), + "%.2f"%min(cpu_dict_all.get("util")))) + break + if not time_minute_flag: + minute_cpu_dict = { + "user":[i.get("values").get("user")], + "sys":[i.get("values").get("sys")], + "iowait":[i.get("values").get("iowait")], + "hardirq":[i.get("values").get("hardirq")], + "softirq":[i.get("values").get("softirq")], + "steal":[i.get("values").get("steal")], + "idle":[i.get("values").get("idle")] + } + time_minute_flag = time + elif time == time_minute_flag: + minute_cpu_dict.get("user").append(i.get("values").get("user")) + minute_cpu_dict.get("sys").append(i.get("values").get("sys")) + minute_cpu_dict.get("iowait").append(i.get("values").get("iowait")) + minute_cpu_dict.get("hardirq").append(i.get("values").get("hardirq")) + minute_cpu_dict.get("softirq").append(i.get("values").get("softirq")) + minute_cpu_dict.get("steal").append(i.get("values").get("steal")) + minute_cpu_dict.get("idle").append(i.get("values").get("idle")) + else: + distance_num+=1 + if distance_num >=19: + print("Time -----------------------cpu----------------------") + print("Time user sys wait hirq sirq util") + distance_num = 0 + user = (sum(minute_cpu_dict.get("user"))/len(minute_cpu_dict.get("user"))) + sys = (sum(minute_cpu_dict.get("sys"))/len(minute_cpu_dict.get("sys"))) + iowait = (sum(minute_cpu_dict.get("iowait"))/len(minute_cpu_dict.get("iowait"))) + hardirq = (sum(minute_cpu_dict.get("hardirq"))/len(minute_cpu_dict.get("hardirq"))) + softirq = (sum(minute_cpu_dict.get("softirq"))/len(minute_cpu_dict.get("softirq"))) + steal = (sum(minute_cpu_dict.get("steal"))/len(minute_cpu_dict.get("steal"))) + idle = (sum(minute_cpu_dict.get("idle"))/len(minute_cpu_dict.get("idle"))) + util = 100-idle-steal-iowait + cpu_dict_all.get("user").append(user) + cpu_dict_all.get("sys").append(sys) + cpu_dict_all.get("iowait").append(iowait) + cpu_dict_all.get("hardirq").append(hardirq) + cpu_dict_all.get("softirq").append(softirq) + cpu_dict_all.get("util").append(util) + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%user,"%.2f"%sys, "%.2f"%iowait,"%.2f"%hardirq, "%.2f"%softirq,"%.2f"%util)) + minute_cpu_dict = { + "user":[i.get("values").get("user")], + "sys":[i.get("values").get("sys")], + "iowait":[i.get("values").get("iowait")], + "hardirq":[i.get("values").get("hardirq")], + "softirq":[i.get("values").get("softirq")], + "steal":[i.get("values").get("steal")], + "idle":[i.get("values").get("idle")] + } + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/db.py b/source/tools/combine/sar/db.py new file mode 100644 index 0000000000000000000000000000000000000000..44790dd81dadae3e5752a3d8bb6f9f45c8481c3b --- /dev/null +++ b/source/tools/combine/sar/db.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +#!/root/anaconda3/envs/python310 +import json +import requests +import datetime +import time +from yaml_instance import sar_config +tz = int(time.strftime('%z')) / 100 + +config = sar_config() +host_config = config["host_config"] + +def get_sql_resp(minutes, table, date): + try: + url = host_config +"/api/query" + if minutes: + d = {"mode": "last", "time": "%sm"%minutes, "table": table} + res_last = requests.post(url, json=d) + ret_last = res_last.content.decode() + ret_last = json.loads(ret_last) + return ret_last + ret_list = [] + now = datetime.datetime.now() + if date: + date_distance = datetime.timedelta(hours=date) + else: + date_distance = datetime.timedelta(days=1) + start_date = now - date_distance #开始时间 + while start_date < now: + start_date_strf = start_date.strftime("%Y-%m-%d %H:%M:%S") + end_time = start_date + datetime.timedelta(minutes=30) + end_time = end_time.strftime("%Y-%m-%d %H:%M:%S") + # eq: d = {'mode': 'date', 'start': '2023-08-30 06:27:48', 'stop': '2023-08-30 06:57:48', 'tz': -8, 'table': ['cpu_total']} + d = {"mode": "date", "start": start_date_strf, "stop": end_time, "tz": -tz, "table": table} + retrys=0 + max_retry=3 + res = requests.post(url, json=d) + if res.status_code!=requests.codes.ok: + while retrys= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time -------------------load-----------------") + print("Time load1 load5 load15 runq plit") + distance_num = 0 + minute_cpu_dict.get("load1").append(i.get("values").get("load1")) + minute_cpu_dict.get("load5").append(i.get("values").get("load5")) + minute_cpu_dict.get("load15").append(i.get("values").get("load15")) + minute_cpu_dict.get("runq").append(i.get("values").get("runq")) + minute_cpu_dict.get("plit").append(i.get("values").get("plit")) + load1 = (sum(minute_cpu_dict.get("load1"))/len(minute_cpu_dict.get("load1"))) + load5 = (sum(minute_cpu_dict.get("load5"))/len(minute_cpu_dict.get("load5"))) + load15 = (sum(minute_cpu_dict.get("load15"))/len(minute_cpu_dict.get("load15"))) + runq = (sum(minute_cpu_dict.get("runq"))/len(minute_cpu_dict.get("runq"))) + plit = (sum(minute_cpu_dict.get("plit"))/len(minute_cpu_dict.get("plit"))) + cpu_dict_all.get("load1").append(load1) + cpu_dict_all.get("load5").append(load5) + cpu_dict_all.get("load15").append(load15) + cpu_dict_all.get("runq").append(runq) + cpu_dict_all.get("plit").append(plit) + print(("%s\n" %title_print_distance_str).format(time,"%.2f"%load1,"%.2f"%load5, "%.2f"%load15,"%.2f"%runq, "%.2f"%plit)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("load1")),"%.2f"%max(cpu_dict_all.get("load5")), + "%.2f"%max(cpu_dict_all.get("load15")),"%.2f"%max(cpu_dict_all.get("runq")), "%.2f"%max(cpu_dict_all.get("plit")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("load1")),"%.2f"%statistics.mean(cpu_dict_all.get("load5")), + "%.2f"%statistics.mean(cpu_dict_all.get("load15")),"%.2f"%statistics.mean(cpu_dict_all.get("runq")), + "%.2f"%statistics.mean(cpu_dict_all.get("plit")))) + + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("load1")),"%.2f"%min(cpu_dict_all.get("load5")), + "%.2f"%min(cpu_dict_all.get("load15")),"%.2f"%min(cpu_dict_all.get("runq")), + "%.2f"%min(cpu_dict_all.get("plit")))) + break + else: + load1 = (sum(minute_cpu_dict.get("load1"))/len(minute_cpu_dict.get("load1"))) + load5 = (sum(minute_cpu_dict.get("load5"))/len(minute_cpu_dict.get("load5"))) + load15 = (sum(minute_cpu_dict.get("load15"))/len(minute_cpu_dict.get("load15"))) + runq = (sum(minute_cpu_dict.get("runq"))/len(minute_cpu_dict.get("runq"))) + plit = (sum(minute_cpu_dict.get("plit"))/len(minute_cpu_dict.get("plit"))) + cpu_dict_all.get("load1").append(load1) + cpu_dict_all.get("load5").append(load5) + cpu_dict_all.get("load15").append(load15) + cpu_dict_all.get("runq").append(runq) + cpu_dict_all.get("plit").append(plit) + distance_num+=1 + if distance_num >=19: + print("Time -------------------load-----------------") + print("Time load1 load5 load15 runq plit") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%load1,"%.2f"%load5, "%.2f"%load15,"%.2f"%runq, "%.2f"%plit)) + load1 = i.get("values").get("load1") + load5 = i.get("values").get("load5") + load15 = i.get("values").get("load15") + runq = i.get("values").get("runq") + plit = i.get("values").get("plit") + cpu_dict_all.get("load1").append(load1) + cpu_dict_all.get("load5").append(load5) + cpu_dict_all.get("load15").append(load15) + cpu_dict_all.get("runq").append(runq) + cpu_dict_all.get("plit").append(plit) + distance_num+=1 + if distance_num >=19: + print("Time -------------------load-----------------") + print("Time load1 load5 load15 runq plit") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%load1,"%.2f"%load5, "%.2f"%load15,"%.2f"%runq, "%.2f"%plit)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("load1")),"%.2f"%max(cpu_dict_all.get("load5")), + "%.2f"%max(cpu_dict_all.get("load15")),"%.2f"%max(cpu_dict_all.get("runq")), "%.2f"%max(cpu_dict_all.get("plit")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("load1")),"%.2f"%statistics.mean(cpu_dict_all.get("load5")), + "%.2f"%statistics.mean(cpu_dict_all.get("load15")),"%.2f"%statistics.mean(cpu_dict_all.get("runq")), + "%.2f"%statistics.mean(cpu_dict_all.get("plit")))) + + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("load1")),"%.2f"%min(cpu_dict_all.get("load5")), + "%.2f"%min(cpu_dict_all.get("load15")),"%.2f"%min(cpu_dict_all.get("runq")), + "%.2f"%min(cpu_dict_all.get("plit")))) + break + if not time_minute_flag: + minute_cpu_dict = { + "load1":[i.get("values").get("load1")], + "load5":[i.get("values").get("load5")], + "load15":[i.get("values").get("load15")], + "runq":[i.get("values").get("runq")], + "plit":[i.get("values").get("plit")] + } + time_minute_flag = time + elif time == time_minute_flag: + minute_cpu_dict.get("load1").append(i.get("values").get("load1")) + minute_cpu_dict.get("load5").append(i.get("values").get("load5")) + minute_cpu_dict.get("load15").append(i.get("values").get("load15")) + minute_cpu_dict.get("runq").append(i.get("values").get("runq")) + minute_cpu_dict.get("plit").append(i.get("values").get("plit")) + else: + distance_num+=1 + if distance_num >=19: + print("Time -------------------load-----------------") + print("Time load1 load5 load15 runq plit") + distance_num = 0 + load1 = (sum(minute_cpu_dict.get("load1"))/len(minute_cpu_dict.get("load1"))) + load5 = (sum(minute_cpu_dict.get("load5"))/len(minute_cpu_dict.get("load5"))) + load15 = (sum(minute_cpu_dict.get("load15"))/len(minute_cpu_dict.get("load15"))) + runq = (sum(minute_cpu_dict.get("runq"))/len(minute_cpu_dict.get("runq"))) + plit = (sum(minute_cpu_dict.get("plit"))/len(minute_cpu_dict.get("plit"))) + cpu_dict_all.get("load1").append(load1) + cpu_dict_all.get("load5").append(load5) + cpu_dict_all.get("load15").append(load15) + cpu_dict_all.get("runq").append(runq) + cpu_dict_all.get("plit").append(plit) + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%load1,"%.2f"%load5, "%.2f"%load15,"%.2f"%runq, "%.2f"%plit)) + minute_cpu_dict = { + "load1":[i.get("values").get("load1")], + "load5":[i.get("values").get("load5")], + "load15":[i.get("values").get("load15")], + "runq":[i.get("values").get("runq")], + "plit":[i.get("values").get("plit")] + } + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/mem.py b/source/tools/combine/sar/mem.py new file mode 100644 index 0000000000000000000000000000000000000000..949529ce03863283b4cf89f7c81abd830298a46b --- /dev/null +++ b/source/tools/combine/sar/mem.py @@ -0,0 +1,171 @@ +# -*- coding: utf-8 -*- +import statistics +import datetime + +from db import get_sql_resp +from hum_byte_convert import hum_convert +from utils import get_print_title_distance + +def mem_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["meminfo"], date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "free":[], + "used":[], + "buff":[], + "cach":[], + "total":[] + } + cpu_dict_all={ + "free":[], + "used":[], + "buff":[], + "cach":[], + "total":[], + "util":[] + } + print("Time -----------------------mem----------------------") + print("Time free used buff cach total util ") + title_mem = "Time free used buff cach total util " + title_print_distance_str = get_print_title_distance(title_mem) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time_strp+datetime.timedelta(minutes=distance_max) >= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time -----------------------mem----------------------") + print("Time free used buff cach total util ") + distance_num = 0 + minute_cpu_dict.get("free").append(i.get("values").get("MemFree")) + minute_cpu_dict.get("used").append(i.get("values").get("used")) + minute_cpu_dict.get("buff").append(i.get("values").get("user_buffers")) + minute_cpu_dict.get("cach").append(i.get("values").get("Cached")) + minute_cpu_dict.get("total").append(i.get("values").get("MemTotal")) + free = (sum(minute_cpu_dict.get("free"))/len(minute_cpu_dict.get("free"))) + used = (sum(minute_cpu_dict.get("used"))/len(minute_cpu_dict.get("used"))) + buff = (sum(minute_cpu_dict.get("buff"))/len(minute_cpu_dict.get("buff"))) + cache = (sum(minute_cpu_dict.get("cach"))/len(minute_cpu_dict.get("cach"))) + total = (sum(minute_cpu_dict.get("total"))/len(minute_cpu_dict.get("total"))) + util = (total - free - buff - cache) / total * 100 + cpu_dict_all.get("free").append(free) + cpu_dict_all.get("used").append(used) + cpu_dict_all.get("buff").append(buff) + cpu_dict_all.get("cach").append(cache) + cpu_dict_all.get("total").append(total) + cpu_dict_all.get("util").append(util) + print(("%s\n"%title_print_distance_str).format(time,hum_convert(free),hum_convert(used), hum_convert(buff),hum_convert(cache), hum_convert(total),hum_convert(util))) + print(("%s"%title_print_distance_str).format("MAX",hum_convert(max(cpu_dict_all.get("free"))),hum_convert(max(cpu_dict_all.get("used"))), + hum_convert(max(cpu_dict_all.get("buff"))),hum_convert(max(cpu_dict_all.get("cach"))), hum_convert(max(cpu_dict_all.get("total"))), + hum_convert(max(cpu_dict_all.get("util"))))) + print(("%s"%title_print_distance_str).format("MEAN",hum_convert(statistics.mean(cpu_dict_all.get("free"))),hum_convert(statistics.mean(cpu_dict_all.get("used"))), + hum_convert(statistics.mean(cpu_dict_all.get("buff"))),hum_convert(statistics.mean(cpu_dict_all.get("cach"))), + hum_convert(statistics.mean(cpu_dict_all.get("total"))), + hum_convert(statistics.mean(cpu_dict_all.get("util"))))) + print(("%s"%title_print_distance_str).format("MIN",hum_convert(min(cpu_dict_all.get("free"))),hum_convert(min(cpu_dict_all.get("used"))), + hum_convert(min(cpu_dict_all.get("buff"))),hum_convert(min(cpu_dict_all.get("cach"))), hum_convert(min(cpu_dict_all.get("total"))), + hum_convert(min(cpu_dict_all.get("util"))))) + break + else: + free = (sum(minute_cpu_dict.get("free"))/len(minute_cpu_dict.get("free"))) + used = (sum(minute_cpu_dict.get("used"))/len(minute_cpu_dict.get("used"))) + buff = (sum(minute_cpu_dict.get("buff"))/len(minute_cpu_dict.get("buff"))) + cache = (sum(minute_cpu_dict.get("cach"))/len(minute_cpu_dict.get("cach"))) + total = (sum(minute_cpu_dict.get("total"))/len(minute_cpu_dict.get("total"))) + util = (total - free - buff - cache) / total * 100 + cpu_dict_all.get("free").append(free) + cpu_dict_all.get("used").append(used) + cpu_dict_all.get("buff").append(buff) + cpu_dict_all.get("cach").append(cache) + cpu_dict_all.get("total").append(total) + cpu_dict_all.get("util").append(util) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------mem----------------------") + print("Time free used buff cach total util") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag,hum_convert(free),hum_convert(used), hum_convert(buff),hum_convert(cache), hum_convert(total),hum_convert(util))) + free = i.get("values").get("MemFree") + used = i.get("values").get("used") + buff = i.get("values").get("user_buffers") + cache = i.get("values").get("Cached") + total = i.get("values").get("MemTotal") + util = (total - free - buff - cache) / total * 100 + cpu_dict_all.get("free").append(free) + cpu_dict_all.get("used").append(used) + cpu_dict_all.get("buff").append(buff) + cpu_dict_all.get("cach").append(cache) + cpu_dict_all.get("total").append(total) + cpu_dict_all.get("util").append(util) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------mem----------------------") + print("Time free used buff cach total util") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time,hum_convert(free),hum_convert(used), hum_convert(buff),hum_convert(cache), hum_convert(total),hum_convert(util))) + print(("%s"%title_print_distance_str).format("MAX",hum_convert(max(cpu_dict_all.get("free"))),hum_convert(max(cpu_dict_all.get("used"))), + hum_convert(max(cpu_dict_all.get("buff"))),hum_convert(max(cpu_dict_all.get("cach"))), hum_convert(max(cpu_dict_all.get("total"))), + hum_convert(max(cpu_dict_all.get("util"))))) + print(("%s"%title_print_distance_str).format("MEAN",hum_convert(statistics.mean(cpu_dict_all.get("free"))),hum_convert(statistics.mean(cpu_dict_all.get("used"))), + hum_convert(statistics.mean(cpu_dict_all.get("buff"))),hum_convert(statistics.mean(cpu_dict_all.get("cach"))), + hum_convert(statistics.mean(cpu_dict_all.get("total"))), + hum_convert(statistics.mean(cpu_dict_all.get("util"))))) + print(("%s"%title_print_distance_str).format("MIN",hum_convert(min(cpu_dict_all.get("free"))),hum_convert(min(cpu_dict_all.get("used"))), + hum_convert(min(cpu_dict_all.get("buff"))),hum_convert(min(cpu_dict_all.get("cach"))), hum_convert(min(cpu_dict_all.get("total"))), + hum_convert(min(cpu_dict_all.get("util"))))) + break + if not time_minute_flag: + minute_cpu_dict = { + "free":[i.get("values").get("MemFree")], + "used":[i.get("values").get("used")], + "buff":[i.get("values").get("user_buffers")], + "cach":[i.get("values").get("Cached")], + "total":[i.get("values").get("MemTotal")] + } + time_minute_flag = time + elif time == time_minute_flag: + minute_cpu_dict.get("free").append(i.get("values").get("MemFree")) + minute_cpu_dict.get("used").append(i.get("values").get("used")) + minute_cpu_dict.get("buff").append(i.get("values").get("user_buffers")) + minute_cpu_dict.get("cach").append(i.get("values").get("Cached")) + minute_cpu_dict.get("total").append(i.get("values").get("MemTotal")) + else: + free = i.get("values").get("MemFree") + used = i.get("values").get("used") + buff = i.get("values").get("user_buffers") + cache = i.get("values").get("Cached") + total = i.get("values").get("MemTotal") + util = (total - free - buff - cache) / total * 100 + cpu_dict_all.get("free").append(free) + cpu_dict_all.get("used").append(used) + cpu_dict_all.get("buff").append(buff) + cpu_dict_all.get("cach").append(cache) + cpu_dict_all.get("total").append(total) + cpu_dict_all.get("util").append(util) + print(("%s"%title_print_distance_str).format(time_minute_flag,hum_convert(free),hum_convert(used), hum_convert(buff),hum_convert(cache), hum_convert(total),hum_convert(util))) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------mem----------------------") + print("Time free used buff cach total util") + distance_num = 0 + minute_cpu_dict = { + "free":[i.get("values").get("MemFree")], + "used":[i.get("values").get("used")], + "buff":[i.get("values").get("user_buffers")], + "cach":[i.get("values").get("Cached")], + "total":[i.get("values").get("MemTotal")] + } + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/paritition.py b/source/tools/combine/sar/paritition.py new file mode 100644 index 0000000000000000000000000000000000000000..e10a8ab99d5030dd9025e579a488fa3e0a06e1f4 --- /dev/null +++ b/source/tools/combine/sar/paritition.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- + +import statistics +import datetime +from db import get_sql_resp +from hum_byte_convert import hum_byte_convert +from utils import get_print_title_distance + +def partition_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["fs_stat"], date=date) + if not ret: + return + + print("Time --------------------paritition-----------------------------------") + print("Time path bfree bused btotal util") + # "Time path f_bfree f_bsize f_blocks util" + title_partition = "Time path f_bfree f_bsize f_blocks util" + title_print_distance_str = get_print_title_distance(title_partition) + title_list = title_partition.split(' ') + title_list = [val for val in title_list if val] + minute_dict, dict_all, distance_num, time_minute_flag = get_cpu_dict(ret, distance_max, title_list, title_print_distance_str) + if minute_dict: + distance_num+=1 + if distance_num >=10: + print("Time --------------------paritition-----------------------------------") + print("Time path bfree bused btotal util") + distance_num = 0 + for k in minute_dict.keys(): + try: + f_bfree = (sum(minute_dict.get(k).get("f_bfree", 0))/len(minute_dict.get(k).get("f_bfree"))) + f_bsize = (sum(minute_dict.get(k).get("f_bsize"))/len(minute_dict.get(k).get("f_bsize"))) + f_blocks = (sum(minute_dict.get(k).get("f_blocks"))/len(minute_dict.get(k).get("f_blocks"))) + except: + f_bfree = 0 + f_bsize = 0 + f_blocks = 0 + bfree = f_bfree*f_bsize + bused = (f_blocks-f_bfree)*f_bsize + btotal= f_blocks * f_bsize + if btotal == 0: + util = 0 + else: + util = bused/btotal + if k not in dict_all.keys(): + dict_all[k] = {} + if not dict_all.get(k).get("bfree"): + dict_all[k]["bfree"]= [bfree] + else: + dict_all.get(k).get("bfree").append(bfree) + if not dict_all.get(k).get("bused"): + dict_all[k]["bused"]= [bused] + else: + dict_all.get(k).get("bused").append(bused) + if not dict_all.get(k).get("btotal"): + dict_all[k]["btotal"]= [btotal] + else: + dict_all.get(k).get("btotal").append(btotal) + if not dict_all.get(k).get("util"): + dict_all[k]["util"]= [util] + else: + dict_all.get(k).get("util").append(util) + print(("%s"%title_print_distance_str).format(time_minute_flag, k , hum_byte_convert(bfree),hum_byte_convert(bused), hum_byte_convert(btotal),"%.2f"%util)) + if not dict_all: + return + for k in dict_all.keys(): + print(("\n%s"%title_print_distance_str).format("MAX",k ,hum_byte_convert(max(dict_all.get(k).get("bfree"))), + hum_byte_convert(max(dict_all.get(k).get("bused"))), + hum_byte_convert(max(dict_all.get(k).get("btotal"))), + "%.2f"%(max(dict_all.get(k).get("util"))))) + print(("%s"%title_print_distance_str).format("MEAN", k , hum_byte_convert(statistics.mean(dict_all.get(k).get("bfree"))), + hum_byte_convert(statistics.mean(dict_all.get(k).get("bused"))), + hum_byte_convert(statistics.mean(dict_all.get(k).get("btotal"))), + "%.2f"%(statistics.mean(dict_all.get(k).get("util"))))) + print(("%s"%title_print_distance_str).format("MIN", k , hum_byte_convert(min(dict_all.get(k).get("bfree"))), + hum_byte_convert(min(dict_all.get(k).get("bused"))), + hum_byte_convert(min(dict_all.get(k).get("btotal"))), + "%.2f"%(min(dict_all.get(k).get("util"))))) + except Exception as e: + print(e) + return + +def get_cpu_dict(ret, distance_max, title_list, title_print_distance_str): + time_minute_flag = None + minute_dict = {} + dict_all = {} + distance_num = 0 + try: + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time == time_minute_flag or time_minute_flag == None: + disk_name = i.get("labels").get("mount") + disk_name = disk_name.split("/dev/")[-1] + for k in title_list[2:5]: + if disk_name not in minute_dict.keys(): + minute_dict[disk_name] = {} + elif k not in minute_dict.get(disk_name).keys(): + if i.get("values").get(k) != None: + minute_dict[disk_name][k] = [i.get("values").get(k)] + else: + minute_dict.get(disk_name).get(k).append(i.get("values").get(k)) + if disk_name not in dict_all.keys(): + dict_all[disk_name] = {} + time_minute_flag=time + else: + distance_num+=1 + if distance_num >=10: + print("Time --------------------paritition-----------------------------------") + print("Time path bfree bused btotal util") + distance_num = 0 + for k in minute_dict.keys(): + try: + f_bfree = (sum(minute_dict.get(k).get("f_bfree", 0))/len(minute_dict.get(k).get("f_bfree"))) + f_bsize = (sum(minute_dict.get(k).get("f_bsize"))/len(minute_dict.get(k).get("f_bsize"))) + f_blocks = (sum(minute_dict.get(k).get("f_blocks"))/len(minute_dict.get(k).get("f_blocks"))) + except: + f_bfree = 0 + f_bsize = 0 + f_blocks = 0 + bfree = f_bfree*f_bsize + bused = (f_blocks-f_bfree)*f_bsize + btotal= f_blocks * f_bsize + util = bused/btotal + if k not in dict_all.keys(): + dict_all[k] = {} + if not dict_all.get(k).get("bfree"): + dict_all[k]["bfree"]= [bfree] + else: + dict_all.get(k).get("bfree").append(bfree) + + if not dict_all.get(k).get("bused"): + dict_all[k]["bused"]= [bused] + else: + dict_all.get(k).get("bused").append(bused) + if not dict_all.get(k).get("btotal"): + dict_all[k]["btotal"]= [btotal] + else: + dict_all.get(k).get("btotal").append(btotal) + if not dict_all.get(k).get("util"): + dict_all[k]["util"]= [util] + else: + dict_all.get(k).get("util").append(util) + print(("%s"%title_print_distance_str).format(time_minute_flag, k , hum_byte_convert(bfree),hum_byte_convert(bused), hum_byte_convert(btotal),"%.2f"%util)) + minute_dict = {} + time_minute_flag = time + return minute_dict, dict_all, distance_num, time_minute_flag + except Exception as e: + print(e) + return None, None, None, None \ No newline at end of file diff --git a/source/tools/combine/sar/pcsw.py b/source/tools/combine/sar/pcsw.py new file mode 100644 index 0000000000000000000000000000000000000000..cd0165cc3a4dd40c41d9ee34ded8c94ed5387f00 --- /dev/null +++ b/source/tools/combine/sar/pcsw.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- + + +import statistics +import datetime + +from db import get_sql_resp +from utils import get_print_title_distance + + +def pcsw_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["stat_counters"], date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "block":[], + "ctxt":[], + "run":[] + } + cpu_dict_all= { + "block":[], + "ctxt":[], + "run":[] + } + print("Time------------------pcsw----------------") + print("Time block ctxt run") + title_pcsw = "Time block ctxt run" + title_print_distance_str = get_print_title_distance(title_pcsw) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time_strp+datetime.timedelta(minutes=distance_max) >= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time------------------pcsw----------------") + print("Time block ctxt run") + distance_num = 0 + minute_cpu_dict.get("block").append(i.get("values").get("procs_blocked")) + minute_cpu_dict.get("ctxt").append(i.get("values").get("ctxt")) + minute_cpu_dict.get("run").append(i.get("values").get("procs_running")) + procs_blocked = (sum(minute_cpu_dict.get("block"))/len(minute_cpu_dict.get("block"))) + ctxt = (sum(minute_cpu_dict.get("ctxt"))/len(minute_cpu_dict.get("ctxt"))) + procs_running = (sum(minute_cpu_dict.get("run"))/len(minute_cpu_dict.get("run"))) + cpu_dict_all.get("block").append(procs_blocked) + cpu_dict_all.get("run").append(procs_running) + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%procs_blocked,"%.2f"%ctxt,"%.2f"%procs_running)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("block")), + "%.2f"%max(cpu_dict_all.get("ctxt")), + "%.2f"%max(cpu_dict_all.get("run")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("block")), + "%.2f"%statistics.mean(cpu_dict_all.get("ctxt")), + "%.2f"%statistics.mean(cpu_dict_all.get("run")))) + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("block")), + "%.2f"%min(cpu_dict_all.get("ctxt")), + "%.2f"%min(cpu_dict_all.get("run")))) + break + else: + procs_blocked = (sum(minute_cpu_dict.get("block"))/len(minute_cpu_dict.get("block"))) + ctxt = (sum(minute_cpu_dict.get("ctxt"))/len(minute_cpu_dict.get("ctxt"))) + procs_running = (sum(minute_cpu_dict.get("run"))/len(minute_cpu_dict.get("run"))) + cpu_dict_all.get("block").append(procs_blocked) + cpu_dict_all.get("run").append(procs_running) + distance_num+=1 + if distance_num >=19: + print("Time------------------pcsw----------------") + print("Time block ctxt run") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%procs_blocked,"%.2f"%ctxt,"%.2f"%procs_running)) + procs_blocked = i.get("values").get("procs_blocked") + ctxt = i.get("values").get("ctxt") + procs_running = i.get("values").get("procs_running") + cpu_dict_all.get("block").append(procs_blocked) + cpu_dict_all.get("ctxt").append(ctxt) + cpu_dict_all.get("run").append(procs_running) + distance_num+=1 + if distance_num >=distance_max: + print("Time------------------pcsw----------------") + print("Time block ctxt run") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%procs_blocked,"%.2f"%ctxt,"%.2f"%procs_running)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("block")), + "%.2f"%max(cpu_dict_all.get("ctxt")), + "%.2f"%max(cpu_dict_all.get("run")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("block")), + "%.2f"%statistics.mean(cpu_dict_all.get("ctxt")), + "%.2f"%statistics.mean(cpu_dict_all.get("run")))) + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("block")), + "%.2f"%min(cpu_dict_all.get("ctxt")), + "%.2f"%min(cpu_dict_all.get("run")))) + break + if not time_minute_flag: + if i.get("values").get("procs_blocked") != None: + minute_cpu_dict.get("block").append(i.get("values").get("procs_blocked")) + if i.get("values").get("ctxt") != None: + minute_cpu_dict.get("ctxt").append(i.get("values").get("ctxt")) + if i.get("values").get("procs_running") != None: + minute_cpu_dict.get("run").append(i.get("values").get("procs_running")) + time_minute_flag = time + elif time == time_minute_flag: + if i.get("values").get("procs_blocked") != None: + minute_cpu_dict.get("block").append(i.get("values").get("procs_blocked")) + if i.get("values").get("ctxt") != None: + minute_cpu_dict.get("ctxt").append(i.get("values").get("ctxt")) + if i.get("values").get("procs_running") != None: + minute_cpu_dict.get("run").append(i.get("values").get("procs_running")) + else: + distance_num+=1 + if distance_num >=19: + print("Time------------------pcsw----------------") + print("Time block ctxt run") + distance_num = 0 + procs_blocked = (sum(minute_cpu_dict.get("block"))/len(minute_cpu_dict.get("block"))) + ctxt = (sum(minute_cpu_dict.get("ctxt"))/len(minute_cpu_dict.get("ctxt"))) + procs_running = (sum(minute_cpu_dict.get("run"))/len(minute_cpu_dict.get("run"))) + cpu_dict_all.get("block").append(procs_blocked) + cpu_dict_all.get("ctxt").append(ctxt) + cpu_dict_all.get("run").append(procs_running) + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%procs_blocked,"%.2f"%ctxt,"%.2f"%procs_running)) + minute_cpu_dict = { + "block":[], + "ctxt":[], + "run":[] + } + if i.get("values").get("procs_blocked") != None: + minute_cpu_dict.get("block").append(i.get("values").get("procs_blocked")) + if i.get("values").get("ctxt") != None: + minute_cpu_dict.get("ctxt").append(i.get("values").get("ctxt")) + if i.get("values").get("procs_running") != None: + minute_cpu_dict.get("run").append(i.get("values").get("procs_running")) + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/requirements.txt b/source/tools/combine/sar/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c83068e558a484738bc57897ddbc19fe2b03836e --- /dev/null +++ b/source/tools/combine/sar/requirements.txt @@ -0,0 +1,3 @@ +requests==2.27.1 +pyinstaller==3.5 +pyyaml==6.0.1 diff --git a/source/tools/combine/sar/sar.py b/source/tools/combine/sar/sar.py new file mode 100644 index 0000000000000000000000000000000000000000..c51b9ab46e66023a618feee5899affc99e775364 --- /dev/null +++ b/source/tools/combine/sar/sar.py @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- +#!/root/anaconda3/envs/python310 +import argparse +from cpu import cpu_data_show +from unity_total import unity_total +from unity_io import io_data_show +from tcp import tcp_data_show +from pcsw import pcsw_data_show +from traffic import traffic_data_show +from udp import udp_data_show +from paritition import partition_data_show +from mem import mem_data_show +from load import load_data_show + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + + parser.add_argument('-i','--instance',type=int, help='data spacing') + parser.add_argument('-t','--time',type=int, help='query mintes old data') + parser.add_argument('-d','--date',type=int, help='query hours old data') + parser.add_argument('--cpu', action='store_true', help='CPU share (user, system, interrupt, nice, & idle)') + parser.add_argument('--mem', action='store_true', help='Physical memory share (active, inactive, cached, free, wired)') + parser.add_argument('--load', action='store_true', help='System Run Queue and load average') + parser.add_argument('--traffic', action='store_true', help='Net traffic statistics') + parser.add_argument('--tcp', action='store_true', help='TCP traffic (v4)') + parser.add_argument('--udp', action='store_true', help='UDP traffic (v4)') + parser.add_argument('--io', action='store_true', help='Linux I/O performance') + parser.add_argument('--partition', action='store_true', help='Disk and partition usage') + parser.add_argument('--pcsw', action='store_true', help='Process (task) creation and context switch') + args = parser.parse_args() + minutes = args.time + if args.cpu: + cpu_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.mem: + mem_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.load: + load_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.traffic: + traffic_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.tcp: + tcp_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.udp: + udp_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.io: + io_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.partition: + partition_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + elif args.pcsw: + pcsw_data_show(distance_max=args.instance, minutes=args.time, date=args.date) + else: + unity_total(distance_max=args.instance, minutes=args.time, date=args.date) diff --git a/source/tools/combine/sar/tcp.py b/source/tools/combine/sar/tcp.py new file mode 100644 index 0000000000000000000000000000000000000000..e52366dde6f8cdb3031d99dc8d2b5e91af05ee97 --- /dev/null +++ b/source/tools/combine/sar/tcp.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- +import statistics +import datetime +from db import get_sql_resp +from utils import get_print_title_distance + +def tcp_data_show(distance_max, minutes, date): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["sock_stat"],date=date) + res_tcp = get_sql_resp(minutes=minutes, table=["net_tcp_count"],date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "active":[], + "pasive":[], + "iseg":[], + "outseg":[], + "curres":[], + "retranssegs":[] + } + cpu_dict_all={ + "active":[], + "pasive":[], + "iseg":[], + "outseg":[], + "curres":[], + "retran":[] + } + print("Time ---------------------tcp----------------------") + title_tcp = "Time active pasive iseg outseg CurrEs retran" + print(title_tcp) + title_print_distance_str = get_print_title_distance(title_tcp) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + index_i = ret.index(i) + if time_strp+datetime.timedelta(minutes=distance_max) >= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time ---------------------tcp----------------------") + print("Time active pasive iseg outseg CurrEs retran") + distance_num = 0 + minute_cpu_dict.get("active").append(i.get("values").get("tcp_inuse")) + minute_cpu_dict.get("pasive").append(i.get("values").get("tcp_tw")) + minute_cpu_dict.get("iseg").append(res_tcp[index_i].get("values").get("InSegs")) + minute_cpu_dict.get("outseg").append(res_tcp[index_i].get("values").get("OutSegs")) + minute_cpu_dict.get("curres").append(res_tcp[index_i].get("values").get("CurrEstab")) + minute_cpu_dict.get("retranssegs").append(res_tcp[index_i].get("values").get("RetransSegs")) + active = (sum(minute_cpu_dict.get("active"))/len(minute_cpu_dict.get("active"))) + pasive = (sum(minute_cpu_dict.get("pasive"))/len(minute_cpu_dict.get("pasive"))) + iseg = (sum(minute_cpu_dict.get("iseg"))/len(minute_cpu_dict.get("iseg"))) + outseg = (sum(minute_cpu_dict.get("outseg"))/len(minute_cpu_dict.get("outseg"))) + curres = (sum(minute_cpu_dict.get("curres"))/len(minute_cpu_dict.get("curres"))) + retran = (sum(minute_cpu_dict.get("retranssegs"))/sum(minute_cpu_dict.get("outseg"))) + cpu_dict_all.get("active").append(active) + cpu_dict_all.get("pasive").append(pasive) + cpu_dict_all.get("iseg").append(iseg) + cpu_dict_all.get("outseg").append(outseg) + cpu_dict_all.get("curres").append(curres) + cpu_dict_all.get("retran").append(retran) + print(("%s\n"%title_print_distance_str).format(time, "%.2f"%active, "%.2f"%pasive, "%.2f"%iseg,"%.2f"%outseg, "%.2f"%curres, "%.2f"%retran)) + print(("%s"%title_print_distance_str).format("MAX", "%.2f"%max(cpu_dict_all.get("active")), + "%.2f"%max(cpu_dict_all.get("pasive")), + "%.2f"%max(cpu_dict_all.get("iseg")), + "%.2f"%max(cpu_dict_all.get("outseg")), + "%.2f"%max(cpu_dict_all.get("curres")), + "%.2f"%max(cpu_dict_all.get("retran")) + )) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%(statistics.mean(cpu_dict_all.get("active"))), + "%.2f"%(statistics.mean(cpu_dict_all.get("pasive"))), + "%.2f"%statistics.mean(cpu_dict_all.get("iseg")), + "%.2f"%statistics.mean(cpu_dict_all.get("outseg")), + "%.2f"%statistics.mean(cpu_dict_all.get("curres")), + "%.2f"%statistics.mean(cpu_dict_all.get("retran")) + )) + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("active")), + "%.2f"%min(cpu_dict_all.get("pasive")), + "%.2f"%min(cpu_dict_all.get("iseg")), + "%.2f"%min(cpu_dict_all.get("outseg")), + "%.2f"%min(cpu_dict_all.get("curres")), + "%.2f"%min(cpu_dict_all.get("retran")) + )) + break + else: + active = (sum(minute_cpu_dict.get("active"))/len(minute_cpu_dict.get("active"))) + pasive = (sum(minute_cpu_dict.get("pasive"))/len(minute_cpu_dict.get("pasive"))) + iseg = (sum(minute_cpu_dict.get("iseg"))/len(minute_cpu_dict.get("iseg"))) + outseg = sum(minute_cpu_dict.get("outseg")) + curres = (sum(minute_cpu_dict.get("curres"))/len(minute_cpu_dict.get("curres"))) + retran = (sum(minute_cpu_dict.get("retranssegs"))/sum(minute_cpu_dict.get("outseg"))) + cpu_dict_all.get("active").append(active) + cpu_dict_all.get("pasive").append(pasive) + cpu_dict_all.get("iseg").append(iseg) + cpu_dict_all.get("outseg").append(outseg) + cpu_dict_all.get("curres").append(curres) + cpu_dict_all.get("retran").append(retran) + distance_num+=1 + if distance_num >=19: + print("Time ---------------------tcp----------------------") + print("Time active pasive iseg outseg CurrEs retran") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag, "%.2f"%active, "%.2f"%pasive, "%.2f"%iseg,"%.2f"%outseg, "%.2f"%curres, "%.2f"%retran)) + active = i.get("values").get("tcp_inuse") + pasive = i.get("values").get("tcp_tw") + iseg = res_tcp[index_i].get("values").get("InSegs") + outseg = res_tcp[index_i].get("values").get("OutSegs") + curres = res_tcp[index_i].get("values").get("CurrEstab") + retran = (res_tcp[index_i].get("values").get("RetransSegs"))/(res_tcp[index_i].get("values").get("OutSegs")) + cpu_dict_all.get("active").append(active) + cpu_dict_all.get("pasive").append(pasive) + cpu_dict_all.get("iseg").append(iseg) + cpu_dict_all.get("outseg").append(outseg) + cpu_dict_all.get("curres").append(curres) + cpu_dict_all.get("retran").append(retran) + distance_num+=1 + if distance_num >=19: + print("Time ---------------------tcp----------------------") + print("Time active pasive iseg outseg CurrEs retran") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time, "%.2f"%active, "%.2f"%pasive, "%.2f"%iseg,"%.2f"%outseg, "%.2f"%curres, "%.2f"%retran)) + print(("%s"%title_print_distance_str).format("MAX", "%.2f"%max(cpu_dict_all.get("active")), + "%.2f"%max(cpu_dict_all.get("pasive")), + "%.2f"%max(cpu_dict_all.get("iseg")), + "%.2f"%max(cpu_dict_all.get("outseg")), + "%.2f"%max(cpu_dict_all.get("curres")), + "%.2f"%max(cpu_dict_all.get("retran")) + )) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%(statistics.mean(cpu_dict_all.get("active"))), + "%.2f"%(statistics.mean(cpu_dict_all.get("pasive"))), + "%.2f"%statistics.mean(cpu_dict_all.get("iseg")), + "%.2f"%statistics.mean(cpu_dict_all.get("outseg")), + "%.2f"%statistics.mean(cpu_dict_all.get("curres")), + "%.2f"%statistics.mean(cpu_dict_all.get("retran")) + )) + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("active")), + "%.2f"%min(cpu_dict_all.get("pasive")), + "%.2f"%min(cpu_dict_all.get("iseg")), + "%.2f"%min(cpu_dict_all.get("outseg")), + "%.2f"%min(cpu_dict_all.get("curres")), + "%.2f"%min(cpu_dict_all.get("retran")) + )) + break + if not time_minute_flag: + if i.get("values").get("tcp_inuse") != None: + minute_cpu_dict.get("active").append(i.get("values").get("tcp_inuse")) + if i.get("values").get("tcp_tw") != None: + minute_cpu_dict.get("pasive").append(i.get("values").get("tcp_tw")) + if res_tcp[index_i].get("values").get("InSegs") != None: + minute_cpu_dict.get("iseg").append(res_tcp[index_i].get("values").get("InSegs")) + if res_tcp[index_i].get("values").get("OutSegs") != None: + minute_cpu_dict.get("outseg").append(res_tcp[index_i].get("values").get("OutSegs")) + if res_tcp[index_i].get("values").get("CurrEstab") != None: + minute_cpu_dict.get("curres").append(res_tcp[index_i].get("values").get("CurrEstab")) + if res_tcp[index_i].get("values").get("RetransSegs") != None: + minute_cpu_dict.get("retranssegs").append(res_tcp[index_i].get("values").get("RetransSegs")) + time_minute_flag = time + elif time == time_minute_flag: + if i.get("values").get("tcp_inuse") != None: + minute_cpu_dict.get("active").append(i.get("values").get("tcp_inuse")) + if i.get("values").get("tcp_tw") != None: + minute_cpu_dict.get("pasive").append(i.get("values").get("tcp_tw")) + if res_tcp[index_i].get("values").get("InSegs") != None: + minute_cpu_dict.get("iseg").append(res_tcp[index_i].get("values").get("InSegs")) + if res_tcp[index_i].get("values").get("OutSegs") != None: + minute_cpu_dict.get("outseg").append(res_tcp[index_i].get("values").get("OutSegs")) + if res_tcp[index_i].get("values").get("CurrEstab") != None: + minute_cpu_dict.get("curres").append(res_tcp[index_i].get("values").get("CurrEstab")) + if res_tcp[index_i].get("values").get("RetransSegs") != None: + minute_cpu_dict.get("retranssegs").append(res_tcp[index_i].get("values").get("RetransSegs")) + else: + distance_num+=1 + if distance_num >=19: + print("Time ---------------------tcp----------------------") + print("Time active pasive iseg outseg CurrEs retran") + distance_num = 0 + if i.get("values").get("tcp_inuse") != None: + minute_cpu_dict.get("active").append(i.get("values").get("tcp_inuse")) + if i.get("values").get("tcp_tw") != None: + minute_cpu_dict.get("pasive").append(i.get("values").get("tcp_tw")) + if res_tcp[index_i].get("values").get("InSegs") != None: + minute_cpu_dict.get("iseg").append(res_tcp[index_i].get("values").get("InSegs")) + if res_tcp[index_i].get("values").get("OutSegs") != None: + minute_cpu_dict.get("outseg").append(res_tcp[index_i].get("values").get("OutSegs")) + if res_tcp[index_i].get("values").get("CurrEstab") != None: + minute_cpu_dict.get("curres").append(res_tcp[index_i].get("values").get("CurrEstab")) + if res_tcp[index_i].get("values").get("RetransSegs") != None: + minute_cpu_dict.get("retranssegs").append(res_tcp[index_i].get("values").get("RetransSegs")) + active = (sum(minute_cpu_dict.get("active"))/len(minute_cpu_dict.get("active"))) + pasive = (sum(minute_cpu_dict.get("pasive"))/len(minute_cpu_dict.get("pasive"))) + iseg = (sum(minute_cpu_dict.get("iseg"))/len(minute_cpu_dict.get("iseg"))) + outseg = (sum(minute_cpu_dict.get("outseg"))/len(minute_cpu_dict.get("outseg"))) + curres = (sum(minute_cpu_dict.get("curres"))/len(minute_cpu_dict.get("curres"))) + retran = (sum(minute_cpu_dict.get("retranssegs"))/len(minute_cpu_dict.get("retranssegs"))) + print(("%s"%title_print_distance_str).format(time_minute_flag, "%.2f"%active, "%.2f"%pasive, "%.2f"%iseg,"%.2f"%outseg, "%.2f"%curres, "%.2f"%retran)) + minute_cpu_dict = { + "active":[], + "pasive":[], + "iseg":[], + "outseg":[], + "curres":[], + "retranssegs":[] + } + if i.get("values").get("tcp_inuse") != None: + minute_cpu_dict.get("active").append(i.get("values").get("tcp_inuse")) + if i.get("values").get("tcp_tw") != None: + minute_cpu_dict.get("pasive").append(i.get("values").get("tcp_tw")) + if res_tcp[index_i].get("values").get("InSegs") != None: + minute_cpu_dict.get("iseg").append(res_tcp[index_i].get("values").get("InSegs")) + if res_tcp[index_i].get("values").get("OutSegs") != None: + minute_cpu_dict.get("outseg").append(res_tcp[index_i].get("values").get("OutSegs")) + if res_tcp[index_i].get("values").get("CurrEstab") != None: + minute_cpu_dict.get("curres").append(res_tcp[index_i].get("values").get("CurrEstab")) + if res_tcp[index_i].get("values").get("RetransSegs") != None: + minute_cpu_dict.get("retranssegs").append(res_tcp[index_i].get("values").get("RetransSegs")) + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/traffic.py b/source/tools/combine/sar/traffic.py new file mode 100644 index 0000000000000000000000000000000000000000..6f41a80df5e735a167f0ddd07a2c67d7ab10eabc --- /dev/null +++ b/source/tools/combine/sar/traffic.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +#!/root/anaconda3/envs/python310 +import argparse +import json +import requests +import statistics +import datetime +import time +from db import get_sql_resp +from hum_byte_convert import hum_byte_convert, hum_convert +from utils import get_print_title_distance +from yaml_instance import load_resp_second_dist + + +config = load_resp_second_dist() +second_dist = config["config"]["freq"] + +def traffic_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["networks"],date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "bytin":[], + "bytout":[], + "pktin":[], + "pktout":[], + "pkterr":[], + "pktdrp":[] + } + cpu_dict_all={ + "bytin":[], + "bytout":[], + "pktin":[], + "pktout":[], + "pkterr":[], + "pktdrp":[] + } + title_traffic = "Time bytin bytout pktin pktout pkterr pktdrp" + print("Time ------------------------------traffic----------------------") + print(title_traffic) + title_print_distance_str = get_print_title_distance(title_traffic) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max == 1 and time_minute_flag != time and time_minute_flag != None: + bytin = (sum(minute_cpu_dict.get("bytin")))*second_dist + bytout = (sum(minute_cpu_dict.get("bytout")))*second_dist + pktin = (sum(minute_cpu_dict.get("pktin")))*second_dist + pktout = (sum(minute_cpu_dict.get("pktout")))*second_dist + pkterr = (sum(minute_cpu_dict.get("pkterr")))*second_dist + pktdrp = (sum(minute_cpu_dict.get("pktdrp")))*second_dist + distance_num+=1 + if distance_num >=19: + print("Time ------------------------------traffic----------------------") + print(title_traffic) + distance_num = 0 + if time_strp+datetime.timedelta(minutes=distance_max) > endtime_strp: + print(("%s\n"%title_print_distance_str).format(time_minute_flag,hum_byte_convert(bytin),hum_byte_convert(bytout), "%.2f"%pktin,"%.2f"%pktout, "%.2f"%pkterr, "%.2f"%pktdrp)) + print(("%s"%title_print_distance_str).format("MAX",hum_byte_convert(max(cpu_dict_all.get("bytin"))), + hum_byte_convert(max(cpu_dict_all.get("bytout"))), + "%.2f"%max(cpu_dict_all.get("pktin")), + "%.2f"%max(cpu_dict_all.get("pktout")), + "%.2f"%max(cpu_dict_all.get("pkterr")), + "%.2f"%max(cpu_dict_all.get("pktdrp")) + )) + print(("%s"%title_print_distance_str).format("MEAN",hum_byte_convert(statistics.mean(cpu_dict_all.get("bytin"))), + hum_byte_convert(statistics.mean(cpu_dict_all.get("bytout"))), + "%.2f"%statistics.mean(cpu_dict_all.get("pktin")), + "%.2f"%statistics.mean(cpu_dict_all.get("pktout")), + "%.2f"%statistics.mean(cpu_dict_all.get("pkterr")), + "%.2f"%statistics.mean(cpu_dict_all.get("pktdrp")) + )) + print(("%s"%title_print_distance_str).format("MIN",hum_byte_convert(min(cpu_dict_all.get("bytin"))), + hum_byte_convert(min(cpu_dict_all.get("bytout"))), + "%.2f"%min(cpu_dict_all.get("pktin")), + "%.2f"%min(cpu_dict_all.get("pktout")), + "%.2f"%min(cpu_dict_all.get("pkterr")), + "%.2f"%min(cpu_dict_all.get("pktdrp")) + )) + break + else: + cpu_dict_all.get("bytin").append(bytin) + cpu_dict_all.get("bytout").append(bytout) + cpu_dict_all.get("pktin").append(pktin) + cpu_dict_all.get("pktout").append(pktout) + cpu_dict_all.get("pkterr").append(pkterr) + cpu_dict_all.get("pktdrp").append(pktdrp) + print(("%s"%title_print_distance_str).format(time_minute_flag,hum_byte_convert(bytin),hum_byte_convert(bytout), "%.2f"%pktin,"%.2f"%pktout, "%.2f"%pkterr, "%.2f"%pktdrp)) + minute_cpu_dict = { + "bytin":[], + "bytout":[], + "pktin":[], + "pktout":[], + "pkterr":[], + "pktdrp":[] + } + if i.get("values").get("if_ibytes") !=None: + minute_cpu_dict.get("bytin").append(i.get("values").get("if_ibytes")) + if i.get("values").get("if_obytes") != None: + minute_cpu_dict.get("bytout").append(i.get("values").get("if_obytes")) + if i.get("values").get("if_ipackets") != None: + minute_cpu_dict.get("pktin").append(i.get("values").get("if_ipackets")) + if i.get("values").get("if_opackets") != None: + minute_cpu_dict.get("pktout").append(i.get("values").get("if_opackets")) + if i.get("values").get("if_oerrs") != None and i.get("values").get("if_ierrs")!= None: + minute_cpu_dict.get("pkterr").append((i.get("values").get("if_oerrs"))+(i.get("values").get("if_ierrs"))) + if i.get("values").get("if_odrop") != None and i.get("values").get("if_idrop") != None: + minute_cpu_dict.get("pktdrp").append((i.get("values").get("if_odrop"))+(i.get("values").get("if_idrop"))) + time_minute_flag = time + else: + if i.get("values").get("if_ibytes") !=None: + minute_cpu_dict.get("bytin").append(i.get("values").get("if_ibytes")) + if i.get("values").get("if_obytes") != None: + minute_cpu_dict.get("bytout").append(i.get("values").get("if_obytes")) + if i.get("values").get("if_ipackets") != None: + minute_cpu_dict.get("pktin").append(i.get("values").get("if_ipackets")) + if i.get("values").get("if_opackets") != None: + minute_cpu_dict.get("pktout").append(i.get("values").get("if_opackets")) + if i.get("values").get("if_oerrs") != None and i.get("values").get("if_ierrs")!= None: + minute_cpu_dict.get("pkterr").append((i.get("values").get("if_oerrs"))+(i.get("values").get("if_ierrs"))) + if i.get("values").get("if_odrop") != None and i.get("values").get("if_idrop") != None: + minute_cpu_dict.get("pktdrp").append((i.get("values").get("if_odrop"))+(i.get("values").get("if_idrop"))) + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/udp.py b/source/tools/combine/sar/udp.py new file mode 100644 index 0000000000000000000000000000000000000000..f472af87a4549f501a6b438a063e814c1d62b123 --- /dev/null +++ b/source/tools/combine/sar/udp.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- +import statistics +import datetime +from db import get_sql_resp +from utils import get_print_title_distance + +def udp_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + ret = get_sql_resp(minutes=minutes, table=["net_udp_count"],date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = { + "InErrors":[], + "SndbufErrors":[], + "InDatagrams":[], + "RcvbufErrors":[], + "OutDatagrams":[], + "NoPorts":[] + } + cpu_dict_all= { + "InErrors":[], + "SndbufErrors":[], + "InDatagrams":[], + "RcvbufErrors":[], + "OutDatagrams":[], + "NoPorts":[] + } + print("Time -----------------------udp-----------------------------------------") + print("Time InEr SndEr In RcvEr Out NoPort") + title_udp = ("Time InEr SndEr InDa RcvEr OutDa NoPort") + title_print_distance_str = get_print_title_distance(title_udp) + endtime = datetime.datetime.fromtimestamp(int(ret[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + for i in ret: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time_strp+datetime.timedelta(minutes=distance_max) >= endtime_strp: #末条数据判断 + if time == time_minute_flag: + distance_num+=1 + if distance_num >=19: + print("Time -----------------------udp-----------------------------------------") + print("Time InEr SndEr In RcvEr Out NoPort") + distance_num = 0 + minute_cpu_dict.get("InErrors").append(i.get("values").get("InErrors")) + minute_cpu_dict.get("SndbufErrors").append(i.get("values").get("SndbufErrors")) + minute_cpu_dict.get("InDatagrams").append(i.get("values").get("InDatagrams")) + minute_cpu_dict.get("RcvbufErrors").append(i.get("values").get("RcvbufErrors")) + minute_cpu_dict.get("OutDatagrams").append(i.get("values").get("OutDatagrams")) + minute_cpu_dict.get("NoPorts").append(i.get("values").get("NoPorts")) + InErrors = (sum(minute_cpu_dict.get("InErrors"))/len(minute_cpu_dict.get("InErrors"))) + SndbufErrors = (sum(minute_cpu_dict.get("SndbufErrors"))/len(minute_cpu_dict.get("SndbufErrors"))) + InDatagrams = (sum(minute_cpu_dict.get("InDatagrams"))/len(minute_cpu_dict.get("InDatagrams"))) + RcvbufErrors = (sum(minute_cpu_dict.get("RcvbufErrors"))/len(minute_cpu_dict.get("RcvbufErrors"))) + OutDatagrams = (sum(minute_cpu_dict.get("OutDatagrams"))/len(minute_cpu_dict.get("OutDatagrams"))) + NoPorts = (sum(minute_cpu_dict.get("NoPorts"))/len(minute_cpu_dict.get("NoPorts"))) + cpu_dict_all.get("InErrors").append(InErrors) + cpu_dict_all.get("SndbufErrors").append(SndbufErrors) + cpu_dict_all.get("InDatagrams").append(InDatagrams) + cpu_dict_all.get("RcvbufErrors").append(RcvbufErrors) + cpu_dict_all.get("OutDatagrams").append(OutDatagrams) + cpu_dict_all.get("NoPorts").append(NoPorts) + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%InErrors,"%.2f"%SndbufErrors, "%.2f"%InDatagrams,"%.2f"%RcvbufErrors, "%.2f"%OutDatagrams,"%.2f"%NoPorts)) + print(("%s\n"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("InErrors")), + "%.2f"%max(cpu_dict_all.get("SndbufErrors")), + "%.2f"%max(cpu_dict_all.get("InDatagrams")), + "%.2f"%max(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%max(cpu_dict_all.get("OutDatagrams")), + "%.2f"%max(cpu_dict_all.get("NoPorts")))) + print(("%s\n"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("InErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("SndbufErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("InDatagrams")), + "%.2f"%statistics.mean(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("OutDatagrams")), + "%.2f"%statistics.mean(cpu_dict_all.get("NoPorts")))) + + print(("%s\n"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("InErrors")), + "%.2f"%min(cpu_dict_all.get("SndbufErrors")), + "%.2f"%min(cpu_dict_all.get("InDatagrams")), + "%.2f"%min(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%min(cpu_dict_all.get("OutDatagrams")), + "%.2f"%min(cpu_dict_all.get("NoPorts")))) + break + else: + InErrors = (sum(minute_cpu_dict.get("InErrors"))/len(minute_cpu_dict.get("InErrors"))) + SndbufErrors = (sum(minute_cpu_dict.get("SndbufErrors"))/len(minute_cpu_dict.get("SndbufErrors"))) + InDatagrams = (sum(minute_cpu_dict.get("InDatagrams"))/len(minute_cpu_dict.get("InDatagrams"))) + RcvbufErrors = (sum(minute_cpu_dict.get("RcvbufErrors"))/len(minute_cpu_dict.get("RcvbufErrors"))) + OutDatagrams = (sum(minute_cpu_dict.get("OutDatagrams"))/len(minute_cpu_dict.get("OutDatagrams"))) + NoPorts = (sum(minute_cpu_dict.get("NoPorts"))/len(minute_cpu_dict.get("NoPorts"))) + cpu_dict_all.get("InErrors").append(InErrors) + cpu_dict_all.get("SndbufErrors").append(SndbufErrors) + cpu_dict_all.get("InDatagrams").append(InDatagrams) + cpu_dict_all.get("RcvbufErrors").append(RcvbufErrors) + cpu_dict_all.get("OutDatagrams").append(OutDatagrams) + cpu_dict_all.get("NoPorts").append(NoPorts) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------udp-----------------------------------------") + print("Time InEr SndEr In RcvEr Out NoPort") + distance_num = 0 + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%InErrors,"%.2f"%SndbufErrors, "%.2f"%InDatagrams,"%.2f"%RcvbufErrors, "%.2f"%OutDatagrams,"%.2f"%NoPorts)) + InErrors = i.get("values").get("InErrors") + SndbufErrors = i.get("values").get("SndbufErrors") + InDatagrams = i.get("values").get("InDatagrams") + RcvbufErrors = i.get("values").get("RcvbufErrors") + OutDatagrams = i.get("values").get("OutDatagrams") + NoPorts = i.get("values").get("NoPorts") + cpu_dict_all.get("InErrors").append(InErrors) + cpu_dict_all.get("SndbufErrors").append(SndbufErrors) + cpu_dict_all.get("InDatagrams").append(InDatagrams) + cpu_dict_all.get("RcvbufErrors").append(RcvbufErrors) + cpu_dict_all.get("OutDatagrams").append(OutDatagrams) + cpu_dict_all.get("NoPorts").append(NoPorts) + distance_num+=1 + if distance_num >=19: + print("Time -----------------------udp-----------------------------------------") + print("Time InEr SndEr In RcvEr Out NoPort") + distance_num = 0 + print(("%s\n"%title_print_distance_str).format(time,"%.2f"%InErrors,"%.2f"%SndbufErrors, "%.2f"%InDatagrams,"%.2f"%RcvbufErrors, "%.2f"%OutDatagrams,"%.2f"%NoPorts)) + print(("%s"%title_print_distance_str).format("MAX","%.2f"%max(cpu_dict_all.get("InErrors")), + "%.2f"%max(cpu_dict_all.get("SndbufErrors")), + "%.2f"%max(cpu_dict_all.get("InDatagrams")), + "%.2f"%max(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%max(cpu_dict_all.get("OutDatagrams")), + "%.2f"%max(cpu_dict_all.get("NoPorts")))) + print(("%s"%title_print_distance_str).format("MEAN","%.2f"%statistics.mean(cpu_dict_all.get("InErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("SndbufErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("InDatagrams")), + "%.2f"%statistics.mean(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%statistics.mean(cpu_dict_all.get("OutDatagrams")), + "%.2f"%statistics.mean(cpu_dict_all.get("NoPorts")))) + + print(("%s"%title_print_distance_str).format("MIN","%.2f"%min(cpu_dict_all.get("InErrors")), + "%.2f"%min(cpu_dict_all.get("SndbufErrors")), + "%.2f"%min(cpu_dict_all.get("InDatagrams")), + "%.2f"%min(cpu_dict_all.get("RcvbufErrors")), + "%.2f"%min(cpu_dict_all.get("OutDatagrams")), + "%.2f"%min(cpu_dict_all.get("NoPorts")))) + break + if not time_minute_flag: + minute_cpu_dict = { + "InErrors":[i.get("values").get("InErrors")], + "SndbufErrors":[i.get("values").get("SndbufErrors")], + "InDatagrams":[i.get("values").get("InDatagrams")], + "RcvbufErrors":[i.get("values").get("RcvbufErrors")], + "OutDatagrams":[i.get("values").get("OutDatagrams")], + "NoPorts":[i.get("values").get("NoPorts")] + } + time_minute_flag = time + elif time == time_minute_flag: + minute_cpu_dict.get("InErrors").append(i.get("values").get("InErrors")) + minute_cpu_dict.get("SndbufErrors").append(i.get("values").get("SndbufErrors")) + minute_cpu_dict.get("InDatagrams").append(i.get("values").get("InDatagrams")) + minute_cpu_dict.get("RcvbufErrors").append(i.get("values").get("RcvbufErrors")) + minute_cpu_dict.get("OutDatagrams").append(i.get("values").get("OutDatagrams")) + minute_cpu_dict.get("NoPorts").append(i.get("values").get("NoPorts")) + else: + distance_num+=1 + if distance_num >=19: + print("Time -----------------------udp-----------------------------------------") + print("Time InEr SndEr In RcvEr Out NoPort") + distance_num = 0 + InErrors = (sum(minute_cpu_dict.get("InErrors"))/len(minute_cpu_dict.get("InErrors"))) + SndbufErrors = (sum(minute_cpu_dict.get("SndbufErrors"))/len(minute_cpu_dict.get("SndbufErrors"))) + InDatagrams = (sum(minute_cpu_dict.get("InDatagrams"))/len(minute_cpu_dict.get("InDatagrams"))) + RcvbufErrors = (sum(minute_cpu_dict.get("RcvbufErrors"))/len(minute_cpu_dict.get("RcvbufErrors"))) + OutDatagrams = (sum(minute_cpu_dict.get("OutDatagrams"))/len(minute_cpu_dict.get("OutDatagrams"))) + NoPorts = (sum(minute_cpu_dict.get("NoPorts"))/len(minute_cpu_dict.get("NoPorts"))) + cpu_dict_all.get("InErrors").append(InErrors) + cpu_dict_all.get("SndbufErrors").append(SndbufErrors) + cpu_dict_all.get("InDatagrams").append(InDatagrams) + cpu_dict_all.get("RcvbufErrors").append(RcvbufErrors) + cpu_dict_all.get("OutDatagrams").append(OutDatagrams) + cpu_dict_all.get("NoPorts").append(NoPorts) + print(("%s"%title_print_distance_str).format(time_minute_flag,"%.2f"%InErrors,"%.2f"%SndbufErrors, "%.2f"%InDatagrams,"%.2f"%RcvbufErrors, "%.2f"%OutDatagrams,"%.2f"%NoPorts)) + minute_cpu_dict = { + "InErrors":[i.get("values").get("InErrors")], + "SndbufErrors":[i.get("values").get("SndbufErrors")], + "InDatagrams":[i.get("values").get("InDatagrams")], + "RcvbufErrors":[i.get("values").get("RcvbufErrors")], + "OutDatagrams":[i.get("values").get("OutDatagrams")], + "NoPorts":[i.get("values").get("NoPorts")] + } + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/unity_io.py b/source/tools/combine/sar/unity_io.py new file mode 100644 index 0000000000000000000000000000000000000000..cdff2ca26d05198e13864c0ef35085d94624dc52 --- /dev/null +++ b/source/tools/combine/sar/unity_io.py @@ -0,0 +1,182 @@ +# -*- coding: utf-8 -*- +import statistics +import datetime +from db import get_sql_resp +from utils import get_print_title_distance + + +def io_data_show(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + rets = get_sql_resp(minutes=minutes, table=["disks"],date=date) + distance_num = 0 + time_minute_flag = None + minute_cpu_dict = {} + cpu_dict_all= {} + print("Time -------------------------------------------------------------IO---------------------------------------------------------------") + print("Time disk_name inflight backlog rmsec util wkb xfers bsize wmsec rkb writes wmerge rmerge reads") + title_io = "Time disk_name inflight backlog rmsec busy wkb xfers bsize wmsec rkb writes wmerge rmerge reads" + title_print_distance_str = get_print_title_distance(title_io) + endtime = datetime.datetime.fromtimestamp(int(rets[-1].get("time"))/1000000).strftime("%m/%d/%y-%H:%M") + endtime_strp = datetime.datetime.strptime(endtime, "%m/%d/%y-%H:%M") + title_list = title_io.split(' ') + title_list = [val for val in title_list if val] + cpu_dict_all = {} + for i in rets: + time = i.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + if (time_strp.minute+time_strp.hour*60)%distance_max != 0: + continue + if time == time_minute_flag or time_minute_flag == None: + disk_name = i.get("labels").get("disk_name") + for k in title_list[2:]: + if i.get("labels").get("disk_name") not in minute_cpu_dict.keys(): + minute_cpu_dict[disk_name] = {} + elif k not in minute_cpu_dict.get(disk_name).keys(): + if i.get("values").get(k) != None: + minute_cpu_dict[disk_name][k] = [i.get("values").get(k)] + else: + minute_cpu_dict.get(disk_name).get(k).append(i.get("values").get(k)) + if i.get("labels").get("disk_name") not in cpu_dict_all.keys(): + cpu_dict_all[disk_name] = {} + if k not in cpu_dict_all.get(disk_name).keys(): + cpu_dict_all[disk_name][k] = [] + time_minute_flag = time + else: + distance_num+=1 + if distance_num >=5: + print("Time -------------------------------------------------------------IO---------------------------------------------------------------") + print("Time disk_name inflight backlog rmsec util wkb xfers bsize wmsec rkb writes wmerge rmerge reads") + distance_num = 0 + if time_strp+datetime.timedelta(minutes=distance_max) > endtime_strp: + for k in minute_cpu_dict.keys(): + inflight = (sum(minute_cpu_dict.get(k).get("inflight"))/len(minute_cpu_dict.get(k).get("inflight"))) + backlog = (sum(minute_cpu_dict.get(k).get("backlog"))/len(minute_cpu_dict.get(k).get("backlog"))) + rmsec = (sum(minute_cpu_dict.get(k).get("rmsec"))/len(minute_cpu_dict.get(k).get("rmsec"))) + busy = (sum(minute_cpu_dict.get(k).get("busy"))/len(minute_cpu_dict.get(k).get("busy"))) + wkb = (sum(minute_cpu_dict.get(k).get("wkb"))/len(minute_cpu_dict.get(k).get("wkb"))) + xfers = (sum(minute_cpu_dict.get(k).get("xfers"))/len(minute_cpu_dict.get(k).get("xfers"))) + bsize = (sum(minute_cpu_dict.get(k).get("bsize"))/len(minute_cpu_dict.get(k).get("bsize"))) + wmsec = (sum(minute_cpu_dict.get(k).get("wmsec"))/len(minute_cpu_dict.get(k).get("wmsec"))) + rkb = (sum(minute_cpu_dict.get(k).get("rkb"))/len(minute_cpu_dict.get(k).get("rkb"))) + writes = (sum(minute_cpu_dict.get(k).get("writes"))/len(minute_cpu_dict.get(k).get("writes"))) + wmerge = (sum(minute_cpu_dict.get(k).get("wmerge"))/len(minute_cpu_dict.get(k).get("wmerge"))) + rmerge = (sum(minute_cpu_dict.get(k).get("rmerge"))/len(minute_cpu_dict.get(k).get("rmerge"))) + reads = (sum(minute_cpu_dict.get(k).get("reads"))/len(minute_cpu_dict.get(k).get("reads"))) + cpu_dict_all.get(k).get("inflight").append(inflight) + cpu_dict_all.get(k).get("backlog").append(backlog) + cpu_dict_all.get(k).get("rmsec").append(rmsec) + cpu_dict_all.get(k).get("busy").append(busy) + cpu_dict_all.get(k).get("wkb").append(wkb) + cpu_dict_all.get(k).get("xfers").append(xfers) + cpu_dict_all.get(k).get("bsize").append(bsize) + cpu_dict_all.get(k).get("wmsec").append(wmsec) + cpu_dict_all.get(k).get("rkb").append(rkb) + cpu_dict_all.get(k).get("writes").append(writes) + cpu_dict_all.get(k).get("wmerge").append(wmerge) + cpu_dict_all.get(k).get("rmerge").append(rmerge) + cpu_dict_all.get(k).get("reads").append(reads) + print(("%s"%title_print_distance_str).format(time_minute_flag, k, "%.2f"%inflight,"%.2f"%backlog, "%.2f"%rmsec,"%.2f"%busy, "%.2f"%wkb,"%.2f"%xfers, + "%.2f"%bsize,"%.2f"%wmsec,"%.2f"%rkb,"%.2f"%writes,"%.2f"%wmerge,"%.2f"%rmerge,"%.2f"%reads + )) + for k in cpu_dict_all.keys(): + inflight = (sum(minute_cpu_dict.get(k).get("inflight"))/len(minute_cpu_dict.get(k).get("inflight"))) + backlog = (sum(minute_cpu_dict.get(k).get("backlog"))/len(minute_cpu_dict.get(k).get("backlog"))) + rmsec = (sum(minute_cpu_dict.get(k).get("rmsec"))/len(minute_cpu_dict.get(k).get("rmsec"))) + busy = (sum(minute_cpu_dict.get(k).get("busy"))/len(minute_cpu_dict.get(k).get("busy"))) + wkb = (sum(minute_cpu_dict.get(k).get("wkb"))/len(minute_cpu_dict.get(k).get("wkb"))) + xfers = (sum(minute_cpu_dict.get(k).get("xfers"))/len(minute_cpu_dict.get(k).get("xfers"))) + bsize = (sum(minute_cpu_dict.get(k).get("bsize"))/len(minute_cpu_dict.get(k).get("bsize"))) + wmsec = (sum(minute_cpu_dict.get(k).get("wmsec"))/len(minute_cpu_dict.get(k).get("wmsec"))) + rkb = (sum(minute_cpu_dict.get(k).get("rkb"))/len(minute_cpu_dict.get(k).get("rkb"))) + writes = (sum(minute_cpu_dict.get(k).get("writes"))/len(minute_cpu_dict.get(k).get("writes"))) + wmerge = (sum(minute_cpu_dict.get(k).get("wmerge"))/len(minute_cpu_dict.get(k).get("wmerge"))) + rmerge = (sum(minute_cpu_dict.get(k).get("rmerge"))/len(minute_cpu_dict.get(k).get("rmerge"))) + reads = (sum(minute_cpu_dict.get(k).get("reads"))/len(minute_cpu_dict.get(k).get("reads"))) + print(("%s"%title_print_distance_str).format(time, k ,"%.2f"%inflight,"%.2f"%backlog, "%.2f"%rmsec,"%.2f"%busy, "%.2f"%wkb,"%.2f"%xfers, + "%.2f"%bsize,"%.2f"%wmsec,"%.2f"%rkb,"%.2f"%writes,"%.2f"%wmerge,"%.2f"%rmerge,"%.2f"%reads + )) + for k in cpu_dict_all.keys(): + print(("\n%s"%title_print_distance_str).format("MAX", k, "%.2f"%max(cpu_dict_all.get(k).get("inflight")), + "%.2f"%max(cpu_dict_all.get(k).get("backlog")), + "%.2f"%max(cpu_dict_all.get(k).get("rmsec")), + "%.2f"%max(cpu_dict_all.get(k).get("busy")), + "%.2f"%max(cpu_dict_all.get(k).get("wkb")), + "%.2f"%max(cpu_dict_all.get(k).get("xfers")), + "%.2f"%max(cpu_dict_all.get(k).get("bsize")), + "%.2f"%max(cpu_dict_all.get(k).get("wmsec")) , + "%.2f"%max(cpu_dict_all.get(k).get("rkb")), + "%.2f"%max(cpu_dict_all.get(k).get("writes")), + "%.2f"%max(cpu_dict_all.get(k).get("wmerge")), + "%.2f"%max(cpu_dict_all.get(k).get("rmerge")), + "%.2f"%max(cpu_dict_all.get(k).get("reads")))) + print(("%s"%title_print_distance_str).format("MEAN", k, "%.2f"%statistics.mean(cpu_dict_all.get(k).get("inflight")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("backlog")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("rmsec")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("busy")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("wkb")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("xfers")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("bsize")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("wmsec")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("rkb")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("writes")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("wmerge")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("rmerge")), + "%.2f"%statistics.mean(cpu_dict_all.get(k).get("reads")))) + + print(("%s"%title_print_distance_str).format("MIN", k, "%.2f"%min(cpu_dict_all.get(k).get("inflight")), + "%.2f"%min(cpu_dict_all.get(k).get("backlog")), + "%.2f"%min(cpu_dict_all.get(k).get("rmsec")), + "%.2f"%min(cpu_dict_all.get(k).get("busy")), + "%.2f"%min(cpu_dict_all.get(k).get("wkb")), + "%.2f"%min(cpu_dict_all.get(k).get("xfers")), + "%.2f"%min(cpu_dict_all.get(k).get("bsize")), + "%.2f"%min(cpu_dict_all.get(k).get("wmsec")), + "%.2f"%min(cpu_dict_all.get(k).get("rkb")), + "%.2f"%min(cpu_dict_all.get(k).get("writes")), + "%.2f"%min(cpu_dict_all.get(k).get("wmerge")), + "%.2f"%min(cpu_dict_all.get(k).get("rmerge")), + "%.2f"%min(cpu_dict_all.get(k).get("reads")))) + break + else: + for k in minute_cpu_dict.keys(): + inflight = (sum(minute_cpu_dict.get(k).get("inflight"))/len(minute_cpu_dict.get(k).get("inflight"))) + backlog = (sum(minute_cpu_dict.get(k).get("backlog"))/len(minute_cpu_dict.get(k).get("backlog"))) + rmsec = (sum(minute_cpu_dict.get(k).get("rmsec"))/len(minute_cpu_dict.get(k).get("rmsec"))) + busy = (sum(minute_cpu_dict.get(k).get("busy"))/len(minute_cpu_dict.get(k).get("busy"))) + wkb = (sum(minute_cpu_dict.get(k).get("wkb"))/len(minute_cpu_dict.get(k).get("wkb"))) + xfers = (sum(minute_cpu_dict.get(k).get("xfers"))/len(minute_cpu_dict.get(k).get("xfers"))) + bsize = (sum(minute_cpu_dict.get(k).get("bsize"))/len(minute_cpu_dict.get(k).get("bsize"))) + wmsec = (sum(minute_cpu_dict.get(k).get("wmsec"))/len(minute_cpu_dict.get(k).get("wmsec"))) + rkb = (sum(minute_cpu_dict.get(k).get("rkb"))/len(minute_cpu_dict.get(k).get("rkb"))) + writes = (sum(minute_cpu_dict.get(k).get("writes"))/len(minute_cpu_dict.get(k).get("writes"))) + wmerge = (sum(minute_cpu_dict.get(k).get("wmerge"))/len(minute_cpu_dict.get(k).get("wmerge"))) + rmerge = (sum(minute_cpu_dict.get(k).get("rmerge"))/len(minute_cpu_dict.get(k).get("rmerge"))) + reads = (sum(minute_cpu_dict.get(k).get("reads"))/len(minute_cpu_dict.get(k).get("reads"))) + cpu_dict_all.get(k).get("inflight").append(inflight) + cpu_dict_all.get(k).get("backlog").append(backlog) + cpu_dict_all.get(k).get("rmsec").append(rmsec) + cpu_dict_all.get(k).get("busy").append(busy) + cpu_dict_all.get(k).get("wkb").append(wkb) + cpu_dict_all.get(k).get("xfers").append(xfers) + cpu_dict_all.get(k).get("bsize").append(bsize) + cpu_dict_all.get(k).get("wmsec").append(wmsec) + cpu_dict_all.get(k).get("rkb").append(rkb) + cpu_dict_all.get(k).get("writes").append(writes) + cpu_dict_all.get(k).get("wmerge").append(wmerge) + cpu_dict_all.get(k).get("rmerge").append(rmerge) + cpu_dict_all.get(k).get("reads").append(reads) + print(("%s"%title_print_distance_str).format(time_minute_flag, k, "%.2f"%inflight,"%.2f"%backlog, "%.2f"%rmsec,"%.2f"%busy, "%.2f"%wkb,"%.2f"%xfers, + "%.2f"%bsize,"%.2f"%wmsec,"%.2f"%rkb,"%.2f"%writes,"%.2f"%wmerge,"%.2f"%rmerge,"%.2f"%reads + )) + + disk_name = i.get("labels").get("disk_name") + for title in title_list[2:]: + if i.get("values").get(title) != None: + minute_cpu_dict[disk_name][title] = [i.get("values").get(title)] + time_minute_flag = time + except Exception as e: + print(e) + return \ No newline at end of file diff --git a/source/tools/combine/sar/unity_total.py b/source/tools/combine/sar/unity_total.py new file mode 100644 index 0000000000000000000000000000000000000000..dc296d1507e2dddd231f62d418bd9f03234b438e --- /dev/null +++ b/source/tools/combine/sar/unity_total.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- + +import statistics +import datetime +from db import get_sql_resp +from hum_byte_convert import hum_byte_convert +from yaml_instance import load_resp_second_dist + + +config = load_resp_second_dist() +second_dist = config["config"]["freq"] + + +def unity_total(distance_max=5, minutes=50, date=1): + try: + if not distance_max: + distance_max = 5 + rets = get_sql_resp(minutes=minutes, table=["cpu_total","meminfo","net_tcp_count","networks","disks","proc_loadavg"], date=date) + if not rets: + return + disks_rets = get_sql_resp(minutes=1, table=["disks"], date=date) + disks_name_list = [] + for disks_ret in disks_rets: + if disks_ret.get("labels").get("disk_name") not in disks_name_list: + disks_name_list.append(disks_ret.get("labels").get("disk_name")) + print_title = "Time --cpu-- ---mem-- ---tcp-- -----traffic------" + title_unity = "Time cpu_util mem_util retran bytin bytout " + print_unity = "Time util util retran bytin bytout " + for disk_name in disks_name_list: + print_title = print_title + (" --%s-- "% disk_name).ljust(20) + print_unity = print_unity + (" util ").ljust(20) + title_unity = title_unity + (" %s"%disk_name + "util ").ljust(20) + print_title = print_title + " --load---" + print_unity = print_unity + " load1 " + title_unity = title_unity + " load1 " + print(print_title) + print(print_unity) + distance_num = 0 + time_minute_flag = None + minute_dict = {} + dict_all= {} + title_list = title_unity.split(' ') + title_index_list = [] + title_old_index = 0 + for title in title_list: + if title: + y = title_unity.index(title) + if title_index_list: + title_index_list.append(y-title_old_index) + else: + title_index_list.append(y) + title_old_index = y + title_list = [val for val in title_list if val] + title_print_distance_str='{:<' + '}{:<'.join([str(i) for i in title_index_list[1:]]) + '}{:<'+(str(title_index_list[-1])) + '}' + for ret in rets: + time = ret.get("time") + time = datetime.datetime.fromtimestamp(int(time)/1000000).strftime("%m/%d/%y-%H:%M") + time_strp = datetime.datetime.strptime(time, "%m/%d/%y-%H:%M") + title = ret.get("title") + if time == time_minute_flag or time_minute_flag == None: + if title not in minute_dict.keys(): + minute_dict[title] = {} + if title == "networks": + if ret.get("values").get("if_ibytes") != None: + if not minute_dict.get(title).get("bytin"): + minute_dict[title]["bytin"] = [ret.get("values").get("if_ibytes")] + else: + minute_dict.get(title).get("bytin").append(ret.get("values").get("if_ibytes")) + if ret.get("values").get("if_obytes") != None: + if not minute_dict.get(title).get("bytout"): + minute_dict[title]["bytout"] = [ret.get("values").get("if_obytes")] + else: + minute_dict.get(title).get("bytout").append(ret.get("values").get("if_obytes")) + if (time_strp.minute+time_strp.hour*60)%distance_max == 0: + minute_dict = append_minute_dict(title, minute_dict, ret) + time_minute_flag = time + else: + if (time_strp.minute+time_strp.hour*60)%distance_max == 0: + + # for db_type in minute_dict.keys(): + # if db_type == "meminfo": + free = (sum(minute_dict.get("meminfo").get("free"))/len(minute_dict.get("meminfo").get("free"))) + total = (sum(minute_dict.get("meminfo").get("total"))/len(minute_dict.get("meminfo").get("total"))) + buff = (sum(minute_dict.get("meminfo").get("buff"))/len(minute_dict.get("meminfo").get("buff"))) + cache = (sum(minute_dict.get("meminfo").get("cach"))/len(minute_dict.get("meminfo").get("cach"))) + mem_util = (total - free - buff - cache) / total * 100 + if "meminfo" not in dict_all.keys(): + dict_all["meminfo"] = [mem_util] + else: + dict_all.get("meminfo").append(mem_util) + # if db_type == "cpu_total": + iowait = (sum(minute_dict.get("cpu_total").get("iowait"))/len(minute_dict.get("cpu_total").get("iowait"))) + steal = (sum(minute_dict.get("cpu_total").get("steal"))/len(minute_dict.get("cpu_total").get("steal"))) + idle = (sum(minute_dict.get("cpu_total").get("idle"))/len(minute_dict.get("cpu_total").get("idle"))) + cpu_util = 100-idle-steal-iowait + if "cpu_total" not in dict_all.keys(): + dict_all["cpu_total"] = [cpu_util] + else: + dict_all.get("cpu_total").append(cpu_util) + # if db_type == "net_tcp_count": + tcp_retran = (sum(minute_dict.get("net_tcp_count").get("retranssegs"))/sum(minute_dict.get("net_tcp_count").get("outseg"))) + if "net_tcp_count" not in dict_all.keys(): + dict_all["net_tcp_count"] = [tcp_retran] + else: + dict_all.get("net_tcp_count").append(tcp_retran) + bytin = (sum(minute_dict.get("networks").get("bytin")))*second_dist + bytout = (sum(minute_dict.get("networks").get("bytout")))*second_dist + if "networks" not in dict_all.keys(): + dict_all["networks"]={} + dict_all["networks"]["bytin"] = [bytin] + dict_all["networks"]["bytout"] = [bytout] + else: + dict_all.get("networks").get("bytin").append(bytin) + dict_all.get("networks").get("bytout").append(bytout) + # if db_type == "proc_loadavg": + load1 = (sum(minute_dict.get("proc_loadavg").get("load1"))/len(minute_dict.get("proc_loadavg").get("load1"))) + if "proc_loadavg" not in dict_all.keys(): + dict_all["proc_loadavg"] = [load1] + else: + dict_all.get("proc_loadavg").append(load1) + # if db_type == "disks": + for disk in minute_dict.get("disks").keys(): + util = sum(minute_dict.get("disks").get(disk))/len(minute_dict.get("disks").get(disk)) + if "disks" not in dict_all.keys(): + dict_all["disks"]={} + dict_all["disks"][disk] = [util] + elif disk not in dict_all.get("disks").keys(): + dict_all["disks"][disk]=[util] + else: + dict_all.get("disks").get(disk).append(util) + print(("%s"%title_print_distance_str).format(time_minute_flag, "%.2f"%cpu_util , "%.2f"%mem_util, "%.2f"%tcp_retran, hum_byte_convert(bytin), hum_byte_convert(bytout), + *("%.2f"%(sum(minute_dict.get("disks").get(i))/len(minute_dict.get("disks").get(i))) for i in disks_name_list),"%.2f"%load1 + )) + distance_num+=1 + if distance_num >=19: + print(print_title) + print(print_unity) + distance_num = 0 + minute_dict = {} + minute_dict = append_minute_dict(title, minute_dict, ret) + time_minute_flag = time + elif title == "networks": + if ret.get("values").get("if_ibytes") != None: + if not minute_dict.get(title).get("bytin"): + minute_dict[title]["bytin"] = [ret.get("values").get("if_ibytes")] + else: + minute_dict.get(title).get("bytin").append(ret.get("values").get("if_ibytes")) + if ret.get("values").get("if_obytes") != None: + if not minute_dict.get(title).get("bytout"): + minute_dict[title]["bytout"] = [ret.get("values").get("if_obytes")] + else: + minute_dict.get(title).get("bytout").append(ret.get("values").get("if_obytes")) + else: + continue + + if minute_dict: + # for db_type in minute_dict.keys(): + # if db_type == "meminfo": + free = (sum(minute_dict.get("meminfo").get("free"))/len(minute_dict.get("meminfo").get("free"))) + total = (sum(minute_dict.get("meminfo").get("total"))/len(minute_dict.get("meminfo").get("total"))) + buff = (sum(minute_dict.get("meminfo").get("buff"))/len(minute_dict.get("meminfo").get("buff"))) + cache = (sum(minute_dict.get("meminfo").get("cach"))/len(minute_dict.get("meminfo").get("cach"))) + mem_util = (total - free - buff - cache) / total * 100 + if "meminfo" not in dict_all.keys(): + dict_all["meminfo"] = [mem_util] + else: + dict_all.get("meminfo").append(mem_util) + # if db_type == "cpu_total": + iowait = (sum(minute_dict.get("cpu_total").get("iowait"))/len(minute_dict.get("cpu_total").get("iowait"))) + steal = (sum(minute_dict.get("cpu_total").get("steal"))/len(minute_dict.get("cpu_total").get("steal"))) + idle = (sum(minute_dict.get("cpu_total").get("idle"))/len(minute_dict.get("cpu_total").get("idle"))) + cpu_util = 100-idle-steal-iowait + if "cpu_total" not in dict_all.keys(): + dict_all["cpu_total"] = [cpu_util] + else: + dict_all.get("cpu_total").append(cpu_util) + # if db_type == "net_tcp_count": + tcp_retran = (sum(minute_dict.get("net_tcp_count").get("retranssegs"))/sum(minute_dict.get("net_tcp_count").get("outseg"))) + if "net_tcp_count" not in dict_all.keys(): + dict_all["net_tcp_count"] = [tcp_retran] + else: + dict_all.get("net_tcp_count").append(tcp_retran) + bytin = (sum(minute_dict.get("networks").get("bytin")))*second_dist + bytout = (sum(minute_dict.get("networks").get("bytout")))*second_dist + if "networks" not in dict_all.keys(): + dict_all["networks"]={} + dict_all["networks"]["bytin"] = [bytin] + dict_all["networks"]["bytout"] = [bytout] + else: + dict_all.get("networks").get("bytin").append(bytin) + dict_all.get("networks").get("bytout").append(bytout) + # if db_type == "proc_loadavg": + load1 = (sum(minute_dict.get("proc_loadavg").get("load1"))/len(minute_dict.get("proc_loadavg").get("load1"))) + if "proc_loadavg" not in dict_all.keys(): + dict_all["proc_loadavg"] = [load1] + else: + dict_all.get("proc_loadavg").append(load1) + # if db_type == "disks": + for disk in minute_dict.get("disks").keys(): + util = sum(minute_dict.get("disks").get(disk))/len(minute_dict.get("disks").get(disk)) + if "disks" not in dict_all.keys(): + dict_all["disks"]={} + dict_all["disks"][disk] = [util] + elif disk not in dict_all.get("disks").keys(): + dict_all["disks"][disk]=[util] + else: + dict_all.get("disks").get(disk).append(util) + # print(("%s"%title_print_distance_str).format(time_minute_flag, "%.2f"%cpu_util , "%.2f"%mem_util, "%.2f"%tcp_retran, hum_byte_convert(bytin), hum_byte_convert(bytout), disk, "%.2f"%util, "%.2f"%load1)) + print(("%s"%title_print_distance_str).format(time_minute_flag, "%.2f"%cpu_util , "%.2f"%mem_util, "%.2f"%tcp_retran, hum_byte_convert(bytin), hum_byte_convert(bytout), + *("%.2f"%(sum(minute_dict.get("disks").get(i))/len(minute_dict.get("disks").get(i))) for i in disks_name_list),"%.2f"%load1 + )) + distance_num+=1 + if distance_num >=19: + # print("Time --cpu-- ---mem-- ---tcp-- -----traffic---- --name-- --vdb--- --load---") + # print("Time util util retran bytin bytout name util load1 ") + print(print_title) + print(print_unity) + distance_num =0 + print(("\n%s"%title_print_distance_str).format("MAX" , "%.2f"%max(dict_all.get("cpu_total")), + "%.2f"%max(dict_all.get("meminfo")), + "%.2f"%max(dict_all.get("net_tcp_count")), + hum_byte_convert(max(dict_all.get("networks").get("bytin"))), + hum_byte_convert(max(dict_all.get("networks").get("bytout"))), + *("%.2f"%max(dict_all.get("disks").get(i)) for i in disks_name_list), + "%.2f"%max(dict_all.get("proc_loadavg")) + )) + print(("%s"%title_print_distance_str).format("MEAN" , "%.2f"%statistics.mean(dict_all.get("cpu_total")), + "%.2f"%statistics.mean(dict_all.get("meminfo")), + "%.2f"%statistics.mean(dict_all.get("net_tcp_count")), + hum_byte_convert(statistics.mean(dict_all.get("networks").get("bytin"))), + hum_byte_convert(statistics.mean(dict_all.get("networks").get("bytout"))), + *("%.2f"%statistics.mean(dict_all.get("disks").get(i)) for i in disks_name_list), + "%.2f"%statistics.mean(dict_all.get("proc_loadavg")) + )) + print(("%s"%title_print_distance_str).format("MIN", "%.2f"%min(dict_all.get("cpu_total")), + "%.2f"%min(dict_all.get("meminfo")), + "%.2f"%min(dict_all.get("net_tcp_count")), + hum_byte_convert(min(dict_all.get("networks").get("bytin"))), + hum_byte_convert(min(dict_all.get("networks").get("bytout"))), + *("%.2f"%min(dict_all.get("disks").get(i)) for i in disks_name_list), + "%.2f"%min(dict_all.get("proc_loadavg")) + )) + except Exception as e: + print(e) + return + + +def append_minute_dict(title,minute_dict, ret): + try: + if title not in minute_dict.keys(): + minute_dict[title] = {} + if title=="disks": #IO + disk_name = ret.get("labels").get("disk_name") + + if disk_name not in minute_dict.get(title).keys(): + minute_dict[title][disk_name] = {} + if ret.get("values").get("busy") != None: + if not minute_dict.get(title).get(disk_name): + minute_dict[title][disk_name] = [ret.get("values").get("busy")] + else: + minute_dict.get(title).get(disk_name).append(ret.get("values").get("busy")) + if title == "meminfo": #mem + if ret.get("values").get("MemTotal") != None: + if not minute_dict.get(title).get("total"): + minute_dict[title]["total"] = [ret.get("values").get("MemTotal")] + else: + minute_dict.get(title).get("total").append(ret.get("values").get("MemTotal")) + if ret.get("values").get("MemFree") != None: + if not minute_dict.get(title).get("free"): + minute_dict[title]["free"] = [ret.get("values").get("MemFree")] + else: + minute_dict.get(title).get("free").append(ret.get("values").get("MemFree")) + if ret.get("values").get("user_buffers") != None: + if not minute_dict.get(title).get("buff"): + minute_dict[title]["buff"] = [ret.get("values").get("user_buffers")] + else: + minute_dict.get(title).get("buff").append(ret.get("values").get("user_buffers")) + if ret.get("values").get("Cached") != None: + if not minute_dict.get(title).get("cach"): + minute_dict[title]["cach"] = [ret.get("values").get("Cached")] + else: + minute_dict.get(title).get("cach").append(ret.get("values").get("Cached")) + if title == "cpu_total": #cpu + if ret.get("values").get("iowait") != None: + if not minute_dict.get(title).get("iowait"): + minute_dict[title]["iowait"] = [ret.get("values").get("iowait")] + else: + minute_dict.get(title).get("iowait").append(ret.get("values").get("iowait")) + if ret.get("values").get("steal") != None: + if not minute_dict.get(title).get("steal"): + minute_dict[title]["steal"] = [ret.get("values").get("steal")] + else: + minute_dict.get(title).get("steal").append(ret.get("values").get("steal")) + if ret.get("values").get("idle") != None: + if not minute_dict.get(title).get("idle"): + minute_dict[title]["idle"] = [ret.get("values").get("idle")] + else: + minute_dict.get(title).get("idle").append(ret.get("values").get("idle")) + if title == "net_tcp_count": #tcp + if ret.get("values").get("OutSegs") != None: + if not minute_dict.get(title).get("outseg"): + minute_dict[title]["outseg"] = [ret.get("values").get("OutSegs")] + else: + minute_dict.get(title).get("outseg").append(ret.get("values").get("OutSegs")) + if ret.get("values").get("RetransSegs") != None: + if not minute_dict.get(title).get("retranssegs"): + minute_dict[title]["retranssegs"] = [ret.get("values").get("RetransSegs")] + else: + minute_dict.get(title).get("retranssegs").append(ret.get("values").get("RetransSegs")) + if title == "proc_loadavg": #load + if ret.get("values").get("load1") != None: + if not minute_dict.get(title).get("load1"): + minute_dict[title]["load1"] = [ret.get("values").get("load1")] + else: + minute_dict.get(title).get("load1").append(ret.get("values").get("load1")) + return minute_dict + except Exception as e: + print(e) + return minute_dict + + \ No newline at end of file diff --git a/source/tools/combine/sar/utils.py b/source/tools/combine/sar/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..66e8ffe53d99cee185a5a5f639ad32a266b8b533 --- /dev/null +++ b/source/tools/combine/sar/utils.py @@ -0,0 +1,17 @@ + + +def get_print_title_distance(title_unity): + title_list = title_unity.split(' ') + title_index_list = [] + title_old_index = 0 + for title in title_list: + if title: + y = title_unity.index(title) + if title_index_list: + title_index_list.append(y-title_old_index) + else: + title_index_list.append(y) + title_old_index = y + title_list = [val for val in title_list if val] + title_print_distance_str='{:<' + '}{:<'.join([str(i) for i in title_index_list[1:]]) + '}{:<'+(str(title_index_list[-1])) + '}' + return title_print_distance_str \ No newline at end of file diff --git a/source/tools/combine/sar/venv.sh b/source/tools/combine/sar/venv.sh new file mode 100644 index 0000000000000000000000000000000000000000..4bbdc481122106769f8f58d16dbd7693d86bfe60 --- /dev/null +++ b/source/tools/combine/sar/venv.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +ALIYUN_MIRROR="https://mirrors.aliyun.com/pypi/simple/" +export SERVER_HOME=$(pwd) + +VIRTUALENV_HOME="${SERVER_HOME}/virtualenv" + +touch_virtualenv() { + pip3.9 uninstall virtualenv -y + pip3.6 install virtualenv -i "${ALIYUN_MIRROR}" + if [ -d ${VIRTUALENV_HOME} ]; then + echo "virtualenv exists, skip" + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + else + virtualenv ${VIRTUALENV_HOME} + if [ "$?" = 0 ]; then + echo "INFO: create virtualenv success" + else + echo "ERROR: create virtualenv failed" + exit 1 + fi + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + check_requirements + fi +} + + +check_requirements() { + echo "INFO: begin install requirements..." + if ! [ -d ${SERVER_HOME}/logs/ ]; then + mkdir -p ${SERVER_HOME}/logs/ || exit 1 + fi + + local requirements_log="${SERVER_HOME}/logs/requirements.log" + local requirements="requirements.txt" + touch "$requirements_log" || exit + pip3.6 install --upgrade pip + pip3.6 install -r ${requirements} -i "${ALIYUN_MIRROR}" |tee -a "${requirements_log}" || exit 1 + local pip_res=$? + if [ $pip_res -ne 0 ]; then + echo "ERROR: requirements not satisfied and auto install failed, please check ${requirements_log}" + exit 1 + fi +} + +pyinstaller_sar() { + echo "INFO: begin pyinstaller sar..." + pyinstaller -F sar.py --add-data './config.yaml:./' -y + deactivate + rm -rf ${VIRTUALENV_HOME} +} + +deploy() { + touch_virtualenv + pyinstaller_sar +} + +deploy diff --git a/source/tools/combine/sar/yaml_instance.py b/source/tools/combine/sar/yaml_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..cb78b1ec68f1bd5e2be11593293ebe6d1b61b067 --- /dev/null +++ b/source/tools/combine/sar/yaml_instance.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +#!/root/anaconda3/envs/python310 +import yaml +import os + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) + +def load_resp_second_dist(): + """load response code status""" + yaml_name = "/etc/sysak/base.yaml" + yaml_path = os.path.join(os.path.expanduser('~'), yaml_name) + with open(yaml_path, 'r') as f: + config = yaml.load(f, Loader=yaml.FullLoader) + return config + +def sar_config(): + """load response code status""" + yaml_name = "config.yaml" + yaml_path = os.path.join(BASE_DIR, yaml_name) + with open(yaml_path, 'r') as f: + config = yaml.load(f, Loader=yaml.FullLoader) + return config \ No newline at end of file diff --git a/source/tools/detect/generic/surftrace/Makefile b/source/tools/detect/generic/surftrace/Makefile index 6134cf0937b5fb56029abcbf2f1d041cfc615b95..359fa1d3f1ba6a1ca16d04dcf4317324454d96fd 100644 --- a/source/tools/detect/generic/surftrace/Makefile +++ b/source/tools/detect/generic/surftrace/Makefile @@ -1,3 +1,3 @@ target := surftrace -DEPEND := "prev{default};python-dep{all}" +DEPEND := "python-dep{all}" include $(SRC)/mk/py.mk diff --git a/source/tools/detect/generic/sysctl/Makefile b/source/tools/detect/generic/sysctl/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..1a63132fcaa043cf72d48678b2f178a53dd5f62b --- /dev/null +++ b/source/tools/detect/generic/sysctl/Makefile @@ -0,0 +1,5 @@ +target := sysctl + +mods := sysctl.o + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/detect/generic/sysctl/README.md b/source/tools/detect/generic/sysctl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9287e4f6842c6b22610477738e6a7f2af056e35a --- /dev/null +++ b/source/tools/detect/generic/sysctl/README.md @@ -0,0 +1,41 @@ +# 功能说明 +sysctl 用于检查fd,pid,inode,rootfs等资源使用情况工具。 + +# 使用说明 +```bash +Usage: [fd|pid|root|inode] [threshold] +fd: fd使用量检查 +pid: pid使用量检查 +inode: inode 使用量检查 +threshold: 相对于max limit的百分比 +``` +# 使用举例 +## 运行说明 + +下面的例子使用sysctl 检查fd 的使用量 + +```bash +sudo sysak sysctl fd 0.6 +``` + +## 输出说明 +file-max: 系统file max +file-used: 系统当前fd使用量 +pid-max: 单个进程可打开最大fd +后面是系统fd 使用top 10 +pid:进程号 +comm: 进程明 +fd: fd数量 +```bash +file-max:1606698 file-used:1401664 pid-max:655350 +pid: 21308, comm: test, fd: 300004 +pid: 22039, comm: main, fd: 300004 +pid: 22070, comm: java, fd: 300004 +pid: 21171, comm: java, fd: 200004 +pid: 20595, comm: stress, fd: 200003 +pid: 21235, comm: stress, fd: 100004 +pid: 611, comm: java, fd: 88 +pid: 2383, comm: java, fd: 75 +pid: 1, comm: systemd, fd: 49 +pid: 1371, comm: java, fd: 42 +``` diff --git a/source/tools/detect/generic/sysctl/sysctl.c b/source/tools/detect/generic/sysctl/sysctl.c new file mode 100644 index 0000000000000000000000000000000000000000..91d2bf30ba27e5cf0c8a4f70319785e00f99c1c4 --- /dev/null +++ b/source/tools/detect/generic/sysctl/sysctl.c @@ -0,0 +1,303 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_COMM 16 // 最大路径长度 +#define MAX_PATH 256 // 最大路径长度 +#define MAX_LINE 128 // 最大行长度 +#define MAX_PROC 10 // 最大进程数 +#define FD_THRESHOLD 0.6 // fd利用率阈值 +#define PID_THRESHOLD 0.6 // pid利用率阈值 +#define ROOT_THRESHOLD 0.6 // 根分区利用率阈值 +#define INODE_THRESHOLD 0.6 // 根分区inode利用率阈值 + +struct process { + int pid; + int fd; + char name[MAX_COMM]; +}; + +int get_process_file_limit(void) +{ + struct rlimit rlim; + int file_max = 455350; + + if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) { + file_max = rlim.rlim_cur; + } + return file_max; +} + +int check_fd_usage(double threshold) { + FILE *fp; + char line[MAX_LINE]; + int total, used, free; + double ratio; + int ret = 0; + fp = fopen("/proc/sys/fs/file-nr", "r"); + if (fp == NULL) { + perror("fopen"); + exit(1); + } + fgets(line, MAX_LINE, fp); + fclose(fp); + sscanf(line, "%d %*d %d", &used,&total); + ratio = (double)used / total; + ret = !!(ratio > threshold); + if (ret) + printf("file-max:%d file-used:%d pid-max:%d\n", total, used, get_process_file_limit()); + return ret; +} + +int check_pid_usage(double threshold) { + FILE *fp; + char line[MAX_LINE]; + int total, used, free; + double ratio; + + fp = fopen("/proc/sys/kernel/pid_max", "r"); + if (fp == NULL) { + perror("fopen"); + exit(1); + } + fgets(line, MAX_LINE, fp); + fclose(fp); + sscanf(line, "%d", &total); + fp = fopen("/proc/sys/kernel/pid_max", "r"); + if (fp == NULL) { + perror("fopen"); + exit(1); + } + fgets(line, MAX_LINE, fp); + fclose(fp); + sscanf(line, "%d", &used); + free = total - used; + ratio = (double)used / total; + if (ratio > threshold) { + return 1; + } else { + return 0; + } +} + +int check_root_usage(double threshold) { + struct statvfs buf; + unsigned long total, used, free; + double ratio; + + if (statvfs("/", &buf) != 0) { + perror("statvfs"); + exit(1); + } + total = buf.f_blocks * buf.f_frsize; + free = buf.f_bfree * buf.f_frsize; + used = total - free; + ratio = (double)used / total; + if (ratio > threshold) { + return 1; + } else { + return 0; + } +} + +int check_inode_usage(double threshold) { + struct statvfs buf; + unsigned long total, used, free; + double ratio; + + if (statvfs("/", &buf) != 0) { + perror("statvfs"); + exit(1); + } + total = buf.f_files; + free = buf.f_ffree; + used = total - free; + ratio = (double)used / total; + if (ratio > threshold) { + return 1; + } else { + return 0; + } +} + +int pid_comm(char *name, int pid) +{ + int len = 0; + FILE *fp; + char path[MAX_PATH]; + sprintf(path, "/proc/%d/comm", pid); + fp = fopen(path, "r"); + if (fp == NULL) + return -1; + fgets(name, MAX_COMM, fp); + len = strlen(name); + if (name[len -1] == '\n') { + name[len -1] = '\0'; + } + fclose(fp); + +} +void check_top_fd_processes(struct process *top) { + DIR *dpp; + DIR *dp; + struct dirent *entry; + char path[MAX_PATH]; + int pid, fd; + char name[MAX_COMM]; + FILE *fp; + int i, j; + int count; + struct process temp; + int len = 0; + dpp = opendir("/proc"); + if (dpp == NULL) { + perror("opendir"); + exit(1); + } + count = 0; + while ((entry = readdir(dpp)) != NULL) { + if (isdigit(entry->d_name[0])) { + pid = atoi(entry->d_name); + sprintf(path, "/proc/%d/fd", pid); + fd = 0; + dp = opendir(path); + if (dp == NULL) { + continue; + } + while ((entry = readdir(dp)) != NULL) { + if (isdigit(entry->d_name[0])) { + fd++; + } + } + closedir(dp); + if (count < MAX_PROC) { + pid_comm(name, pid); + top[count].pid = pid; + top[count].fd = fd; + strcpy(top[count].name, name); + count++; + } else { + if (fd > top[MAX_PROC - 1].fd) { + pid_comm(name, pid); + top[MAX_PROC - 1].pid = pid; + top[MAX_PROC - 1].fd = fd; + strcpy(top[MAX_PROC - 1].name, name); + for (i = MAX_PROC - 2; i >= 0; i--) { + // 如果当前进程的fd数大于前一个进程的fd数,交换位置 + if (top[i + 1].fd > top[i].fd) { + temp = top[i + 1]; + top[i + 1] = top[i]; + top[i] = temp; + } else { + break; + } + } + } + } + } + } + closedir(dpp); +} + +int main(int argc, char **argv) +{ + int i = 0; + double threshold = 0.5; + int result = 0; + struct process *top = NULL; + + if (argc == 1) { + printf("Usage: %s [fd|pid|root|inode] [threshold]\n", argv[0]); + exit(0); + } + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "fd") == 0) { + if (i + 1 < argc) { + threshold = atof(argv[i + 1]); + if (threshold > 0 && threshold < 1) { + i++; + } else { + threshold = FD_THRESHOLD; + } + } else { + threshold = FD_THRESHOLD; + } + result = check_fd_usage(threshold); + if (result == 1) { + top = (struct process *)malloc(MAX_PROC * sizeof(struct process)); + if (!top) { + printf("alloc proccess top failed \n"); + continue; + } + check_top_fd_processes(top); + for (i = 0; i < MAX_PROC; i++) { + printf("pid: %d, comm: %s, fd: %d\n", top[i].pid, top[i].name, top[i].fd); + } + if(top) { + free(top); + top = NULL; + } + } else + printf("fd check ok\n"); + } + else if (strcmp(argv[i], "pid") == 0) { + if (i + 1 < argc) { + threshold = atof(argv[i + 1]); + if (threshold > 0 && threshold < 1) { + i++; + } else { + threshold = PID_THRESHOLD; + } + } else { + threshold = PID_THRESHOLD; + } + //result = check_pid_usage(threshold); + printf("pid usage: %d\n", result); + } + else if (strcmp(argv[i], "root") == 0) { + if (i + 1 < argc) { + threshold = atof(argv[i + 1]); + if (threshold > 0 && threshold < 1) { + i++; + } else { + threshold = ROOT_THRESHOLD; + } + } else { + threshold = ROOT_THRESHOLD; + } + //result = check_root_usage(threshold); + printf("root usage: %d\n", result); + } + else if (strcmp(argv[i], "inode") == 0) { + if (i + 1 < argc) { + threshold = atof(argv[i + 1]); + if (threshold > 0 && threshold < 1) { + i++; + } else { + threshold = INODE_THRESHOLD; + } + } else { + threshold = INODE_THRESHOLD; + } + //result = check_inode_usage(threshold); + printf("inode usage: %d\n", result); + } + else { + printf("Usage: [fd|pid|root|inode] [threshold]\n" + "fd: fd usage check \n" + "pid: pid usage check\n" + "inode: inode usage check\n" + "threshold: percent of max \n" + "example: sysak fd 0.5 \n"); + exit(1); + } + } + return 0; +} + diff --git a/source/tools/detect/generic/tracesig/tracesig.c b/source/tools/detect/generic/tracesig/tracesig.c index a9001a3226f48e2c2c2314813fc19f120409e456..12d2196c6e37a5cf84a19586b4f27c5c2636aed2 100644 --- a/source/tools/detect/generic/tracesig/tracesig.c +++ b/source/tools/detect/generic/tracesig/tracesig.c @@ -39,16 +39,16 @@ const char *argp_program_version = "tracesig 0.1"; const char argp_program_doc[] = "Catch the delay of a syscall more than threshold.\n" "\n" -"USAGE: tracesig [--help] <[-f a.log]> \n" +"USAGE: tracesig [--help] <[-f a.log]> duration\n" "\n" "EXAMPLES:\n" " tracesig # run forever, detect delay more than 10ms(default)\n" +" tracesig 10 # run for 10 seconds\n" " tracesig -c bash # check the victim who's name is bash\n" " tracesig -f a.log #save the log to a.log\n"; static const struct argp_option opts[] = { { "file", 'f', "FILE", 0, "log file"}, - { "span", 's', "SPAN", 0, "How long to run"}, { "comm", 'c', "COMM", 0, "Name of the victim"}, { "verbose", 'v', NULL, 0, "Verbose debug output" }, { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" }, diff --git a/source/tools/detect/io/iowaitstat/iowaitstat.py b/source/tools/detect/io/iowaitstat/iowaitstat.py index 31f595653f0a6a73d9c8eefc357f2785b5b024e6..db640c59f4e9fbd2a46ae71eba4bfc40997e5b27 100755 --- a/source/tools/detect/io/iowaitstat/iowaitstat.py +++ b/source/tools/detect/io/iowaitstat/iowaitstat.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # -*- coding: utf-8 -*- import os diff --git a/source/tools/detect/java/java_collect/Makefile b/source/tools/detect/java/java_collect/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bdfc15538f83cdae1a0fff06e49b1bef7a67fe1e --- /dev/null +++ b/source/tools/detect/java/java_collect/Makefile @@ -0,0 +1,16 @@ +target := java_collect +ifeq ($(KERNEL_DEPEND), Y) +TARGET_PATH := $(OBJ_TOOLS_PATH) +else +TARGET_PATH := $(OBJ_TOOLS_ROOT) +endif + +all: $(target) target_rule + +$(target): $@ + cp -r continuous-profile-collector $(TARGET_PATH)/ + mv $(TARGET_PATH)/continuous-profile-collector $(TARGET_PATH)/jruntime + cd src && pwd && sh venv.sh && cd ../ + cp src/dist/java_collect $(TARGET_PATH)/ + +include $(SRC)/mk/target.inc diff --git a/source/tools/detect/java/java_collect/README.md b/source/tools/detect/java/java_collect/README.md new file mode 100644 index 0000000000000000000000000000000000000000..11ef94a67a792355a7a31788b53e1595cf4fb123 --- /dev/null +++ b/source/tools/detect/java/java_collect/README.md @@ -0,0 +1,2 @@ +# 功能说明 +收集Java进程JFR diff --git a/source/tools/detect/java/java_collect/continuous-profile-collector/collector b/source/tools/detect/java/java_collect/continuous-profile-collector/collector new file mode 100755 index 0000000000000000000000000000000000000000..f1c61aeb0e4bc1a463785bea8c0acfdff1e49cbf --- /dev/null +++ b/source/tools/detect/java/java_collect/continuous-profile-collector/collector @@ -0,0 +1,169 @@ +#!/bin/sh + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +error() { + echo + echo "ERROR: $*" +} >&2 + +checkJava() { + set +eu + if [ -n "$JAVA_HOME" ]; then + JAVA_CMD=$JAVA_HOME/bin/java + if [ ! -x "$JAVA_CMD" ]; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + + Please set the JAVA_HOME variable in your environment to match the + location of your Java installation." + fi + else + JAVA_CMD=java + which java > /dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + + Please set the JAVA_HOME variable in your environment to match the + location of your Java installation." + fi + set -eu +} + +set -eu + +usage() { + echo + echo "Usage: $0 [options] run collecting for target process" + echo " or $0 new-config create a configuration from template" + echo + echo "Options:" + echo " -d duration run collecting for seconds" + echo " -c configuration_path config file" + echo + exit 1 +} >&2 + +jattach() { + set +e + "$JATTACH" "$PID" load instrument false "$COLLECTOR=$1" >/dev/null + RET=$? + + # Check if jattach failed + if [ $RET -ne 0 ]; then + if [ $RET -eq 255 ]; then + error "Failed to inject collector into $PID" + fi + exit $RET + fi + + set -e +} + +check_if_terminated() { + if ! kill -0 "$PID" 2>/dev/null; then + exit 0 + fi +} + +SCRIPT_BIN="$0" +while [ -h "$SCRIPT_BIN" ]; do + SCRIPT_BIN="$(readlink "$SCRIPT_BIN")" +done +SCRIPT_DIR="$( + cd "$(dirname "$SCRIPT_BIN")" >/dev/null 2>&1 + pwd -P +)" + +JATTACH=$SCRIPT_DIR/jattach +COLLECTOR=$SCRIPT_DIR/continuous-profile-collector-agent.jar +CONFIGURATION_PATH=$SCRIPT_DIR/configuration.toml +PID="" + +newConfig() { + set +e + checkJava + "$JAVA_CMD" -cp "$COLLECTOR" "com.alibaba.cpc.ConfigurationGenerator" "$SCRIPT_DIR" + RET=$? + if [ $RET -ne 0 ]; then + die "Failed to generate configuration" + fi + set -e +} + +if [ $# -eq 1 ]; then + if [ "$1" = "new-config" ]; then + newConfig + exit 0 + fi +fi + +genJattach() { + set +e + checkJava + "$JAVA_CMD" -cp "$COLLECTOR" "com.alibaba.cpc.JattachExtractor" "$SCRIPT_DIR" + RET=$? + if [ $RET -ne 0 ]; then + die "Failed to generate jattach" + fi + + chmod +x $JATTACH + set -e +} + +if [ ! -f "$JATTACH" ]; then + genJattach +fi + +DURATION=10 +while [ $# -gt 0 ]; do + case $1 in + -h | "-?") + usage + ;; + -d) + DURATION="$2" + shift + ;; + -c) + CONFIGURATION_PATH="$SCRIPT_DIR/$2" + shift + ;; + [0-9]*) + PID="$1" + ;; + -*) + error "Unrecognized option: $1" + usage + ;; + esac + shift +done + +if [ ! -f "$CONFIGURATION_PATH" ]; then + error "config file not found: $CONFIGURATION_PATH" + exit 1 +fi + +if [ "$PID" = "" ]; then + error "No process specified" + usage +fi + +jattach "configuration_path=$CONFIGURATION_PATH" +echo Collecting for "$DURATION" seconds >&2 +set +e +trap 'DURATION=0' INT + +while [ "$DURATION" -gt 0 ]; do + DURATION=$((DURATION - 1)) + check_if_terminated + sleep 1 +done + +set -e +trap - INT +echo Done >&2 +jattach "" \ No newline at end of file diff --git a/source/tools/detect/java/java_collect/continuous-profile-collector/configuration.toml b/source/tools/detect/java/java_collect/continuous-profile-collector/configuration.toml new file mode 100644 index 0000000000000000000000000000000000000000..7442bf6aa470b70fd0586cb259e2222379fc2a6b --- /dev/null +++ b/source/tools/detect/java/java_collect/continuous-profile-collector/configuration.toml @@ -0,0 +1,145 @@ +################################ Basic ################################ +# collection period +# unit: second(s) +# default: 60 +period = 60 + +# delay of the first period starting +# unit: second(s) +# default: 0 +delay = 0 + +# whether to start the first period at zero second, 'delay' will be ignored if true +# default: false +start_at_zero_second = false + +# compression mode of profile +# support values: 'gzip', 'none' +# default: 'none' +compression_mode = 'none' + +# trigger mode +# support values: 'periodic', 'api' +# when 'api', 'period', 'delay', 'start_at_zero_second' will be ignored, and +# agent will set this mode to 'api' when using collector script +# default: 'periodic' +trigger_mode = 'periodic' + +# Output file format: +# 'jfr' +# 'collapsed_with_samples': this format is only supported by Async Profiler, JFR will be disabled if is is specified +# 'collapsed_with_total': this format is only supported by Async Profiler, JFR will be disabled if is is specified +# +# when specified as 'collapsed_with_samples' and 'collapsed_with_total', only one of 'execution' +# and 'allocation' will be enabled, if both are specified as enabled, only 'execution' will be finally enabled +# +# it is recommended to use 'jfr', this configuration item will be removed in the future, and only used by a special scene now +# +# default: 'jfr' +output_format = 'jfr' + +################################ Execution ################################ +[execution] +# implementation strategy for data collection: +# 'jfr': use JFR to collect profiling profile or disabled if unsupported +# 'async_profiler': use Async Profiler to profile or disabled if unsuppored +# 'auto': automatically decide which way to collect profile, or disabled if none of ways is supported +# 'off': distable the profiling type +# +# default: 'auto' +strategy = 'auto' + +# unit: ms +# default: 10 +interval = -1 + +################################ Wall Clock ################################ +[wall_clock] +# default: 'auto' +strategy = 'off' +# unit: ms +interval = 10 +thread_filter = '' +threads_per_tick = 16 + +################################ Allocation ################################ +[allocation] +# same meaning as 'execution.strategy' +strategy= 'off' + +# unit: kilobyte(s) +# default: 256 +# Only work for Async Profiler +interval = 256 + +# unit: kilobyte(s) +# default: 0 +# Only work for Async Profiler +# =0 means disabled, >0 means sampling obj exceeding such threshold +humongous_threshold = 0 + +################################ JFR ################################ +[JFR] +# how much data is kept in the disk repository +# unit: megabyte(s) +# default: 64 +max_size = 64 + +# how far back data is kept in the disk repository +# unit: minute(s) +# default: 10 +max_age = 10 + +# the path of custom JFR settings file, empty means use default settings +# file format: property +# default: null +# custom_settings_path = '' + +# override custom or default settings +# e.g. +# jdk.ExecutionSample#period=20 ms +# jdk.SafepointCleanup#enabled=true +# default: null +# override_settings = """ +# """ + +# the settings of JFR may be modified due to environmental problem (such as JDK version), +# if you want some settings not to be modified, please specify here +# +# default: null +# mandatory_settings = """ +# """ + +################################ Logging ################################ +[log] +# DEBUG, INFO, WARN, ERROR, OFF, default ERROR +# level = 'DEBUG' +# default /cpc.log +log_file = '/tmp/cpc_?.log' + +################################ Agent ################################ +[agent] +# application name, agent will set a value automatically if empty: +# a. - +# b. UNNAMED-APP +# default: null +# app_name = '' + +# upload server of profile, 'oss' and 'destination' will be ignored if not empty +# e.g. http://profile-visualizer-dev.alibaba.net +# default: null +# upload_server = '' + +# oss configuration +# default: null +# oss_endpoint = '' +# oss_key = '' +# oss_secret = '' +# oss_bucket = '' + +# the path(directory or file) to save profile, only take effect when 'upload_server' and 'oss' is empty. Agent will set +# to when effect but empty +# when specified as a directory, the final saved file name is 'profile-MM-dd-HH-mm-ss.jfr' +# overwrite if the target file already exists and is not a directory +# +# default: null diff --git a/source/tools/detect/java/java_collect/continuous-profile-collector/continuous-profile-collector-agent.jar b/source/tools/detect/java/java_collect/continuous-profile-collector/continuous-profile-collector-agent.jar new file mode 100644 index 0000000000000000000000000000000000000000..70042ed84c0119fe2cb28d7e47263eb363b5b7ed Binary files /dev/null and b/source/tools/detect/java/java_collect/continuous-profile-collector/continuous-profile-collector-agent.jar differ diff --git a/source/tools/detect/java/java_collect/continuous-profile-collector/jattach b/source/tools/detect/java/java_collect/continuous-profile-collector/jattach new file mode 100755 index 0000000000000000000000000000000000000000..94db0544ca4a5292c3397e513e71084582b81fac Binary files /dev/null and b/source/tools/detect/java/java_collect/continuous-profile-collector/jattach differ diff --git a/source/tools/detect/java/java_collect/continuous-profile-collector/raptor b/source/tools/detect/java/java_collect/continuous-profile-collector/raptor new file mode 100755 index 0000000000000000000000000000000000000000..0f0bdf35eac9ce55bcd0ef27051c185cd0abe4c5 Binary files /dev/null and b/source/tools/detect/java/java_collect/continuous-profile-collector/raptor differ diff --git a/source/tools/detect/java/java_collect/src/conApi.py b/source/tools/detect/java/java_collect/src/conApi.py new file mode 100644 index 0000000000000000000000000000000000000000..bb53ee5f0af0ed9ccca098c153eb210790831485 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/conApi.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: conApi + Description : + Author : liaozhaoyan + date: 2023/6/12 +------------------------------------------------- + Change Activity: + 2023/6/12: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import re +from dockerCli import CdockerCli +from pouchCli import CpouchCli +from crictlCli import CcrictlCli + + +class CconApi(object): + def __init__(self): + super(CconApi, self).__init__() + self._reNs = re.compile(r"(?<=\[)(.+?)(?=\])") + self._reDocker = re.compile(r"([0-9a-f]){16,}") + self._rePod = re.compile(r"\/.*\/([0-9a-f]){16,}") + self._ns1 = self._ns_mnt(1) + + def _ns_mnt(self, pid): + try: + link = os.readlink("/proc/%d/ns/mnt" % pid) + return self._reNs.findall(link)[0] + except FileNotFoundError: + return None + except IndexError: + return None + + def isHost(self, pid): + return self._ns_mnt(pid) == self._ns1 + + def _cgroup(self, pid): + d = {} + with open("/proc/%d/cgroup" % pid, 'r') as f: + for i, line in enumerate(f): + n, t, path = line.strip().split(":") + d[t] = path + return d + + def conCli(self, pid): + cgroups = self._cgroup(pid) + cname = cgroups['name=systemd'] + if "/docker" in cname: + ret = self._reDocker.search(cname)[0] + if ret: + return CdockerCli(ret, pid) + elif self._rePod.search(cname): + podId = cname.split("/")[-1] + return CpouchCli(podId, pid) + elif "/cri-containerd" in cname: + s = cname.split("-")[-1] + podId = s.split(".")[0] + return CcrictlCli(podId, pid) + return None + + def dCopyTo(self, cli, dst, src): + cli.copyTo(dst, src) + + def dCopyFrom(self, cli, dst, src): + cli.copyFrom(dst, src) + + @staticmethod + def _pollTime(tmo, ctmo): + first = True + def waitTime(): + nonlocal first + if first: + first = False + return tmo + else: + return ctmo + return waitTime + + def exec(self, cli, cmd, tmo=5, ctmo=0.1): + return cli.exec(cmd, tmo, ctmo) + + def checkDir(self, cli, path, mode): + ret = cli.exec("ls -l %s" % path) + if "cannot access" in ret: + cli.exec("mkdir -p %s && chmod %s %s" % (path, mode, path)) + + def checkFile(self, cli, path): + ret = cli.exec("ls -l %s" % path) + if ret == "" or "cannot access" in ret: + return False + else: + return True + + def conList(self, cli): + return cli.list() + + +if __name__ == "__main__": + api = CconApi() + cli = api.conCli(355) + res = api.exec(cli, "ls -l /var/sysak/jruntime") + if "cannot access" in res: + api.exec(cli, "mkdir -p /var/sysak/jruntime") + pass diff --git a/source/tools/detect/java/java_collect/src/cpuRate.py b/source/tools/detect/java/java_collect/src/cpuRate.py new file mode 100644 index 0000000000000000000000000000000000000000..93fd835df553530aa6e16518f4f837b013a1d62e --- /dev/null +++ b/source/tools/detect/java/java_collect/src/cpuRate.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: cpuRate + Description : + Author : liaozhaoyan + date: 2023/6/8 +------------------------------------------------- + Change Activity: + 2023/6/8: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import psutil + +for proc in psutil.process_iter(['pid', 'name', 'cpu_percent']): + try: + pinfo = proc.as_dict(attrs=['pid', 'name', 'cpu_percent']) + except psutil.NoSuchProcess: + pass + else: + print(pinfo) + +if __name__ == "__main__": + pass diff --git a/source/tools/detect/java/java_collect/src/crictlCli.py b/source/tools/detect/java/java_collect/src/crictlCli.py new file mode 100644 index 0000000000000000000000000000000000000000..7776c1d6ffedf1333d9f8fac3acad0aad87d847d --- /dev/null +++ b/source/tools/detect/java/java_collect/src/crictlCli.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: crictlCli + Description : + Author : liaozhaoyan + date: 2023/7/28 +------------------------------------------------- + Change Activity: + 2023/7/28: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +from prun import Cprun +from dockerCli import CdockerCli + + +class CcrictlCli(CdockerCli): + def __init__(self, cid, pid): + super(CcrictlCli, self).__init__(cid, pid) + self._engine = "crictl" + + def exec(self, cmd, tmo=1, ctmo=0.1): + cmd = '%s exec %s sh -c \'%s\'' % (self._engine, self._cid, cmd) + r = Cprun() + return r.exec(cmd, tmo, ctmo) + + +if __name__ == "__main__": + pass diff --git a/source/tools/detect/java/java_collect/src/dockerCli.py b/source/tools/detect/java/java_collect/src/dockerCli.py new file mode 100644 index 0000000000000000000000000000000000000000..9dc373338bb9f5359c2cf19351f3886a52fb8dba --- /dev/null +++ b/source/tools/detect/java/java_collect/src/dockerCli.py @@ -0,0 +1,94 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: dockerCli + Description : + Author : liaozhaoyan + date: 2023/6/27 +------------------------------------------------- + Change Activity: + 2023/6/27: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import random +import logging +from nsenter import Namespace +from prun import Cprun +from multiprocessing import Process + + +class CprocCopyTo(Process): + def __init__(self, pid, dst, src): + super(CprocCopyTo, self).__init__() + self._pid = pid + self._dst = dst + self._src = src + self.start() + self.join() + + def run(self): + with open(self._src, 'rb') as f: + stream = f.read() + with Namespace(self._pid, "mnt"): + with open(self._dst, 'wb') as f: + f.write(stream) + + +class CprocCopyFrom(Process): + def __init__(self, pid, dst, src): + super(CprocCopyFrom, self).__init__() + self._pid = pid + self._dst = dst + self._src = src + self.start() + self.join() + + def run(self): + cwd = os.getcwd() + with Namespace(self._pid, "mnt"): + try: + with open(self._src, 'rb') as f: + stream = f.read() + except FileNotFoundError: + os.chdir(cwd) + logging.warning("con cp from, no: %s" % self._src) + return + + os.chdir(cwd) + with open(self._dst, 'wb') as f: + f.write(stream) + + +class CdockerCli(object): + def __init__(self, cid, pid): + self._cid = cid + self._pid = pid + self._engine = "docker" + + def cid(self): + return self._cid + + def exec(self, cmd, tmo=1, ctmo=0.1): + cmd = '%s exec --user=root %s sh -c \'%s\'' % (self._engine, self._cid, cmd) + r = Cprun() + return r.exec(cmd, tmo, ctmo) + + def copyTo(self, dst, src): + CprocCopyTo(self._pid, dst, src) + + def copyFrom(self, dst, src): + CprocCopyFrom(self._pid, dst, src) + + def list(self): + cmd = "%s ps" % self._engine + r = Cprun() + return r.exec(cmd) + + +if __name__ == "__main__": + cli = CdockerCli("43818bdc1883") + print(cli.exec("ps")) + pass diff --git a/source/tools/detect/java/java_collect/src/entry.py b/source/tools/detect/java/java_collect/src/entry.py new file mode 100644 index 0000000000000000000000000000000000000000..9193d9f786d513ed4fb37d692597df23b222e28d --- /dev/null +++ b/source/tools/detect/java/java_collect/src/entry.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: entry + Description : + Author : liaozhaoyan + date: 2023/7/23 +------------------------------------------------- + Change Activity: + 2023/7/23: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import argparse +from jspy import Cjspy + + +def setup_args(): + examples = """e.g. + java_collect -d 10 + java_collect -d 10 -p 1346 + java_collect -d 10 -t 3 + java_collect -d 5 -p 47184,1234 -b + """ + parser = argparse.ArgumentParser( + description="collect cpu flamegraph for java processes", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) + parser.add_argument('-d', '--dur', dest='duration', type=int, default=5, help='duration, default: 5 seconds') + parser.add_argument('-t', '--top', dest='top', type=int, default=-1, + help='Flame graphs for the top java processes. Default: 1, Max: 20') + parser.add_argument('-p', '--pid', dest='pid', type=str, default="", help='specified pid') + parser.add_argument('-b', '--bpf', dest='bpf', action='store_true', default=False, + help='eBPF oncpu sampling profiler. Default: True') + parser.add_argument('--oss', dest='oss', action='store_true', default=True, + help='put on oss.') + parser.add_argument('--zip', dest='oss', action='store_false', default=True, + help='pack to zip.') + return parser.parse_args() + + +def check(): + args = setup_args() + if args.top == -1 and args.pid == "": + raise ValueError("should set top N or pid.") + if args.top != -1 and args.pid != "": + raise ValueError("should not both set top N and pid.") + return args + + +if __name__ == "__main__": + s = Cjspy(check()) + s.diag() + pass diff --git a/source/tools/detect/java/java_collect/src/forkRun.py b/source/tools/detect/java/java_collect/src/forkRun.py new file mode 100644 index 0000000000000000000000000000000000000000..721f9d24bd3073513269e6b5ba5a30bb12bbf280 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/forkRun.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: forkRun + Description : + Author : liaozhaoyan + date: 2023/6/8 +------------------------------------------------- + Change Activity: + 2023/6/8: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import time +import shlex +import psutil +from subprocess import PIPE, Popen +from threading import Thread +import logging + + +class Cguard(Thread): + def __init__(self, pid, wait, cmd): + super(Cguard, self).__init__() + self.daemon = True + self._pid = pid + self._wait = wait + self._cmd = cmd + self.start() + + def run(self): + time.sleep(self._wait) + try: + p = psutil.Process(self._pid) + except psutil.NoSuchProcess: + return + if p and p.parent().pid == os.getpid(): + logging.warning("guard pid: %d cmd: %s over time." % (self._pid, self._cmd)) + os.kill(self._pid, 9) + + +class CforkRun(Thread): + def __init__(self, cmd, wait, out_f): + super(CforkRun, self).__init__() + self._cmd = cmd + self._wait = wait + self._out = out_f + + self._pid = -1 + self.start() + + def run(self): + with open(self._out, 'w') as f: + p = Popen(shlex.split(self._cmd), stdout=f) + if p: + Cguard(p.pid, self._wait, self._cmd) + p.wait() + + +if __name__ == "__main__": + cmd = "./raptor oncpu --server local --exit-time 11 --sample-rate 100 --upload-rate 10s" + t = CforkRun(cmd, 12, "out.fold") + t.join() + pass diff --git a/source/tools/detect/java/java_collect/src/getProc.py b/source/tools/detect/java/java_collect/src/getProc.py new file mode 100644 index 0000000000000000000000000000000000000000..4f120ee8e3765574edb48f542af5e936940ddf9d --- /dev/null +++ b/source/tools/detect/java/java_collect/src/getProc.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: getProc + Description : + Author : liaozhaoyan + date: 2023/6/8 +------------------------------------------------- + Change Activity: + 2023/6/8: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import psutil +import time + + +def calcTotal(cpu_times): + return cpu_times.user + cpu_times.system + cpu_times.iowait + + +def getTops(pids): + dTop = {} + for pid in pids: + p = psutil.Process(pid) + if p: + dTop[pid] = calcTotal(p.cpu_times()) + return dTop + + +def diffTop(t1, t2): + res = {} + for k, v in t2.items(): + if k in t1: + res[k] = v - t1[k] + return res + + +pids = psutil.pids() +t1 = getTops(pids) +time.sleep(1) +t2 = getTops(pids) +res = diffTop(t1, t2) +print(sorted(res.items(), key=lambda x: x[1], reverse=True)) + +# for pid in pids: +# p = psutil.Process(pid) +# cmds = p.cmdline() +# if len(cmds) > 0 and 'java' in cmds[0]: +# print(cmds, calcTotal(p.cpu_times())) + + +if __name__ == "__main__": + pass diff --git a/source/tools/detect/java/java_collect/src/jConOp.py b/source/tools/detect/java/java_collect/src/jConOp.py new file mode 100644 index 0000000000000000000000000000000000000000..bd14b1189c4f73a41155e5a1d45674e8fe5786e7 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/jConOp.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: jConOp + Description : + Author : liaozhaoyan + date: 2023/6/20 +------------------------------------------------- + Change Activity: + 2023/6/20: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import time +from conApi import CconApi +from tomlWrite import CtomlWrite + + +class CjConOp(object): + def __init__(self, pid): + self._api = CconApi() + self._cid = self._api.conId(pid) + self._cpid = int(self._con_pid(pid)) + + def _con_pid(self, pid): + with open("/proc/%d/status" % pid, 'r') as f: + for _, line in enumerate(f): + if line.startswith("NSpid:"): + _, _, cpid = line.split() + return cpid + return None + + def exec(self, cmd, tmo=5, ctmo=0.1): + return self._api.exec(self._cid, cmd, tmo, ctmo) + + def checkDir(self, path): + ret = self.exec("ls -l %s" % path) + if "cannot access" in ret: + self.exec("mkdir -p %s" % path) + + def checkFlag(self, path): + ret = self.exec("ls -l %s" % path) + if "cannot access" in ret: + return "None" + ret = self.exec("cat %s" % path) + if "ok" in ret: + return "exist" + return "locked" + + def _checkTool(self): + dstExec = "/var/sysak/continuous-profile-collector/" + self.checkDir(dstExec) + ret = self.checkFlag(dstExec + "ok") + if ret == "None": + with open("ok", 'w') as f: + f.write("lock") + self._api.dCopyTo(self._cid, dstExec, "ok") + self._api.dCopyTos(self._cid, dstExec, "../continuous-profile-collector/") + with open("ok", 'w') as f: + f.write("ok") + self._api.dCopyTo(self._cid, dstExec, "ok") + elif ret == "locked": + cnt = 0 + while cnt < 50: + ret = self.checkFlag(dstExec + "ok") + if ret == "exist": + break + cnt += 1 + self.checkDir("/var/sysak/jruntime") + + def jDiag(self): + self._checkTool() + cmds = ['sh', '-c', "cd /var/sysak/continuous-profile-collector/ && ls"] + print(self.exec(cmds)) + conf = CtomlWrite("configuration.toml") + confFile = conf.confPid(self._cpid) + self._api.dCopyTo(self._cid, "/var/sysak/continuous-profile-collector/", confFile) + cmd = "./collector -d %d %d -c %s" % (10, self._cpid, confFile) + cmds = ['sh', '-c', "cd /var/sysak/continuous-profile-collector/ && %s" % cmd] + print(self.exec(cmds)) + + +if __name__ == "__main__": + os.chdir("../continuous-profile-collector/") + op = CjConOp(27966) + op.jDiag() diff --git a/source/tools/detect/java/java_collect/src/java_collect.spec b/source/tools/detect/java/java_collect/src/java_collect.spec new file mode 100644 index 0000000000000000000000000000000000000000..58dd3b30787b11895731bd6a79fdc40aee95196b --- /dev/null +++ b/source/tools/detect/java/java_collect/src/java_collect.spec @@ -0,0 +1,33 @@ +# -*- mode: python ; coding: utf-8 -*- + +block_cipher = None + + +a = Analysis(['entry.py'], + pathex=["./"], + binaries=[], + datas=[], + hiddenimports=[], + hookspath=[], + runtime_hooks=[], + excludes=[], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher, + noarchive=False) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + [], + name='java_collect', + debug=False, + bootloader_ignore_signals=False, + strip=False, + upx=True, + upx_exclude=[], + runtime_tmpdir=None, + console=True ) \ No newline at end of file diff --git a/source/tools/detect/java/java_collect/src/jspy.py b/source/tools/detect/java/java_collect/src/jspy.py new file mode 100644 index 0000000000000000000000000000000000000000..9a0e0f5ed028c84589553f5162dde94cacb3aa9e --- /dev/null +++ b/source/tools/detect/java/java_collect/src/jspy.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: jspy + Description : + Author : liaozhaoyan + date: 2023/6/15 +------------------------------------------------- + Change Activity: + 2023/6/15: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import sys +import zipfile +from forkRun import CforkRun +from conApi import CconApi +from tomlWrite import CtomlWrite +from unityCli import CunityCli +from ptop import Cptop +from prun import CprunThread +from queue import Queue +import logging +import uuid + +SYSAK_BASE = os.getenv("SYSAK_WORK_PATH") +if SYSAK_BASE is None: + SYSAK_BASE = "/usr/local/sysak/.sysak_components/" +SYSAK_PATH = os.path.join(SYSAK_BASE, "tools/jruntime/") +JRUNT_OUT = "/var/sysak/jruntime/" +JRUNT_TOOL = "continuous-profile-collector-agent.jar" + +depDirCheck = """ +if [ ! -d "/var/sysak/jruntime" ]; then + mkdir -p /var/sysak/jruntime + chmod 777 /var/sysak/jruntime +fi +rm -rf /var/sysak/jruntime/* +""" + + +class Cjspy(object): + def __init__(self, args): + self._args = args + os.system(depDirCheck) + self._cwd = os.getcwd() + # self._cli = CunityCli() + self._cli = None + self._capi = CconApi() + logging.basicConfig(filename=os.path.join(JRUNT_OUT, "jspy.log"), + format='%(asctime)s %(levelname)s <%(module)s> [%(funcName)s] %(message)s', + datefmt='%Y-%m-%d,%H:%M:%S', + level=logging.DEBUG) + os.chdir(SYSAK_PATH) + + def __del__(self): + os.chdir(self._cwd) + + def _raptor(self, sample): + overtime = sample + 1 + cmd = "./raptor oncpu --server local --exit-time %d --sample-rate 100 --upload-rate %ds" % (overtime, sample) + print(cmd) + logging.info("raptor cmd: %s" % cmd) + oFile = os.path.join(JRUNT_OUT, "raptor.fold") + return CforkRun(cmd, overtime + 5, oFile) + + def _pre_con(self, pid, confFile, outFile, cDict): + if self._capi.isHost(pid): + cli = None + else: + cli = self._capi.conCli(pid) + self._capi.checkDir(cli, SYSAK_PATH, '755') + self._capi.checkDir(cli, JRUNT_OUT, '777') + jar_tar = os.path.join(SYSAK_PATH, JRUNT_TOOL) + if not self._capi.checkFile(cli, jar_tar): + self._capi.dCopyTo(cli, os.path.join(SYSAK_PATH, JRUNT_TOOL), JRUNT_TOOL) + self._capi.dCopyTo(cli, os.path.join(SYSAK_PATH, confFile), confFile) + + if pid not in cDict: + cDict[pid] = {"cp": [], "rm": []} + oFile = os.path.join(JRUNT_OUT, outFile) + cpcLog = os.path.join(JRUNT_OUT, "cpc_%d.log" % pid) + cDict[pid]['cp'].append(oFile) + cDict[pid]['cp'].append(cpcLog) + cDict[pid]['rm'].append(os.path.join(SYSAK_PATH, confFile)) + cDict[pid]['rm'].append(oFile) + cDict[pid]['rm'].append(cpcLog) + cDict[pid]['rm'].append("/tmp/cpc-async-profiler-*.jfr") + return cli + + def _pullConDatas(self, cDict): + for pid, cell in cDict.items(): + cli = self._capi.conCli(pid) + for cp in cell['cp']: + self._capi.dCopyFrom(cli, os.path.join(JRUNT_OUT, cp), cp) + for rm in cell['rm']: + self._capi.exec(cli, "rm %s" % rm) + + def mon_pid(self, pid, conf, cDict, runs, q): + confFile, outFile = conf.confPid(pid, self._args.duration) + self._pre_con(pid, confFile, outFile, cDict) + cmd = "/bin/sh collector -d %d %s -c %s" % (self._args.duration, pid, confFile) + print(cmd) + logging.info("java cmd: %s", cmd) + runs.append(CprunThread(cmd, confFile, q, self._args.duration + 1)) + + def mon_top(self, conf, cDict, runs, q): + t = Cptop() + tops = t.jtop(self._args.top) + for top in tops: + pid = top.pid + self.mon_pid(pid, conf, cDict, runs, q) + + def z_dir(self, path, dDir): + pwd = os.getcwd() + os.chdir(dDir) + with zipfile.ZipFile(path, mode='w', compression=zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk("./"): + for f in files: + zf.write(os.path.join(root, f)) + os.chdir(pwd) + + def diag(self): + cDict = {} + runs = [] + q = Queue() + conf = CtomlWrite(JRUNT_OUT, "./configuration.toml") + + if self._args.top > 0: + self.mon_top(conf, cDict, runs, q) + else: + s = self._args.pid + pids = s.split(",") + for pid in pids: + self.mon_pid(int(pid), conf, cDict, runs, q) + + if self._args.bpf: + runs.append(self._raptor(self._args.duration)) + for r in runs: + r.join() + + self._pullConDatas(cDict) + if self._args.oss: + if self._cli is None: + self._cli = CunityCli() + self._cli.ossDir(str(uuid.uuid4()), JRUNT_OUT) + else: + path = os.path.join(JRUNT_OUT, "../j_out.zip") + self.z_dir(path, JRUNT_OUT) + + +if __name__ == "__main__": + num = 5 + if len(sys.argv) >= 2: + num = int(sys.argv[1]) + s = Cjspy(num) + s.diag() + pass diff --git a/source/tools/detect/java/java_collect/src/loop.sh b/source/tools/detect/java/java_collect/src/loop.sh new file mode 100755 index 0000000000000000000000000000000000000000..997470c27d8d57a242458f0e8c16bc73ecbb8f95 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/loop.sh @@ -0,0 +1,5 @@ +while true +do +python3 jspy.py +sleep 20 +done diff --git a/source/tools/detect/java/java_collect/src/pouchCli.py b/source/tools/detect/java/java_collect/src/pouchCli.py new file mode 100644 index 0000000000000000000000000000000000000000..706b8ca2671c8e7173472ebfdd0c4cf3b21be8a6 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/pouchCli.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: pouchCli + Description : + Author : liaozhaoyan + date: 2023/6/27 +------------------------------------------------- + Change Activity: + 2023/6/27: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +from dockerCli import CdockerCli + + +class CpouchCli(CdockerCli): + def __init__(self, cid, pid): + super(CpouchCli, self).__init__(cid, pid) + self._engine = "pouch" + + +if __name__ == "__main__": + pass diff --git a/source/tools/detect/java/java_collect/src/prun.py b/source/tools/detect/java/java_collect/src/prun.py new file mode 100644 index 0000000000000000000000000000000000000000..18d9f63d0abf8a70ac0342a8f4ac615ec9d7c7a7 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/prun.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: prun + Description : + Author : liaozhaoyan + date: 2023/6/15 +------------------------------------------------- + Change Activity: + 2023/6/15: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import sys +import select +import shlex +import time +import traceback +import logging +from subprocess import PIPE, Popen +from threading import Thread +import queue + +ON_POSIX = 'posix' in sys.builtin_module_names + + +class Cprun(object): + def __init__(self): + super(Cprun, self).__init__() + pass + + @staticmethod + def _pollTime(tmo, ctmo): + first = True + + def waitTime(): + nonlocal first + if first: + first = False + return tmo + else: + return ctmo + + return waitTime + + def _pollRead(self, p, tmo, ctmo=0.1): + wait = self._pollTime(tmo, ctmo) + s = "" + with select.epoll() as poll: + poll.register(p.stdout.fileno(), select.EPOLLIN) + poll.register(p.stderr.fileno(), select.EPOLLIN) + while True: + events = poll.poll(wait()) + if len(events) == 0: # poll time out. + return s + for fd, event in events: + if event & select.EPOLLIN: + s += os.read(fd, 1024 * 1024).decode() + if event & (select.EPOLLHUP | select.EPOLLERR): + return s + + def exec(self, cmd, tmo=5, ctmo=0.1): + # traceback.print_stack() + # print("cwd", os.getcwd()) + p = Popen(shlex.split(cmd), + stdout=PIPE, + stderr=PIPE, + stdin=PIPE, + close_fds=ON_POSIX) + ret = self._pollRead(p, tmo, ctmo) + p.terminate() + return ret + + +class CprunThread(Thread): + def __init__(self, cmd, toDel, q, tmo=5): + self._cmd = cmd + self._toDel = toDel + self._tmo = tmo + self._q = q + super(CprunThread, self).__init__() + self.daemon = True + self.start() + + def run(self): + r = Cprun() + self._q.put(r.exec(self._cmd, self._tmo, self._tmo)) + logging.info("exec: %s done" % self._cmd) + if len(self._toDel): + os.remove(self._toDel) + + +if __name__ == "__main__": + q = queue.Queue() + t = CprunThread("ps", q, "") + t.join() + print(q.get(block=False)) + pass diff --git a/source/tools/detect/java/java_collect/src/ptop.py b/source/tools/detect/java/java_collect/src/ptop.py new file mode 100644 index 0000000000000000000000000000000000000000..b9d536d947b574b530c9e3c61d670cac5344e0c9 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/ptop.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: ptop + Description : + Author : liaozhaoyan + date: 2023/6/8 +------------------------------------------------- + Change Activity: + 2023/6/8: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import psutil +import time + + +class Cptop(object): + def __init__(self, interval=0.5): + super(Cptop, self).__init__() + self._initerval = interval + + + @staticmethod + def _calcTotal(cpu_times): + return cpu_times.user + cpu_times.system + cpu_times.iowait + + @staticmethod + def _diffTop(t1, t2): + res = {} + for k, v in t2.items(): + if k in t1: + res[k] = v - t1[k] + return res + + def _getTops(self, pids): + dTop = {} + for pid in pids: + try: + p = psutil.Process(pid) + except psutil.NoSuchProcess: + continue + if p: + dTop[pid] = self._calcTotal(p.cpu_times()) + return dTop + + def _top(self): + pids = psutil.pids() + t1 = self._getTops(pids) + time.sleep(self._initerval) + t2 = self._getTops(pids) + res = self._diffTop(t1, t2) + return sorted(res.items(), key=lambda x: x[1], reverse=True) + + def top(self, N=10): + arr = self._top() + return arr[:N] + + def jtop(self, N=3): + ret = [] + arr = self._top() + for a in arr: + try: + p = psutil.Process(a[0]) + except psutil.NoSuchProcess: + p = None + if p: + cmds = p.exe() + if cmds.endswith("java"): + ret.append(p) + return ret[:N] + + +if __name__ == "__main__": + t = Cptop() + tops = t.jtop(4) + for a in tops: + print('\t', a.exe()) + pass diff --git a/source/tools/detect/java/java_collect/src/requirements.txt b/source/tools/detect/java/java_collect/src/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..3753d87e08a0544eda3bcaa88fd7fefe8bcac7c3 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/requirements.txt @@ -0,0 +1,5 @@ +requests==2.27.1 +pyinstaller==3.5 +nsenter==0.2 +psutil==5.9.5 +PyYAML==6.0 \ No newline at end of file diff --git a/source/tools/detect/java/java_collect/src/tomlOperate.py b/source/tools/detect/java/java_collect/src/tomlOperate.py new file mode 100644 index 0000000000000000000000000000000000000000..cf2cc618915e4e6ef595817dd3681fea64103a40 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/tomlOperate.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: tomlOperate + Description : + Author : liaozhaoyan + date: 2023/6/14 +------------------------------------------------- + Change Activity: + 2023/6/14: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import copy + +import toml + + +class CtomlOperate(object): + def __init__(self, model): + super(CtomlOperate, self).__init__() + with open(model, 'r') as f: + self._model = toml.load(f) + + def out(self, d, fName): + model = copy.deepcopy(self._model) + model.update(d) + with open(fName, 'w') as f: + toml.dump(model, f) + + def confPid(self, pid, dua=5): + fName = "conf_%d.toml" % pid + d = {"destination": "/tmp/output_%d.fold" % pid} + self.out(d, fName) + return fName, None + + +if __name__ == "__main__": + t = CtomlOperate("../continuous-profile-collector/configuration.toml") + t.confPid(123) + pass diff --git a/source/tools/detect/java/java_collect/src/tomlWrite.py b/source/tools/detect/java/java_collect/src/tomlWrite.py new file mode 100644 index 0000000000000000000000000000000000000000..23f5c07567591b29ed095f4aaef0ba9ddf50da13 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/tomlWrite.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: tomlWrite + Description : + Author : liaozhaoyan + date: 2023/6/16 +------------------------------------------------- + Change Activity: + 2023/6/16: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import re + + +class CtomlWrite(object): + def __init__(self, jout, model): + super(CtomlWrite, self).__init__() + self._out = jout + self._model = model + self._reId = re.compile(r"([0-9a-f]){16,}") + + def _cgroup(self, pid): + d = {} + with open("/proc/%d/cgroup" % pid, 'r') as f: + for i, line in enumerate(f): + n, t, path = line.strip().split(":") + d[t] = path + return d + + def outName(self, pid): + cgroups = self._cgroup(pid) + cname = cgroups['name=systemd'] + ret = self._reId.search(cname) + if ret: + cid = ret[0][:8] + return "out_%d_%s.jfr" % (pid, cid) + return "out_%d.jfr" % pid + + def confPid(self, pid, dua=5): + fName = "conf_%d.toml" % pid + outName = self.outName(pid) + with open(self._model, 'r') as f: + s = f.read() + rOrig = "log_file = \\'\\/tmp\\/cpc_\\?\\.log\\'" + rDur = "interval = -1" + sRep = "log_file = '%scpc_%d.log'" % (self._out, pid) + sDur = "interval = %d" % dua + s = re.sub(rOrig, sRep, s) + s = re.sub(rDur, sDur, s) + with open(fName, 'w') as f: + stream = "\n".join([s, "destination = '%s%s'" % (self._out, outName)]) + f.write(stream) + return fName, outName + + +if __name__ == "__main__": + t = CtomlWrite("/var/sysak/jruntime/", "../continuous-profile-collector/configuration.toml") + t.confPid(123) + pass diff --git a/source/tools/detect/java/java_collect/src/unityCli.py b/source/tools/detect/java/java_collect/src/unityCli.py new file mode 100644 index 0000000000000000000000000000000000000000..8ee1db2e1acb1b4768ccf2520321a7be090a5efe --- /dev/null +++ b/source/tools/detect/java/java_collect/src/unityCli.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: unityQuery + Description : + Author : liaozhaoyan + date: 2023/5/10 +------------------------------------------------- + Change Activity: + 2023/5/10: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import sys +import requests +import json +import zipfile +import tempfile +import base64 +import uuid +import yaml + + +class CunityCli(object): + def __init__(self, fYaml="/etc/sysak/base.yaml"): + super(CunityCli, self).__init__() + with open(fYaml, 'r') as f: + data = yaml.load(f, Loader=yaml.FullLoader) + self._url = "http://127.0.0.1:%d/api/" % data["config"]["port"] + + def query(self, tab, ts="1m"): + d = {"mode": "last", "time": ts, "table": [tab]} + url = self._url + "query" + res = requests.post(url, json=d) + return res.content.decode() + + def line(self, lines): + url = self._url + "line" + res = requests.post(url, data=lines) + return res.content.decode() + + def pushFile(self, title, fileName): + with tempfile.TemporaryDirectory() as tmpdir: + with zipfile.ZipFile(tmpdir + '/push.zip', 'w') as z: + z.write(fileName) + with open(tmpdir + '/push.zip', 'rb') as z: + bs = z.read() + stream = base64.b64encode(bs) + lines = '%s content="%s"' % (title, stream.decode()) + return self.line(lines) + + def _ossPut(self, uid, stream): + url = self._url + 'oss' + headers = {'Content-Type': 'application/octet-stream', 'uuid': uid} + res = requests.post(url, headers=headers, data=stream) + return res.content.decode() + + def ossFile(self, uid, fName): + with open(fName, 'rb') as f: + stream = f.read() + return self._ossPut(uid, stream) + + @staticmethod + def zipDir(dDir): + with tempfile.TemporaryFile() as tmp: + with zipfile.ZipFile(tmp, mode='w', compression=zipfile.ZIP_DEFLATED) as zf: + for root, dirs, files in os.walk(dDir): + for f in files: + zf.write(os.path.join(root, f)) + + tmp.seek(0) + stream = tmp.read() + return stream + + def ossDir(self, uid, dDir): + stream = self.zipDir(dDir) + print(len(stream)) + return self._ossPut(uid, stream) + + +if __name__ == "__main__": + q = CunityCli() + print(q.ossDir(str(uuid.uuid4()), '../test')) + pass diff --git a/source/tools/detect/java/java_collect/src/venv.sh b/source/tools/detect/java/java_collect/src/venv.sh new file mode 100644 index 0000000000000000000000000000000000000000..4aa6ddcfe8bfd4fd0a6c597c6b53774b1a495921 --- /dev/null +++ b/source/tools/detect/java/java_collect/src/venv.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +ALIYUN_MIRROR="https://mirrors.aliyun.com/pypi/simple/" +export SERVER_HOME=$(pwd) + +VIRTUALENV_HOME="${SERVER_HOME}/virtualenv" + +touch_virtualenv() { + pip3.9 uninstall virtualenv -y + pip3.6 install virtualenv -i "${ALIYUN_MIRROR}" + if [ -d ${VIRTUALENV_HOME} ]; then + echo "virtualenv exists, skip" + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + else + virtualenv ${VIRTUALENV_HOME} + if [ "$?" = 0 ]; then + echo "INFO: create virtualenv success" + else + echo "ERROR: create virtualenv failed" + exit 1 + fi + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + check_requirements + fi +} + + +check_requirements() { + echo "INFO: begin install requirements..." + if ! [ -d ${SERVER_HOME}/logs/ ]; then + mkdir -p ${SERVER_HOME}/logs/ || exit 1 + fi + + local requirements_log="${SERVER_HOME}/logs/requirements.log" + local requirements="requirements.txt" + touch "$requirements_log" || exit + pip3.6 install --upgrade pip + pip3.6 install -r ${requirements} -i "${ALIYUN_MIRROR}" |tee -a "${requirements_log}" || exit 1 + local pip_res=$? + if [ $pip_res -ne 0 ]; then + echo "ERROR: requirements not satisfied and auto install failed, please check ${requirements_log}" + exit 1 + fi +} + +pyinstaller_sar() { + echo "INFO: begin pyinstaller java_collect..." + pyinstaller java_collect.spec + deactivate + rm -rf ${VIRTUALENV_HOME} +} + +deploy() { + touch_virtualenv + pyinstaller_sar +} + +deploy \ No newline at end of file diff --git a/source/tools/detect/mem/fcachetop/fcachetop.py b/source/tools/detect/mem/fcachetop/fcachetop.py index 0e4e23f4f365aaabdf5eaf71b82235f344ee71a9..b436ff1e4708164190ee290b56e695bff14ce20a 100644 --- a/source/tools/detect/mem/fcachetop/fcachetop.py +++ b/source/tools/detect/mem/fcachetop/fcachetop.py @@ -1,4 +1,3 @@ -#!/usr/bin/python3 # -*- coding: UTF-8 -*- import os, sys, getopt, signal, threading diff --git a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h index 300a82b05d1281d70f154e6f407fe2388253acd6..5faac8a8d246809657122808fea4784cd5fbac72 100644 --- a/source/tools/detect/mem/memcgoffline/include/memcg_iter.h +++ b/source/tools/detect/mem/memcgoffline/include/memcg_iter.h @@ -3,9 +3,16 @@ #include "btfparse.h" +enum KERNEL_VERSION { + LINUX_3_10, + LINUX_4_19, + LINUX_5_10 +}; + #define PATH_MAX (2048) #define LEN (255) #define CSS_DYING (1 << 4) /* css is dying */ +#define CSS_ONLINE (1 << 1) /* iterator function of "for_each_mem_cgroup" */ unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, diff --git a/source/tools/detect/mem/memcgoffline/memcg_iter.c b/source/tools/detect/mem/memcgoffline/memcg_iter.c index 541d0367ffd805d37336b440cc367a4f1f57d582..93090616fe450f98c54b37fa9280cfdd0570de8a 100644 --- a/source/tools/detect/mem/memcgoffline/memcg_iter.c +++ b/source/tools/detect/mem/memcgoffline/memcg_iter.c @@ -9,6 +9,7 @@ #include "kcore_utils.h" static unsigned long root_mem_cgroup; +extern enum KERNEL_VERSION kernel_version; struct member_attribute *get_offset_no_cache(char *struct_name, char *member_name, struct btf *handle) @@ -34,6 +35,34 @@ int get_member_offset(char *struct_name, char *member_name, struct btf *handle) return btf_get_member_offset(handle, prefix, member_name)/8; } +static unsigned long _cg_next_child(unsigned long pos, unsigned long parent, + struct btf *btf_handle) +{ + struct member_attribute *att, *att2; + unsigned long next; + + att = get_offset_no_cache("cgroup", "sibling", btf_handle); + if (!att) + return 0; + + att2 = get_offset_no_cache("cgroup", "children", btf_handle); + if (!att2) + return 0; + + if(!pos) { + kcore_readmem(parent + att2->offset, &next, sizeof(next)); + next = next - att->offset; + } else { + kcore_readmem(pos + att->offset, &next, sizeof(next)); + next = next - att->offset; + } + + if(next + att->offset != parent + att2->offset) + return next; + + return 0; +} + static unsigned long _css_next_child(unsigned long pos, unsigned long parent, struct btf *btf_handle) { @@ -62,6 +91,28 @@ static unsigned long _css_next_child(unsigned long pos, unsigned long parent, return 0; } +static unsigned long cg_to_memcg(unsigned long cgroup, struct btf *btf_handle) +{ + struct member_attribute *cg_subsys_att, *memcg_css_att; + unsigned long css_offset, css; + // normally, mem_cgroup_subsys_id = 3 (without cgroup debug subsys) + const int mem_cgroup_subsys_id = 3; + + cg_subsys_att = get_offset_no_cache("cgroup", "subsys", btf_handle); + if (!cg_subsys_att) + return 0; + + css_offset = cgroup + cg_subsys_att->offset + (mem_cgroup_subsys_id * sizeof(unsigned long)); + kcore_readmem(css_offset, &css, sizeof(css)); + + memcg_css_att = get_offset_no_cache("mem_cgroup", "css", btf_handle); + if (!memcg_css_att) + return 0; + + // equal to mem_cgroup_from_css() + return css - memcg_css_att->offset; +} + unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, struct btf *btf_handle) { @@ -77,40 +128,71 @@ unsigned long _mem_cgroup_iter(unsigned long root, unsigned long prev, if(!prev) return root; - //printf("root:%lx, prev:%lx\n", root, prev); - att = get_offset_no_cache("mem_cgroup", "css", btf_handle); if (!att) return 0; - att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle); - if (!att2) - return 0; - pos = prev; //kcore_readmem(pos + att->offset, &css, sizeof(css)); css = pos + att->offset; - //kcore_readmem(root+att->offset, &root_css, sizeof(root_css)); root_css = root + att->offset; - next = _css_next_child(0, css, btf_handle); - if(!next) - { - tmp1 = css; - while(tmp1 != root_css) + + if (kernel_version == LINUX_3_10) { + struct member_attribute *css_cg_att, *cg_parent_att; + unsigned long cg, root_cg; + unsigned long cg_tmp1, cg_tmp2; + + css_cg_att = get_offset_no_cache("cgroup_subsys_state", "cgroup", btf_handle); + if (!css_cg_att) + return 0; + + cg_parent_att = get_offset_no_cache("cgroup", "parent", btf_handle); + if (!cg_parent_att) + return 0; + + kcore_readmem(css + css_cg_att->offset, &cg, sizeof(cg)); + kcore_readmem(root_css + css_cg_att->offset, &root_cg, sizeof(root_cg)); + + next = _cg_next_child(0, cg, btf_handle); + if (!next) { + cg_tmp1 = cg; + while (cg_tmp1 != root_cg) { + kcore_readmem(cg_tmp1 + cg_parent_att->offset, &cg_tmp2, sizeof(cg_tmp2)); + next = _cg_next_child(cg_tmp1, cg_tmp2, btf_handle); + if (next) + break; + cg_tmp1 = cg_tmp2; + } + } + } else { + att2 = get_offset_no_cache("cgroup_subsys_state", "parent", btf_handle); + if (!att2) + return 0; + + next = _css_next_child(0, css, btf_handle); + if(!next) { - kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2)); - next = _css_next_child(tmp1, tmp2, btf_handle); - if(next) - break; - tmp1 = tmp2; + tmp1 = css; + while(tmp1 != root_css) + { + kcore_readmem(tmp1 + att2->offset, &tmp2, sizeof(tmp2)); + next = _css_next_child(tmp1, tmp2, btf_handle); + if(next) + break; + tmp1 = tmp2; + } } } if(!next) return 0; - memcg = next - att->offset; + if (kernel_version == LINUX_3_10) { + memcg = cg_to_memcg(next, btf_handle); + } else { + memcg = next - att->offset; + } return memcg; } @@ -234,58 +316,58 @@ void memcg_get_name(unsigned long memcg, char *name, kcore_readmem(cg + att->offset, &cg, sizeof(cg)); -#ifdef LINUX_310 - if (!cg) - return; - cgroup_path(cg, name, PATH_MAX); - end = name+strlen("/sys/fs/cgroup/memory/"); - memmove(end, name, strlen(name)+1); - prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); -#else - unsigned long kn; - unsigned long pkn; - int kn_name_offset, kn_pa_offset; - - att = get_offset_no_cache("cgroup", "kn", btf_handle); - if (!att) - return; + if (kernel_version == LINUX_3_10) { + if (!cg) + return; + cgroup_path(cg, name, PATH_MAX, btf_handle); + end = name + strlen("sys/fs/cgroup/memory/"); + memmove(end, name, strlen(name) + 1); + prepend(&end, &len, "sys/fs/cgroup/memory", strlen("sys/fs/cgroup/memory"), 0); + } else { + unsigned long kn; + unsigned long pkn; + int kn_name_offset, kn_pa_offset; - kcore_readmem(cg + att->offset, &kn, sizeof(kn)); + att = get_offset_no_cache("cgroup", "kn", btf_handle); + if (!att) + return; - if (!cg || !kn) - return; + kcore_readmem(cg + att->offset, &kn, sizeof(kn)); - end = name + len - 1; - prepend(&end, &len, "\0", 1, 0); - pkn = kn; + if (!cg || !kn) + return; - kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle); - if (kn_name_offset < 0) - return; - - kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle); - if (kn_pa_offset < 0) - return; + end = name + len - 1; + prepend(&end, &len, "\0", 1, 0); + pkn = kn; + + kn_name_offset = get_member_offset("kernfs_node", "name", btf_handle); + if (kn_name_offset < 0) + return; + + kn_pa_offset = get_member_offset("kernfs_node", "parent", btf_handle); + if (kn_pa_offset < 0) + return; - while (pkn) { - kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname)); - kcore_readmem(knname, subname, sizeof(subname)); + while (pkn) { + kcore_readmem(pkn + kn_name_offset, &knname, sizeof(knname)); + kcore_readmem(knname, subname, sizeof(subname)); - pos = prepend(&end, &len, subname, strlen(subname), 0); - if (pos) - break; + pos = prepend(&end, &len, subname, strlen(subname), 0); + if (pos) + break; - kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn)); - if ((pkn == kn) || !kn) - break; - pos = prepend(&end, &len, "/", 1, 0); - if (pos) - break; - pkn = kn; - } + kcore_readmem(pkn + kn_pa_offset, &kn, sizeof(kn)); + if ((pkn == kn) || !kn) + break; + pos = prepend(&end, &len, "/", 1, 0); + if (pos) + break; + pkn = kn; + } - prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); + prepend(&end, &len, "/sys/fs/cgroup/memory", strlen("/sys/fs/cgroup/memory"), 0); - memmove(name, end, strlen(end) + 1); -#endif + memmove(name, end, strlen(end) + 1); + } } \ No newline at end of file diff --git a/source/tools/detect/mem/memcgoffline/memcgoffline.c b/source/tools/detect/mem/memcgoffline/memcgoffline.c index 16fe17db6a9a1a07ba9605ade1e676fabb2f28fd..b100b6290d5c7b9b481cc89dbcb6a53598493b7c 100644 --- a/source/tools/detect/mem/memcgoffline/memcgoffline.c +++ b/source/tools/detect/mem/memcgoffline/memcgoffline.c @@ -6,12 +6,14 @@ #include #include #include +#include #include "kcore_utils.h" #include "memcg_iter.h" static struct btf *btf_handle = NULL; int total_memcg_num = 0; +enum KERNEL_VERSION kernel_version = LINUX_4_19; struct environment { int print_cg_num; /* unused */ @@ -19,15 +21,48 @@ struct environment { .print_cg_num = 10000, }; +static int get_kernel_version() +{ + struct utsname kernel_info; + char *release; + long ver[16]; + int i = 0; + + if (uname(&kernel_info) < 0) { + LOG_ERROR("uname error: %s\n", strerror(errno)); + return -1; + } + + release = kernel_info.release; + while (*release) { + if (isdigit(*release)) { + ver[i] = strtol(release, &release, 10); + i++; + } else { + release++; + } + } + + if (ver[0] == 3) { + kernel_version = LINUX_3_10; + } else if (ver[0] == 4) { + kernel_version = LINUX_4_19; + } else { + kernel_version = LINUX_5_10; + } + + return 0; +} + static int caculate_offline(unsigned long start_memcg) { int offline_num = 0; unsigned long css, css_flags, cnt, iter = 0; long refcnt_value; - unsigned int flags_value; + unsigned int flags_value, offline_flag; char fileName[PATH_MAX]; struct member_attribute *css_attr, *css_flag_attr, *refcnt_attr; - struct member_attribute *cnt_attr; + struct member_attribute *cnt_attr, *ref_data_attr, *data_cnt_attr; css_attr = get_offset_no_cache("mem_cgroup", "css", btf_handle); if (!css_attr) { @@ -49,24 +84,57 @@ static int caculate_offline(unsigned long start_memcg) return -1; } - cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle); - if (!cnt_attr) { - LOG_ERROR("get cnt offset of percpu_ref failed!\n"); - return -1; + if (kernel_version == LINUX_5_10) { + ref_data_attr = get_offset_no_cache("percpu_ref", "data", btf_handle); + if (!ref_data_attr) { + LOG_ERROR("get data offset of percpu_ref failed!\n"); + return -1; + } + data_cnt_attr = get_offset_no_cache("percpu_ref_data", "count", btf_handle); + if (!data_cnt_attr) { + LOG_ERROR("get cnt offset of percpu_ref_data failed!\n"); + return -1; + } + } else { + cnt_attr = get_offset_no_cache("percpu_ref", "count", btf_handle); + if (!cnt_attr) { + LOG_ERROR("get cnt offset of percpu_ref failed!\n"); + return -1; + } } - + for_each_mem_cgroup(iter, start_memcg, btf_handle) { css = iter + css_attr->offset; css_flags = css + css_flag_attr->offset; kcore_readmem(css_flags, &flags_value, sizeof(flags_value)); - - if (flags_value & CSS_DYING) { - cnt = css + refcnt_attr->offset + cnt_attr->offset; - + if (kernel_version == LINUX_3_10) { + offline_flag = !(flags_value & CSS_ONLINE); + } else { + offline_flag = flags_value & CSS_DYING; + } + + if (offline_flag) { offline_num++; - kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); - + + // in kernel 5.10, refcnt = css->refcnt->data->count + // in other, refcnt = css->refcnt->count + if (kernel_version == LINUX_5_10) { + unsigned long ref_data, ref_data_val; + + ref_data = css + refcnt_attr->offset + ref_data_attr->offset; + kcore_readmem(ref_data, &ref_data_val, sizeof(ref_data_val)); + + cnt = ref_data_val + data_cnt_attr->offset; + kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); + } else if (kernel_version == LINUX_4_19) { + cnt = css + refcnt_attr->offset + cnt_attr->offset; + kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); + } else { + cnt = css + refcnt_attr->offset; + kcore_readmem(cnt, &refcnt_value, sizeof(refcnt_value)); + } + if (env.print_cg_num > 0) { memcg_get_name(iter, fileName, PATH_MAX, btf_handle); printf("cgroup path:%s\trefcount=%ld\n", fileName, refcnt_value); @@ -151,6 +219,12 @@ int main(int argc, char *argp[]) return -1; } + ret = get_kernel_version(); + if (ret) { + LOG_ERROR("get kernel version failed!"); + return -1; + } + btf_handle = btf_init(); if (!btf_handle) { LOG_ERROR("btf init failed!\n"); diff --git a/source/tools/detect/mem/memgraph/memgraph.py b/source/tools/detect/mem/memgraph/memgraph.py index 262e2d41772392f26ef3e3867a387687c8d28667..f88c85714c0332ac7f187978d6e9f63008bbffd4 100644 --- a/source/tools/detect/mem/memgraph/memgraph.py +++ b/source/tools/detect/mem/memgraph/memgraph.py @@ -263,6 +263,15 @@ def memgraph_free(meminfo): used = int(line.strip().split()[2]) return used +def format_unit(value): + value = int(value) + if value > 5 * 1024 * 1024: + return '%s KB (%s GB)' % (value, round(value/(1024*1024),2)) + elif value > 10 * 1024: + return '%s KB (%s MB)' % (value, round(value/(1024),2)) + else: + return '%s KB' % value + def memgraph_graph(meminfo): res = {} res["total"] = meminfo["MemTotal"] + meminfo['res'] @@ -291,11 +300,35 @@ def memgraph_graph(meminfo): kernel["VmallocUsed"] = meminfo["VmallocUsed"] kernel["allocPage"] = meminfo["allocPage"] res["kernel"] = kernel + + format_res = {} + for i in res: + if type(res[i]) == dict: + format_res[i] = {} + for j in res[i]: + format_res[i][j] = format_unit(res[i][j]) + else: + format_res[i] = format_unit(res[i]) + meminfo["graph"] = res global jsonFormat if jsonFormat != None: return meminfo + memgraph_check_event(meminfo) + res_str = ("\nmemgraph result:\ntotal memory: %s\tused: %s\tfree: %s\tavailable: %s\n\nuser: total used: %s\n\tuser-anon: %s\n\tuser-filecache: %s\n\tuser-buffers: %s\n\tuser-mlock: %s\n\nkernel: total used: %s\n\tkernel-reserved: %s\n\tkernel-SReclaimable: %s\n\tkernel-SUnreclaim: %s\n\tkernel-PageTables: %s\n\tkernel-VmallocUsed: %s\n\tkernel-KernelStack: %s\n\tkernel-allocPage: %s\n") % (format_res['total'],format_res['used'],format_res['free'],format_res['available'],format_res['userUsed'],format_res['user']['anon'],format_res['user']['filecache'],format_res['user']['buffers'],format_res['user']['mlock'],format_res['kernelUsed'],format_res['kernel']['reserved'],format_res['kernel']['SReclaimable'],format_res['kernel']['SUnreclaim'],format_res['kernel']['PageTables'],format_res['kernel']['VmallocUsed'],format_res['kernel']['KernelStack'],format_res['kernel']['allocPage']) + for item in meminfo['event']: + if item == "util": + continue + if meminfo['event'][item] == True: + if item == 'memcg': + res_str = '%sNotice: memcg leak\n' % res_str + elif item == 'leak': + res_str = '%sNotice: memory leak\n' % res_str + elif item == 'memfrag': + res_str = '%sNotice: memory fragment\n' % res_str print(res) + print(res_str) + return res def kmemleak_check(meminfo, memType): kmem = meminfo[memType]/1024 @@ -665,15 +698,6 @@ def memgraph_check_event(meminfo): def memgraph_handler_cmd(meminfo, argv): global jsonFormat filepath = "memgraph.json" - meminfo["taskAnon"] = {} - meminfo["taskMem"] = {} - meminfo["filepid"] = {} - meminfo["filecache"] = {} - meminfo["graph"] = {} - meminfo["memleak"] = {} - meminfo["taskInfo"] = {} - meminfo["cgroupTop"] = {} - meminfo["cgroupInfo"] = {} try: opts, args = getopt.getopt(argv,"j:hgfaklc:") except getopt.GetoptError: @@ -716,6 +740,15 @@ def memgraph_handler_cmd(meminfo, argv): if __name__ == "__main__": meminfo = {} + meminfo["taskAnon"] = {} + meminfo["taskMem"] = {} + meminfo["filepid"] = {} + meminfo["filecache"] = {} + meminfo["graph"] = {} + meminfo["memleak"] = {} + meminfo["taskInfo"] = {} + meminfo["cgroupTop"] = {} + meminfo["cgroupInfo"] = {} memgraph_get_meminfo(meminfo) if len(sys.argv) == 1: memgraph_graph(meminfo) diff --git a/source/tools/detect/mem/oomcheck/oomcheck.py b/source/tools/detect/mem/oomcheck/oomcheck.py index 945ec4577d6709c670740e5efe2582ad5fa0e2be..d50e4a3c241bd572a65b5c987711a7b8496b675d 100644 --- a/source/tools/detect/mem/oomcheck/oomcheck.py +++ b/source/tools/detect/mem/oomcheck/oomcheck.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- # @Author: changjun @@ -1080,6 +1079,7 @@ def oom_diagnose(sn, data, mode): oom_result['max_total'] = {'rss':0,'task':"",'score':0,'cnt':0} dmesgs = data['dmesg'] if OOM_BEGIN_KEYWORD in dmesgs or pid_pattern.search(dmesgs) is not None: + submsg = '' oom_dmesg_analyze(dmesgs, oom_result) oom_result['summary'] += "total oom: %s\n"%oom_result['oom_total_num'] diff --git a/source/tools/detect/mem/podmem/Makefile b/source/tools/detect/mem/podmem/Makefile index 07294fb656d6bb839a92f1a587b203b062494903..7c0b2f6d5045d6cf9db31043bbe29b891ec31a69 100644 --- a/source/tools/detect/mem/podmem/Makefile +++ b/source/tools/detect/mem/podmem/Makefile @@ -1,2 +1,3 @@ target := podmem +DEPEND := "python-dep{all}" include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/mem/podmem/entry/pagealloc.py b/source/tools/detect/mem/podmem/entry/pagealloc.py index 5e0ef2b16a4258fc80f5f245a82e28aadd907742..d7d80fb099aec110dd2eaab8d75061089e2929ad 100644 --- a/source/tools/detect/mem/podmem/entry/pagealloc.py +++ b/source/tools/detect/mem/podmem/entry/pagealloc.py @@ -1,5 +1,3 @@ -#!/usr/bin/python2 - import os import sys import getopt diff --git a/source/tools/detect/mem/podmem/entry/podmem.py b/source/tools/detect/mem/podmem/entry/podmem.py index 8b875a2053f59fa97192c8a33d951d7124072b6f..a1a200722ac45aaaa63450011758e74a7b7a7e9c 100644 --- a/source/tools/detect/mem/podmem/entry/podmem.py +++ b/source/tools/detect/mem/podmem/entry/podmem.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 import os import sys import json diff --git a/source/tools/detect/mem/podmem/memcache/memcg.cpp b/source/tools/detect/mem/podmem/memcache/memcg.cpp index 01e66a431db0a3ff35b02544d586b461cac5adfd..15df480e06ced07807a0827dedf6f787113dce1c 100644 --- a/source/tools/detect/mem/podmem/memcache/memcg.cpp +++ b/source/tools/detect/mem/podmem/memcache/memcg.cpp @@ -272,8 +272,8 @@ static int get_dentry(unsigned long pfn, unsigned long cinode, int active, int s info->cinode = cinode; info->size = i_size>>10; - strncpy(info->filename, end, sizeof(info->filename) - 2); - info->filename[sizeof(info->filename) -1] = '0'; + strncpy(info->filename, end, sizeof(info->filename) - 1); + info->filename[sizeof(info->filename) -1] = 0; files[i_ino] = info; return 0; } diff --git a/source/tools/detect/mem/podmem/memcache/offset.cpp b/source/tools/detect/mem/podmem/memcache/offset.cpp index 3595c0c7c591638112c73b0dafbfded8ce978bf3..8f525d6da5d1c448609eac606f8455de2390890e 100644 --- a/source/tools/detect/mem/podmem/memcache/offset.cpp +++ b/source/tools/detect/mem/podmem/memcache/offset.cpp @@ -151,7 +151,7 @@ static int download_btf(void) { sysak_path = getenv("SYSAK_WORK_PATH") ; sysak_path += "/tools/"; - sysak_path += kernel; + //sysak_path += kernel; } snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s",sysak_path.c_str(), kernel, ®ion[3],®ion[3],timeout.c_str(),arch, kernel); @@ -184,7 +184,7 @@ int offset_init(void) cmd = string("uname -r").c_str(); do_cmd(cmd, ver, LEN); if(getenv("SYSAK_WORK_PATH") != NULL) - sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver); + sprintf(btf,"%s/tools/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver); else sprintf(btf,"/boot/vmlinux-%s", ver); diff --git a/source/tools/detect/net/mon_connect/Makefile b/source/tools/detect/net/mon_connect/Makefile index 2f267f4ebbb5e320f6be99b47965d98cbdb4d94f..d932e57819aade31408d3173f067d9ebebca0379 100644 --- a/source/tools/detect/net/mon_connect/Makefile +++ b/source/tools/detect/net/mon_connect/Makefile @@ -1,4 +1,4 @@ mods := mon_connect target := mon_connect -DEPEND := "python-dep{python3}" +DEPEND := "python-dep{all}" include $(SRC)/mk/py.mk diff --git a/source/tools/detect/net/pingEdge/Makefile b/source/tools/detect/net/pingEdge/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2931f275a270488094a8b51332dab0cef1a7fba6 --- /dev/null +++ b/source/tools/detect/net/pingEdge/Makefile @@ -0,0 +1,14 @@ +target := pingEdge +ifeq ($(KERNEL_DEPEND), Y) +TARGET_PATH := $(OBJ_TOOLS_PATH) +else +TARGET_PATH := $(OBJ_TOOLS_ROOT) +endif + +all: $(target) target_rule + +$(target): $@ + sh venv.sh + cp dist/pingEdge $(TARGET_PATH)/ + +include $(SRC)/mk/target.inc \ No newline at end of file diff --git a/source/tools/detect/net/pingEdge/ReadMe.md b/source/tools/detect/net/pingEdge/ReadMe.md new file mode 100644 index 0000000000000000000000000000000000000000..e7a3b023ea5edf97b578ea0a6ebc03b978517377 --- /dev/null +++ b/source/tools/detect/net/pingEdge/ReadMe.md @@ -0,0 +1,14 @@ +# 功能说明 + + 通过批量发起ping 探测,用于分析ping 抖动边沿位置 + +## 参数说明 + + pingEdge [dist IP] [pid of container process] [minutes], minute is optional ,default is 5. + 例如,本地容器pid 为3551 进程 访问 192.168.0.131 存在抖动,需要探测3分钟的网络质量,可以按照以下命令执行探测: + +``` +ping 192.168.0.131 3551 3 +``` + + 将会报告出容器访问目标IP,容器访问容器网关、host 访问目标IP,host 访问网关的网络质量,用于定界网络抖动边界。 \ No newline at end of file diff --git a/source/tools/detect/net/pingEdge/pingEdge.py b/source/tools/detect/net/pingEdge/pingEdge.py new file mode 100644 index 0000000000000000000000000000000000000000..0305b0fd053384112487fbda899440b5935a07ed --- /dev/null +++ b/source/tools/detect/net/pingEdge/pingEdge.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +------------------------------------------------- + File Name: pingEdge + Description : + Author : liaozhaoyan + date: 2023/9/20 +------------------------------------------------- + Change Activity: + 2023/9/20: +------------------------------------------------- +""" +__author__ = 'liaozhaoyan' + +import os +import re +import sys +import json +import netifaces +from multiprocessing import Process +from nsenter import Namespace +from icmplib import multiping + +OUT_PATH = "/var/sysak/pingEdge" + +def pings(ip, ts, interval=0.1): + gw = netifaces.gateways()['default'][netifaces.AF_INET][0] + rets = multiping([gw, ip], count=int(ts/interval), interval=interval, timeout=1) + ret = {} + for r in rets: + ret[r.address] = { + "max": r.max_rtt, + "avg": r.avg_rtt, + "send": r.packets_sent, + "loss": r.packet_loss, + } + return ret + +class CpingProc(Process): + def __init__(self, dip, tpid=0, ts=5*60): + super(CpingProc, self).__init__() + self._dip = dip + self._ts = ts + self._tpid = tpid + + def save(self, res): + fName = os.path.join(OUT_PATH, "%d.json" % self._tpid) + with open(fName, "w") as f: + json.dump(res, f) + + def run(self): + if self._tpid > 0: + with Namespace(self._tpid, 'net'): + res = pings(self._dip, self._ts) + else: + res = pings(self._dip, self._ts) + self.save(res) + +def work(dip, pid, ts=5*60): + ps = [] + ps.append(CpingProc(dip, 0, int(ts/2))) + if pid > 0: + ps.append(CpingProc(dip, pid, int(ts/2))) + for p in ps: + p.start() + for p in ps: + p.join() + +def pre(distDir): + if not os.path.exists(distDir): + os.makedirs(distDir) + fNames = os.listdir(distDir) + for fName in fNames: + if fName.endswith(".json"): + os.remove(os.path.join(distDir, fName)) + +def post(distDir): + res = {} + fNames = os.listdir(distDir) + for fName in fNames: + if fName.endswith(".json"): + with open(os.path.join(distDir, fName), 'r') as f: + res[fName] = json.load(f) + print(res) + + +if __name__ == "__main__": + pre(OUT_PATH) + dip = sys.argv[1] + if dip == "-h" or dip == "--help": + print("pingEdge [dist IP] [pid of container process] [minutes], minute is optional ,default is 5.") + sys.exit(0) + pid = 0 + if len(sys.argv) > 2: + pid = int(sys.argv[2]) + ts = 5 * 60 + if len(sys.argv) > 3: + ts = int(sys.argv[3]) * 60 + work(dip, pid, ts) + post(OUT_PATH) + pass diff --git a/source/tools/detect/net/pingEdge/requirements.txt b/source/tools/detect/net/pingEdge/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..82b5ce43d21a4c4704a24fc55b26d873c14846cf --- /dev/null +++ b/source/tools/detect/net/pingEdge/requirements.txt @@ -0,0 +1,5 @@ +requests==2.27.1 +pyinstaller==3.5 +netifaces==0.11.0 +nsenter==0.2 +icmplib==2.1.1 \ No newline at end of file diff --git a/source/tools/detect/net/pingEdge/venv.sh b/source/tools/detect/net/pingEdge/venv.sh new file mode 100644 index 0000000000000000000000000000000000000000..d2aec49a9794f1354bc391d964cf884b86fe0ffb --- /dev/null +++ b/source/tools/detect/net/pingEdge/venv.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +ALIYUN_MIRROR="https://mirrors.aliyun.com/pypi/simple/" +export SERVER_HOME=$(pwd) + +VIRTUALENV_HOME="${SERVER_HOME}/virtualenv" + +touch_virtualenv() { + pip3.9 uninstall virtualenv -y + pip3.6 install virtualenv -i "${ALIYUN_MIRROR}" + if [ -d ${VIRTUALENV_HOME} ]; then + echo "virtualenv exists, skip" + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + else + virtualenv ${VIRTUALENV_HOME} + if [ "$?" = 0 ]; then + echo "INFO: create virtualenv success" + else + echo "ERROR: create virtualenv failed" + exit 1 + fi + echo "INFO: activate virtualenv..." + source ${VIRTUALENV_HOME}/bin/activate || exit 1 + check_requirements + fi +} + + +check_requirements() { + echo "INFO: begin install requirements..." + if ! [ -d ${SERVER_HOME}/logs/ ]; then + mkdir -p ${SERVER_HOME}/logs/ || exit 1 + fi + + local requirements_log="${SERVER_HOME}/logs/requirements.log" + local requirements="requirements.txt" + touch "$requirements_log" || exit + pip3.6 install --upgrade pip + pip3.6 install -r ${requirements} -i "${ALIYUN_MIRROR}" |tee -a "${requirements_log}" || exit 1 + local pip_res=$? + if [ $pip_res -ne 0 ]; then + echo "ERROR: requirements not satisfied and auto install failed, please check ${requirements_log}" + exit 1 + fi +} + +pyinstaller_sar() { + echo "INFO: begin pyinstaller sar..." + pyinstaller -F pingEdge.py -y + deactivate + rm -rf ${VIRTUALENV_HOME} +} + +deploy() { + touch_virtualenv + pyinstaller_sar +} + +deploy \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/.gitignore b/source/tools/detect/net/rtrace/.gitignore deleted file mode 100644 index 47e87a2f69dd8a376b52970457b930010f95876d..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -/target -.output/ -.vscode/ -src/bindings/ -backup/ -rtrace -rtrace.tar.gz \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/CHANGELOG b/source/tools/detect/net/rtrace/CHANGELOG deleted file mode 100644 index b01fd38cc8ab586ed492e46ccdbbf924fd870407..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/CHANGELOG +++ /dev/null @@ -1,11 +0,0 @@ - - -v0.2.0 ---- -1. support drop diagnosing -2. update libbpf-rs version and support to pass btf_custom_path parameter - -v0.1.0 ---- -1. latency diagnosing: support tcp and icmp. - diff --git a/source/tools/detect/net/rtrace/Cargo.toml b/source/tools/detect/net/rtrace/Cargo.toml index b513c77784bcb243fe1fd4e6c0fd813cb893bbe4..eac7ace42753f29ae5c5213e69a01261d0c685dc 100644 --- a/source/tools/detect/net/rtrace/Cargo.toml +++ b/source/tools/detect/net/rtrace/Cargo.toml @@ -1,57 +1,41 @@ [package] name = "rtrace" -version = "0.2.0" +version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +embed-rtrace = [] [dependencies] -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -log = "0.4.14" -libc = "0.2.125" -anyhow = "1.0.57" -once_cell = "1.8.0" -crossbeam-channel = "0.5" +anyhow = "1.0.71" +chrono = "0.4.26" +libbpf-rs = "0.21.2" +once_cell = "1.17.1" +plain = "0.2.3" +libc = "0.2" structopt = "0.3.26" -nix = "0.22" -env_logger = "0.9.0" -clearscreen = "1.0.9" -strum = "0.24" -strum_macros = "0.24" -chrono = "0.4.19" +log4rs = "1.2.0" +log = "0.4.19" +local-ip-address = "0.5.5" +crossbeam-channel = "0.5.8" byteorder = "1.4.3" -eutils-rs = { git = "https://github.com/chengshuyi/eutils-rs.git" } -procfs = "0.14.0" -paste = "1.0" -utils = { path = "utils" } -icmp = { path = "latency/icmp" } -retran = { path = "retran" } -ctrlc = { version = "3.0", features = ["termination"] } - -drop = { path = "drop" } -serde_json = "1.0" -serde = {version = "1.0", features = ["derive"]} +num_cpus = "1.16.0" +sysctl = "0.5.4" uname = "0.1.1" +serde = "1.0.190" +serde_json = "1.0.107" +sqlite = "0.32.0" +pnet = "0.34.0" +rand = "0.8.5" +cached = "0.46.1" +ndarray = "0.15.6" +ndarray-stats = "0.5.1" +num-traits = "0.2.17" +procfs = "0.16.0" +rust-embed = { version = "8.0.0", features = ["include-exclude"] } +tempfile = "3.8.1" +noisy_float = "0.2.0" [build-dependencies] -libbpf-cargo = "0.13" -bindgen = "0.59.2" -bpfskel = { path = "./bpfskel" } - - -[workspace] -members = [ - "utils", - "builder", - "builder/buildertest", - "bpfskel", - - "drop", - - "latency/icmp", - - "retran", - - "cenum-rs", -] +libbpf-cargo = "0.21.2" +bindgen = "0.68.1" diff --git a/source/tools/detect/net/rtrace/Makefile b/source/tools/detect/net/rtrace/Makefile index 97693132f332fad6dcb9238ebfb97848d6635913..324e51cb0609eebff810d66d002178e3a506c777 100644 --- a/source/tools/detect/net/rtrace/Makefile +++ b/source/tools/detect/net/rtrace/Makefile @@ -5,7 +5,7 @@ TARGET_PATH := $(OBJ_TOOLS_ROOT) rtrace: bin target_rule bin: - cargo build --release + cargo build --release --features embed-rtrace cp target/release/rtrace $(TARGET_PATH)/ target := rtrace diff --git a/source/tools/detect/net/rtrace/README.md b/source/tools/detect/net/rtrace/README.md deleted file mode 100644 index 95d523fbc96a876eacd5fe72ae926d0d96a8a229..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/README.md +++ /dev/null @@ -1,150 +0,0 @@ -# 功能说明 -基于 ebpf 实现的实时网络丢包、延迟、异常分析工具 - -# rtrace - -rtrace 具有四个子功能,分别是:异常诊断、丢包诊断、延迟诊断及网络 SLI。具体可以通过命令 `sysak rtrace --help` 查看。 - -```shell -rtrace 0.1.0 -Diagnosing tools of kernel network - -USAGE: - rtrace - -FLAGS: - -h, --help Prints help information - -V, --version Prints version information - -SUBCOMMANDS: - abnormal Abnormal connection diagnosing - drop Packet drop diagnosing - help Prints this message or the help of the given subcommand (s) - latency Packet latency diagnosing - sli Collection machine sli - -``` - -## rtrace abnormal: 异常链接诊断 - -rtrace abnormal 通过遍历机器上的 tcp 链接,并检查 tcp 链接的相关参数来判断哪些链接可能存在异常。 - - -### 命令行参数解析 - -```shell -rtrace-abnormal 0.2.0 -Abnormal connection diagnosing - -USAGE: - rtrace abnormal [OPTIONS] - -FLAGS: - -h, --help Prints help information - -V, --version Prints version information - -OPTIONS: - --btf Custom btf path - --dst Remote network address of traced sock - --pid Process identifier - --proto Network protocol type, now only support tcp [default: tcp] - --sort Sorting key, including: synq, acceptq, rcvm, sndm, drop, retran, ooo [default: acceptq] - --src Local network address of traced sock - --top Show top N connections [default: 10] -``` - -1. `--sort` 对参数进行排序,目前包括: - -synq: 半连接队列长度 -acceptq: 全连接队列长度 -rcvm: 接收端内存 -sndm: 发送端内存 -drop: 丢包数 -retran: 重传数 -ooo: 乱序报文数,注不支持 centos 3.10 - - -### 常用命令行示例 - -1. `rtrace abnormal`: 根据全连接队列长度(默认为 acceptq)进行排序,显示长度最长的前 10(默认为 Top10)条链接信息; - -2. `rtrace abnormal --sort rcvm --top 5`: 根据接收端内存大小进行排序,显示使用内存最多的前 5 条链接; - -## rtrace drop: packet drop diagnosing - -rtrace drop 进行丢包诊断分析。 - -### 命令行参数解析 - -``` -rtrace-drop 0.2.0 -Packet drop diagnosing - -USAGE: - rtrace drop [FLAGS] [OPTIONS] - -FLAGS: - --conntrack Enable conntrack modules - -h, --help Prints help information - --iptables Enable iptables modules - -V, --version Prints version information - -OPTIONS: - --btf Custom btf path - --period Period of display in seconds. 0 mean display immediately when event triggers [default: 3] - --proto Network protocol type [default: tcp] -``` - -1. `--conntrack`: 跟踪 conntrack 是否存在丢包; - -2. `--iptables`: 跟踪 iptables 是否存在丢包; - -### 常用命令行示例 - -1. `rtrace drop`: 跟踪丢包,默认每三秒种打印丢包信息,丢包信息包括:丢包堆栈、snmp、netstat、dev 等; - -2. `rtrace drop --conntrack --iptables`: 除了命令 1 的功能外,还跟踪 conntrack 和 iptables 是否存在丢包; - -## rtrace sli: data collections about sli - -rtrace sli 采集网络相关的 sli 指标。 - -``` -rtrace-sli 0.2.0 -Collection machine sli - -USAGE: - rtrace sli [FLAGS] [OPTIONS] - -FLAGS: - -a, --applat Collect latency between kernel and application in receiving side - --drop Collect drop metrics - -h, --help Prints help information - --latency Collect latency metrics - --retran Collect retransmission metrics - --shell Output every sli to shell - -V, --version Prints version information - -OPTIONS: - --period Data collection cycle, in seconds [default: 3] - --threshold Max latency to trace, default is 1000ms [default: 1000] -``` - -### 命令行参数解析 - -1. `--applat`: 检测网络包从内核态到用户态的耗时; - -2. `--latency`: 检测 tcp 链接 rtt,打印 rtt 分布直方图; - -3. `--shell`: 打印输出到终端; - -4. `--threshold`: 延迟阈值,超过该阈值会打印超时的链接具体信息; - - -### 常用命令行示例 - -1. `rtrace sli --latency --shell`: 打印 rtt 分布直方图,并打印 rtt 超过 1000ms 的 tcp 链接五元组信息; - -2. `rtrace sli --latency --threshold 200 --shell`: 打印 rtt 分布直方图,并打印 rtt 超过 200ms 的 tcp 链接五元组信息; - -3. `rtrace sli --latency --applat --shell`: 打印 rtt 分布直方图,并打印 rtt 或应用延迟超过 1000ms 的 tcp 链接五元组信息; diff --git a/source/tools/detect/net/rtrace/bpf_core.h b/source/tools/detect/net/rtrace/bpf_core.h deleted file mode 100644 index 6b77bd5621df6a969748b715a552809fd17a8066..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpf_core.h +++ /dev/null @@ -1,186 +0,0 @@ -#ifndef __BPF_CORE_H -#define __BPF_CORE_H - -#ifdef __VMLINUX_H__ - -union ktime___310 -{ - s64 tv64; -}; -typedef union ktime___310 ktime_t___310; -struct sk_buff___310 -{ - ktime_t___310 tstamp; -}; - -static __always_inline u64 bpf_core_skb_tstamp(struct sk_buff *skb) -{ - u64 ts; - ktime_t___310 ktime310; - if (bpf_core_field_exists(ktime310.tv64)) - { - struct sk_buff___310 *skb310 = (void *)skb; - bpf_core_read(&ktime310, sizeof(ktime310), &skb310->tstamp); - ts = ktime310.tv64; - } - else - { - bpf_probe_read(&ts, sizeof(u64), &skb->tstamp); - } - return ts; -} - -struct msghdr___310 -{ - struct iovec *msg_iov; -}; - -// libbpf: prog 'kprobe__raw_sendmsg': relo #3: kind (0), spec is [346] struct msghdr.msg_iter.iov (0:2:4:0 @ offset 40) -static __always_inline void *fix_msghdr_base(struct msghdr *msg) -{ - void *ptr; - if (bpf_core_field_exists(msg->msg_iter)) - { - BPF_CORE_READ_INTO(&ptr, msg, msg_iter.iov, iov_base); - } - else - { - struct msghdr___310 *msg310 = (void *)msg; - ; - BPF_CORE_READ_INTO(&ptr, msg310, msg_iov, iov_base); - } - return ptr; -} - -static __always_inline u16 bpf_core_sock_sk_protocol(struct sock *sk) -{ - return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); -} - -struct netns_ipv4___310_419 -{ - struct xt_table *iptable_filter; - struct xt_table *iptable_mangle; - struct xt_table *iptable_raw; - struct xt_table *arptable_filter; - struct xt_table *iptable_security; - struct xt_table *nat_table; -}; - -static __always_inline u64 bpf_core_netns_ipv4_iptable_filter(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->iptable_filter)) - bpf_probe_read(&addr, sizeof(addr), &ns->iptable_filter); - - return addr; -} - -static __always_inline u64 bpf_core_netns_ipv4_iptable_mangle(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->iptable_mangle)) - bpf_probe_read(&addr, sizeof(addr), &ns->iptable_mangle); - - return addr; -} - -static __always_inline u64 bpf_core_netns_ipv4_iptable_raw(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->iptable_raw)) - bpf_probe_read(&addr, sizeof(addr), &ns->iptable_raw); - - return addr; -} - -static __always_inline u64 bpf_core_netns_ipv4_arptable_filter(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->arptable_filter)) - bpf_probe_read(&addr, sizeof(addr), &ns->arptable_filter); - - return addr; -} - -static __always_inline u64 bpf_core_netns_ipv4_iptable_security(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->iptable_security)) - bpf_probe_read(&addr, sizeof(addr), &ns->iptable_security); - - return addr; -} - -static __always_inline u64 bpf_core_netns_ipv4_nat_table(void *ptr) -{ - struct netns_ipv4___310_419 *ns = ptr; - u64 addr = 0; - if (bpf_core_field_exists(ns->nat_table)) - bpf_probe_read(&addr, sizeof(addr), &ns->nat_table); - - return addr; -} - -#define XT_TABLE_MAXNAMELEN 32 -struct xt_table___419 -{ - char name[XT_TABLE_MAXNAMELEN]; -}; - -static __always_inline u64 bpf_core_xt_table_name(void *ptr) -{ - struct xt_table___419 *table = ptr; - if (bpf_core_field_exists(table->name)) - return (u64)(&table->name[0]); - return 0; -} - -struct listen_sock___310 -{ - int qlen; -}; - -struct request_sock_queue___310 -{ - struct listen_sock___310 *listen_opt; -}; - -static __always_inline u32 bpf_core_reqsk_synqueue_len(struct sock *sk) -{ - u32 synqueue_len = 0; - struct request_sock_queue___310 *reqsk310 = &((struct inet_connection_sock *)sk)->icsk_accept_queue; - if (bpf_core_field_exists(reqsk310->listen_opt)) - BPF_CORE_READ_INTO(&synqueue_len, reqsk310, listen_opt, qlen); - else - { - struct request_sock_queue *reqsk = reqsk310; - bpf_probe_read(&synqueue_len, sizeof(synqueue_len), &reqsk->qlen.counter); - } - - return synqueue_len; -} - -static __always_inline u32 bpf_core_sock_ack_backlog(struct sock *sk) -{ - if (bpf_core_field_size(sk->sk_ack_backlog) == 2) - { - u16 sk_ack_backlog; - bpf_probe_read(&sk_ack_backlog, 2, &sk->sk_ack_backlog); - return sk_ack_backlog; - }else { - u32 sk_ack_backlog; - bpf_probe_read(&sk_ack_backlog, 4, &sk->sk_ack_backlog); - return sk_ack_backlog; - } - return ~0; -} - -#endif - -#endif diff --git a/source/tools/detect/net/rtrace/bpfskel/src/btf/btf.rs b/source/tools/detect/net/rtrace/bpfskel/src/btf/btf.rs deleted file mode 100644 index 2ec02b51b2488dedb273666d0452f618e7446a90..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/btf/btf.rs +++ /dev/null @@ -1,1011 +0,0 @@ -use std::cmp::{max, min}; -use std::collections::BTreeSet; -use std::convert::TryFrom; -use std::ffi::{c_void, CStr, CString}; -use std::fmt::Write; -use std::mem::size_of; -use std::os::raw::{c_char, c_ulong}; -use std::slice; - -use anyhow::{bail, ensure, Result}; -use scroll::Pread; - -use crate::btf::c_types::*; -use crate::btf::*; - -const ANON_PREFIX: &str = "__anon_"; - -pub struct Btf<'a> { - types: Vec>, - ptr_size: u32, - string_table: &'a [u8], - bpf_obj: *mut libbpf_sys::bpf_object, - anon_count: u32, -} - -impl<'a> Btf<'a> { - pub fn new(name: &str, object_file: &[u8]) -> Result> { - let cname = CString::new(name)?; - let obj_opts = libbpf_sys::bpf_object_open_opts { - sz: std::mem::size_of::() as libbpf_sys::size_t, - object_name: cname.as_ptr(), - ..Default::default() - }; - let bpf_obj = unsafe { - libbpf_sys::bpf_object__open_mem( - object_file.as_ptr() as *const c_void, - object_file.len() as c_ulong, - &obj_opts, - ) - }; - let err = unsafe { libbpf_sys::libbpf_get_error(bpf_obj as *const _) }; - ensure!(err == 0, "Failed to bpf_object__open_mem: errno {}", err); - - let bpf_obj_btf = unsafe { libbpf_sys::bpf_object__btf(bpf_obj) }; - if bpf_obj_btf.is_null() { - return Ok(None); - } - - let num: u32 = 0x1234; - let endianness = if num.to_le_bytes() == num.to_ne_bytes() { - libbpf_sys::BTF_LITTLE_ENDIAN - } else { - libbpf_sys::BTF_BIG_ENDIAN - }; - ensure!( - unsafe { libbpf_sys::btf__set_endianness(bpf_obj_btf, endianness) } == 0, - "Failed to set BTF endianness" - ); - - let ptr_size = unsafe { libbpf_sys::btf__pointer_size(bpf_obj_btf) }; - ensure!(ptr_size != 0, "Could not determine BTF pointer size"); - - let mut raw_data_size = 0; - let raw_data = unsafe { libbpf_sys::btf__raw_data(bpf_obj_btf, &mut raw_data_size) }; - ensure!( - !raw_data.is_null() && raw_data_size > 0, - "Could not get raw BTF data" - ); - // `data` is valid as long as `bpf_obj` ptr is valid, so we're safe to conjure up this - // `'a` lifetime - let data: &'a [u8] = - unsafe { slice::from_raw_parts(raw_data as *const u8, raw_data_size as usize) }; - - // Read section header - let hdr = data.pread::(0)?; - ensure!(hdr.magic == BTF_MAGIC, "Invalid BTF magic"); - ensure!( - hdr.version == BTF_VERSION, - "Unsupported BTF version: {}", - hdr.version - ); - - // String table - let str_off = (hdr.hdr_len + hdr.str_off) as usize; - let str_end = str_off + (hdr.str_len as usize); - ensure!(str_end <= data.len(), "String table out of bounds"); - let str_data = &data[str_off..str_end]; - - // Type table - let type_off = (hdr.hdr_len + hdr.type_off) as usize; - let type_end = type_off + (hdr.type_len as usize); - ensure!(type_end <= data.len(), "Type table out of bounds"); - let type_data = &data[type_off..type_end]; - - let mut btf = Btf::<'a> { - // Type ID 0 is reserved for Void - types: vec![BtfType::Void], - ptr_size: ptr_size as u32, - string_table: str_data, - bpf_obj, - anon_count: 0u32, - }; - - // Load all types - let mut off: usize = 0; - while off < hdr.type_len as usize { - let t = btf.load_type(&type_data[off..])?; - off += Btf::type_size(&t); - btf.types.push(t); - } - - Ok(Some(btf)) - } - - pub fn types(&self) -> &[BtfType<'a>] { - &self.types - } - - pub fn type_by_id(&self, type_id: u32) -> Result<&BtfType> { - if (type_id as usize) < self.types.len() { - Ok(&self.types[type_id as usize]) - } else { - bail!("Invalid type_id: {}", type_id); - } - } - - pub fn size_of(&self, type_id: u32) -> Result { - let skipped_type_id = self.skip_mods_and_typedefs(type_id)?; - - Ok(match self.type_by_id(skipped_type_id)? { - BtfType::Int(t) => ((t.bits + 7) / 8).into(), - BtfType::Ptr(_) => self.ptr_size, - BtfType::Array(t) => t.nelems * self.size_of(t.val_type_id)?, - BtfType::Struct(t) => t.size, - BtfType::Union(t) => t.size, - BtfType::Enum(t) => t.size, - BtfType::Var(t) => self.size_of(t.type_id)?, - BtfType::Datasec(t) => t.size, - BtfType::Float(t) => t.size, - BtfType::Void - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::Typedef(_) - | BtfType::FuncProto(_) - | BtfType::Fwd(_) - | BtfType::Func(_) - | BtfType::DeclTag(_) - | BtfType::TypeTag(_) => bail!("Cannot get size of type_id: {}", skipped_type_id), - }) - } - - pub fn align_of(&self, type_id: u32) -> Result { - let skipped_type_id = self.skip_mods_and_typedefs(type_id)?; - - Ok(match self.type_by_id(skipped_type_id)? { - BtfType::Int(t) => min(self.ptr_size, ((t.bits + 7) / 8).into()), - BtfType::Ptr(_) => self.ptr_size, - BtfType::Array(t) => self.align_of(t.val_type_id)?, - BtfType::Struct(t) | BtfType::Union(t) => { - let mut align = 1; - for m in &t.members { - align = max(align, self.align_of(m.type_id)?); - } - - align - } - BtfType::Enum(t) => min(self.ptr_size, t.size), - BtfType::Var(t) => self.align_of(t.type_id)?, - BtfType::Datasec(t) => t.size, - BtfType::Float(t) => min(self.ptr_size, t.size), - BtfType::Void - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::Typedef(_) - | BtfType::FuncProto(_) - | BtfType::Fwd(_) - | BtfType::Func(_) - | BtfType::DeclTag(_) - | BtfType::TypeTag(_) => bail!("Cannot get alignment of type_id: {}", skipped_type_id), - }) - } - - /// Returns the rust-ified type declaration of `ty` in string format. - /// - /// Rule of thumb is `ty` must be a type a variable can have. - /// - /// Type qualifiers are discarded (eg `const`, `volatile`, etc). - pub fn type_declaration(&self, type_id: u32) -> Result { - let stripped_type_id = self.skip_mods_and_typedefs(type_id)?; - let ty = self.type_by_id(stripped_type_id)?; - - Ok(match ty { - BtfType::Void => "std::ffi::c_void".to_string(), - BtfType::Int(t) => { - let width = match (t.bits + 7) / 8 { - 1 => "8", - 2 => "16", - 4 => "32", - 8 => "64", - 16 => "128", - _ => bail!("Invalid integer width"), - }; - - match t.encoding { - btf::BtfIntEncoding::Signed => format!("i{}", width), - btf::BtfIntEncoding::Bool => { - assert!(t.bits as usize == (std::mem::size_of::() * 8)); - "bool".to_string() - } - btf::BtfIntEncoding::Char | btf::BtfIntEncoding::None => format!("u{}", width), - } - } - BtfType::Float(t) => { - let width = match t.size { - 2 => bail!("Unsupported float width"), - 4 => "32", - 8 => "64", - 12 => bail!("Unsupported float width"), - 16 => bail!("Unsupported float width"), - _ => bail!("Invalid float width"), - }; - - format!("f{}", width) - } - BtfType::Ptr(t) => { - let pointee_ty = self.type_declaration(t.pointee_type)?; - - format!("*mut {}", pointee_ty) - } - BtfType::Array(t) => { - let val_ty = self.type_declaration(t.val_type_id)?; - - format!("[{}; {}]", val_ty, t.nelems) - } - BtfType::Struct(t) | BtfType::Union(t) => t.name.to_string(), - BtfType::Enum(t) => t.name.to_string(), - // The only way a variable references a function is through a function pointer. - // Return c_void here so the final def will look like `*mut c_void`. - // - // It's not like rust code can call a function inside a bpf prog either so we don't - // really need a full definition. `void *` is totally sufficient for sharing a pointer. - BtfType::Func(_) => "std::ffi::c_void".to_string(), - BtfType::Var(t) => self.type_declaration(t.type_id)?, - BtfType::Fwd(_) - | BtfType::FuncProto(_) - | BtfType::Datasec(_) - | BtfType::Typedef(_) - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::DeclTag(_) - | BtfType::TypeTag(_) => { - bail!("Invalid type: {}", ty) - } - }) - } - - /// Returns an expression that evaluates to the Default value - /// of a type(typeid) in string form. - /// - /// To be used when creating a impl Default for a structure - /// - /// Rule of thumb is `ty` must be a type a variable can have. - /// - /// Type qualifiers are discarded (eg `const`, `volatile`, etc). - pub fn type_default(&self, type_id: u32) -> Result { - let stripped_type_id = self.skip_mods_and_typedefs(type_id)?; - let ty = self.type_by_id(stripped_type_id)?; - - Ok(match ty { - BtfType::Void => "std::ffi::c_void::default()".to_string(), - BtfType::Int(_) => format!("{}::default()", self.type_declaration(stripped_type_id)?), - BtfType::Float(_) => format!("{}::default()", self.type_declaration(stripped_type_id)?), - BtfType::Ptr(_) => "std::ptr::null_mut()".to_string(), - BtfType::Array(t) => { - let val_ty = self.type_declaration(t.val_type_id)?; - - format!("[{}::default(); {}]", val_ty, t.nelems) - } - BtfType::Struct(t) | BtfType::Union(t) => format!("{}::default()", t.name), - BtfType::Enum(t) => format!("{}::default()", t.name), - BtfType::Var(t) => format!("{}::default()", self.type_declaration(t.type_id)?), - BtfType::Func(_) - | BtfType::Fwd(_) - | BtfType::FuncProto(_) - | BtfType::Datasec(_) - | BtfType::Typedef(_) - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::DeclTag(_) - | BtfType::TypeTag(_) => { - bail!("Invalid type: {}", ty) - } - }) - } - - fn is_struct_packed(&self, struct_type_id: u32, t: &BtfComposite) -> Result { - if !t.is_struct { - return Ok(false); - } - - let align = self.align_of(struct_type_id)?; - ensure!( - align != 0, - "Failed to get alignment of struct_type_id: {}", - struct_type_id - ); - - // Size of a struct has to be a multiple of its alignment - if t.size % align != 0 { - return Ok(true); - } - - // All the non-bitfield fields have to be naturally aligned - for m in &t.members { - let align = self.align_of(m.type_id)?; - ensure!( - align != 0, - "Failed to get alignment of m.type_id: {}", - m.type_id - ); - - if m.bit_size == 0 && m.bit_offset % (align * 8) != 0 { - return Ok(true); - } - } - - // Even if original struct was marked as packed, we haven't detected any misalignment, so - // there is no effect of packedness for given struct - Ok(false) - } - - /// Given a `current_offset` (in bytes) into a struct and a `required_offset` (in bytes) that - /// type `type_id` needs to be placed at, returns how much padding must be inserted before - /// `type_id`. - fn required_padding( - &self, - current_offset: usize, - required_offset: usize, - type_id: u32, - packed: bool, - ) -> Result { - ensure!( - current_offset <= required_offset, - "Current offset ahead of required offset" - ); - - let align = if packed { - 1 - } else { - // Assume 32-bit alignment in case we're generating code for 32-bit - // arch. Worst case is on a 64-bit arch the compiler will generate - // extra padding. The final layout will still be identical to what is - // described by BTF. - let a = self.align_of(type_id)? as usize; - ensure!(a != 0, "Failed to get alignment of type_id: {}", type_id); - - if a > 4 { - 4 - } else { - a - } - }; - - // If we aren't aligning to the natural offset, padding needs to be inserted - let aligned_offset = (current_offset + align - 1) / align * align; - if aligned_offset == required_offset { - Ok(0) - } else { - Ok(required_offset - current_offset) - } - } - - /// Returns rust type definition of `ty` in string format, including dependent types. - /// - /// `ty` must be a struct, union, enum, or datasec type. - pub fn type_definition(&self, type_id: u32) -> Result { - let next_type = |mut id| -> Result> { - loop { - match self.type_by_id(id)? { - BtfType::Struct(_) - | BtfType::Union(_) - | BtfType::Enum(_) - | BtfType::Datasec(_) => return Ok(Some(id)), - BtfType::Ptr(t) => id = t.pointee_type, - BtfType::Array(t) => id = t.val_type_id, - BtfType::Volatile(t) => id = t.type_id, - BtfType::Const(t) => id = t.type_id, - BtfType::Restrict(t) => id = t.type_id, - BtfType::Typedef(t) => id = t.type_id, - BtfType::DeclTag(t) => id = t.type_id, - BtfType::TypeTag(t) => id = t.type_id, - BtfType::Void - | BtfType::Int(_) - | BtfType::Float(_) - | BtfType::Fwd(_) - | BtfType::Func(_) - | BtfType::FuncProto(_) - | BtfType::Var(_) => return Ok(None), - } - } - }; - - let is_terminal = |id| -> Result { - match self.type_by_id(id)?.kind() { - BtfKind::Struct | BtfKind::Union | BtfKind::Enum | BtfKind::Datasec => Ok(false), - BtfKind::Void - | BtfKind::Int - | BtfKind::Float - | BtfKind::Ptr - | BtfKind::Array - | BtfKind::Fwd - | BtfKind::Typedef - | BtfKind::Volatile - | BtfKind::Const - | BtfKind::Restrict - | BtfKind::Func - | BtfKind::FuncProto - | BtfKind::Var - | BtfKind::DeclTag - | BtfKind::TypeTag => Ok(true), - } - }; - - ensure!( - !is_terminal(type_id)?, - "Tried to print type definition for terminal type" - ); - - // Process dependent types until there are none left. - // - // When we hit a terminal, we write out some stuff. A non-terminal adds more types to - // the queue. - let mut def = String::new(); - let mut dependent_types = vec![type_id]; - let mut processed = BTreeSet::new(); - while !dependent_types.is_empty() { - let type_id = dependent_types.remove(0); - if processed.contains(&type_id) { - continue; - } else { - processed.insert(type_id); - } - - let ty = self.type_by_id(type_id)?; - - match ty { - BtfType::Struct(t) | BtfType::Union(t) => { - let packed = self.is_struct_packed(type_id, t)?; - - // fields in the aggregate - let mut agg_content: Vec = Vec::new(); - - // structs with arrays > 32 length need to impl Default - // rather than #[derive(Default)] - let mut impl_default: Vec = Vec::new(); // output for impl Default - let mut gen_impl_default = false; // whether to output impl Default or use #[derive] - - let mut offset = 0; // In bytes - for member in &t.members { - ensure!( - member.bit_size == 0 && member.bit_offset % 8 == 0, - "Struct bitfields not supported" - ); - - let field_ty_id = self.skip_mods_and_typedefs(member.type_id)?; - if let Some(next_ty_id) = next_type(field_ty_id)? { - dependent_types.push(next_ty_id); - } - - // Add padding as necessary - if t.is_struct { - let padding = self.required_padding( - offset, - member.bit_offset as usize / 8, - member.type_id, - packed, - )?; - - if padding != 0 { - agg_content.push(format!( - r#" __pad_{offset}: [u8; {padding}],"#, - offset = offset, - padding = padding, - )); - - impl_default.push(format!( - r#" __pad_{offset}: [u8::default(); {padding}]"#, - offset = offset, - padding = padding, - )); - } - - if let BtfType::Array(ft) = self.type_by_id(field_ty_id)? { - if ft.nelems > 32 { - gen_impl_default = true - } - } - } - - match self.type_default(field_ty_id) { - Ok(def) => { - impl_default.push(format!( - r#" {field_name}: {field_ty_str}"#, - field_name = member.name, - field_ty_str = def - )); - } - Err(e) => { - if gen_impl_default || !t.is_struct { - bail!("Could not construct a necessary Default Impl: {}", e); - } - } - }; - - // Set `offset` to end of current var - offset = ((member.bit_offset / 8) + self.size_of(field_ty_id)?) as usize; - - let field_ty_str = self.type_declaration(field_ty_id)?; - let field_name = if !member.name.is_empty() { - member.name.to_string() - } else { - field_ty_str.clone() - }; - - agg_content.push(format!(r#" pub {}: {},"#, field_name, field_ty_str)); - } - - if !gen_impl_default && t.is_struct { - writeln!(def, r#"#[derive(Debug, Default, Copy, Clone)]"#)?; - } else if t.is_struct { - writeln!(def, r#"#[derive(Debug, Copy, Clone)]"#)?; - } else { - writeln!(def, r#"#[derive(Copy, Clone)]"#)?; - } - - let aggregate_type = if t.is_struct { "struct" } else { "union" }; - let packed_repr = if packed { ", packed" } else { "" }; - - writeln!(def, r#"#[repr(C{})]"#, packed_repr)?; - writeln!( - def, - r#"pub {agg_type} {name} {{"#, - agg_type = aggregate_type, - name = t.name, - )?; - - for field in agg_content { - writeln!(def, "{}", field)?; - } - writeln!(def, "}}")?; - - // if required write a Default implementation for this struct - if gen_impl_default { - writeln!(def, r#"impl Default for {} {{"#, t.name)?; - writeln!(def, r#" fn default() -> Self {{"#)?; - writeln!(def, r#" {} {{"#, t.name)?; - for impl_def in impl_default { - writeln!(def, r#"{},"#, impl_def)?; - } - writeln!(def, r#" }}"#)?; - writeln!(def, r#" }}"#)?; - writeln!(def, r#"}}"#)?; - } else if !t.is_struct { - // write a Debug implementation for a union - writeln!(def, r#"impl std::fmt::Debug for {} {{"#, t.name)?; - writeln!( - def, - r#" fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {{"# - )?; - writeln!(def, r#" write!(f, "(???)")"#)?; - writeln!(def, r#" }}"#)?; - writeln!(def, r#"}}"#)?; - - // write a Default implementation for a union - writeln!(def, r#"impl Default for {} {{"#, t.name)?; - writeln!(def, r#" fn default() -> Self {{"#)?; - writeln!(def, r#" {} {{"#, t.name)?; - writeln!(def, r#"{},"#, impl_default[0])?; - writeln!(def, r#" }}"#)?; - writeln!(def, r#" }}"#)?; - writeln!(def, r#"}}"#)?; - } - } - BtfType::Enum(t) => { - let repr_size = match t.size { - 1 => "8", - 2 => "16", - 4 => "32", - 8 => "64", - 16 => "128", - _ => bail!("Invalid enum size: {}", t.size), - }; - - let mut signed = "u"; - for value in &t.values { - if value.value < 0 { - signed = "i"; - break; - } - } - - writeln!(def, r#"#[derive(Debug, Copy, Clone, PartialEq)]"#)?; - writeln!( - def, - r#"#[repr({signed}{repr_size})]"#, - signed = signed, - repr_size = repr_size, - )?; - writeln!(def, r#"pub enum {name} {{"#, name = t.name,)?; - - for value in &t.values { - writeln!( - def, - r#" {name} = {value},"#, - name = value.name, - value = value.value, - )?; - } - - writeln!(def, "}}")?; - - // write an impl Default for this enum - if !t.values.is_empty() { - writeln!(def, r#"impl Default for {name} {{"#, name = t.name)?; - writeln!(def, r#" fn default() -> Self {{"#)?; - writeln!( - def, - r#" {name}::{value}"#, - name = t.name, - value = t.values[0].name - )?; - writeln!(def, r#" }}"#)?; - writeln!(def, r#"}}"#)?; - } - } - BtfType::Datasec(t) => { - let mut sec_name = t.name.to_string(); - if sec_name.is_empty() || !sec_name.starts_with('.') { - bail!("Datasec name is invalid: {}", sec_name); - } - sec_name.remove(0); - - writeln!(def, r#"#[derive(Debug, Copy, Clone)]"#)?; - writeln!(def, r#"#[repr(C)]"#)?; - writeln!(def, r#"pub struct {} {{"#, sec_name,)?; - - let mut offset: u32 = 0; - for datasec_var in &t.vars { - let var = match self.type_by_id(datasec_var.type_id)? { - BtfType::Var(v) => { - if v.linkage == BtfVarLinkage::Static { - // do not output Static Var - continue; - } - - if let Some(next_ty_id) = next_type(v.type_id)? { - dependent_types.push(next_ty_id); - } - - v - } - _ => bail!("BTF is invalid! Datasec var does not point to a var"), - }; - - let padding = self.required_padding( - offset as usize, - datasec_var.offset as usize, - var.type_id, - false, - )?; - if padding != 0 { - writeln!(def, r#" __pad_{}: [u8; {}],"#, offset, padding,)?; - } - - // Set `offset` to end of current var - offset = datasec_var.offset + datasec_var.size; - - writeln!( - def, - r#" pub {var_name}: {var_type},"#, - var_name = var.name, - var_type = self.type_declaration(var.type_id)? - )?; - } - - writeln!(def, "}}")?; - } - BtfType::Void - | BtfType::Ptr(_) - | BtfType::Func(_) - | BtfType::Int(_) - | BtfType::Float(_) - | BtfType::Array(_) - | BtfType::Fwd(_) - | BtfType::Typedef(_) - | BtfType::FuncProto(_) - | BtfType::Var(_) - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::DeclTag(_) - | BtfType::TypeTag(_) => bail!("Invalid type: {}", ty), - } - } - - Ok(def) - } - - pub fn skip_mods_and_typedefs(&self, mut type_id: u32) -> Result { - loop { - match self.type_by_id(type_id)? { - BtfType::Volatile(t) => type_id = t.type_id, - BtfType::Const(t) => type_id = t.type_id, - BtfType::Restrict(t) => type_id = t.type_id, - BtfType::Typedef(t) => type_id = t.type_id, - BtfType::TypeTag(t) => type_id = t.type_id, - BtfType::Void - | BtfType::Int(_) - | BtfType::Float(_) - | BtfType::Ptr(_) - | BtfType::Array(_) - | BtfType::Struct(_) - | BtfType::Union(_) - | BtfType::Enum(_) - | BtfType::Fwd(_) - | BtfType::Func(_) - | BtfType::FuncProto(_) - | BtfType::Var(_) - | BtfType::Datasec(_) - | BtfType::DeclTag(_) => return Ok(type_id), - }; - } - } - - fn load_type(&mut self, data: &'a [u8]) -> Result> { - let t = data.pread::(0)?; - let extra = &data[size_of::()..]; - let kind = Self::get_kind(t.info); - - match BtfKind::try_from(kind)? { - BtfKind::Void => { - let _ = BtfType::Void; // Silence unused variant warning - bail!("Cannot load Void type"); - } - BtfKind::Int => self.load_int(&t, extra), - BtfKind::Float => self.load_float(&t), - BtfKind::Ptr => Ok(BtfType::Ptr(BtfPtr { - pointee_type: t.type_id, - })), - BtfKind::Array => self.load_array(extra), - BtfKind::Struct => self.load_struct(&t, extra), - BtfKind::Union => self.load_union(&t, extra), - BtfKind::Enum => self.load_enum(&t, extra), - BtfKind::Fwd => self.load_fwd(&t), - BtfKind::Typedef => Ok(BtfType::Typedef(BtfTypedef { - name: self.get_btf_str(t.name_off as usize)?, - type_id: t.type_id, - })), - BtfKind::Volatile => Ok(BtfType::Volatile(BtfVolatile { type_id: t.type_id })), - BtfKind::Const => Ok(BtfType::Const(BtfConst { type_id: t.type_id })), - BtfKind::Restrict => Ok(BtfType::Restrict(BtfRestrict { type_id: t.type_id })), - BtfKind::Func => Ok(BtfType::Func(BtfFunc { - name: self.get_btf_str(t.name_off as usize)?, - type_id: t.type_id, - })), - BtfKind::FuncProto => self.load_func_proto(&t, extra), - BtfKind::Var => self.load_var(&t, extra), - BtfKind::Datasec => self.load_datasec(&t, extra), - BtfKind::DeclTag => self.load_decl_tag(&t, extra), - BtfKind::TypeTag => Ok(BtfType::TypeTag(BtfTypeTag { - name: self.get_btf_str(t.name_off as usize)?, - type_id: t.type_id, - })), - } - } - - fn load_int(&self, t: &btf_type, extra: &'a [u8]) -> Result> { - let info = extra.pread::(0)?; - let enc: u8 = ((info >> 24) & 0xf) as u8; - let off: u8 = ((info >> 16) & 0xff) as u8; - let bits: u8 = (info & 0xff) as u8; - Ok(BtfType::Int(BtfInt { - name: self.get_btf_str(t.name_off as usize)?, - bits, - offset: off, - encoding: BtfIntEncoding::try_from(enc)?, - })) - } - - fn load_float(&self, t: &btf_type) -> Result> { - Ok(BtfType::Float(BtfFloat { - name: self.get_btf_str(t.name_off as usize)?, - size: t.type_id, - })) - } - - fn load_array(&self, extra: &'a [u8]) -> Result> { - let info = extra.pread::(0)?; - Ok(BtfType::Array(BtfArray { - nelems: info.nelems, - index_type_id: info.idx_type_id, - val_type_id: info.val_type_id, - })) - } - - fn load_struct(&mut self, t: &btf_type, extra: &'a [u8]) -> Result> { - let name = match self.get_btf_str(t.name_off as usize)? { - "" => { - self.anon_count += 1; - format!("{}{}", ANON_PREFIX, self.anon_count) - } - n => n.to_string(), - }; - Ok(BtfType::Struct(BtfComposite { - name, - is_struct: true, - size: t.type_id, - members: self.load_members(t, extra)?, - })) - } - - fn load_union(&mut self, t: &btf_type, extra: &'a [u8]) -> Result> { - let name = match self.get_btf_str(t.name_off as usize)? { - "" => { - self.anon_count += 1; - format!("{}{}", ANON_PREFIX, self.anon_count) - } - n => n.to_string(), - }; - Ok(BtfType::Union(BtfComposite { - name, - is_struct: false, - size: t.type_id, - members: self.load_members(t, extra)?, - })) - } - - fn load_members(&self, t: &btf_type, extra: &'a [u8]) -> Result>> { - let mut res = Vec::new(); - let mut off: usize = 0; - let bits = Self::get_kind_flag(t.info); - - for _ in 0..Btf::get_vlen(t.info) { - let m = extra.pread::(off)?; - res.push(BtfMember { - name: self.get_btf_str(m.name_off as usize)?, - type_id: m.type_id, - bit_size: if bits { (m.offset >> 24) as u8 } else { 0 }, - bit_offset: if bits { m.offset & 0xffffff } else { m.offset }, - }); - - off += size_of::(); - } - - Ok(res) - } - - fn load_enum(&mut self, t: &btf_type, extra: &'a [u8]) -> Result> { - let name = match self.get_btf_str(t.name_off as usize)? { - "" => { - self.anon_count += 1; - format!("{}{}", ANON_PREFIX, self.anon_count) - } - n => n.to_string(), - }; - - let mut vals = Vec::new(); - let mut off: usize = 0; - for _ in 0..Btf::get_vlen(t.info) { - let v = extra.pread::(off)?; - vals.push(BtfEnumValue { - name: self.get_btf_str(v.name_off as usize)?, - value: v.val, - }); - - off += size_of::(); - } - - Ok(BtfType::Enum(BtfEnum { - name, - size: t.type_id, - values: vals, - })) - } - - fn load_fwd(&self, t: &btf_type) -> Result> { - Ok(BtfType::Fwd(BtfFwd { - name: self.get_btf_str(t.name_off as usize)?, - kind: if Self::get_kind_flag(t.info) { - BtfFwdKind::Union - } else { - BtfFwdKind::Struct - }, - })) - } - - fn load_func_proto(&self, t: &btf_type, extra: &'a [u8]) -> Result> { - let mut params = Vec::new(); - let mut off: usize = 0; - - for _ in 0..Btf::get_vlen(t.info) { - let p = extra.pread::(off)?; - params.push(BtfFuncParam { - name: self.get_btf_str(p.name_off as usize)?, - type_id: p.type_id, - }); - - off += size_of::(); - } - - Ok(BtfType::FuncProto(BtfFuncProto { - ret_type_id: t.type_id, - params, - })) - } - - fn load_var(&self, t: &btf_type, extra: &'a [u8]) -> Result> { - let kind = extra.pread::(0)?; - Ok(BtfType::Var(BtfVar { - name: self.get_btf_str(t.name_off as usize)?, - type_id: t.type_id, - linkage: BtfVarLinkage::try_from(kind)?, - })) - } - - fn load_datasec(&self, t: &btf_type, extra: &'a [u8]) -> Result> { - let mut vars = Vec::new(); - let mut off: usize = 0; - - for _ in 0..Btf::get_vlen(t.info) { - let v = extra.pread::(off)?; - vars.push(BtfDatasecVar { - type_id: v.type_id, - offset: v.offset, - size: v.size, - }); - - off += size_of::(); - } - - Ok(BtfType::Datasec(BtfDatasec { - name: self.get_btf_str(t.name_off as usize)?, - size: t.type_id, - vars, - })) - } - - fn load_decl_tag(&self, t: &btf_type, extra: &'a [u8]) -> Result> { - let decl_tag = extra.pread::(0)?; - Ok(BtfType::DeclTag(BtfDeclTag { - name: self.get_btf_str(t.name_off as usize)?, - type_id: t.type_id, - component_idx: decl_tag.component_idx, - })) - } - - /// Returns size of type on disk in .BTF section - fn type_size(t: &BtfType) -> usize { - let common = size_of::(); - match t { - BtfType::Void => 0, - BtfType::Ptr(_) - | BtfType::Fwd(_) - | BtfType::Typedef(_) - | BtfType::Volatile(_) - | BtfType::Const(_) - | BtfType::Restrict(_) - | BtfType::Func(_) - | BtfType::Float(_) - | BtfType::TypeTag(_) => common, - BtfType::Int(_) | BtfType::Var(_) => common + size_of::(), - BtfType::Array(_) => common + size_of::(), - BtfType::Struct(t) => common + t.members.len() * size_of::(), - BtfType::Union(t) => common + t.members.len() * size_of::(), - BtfType::Enum(t) => common + t.values.len() * size_of::(), - BtfType::FuncProto(t) => common + t.params.len() * size_of::(), - BtfType::Datasec(t) => common + t.vars.len() * size_of::(), - BtfType::DeclTag(_) => common + size_of::(), - } - } - - fn get_vlen(info: u32) -> u32 { - info & 0xffff - } - - fn get_kind_flag(info: u32) -> bool { - (info >> 31) == 1 - } - - fn get_kind(info: u32) -> u32 { - (info >> 24) & 0x1f - } - - fn get_btf_str(&self, offset: usize) -> Result<&'a str> { - let c_str = - unsafe { CStr::from_ptr(&self.string_table[offset] as *const u8 as *const c_char) }; - Ok(c_str.to_str()?) - } -} - -impl<'a> Drop for Btf<'a> { - fn drop(&mut self) { - unsafe { - libbpf_sys::bpf_object__close(self.bpf_obj); - } - } -} diff --git a/source/tools/detect/net/rtrace/bpfskel/src/btf/c_types.rs b/source/tools/detect/net/rtrace/bpfskel/src/btf/c_types.rs deleted file mode 100644 index 0ca27021bdf73c6426a33f3d50a31c3e5227cac5..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/btf/c_types.rs +++ /dev/null @@ -1,70 +0,0 @@ -use scroll_derive::{IOread, Pread as DerivePread, Pwrite, SizeWith}; - -pub const BTF_MAGIC: u16 = 0xEB9F; -pub const BTF_VERSION: u8 = 1; - -/// All offsets are in bytes relative to the end of this header -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_header { - pub magic: u16, - pub version: u8, - pub flags: u8, - pub hdr_len: u32, - pub type_off: u32, - pub type_len: u32, - pub str_off: u32, - pub str_len: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_type { - pub name_off: u32, - pub info: u32, - pub type_id: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_enum { - pub name_off: u32, - pub val: i32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_array { - pub val_type_id: u32, - pub idx_type_id: u32, - pub nelems: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_member { - pub name_off: u32, - pub type_id: u32, - pub offset: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_param { - pub name_off: u32, - pub type_id: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_datasec_var { - pub type_id: u32, - pub offset: u32, - pub size: u32, -} - -#[repr(C)] -#[derive(Debug, Clone, DerivePread, Pwrite, IOread, SizeWith)] -pub struct btf_decl_tag { - pub component_idx: i32, -} diff --git a/source/tools/detect/net/rtrace/bpfskel/src/btf/mod.rs b/source/tools/detect/net/rtrace/bpfskel/src/btf/mod.rs deleted file mode 100644 index a1adc9aa4d04521720ee7a8eceec96a894d49a07..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/btf/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -#[allow(clippy::module_inception)] -pub mod btf; -pub mod c_types; -pub mod types; - -pub use btf::*; -pub use types::*; diff --git a/source/tools/detect/net/rtrace/bpfskel/src/btf/types.rs b/source/tools/detect/net/rtrace/bpfskel/src/btf/types.rs deleted file mode 100644 index 6a9f08e5be4452b843de09ab265b0f80c6c02b9e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/btf/types.rs +++ /dev/null @@ -1,258 +0,0 @@ -use std::fmt; - -use num_enum::TryFromPrimitive; - -#[derive(Debug, Copy, Clone, TryFromPrimitive, PartialEq)] -#[repr(u32)] -pub enum BtfKind { - Void = 0, - Int = 1, - Ptr = 2, - Array = 3, - Struct = 4, - Union = 5, - Enum = 6, - Fwd = 7, - Typedef = 8, - Volatile = 9, - Const = 10, - Restrict = 11, - Func = 12, - FuncProto = 13, - Var = 14, - Datasec = 15, - Float = 16, - DeclTag = 17, - TypeTag = 18, -} - -#[derive(Debug, Copy, Clone, TryFromPrimitive, PartialEq)] -#[repr(u8)] -pub enum BtfIntEncoding { - None = 0, - Signed = 1 << 0, - Char = 1 << 1, - Bool = 1 << 2, -} - -#[derive(Debug)] -pub struct BtfInt<'a> { - pub name: &'a str, - pub bits: u8, - pub offset: u8, - pub encoding: BtfIntEncoding, -} - -#[derive(Debug)] -pub struct BtfPtr { - pub pointee_type: u32, -} - -#[derive(Debug)] -pub struct BtfArray { - pub nelems: u32, - pub index_type_id: u32, - pub val_type_id: u32, -} - -#[derive(Debug)] -pub struct BtfMember<'a> { - pub name: &'a str, - pub type_id: u32, - pub bit_offset: u32, - pub bit_size: u8, -} - -#[derive(Debug)] -pub struct BtfComposite<'a> { - pub name: String, - pub is_struct: bool, - pub size: u32, - pub members: Vec>, -} - -#[derive(Debug)] -pub struct BtfEnumValue<'a> { - pub name: &'a str, - pub value: i32, -} - -#[derive(Debug)] -pub struct BtfEnum<'a> { - pub name: String, - pub size: u32, - pub values: Vec>, -} - -#[derive(Debug, Copy, Clone, PartialEq)] -pub enum BtfFwdKind { - Struct, - Union, -} - -#[derive(Debug)] -pub struct BtfFwd<'a> { - pub name: &'a str, - pub kind: BtfFwdKind, -} - -#[derive(Debug)] -pub struct BtfTypedef<'a> { - pub name: &'a str, - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfVolatile { - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfConst { - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfRestrict { - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfFunc<'a> { - pub name: &'a str, - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfFuncParam<'a> { - pub name: &'a str, - pub type_id: u32, -} - -#[derive(Debug)] -pub struct BtfFuncProto<'a> { - pub ret_type_id: u32, - pub params: Vec>, -} - -#[derive(Debug, Copy, Clone, TryFromPrimitive, PartialEq)] -#[repr(u32)] -pub enum BtfVarLinkage { - Static = 0, - GlobalAlloc = 1, - GlobalExtern = 2, -} - -#[derive(Debug)] -pub struct BtfVar<'a> { - pub name: &'a str, - pub type_id: u32, - pub linkage: BtfVarLinkage, -} - -#[derive(Debug)] -pub struct BtfDatasecVar { - pub type_id: u32, - pub offset: u32, - pub size: u32, -} - -#[derive(Debug)] -pub struct BtfDatasec<'a> { - pub name: &'a str, - pub size: u32, - pub vars: Vec, -} - -#[derive(Debug)] -pub struct BtfFloat<'a> { - pub name: &'a str, - pub size: u32, -} - -#[derive(Debug)] -pub struct BtfDeclTag<'a> { - pub name: &'a str, - pub type_id: u32, - pub component_idx: i32, -} - -#[derive(Debug)] -pub struct BtfTypeTag<'a> { - pub name: &'a str, - pub type_id: u32, -} - -pub enum BtfType<'a> { - Void, - Int(BtfInt<'a>), - Ptr(BtfPtr), - Array(BtfArray), - Struct(BtfComposite<'a>), - Union(BtfComposite<'a>), - Enum(BtfEnum<'a>), - Fwd(BtfFwd<'a>), - Typedef(BtfTypedef<'a>), - Volatile(BtfVolatile), - Const(BtfConst), - Restrict(BtfRestrict), - Func(BtfFunc<'a>), - FuncProto(BtfFuncProto<'a>), - Var(BtfVar<'a>), - Datasec(BtfDatasec<'a>), - Float(BtfFloat<'a>), - DeclTag(BtfDeclTag<'a>), - TypeTag(BtfTypeTag<'a>), -} - -impl<'a> BtfType<'a> { - pub fn kind(&self) -> BtfKind { - match self { - BtfType::Void => BtfKind::Void, - BtfType::Ptr(_) => BtfKind::Ptr, - BtfType::Fwd(_) => BtfKind::Fwd, - BtfType::Typedef(_) => BtfKind::Typedef, - BtfType::Volatile(_) => BtfKind::Volatile, - BtfType::Const(_) => BtfKind::Const, - BtfType::Restrict(_) => BtfKind::Restrict, - BtfType::Func(_) => BtfKind::Func, - BtfType::Int(_) => BtfKind::Int, - BtfType::Var(_) => BtfKind::Var, - BtfType::Array(_) => BtfKind::Array, - BtfType::Struct(_) => BtfKind::Struct, - BtfType::Union(_) => BtfKind::Union, - BtfType::Enum(_) => BtfKind::Enum, - BtfType::FuncProto(_) => BtfKind::FuncProto, - BtfType::Datasec(_) => BtfKind::Datasec, - BtfType::Float(_) => BtfKind::Float, - BtfType::DeclTag(_) => BtfKind::DeclTag, - BtfType::TypeTag(_) => BtfKind::TypeTag, - } - } -} - -impl<'a> fmt::Display for BtfType<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - BtfType::Void => write!(f, "void"), - BtfType::Ptr(_) => write!(f, "ptr"), - BtfType::Fwd(_) => write!(f, "fwd"), - BtfType::Typedef(_) => write!(f, "typedef"), - BtfType::Volatile(_) => write!(f, "volatile"), - BtfType::Const(_) => write!(f, "const"), - BtfType::Restrict(_) => write!(f, "restrict"), - BtfType::Func(_) => write!(f, "func"), - BtfType::Int(_) => write!(f, "int"), - BtfType::Var(_) => write!(f, "var"), - BtfType::Array(_) => write!(f, "array"), - BtfType::Struct(_) => write!(f, "struct"), - BtfType::Union(_) => write!(f, "union"), - BtfType::Enum(_) => write!(f, "enum"), - BtfType::FuncProto(_) => write!(f, "funcproto"), - BtfType::Datasec(_) => write!(f, "datasec"), - BtfType::Float(_) => write!(f, "float"), - BtfType::DeclTag(_) => write!(f, "decltag"), - BtfType::TypeTag(_) => write!(f, "typetag"), - } - } -} diff --git a/source/tools/detect/net/rtrace/bpfskel/src/gen.rs b/source/tools/detect/net/rtrace/bpfskel/src/gen.rs deleted file mode 100644 index 8f222553dc943181f0810eae54d3287cf6bfb3f3..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/gen.rs +++ /dev/null @@ -1,501 +0,0 @@ -use crate::btf; -use memmap2::Mmap; -use std::collections::BTreeMap; -use std::convert::TryInto; -use std::ffi::{c_void, CStr, CString}; -use std::fmt::Write as fmt_write; -use std::fs::File; -use std::io::Write; -use std::os::raw::c_ulong; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -use std::ptr; - -use anyhow::{bail, ensure, Context, Result}; - -macro_rules! gen_bpf_object_iter { - ($name:ident, $iter_ty:ty, $next_fn:expr) => { - struct $name { - obj: *mut libbpf_sys::bpf_object, - last: *mut $iter_ty, - } - - impl $name { - fn new(obj: *mut libbpf_sys::bpf_object) -> $name { - $name { - obj, - last: ptr::null_mut(), - } - } - } - - impl Iterator for $name { - type Item = *mut $iter_ty; - - fn next(&mut self) -> Option { - self.last = unsafe { $next_fn(self.obj, self.last) }; - - if self.last.is_null() { - None - } else { - Some(self.last) - } - } - } - }; -} - -gen_bpf_object_iter!( - MapIter, - libbpf_sys::bpf_map, - libbpf_sys::bpf_object__next_map -); -gen_bpf_object_iter!( - ProgIter, - libbpf_sys::bpf_program, - libbpf_sys::bpf_object__next_program -); - -/// Run `rustfmt` over `s` and return result -fn rustfmt(s: &str, rustfmt_path: &Path) -> Result { - let mut cmd = Command::new(rustfmt_path) - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .spawn() - .context("Failed to spawn rustfmt")?; - - // Send input in via stdin - write!(cmd.stdin.take().unwrap(), "{}", s)?; - - // Extract output - let output = cmd - .wait_with_output() - .context("Failed to execute rustfmt")?; - ensure!( - output.status.success(), - "Failed to rustfmt: {}", - output.status - ); - - Ok(String::from_utf8(output.stdout)?) -} - -fn capitalize_first_letter(s: &str) -> String { - if s.is_empty() { - return "".to_string(); - } - - s.split('_').fold(String::new(), |mut acc, ts| { - acc += &ts.chars().next().unwrap().to_uppercase().to_string(); - if ts.len() > 1 { - acc += &ts[1..]; - } - acc - }) -} - -fn get_raw_map_name(map: *const libbpf_sys::bpf_map) -> Result { - let name_ptr = unsafe { libbpf_sys::bpf_map__name(map) }; - if name_ptr.is_null() { - bail!("Map name unknown"); - } - - Ok(unsafe { CStr::from_ptr(name_ptr) }.to_str()?.to_string()) -} - -fn canonicalize_internal_map_name(s: &str) -> Option { - if s.ends_with(".data") { - Some("data".to_string()) - } else if s.ends_with(".rodata") { - Some("rodata".to_string()) - } else if s.ends_with(".bss") { - Some("bss".to_string()) - } else if s.ends_with(".kconfig") { - Some("kconfig".to_string()) - } else { - eprintln!("Warning: unrecognized map: {}", s); - None - } -} - -/// Same as `get_raw_map_name` except the name is canonicalized -fn get_map_name(map: *const libbpf_sys::bpf_map) -> Result> { - let name = get_raw_map_name(map)?; - - if unsafe { !libbpf_sys::bpf_map__is_internal(map) } { - Ok(Some(name)) - } else { - Ok(canonicalize_internal_map_name(&name)) - } -} - -fn get_prog_name(prog: *const libbpf_sys::bpf_program) -> Result { - let name_ptr = unsafe { libbpf_sys::bpf_program__name(prog) }; - - if name_ptr.is_null() { - bail!("Prog name unknown"); - } - - Ok(unsafe { CStr::from_ptr(name_ptr) }.to_str()?.to_string()) -} - -fn map_is_mmapable(map: *const libbpf_sys::bpf_map) -> bool { - let def = unsafe { libbpf_sys::bpf_map__def(map) }; - (unsafe { (*def).map_flags } & libbpf_sys::BPF_F_MMAPABLE) > 0 -} - -fn map_is_datasec(map: *const libbpf_sys::bpf_map) -> bool { - let internal = unsafe { libbpf_sys::bpf_map__is_internal(map) }; - let mmapable = map_is_mmapable(map); - - internal && mmapable -} - -fn map_is_readonly(map: *const libbpf_sys::bpf_map) -> bool { - assert!(map_is_mmapable(map)); - let def = unsafe { libbpf_sys::bpf_map__def(map) }; - - // BPF_F_RDONLY_PROG means readonly from prog side - (unsafe { (*def).map_flags } & libbpf_sys::BPF_F_RDONLY_PROG) > 0 -} - -fn open_bpf_object(name: &str, data: &[u8]) -> Result<*mut libbpf_sys::bpf_object> { - let cname = CString::new(name)?; - let obj_opts = libbpf_sys::bpf_object_open_opts { - sz: std::mem::size_of::() as libbpf_sys::size_t, - object_name: cname.as_ptr(), - ..Default::default() - }; - let object = unsafe { - libbpf_sys::bpf_object__open_mem( - data.as_ptr() as *const c_void, - data.len() as c_ulong, - &obj_opts, - ) - }; - if object.is_null() { - bail!("Failed to bpf_object__open_mem()"); - } - - Ok(object) -} - -/// Generate contents of a single skeleton -fn gen_skel_contents(raw_obj_name: &str, obj_file_path: &Path) -> Result { - let mut skel = String::new(); - - let libbpf_obj_name = format!("{}_bpf", raw_obj_name); - let obj_name = capitalize_first_letter(raw_obj_name); - let file = File::open(obj_file_path)?; - let mmap = unsafe { Mmap::map(&file)? }; - let object = open_bpf_object(&libbpf_obj_name, &*mmap)?; - let btf = match btf::Btf::new(&obj_name, &*mmap)? { - Some(b) => b, - None => bail!("failed to get btf"), - }; - - write!( - skel, - r#"// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - // - // THIS FILE IS AUTOGENERATED BY BPFSKEL! - - #[path = "bindings.rs"] - mod bindings; - #[path = "{name}.skel.rs"] - mod {name}_skel; - - pub use bindings::*; - pub use {name}_skel::*; - - #[allow(dead_code)] - #[allow(non_snake_case)] - #[allow(non_camel_case_types)] - #[allow(clippy::transmute_ptr_to_ref)] - #[allow(clippy::upper_case_acronyms)] - use libbpf_rs::libbpf_sys; - - - use anyhow::{{bail, Result}}; - use once_cell::sync::Lazy; - use std::sync::Mutex; - use std::time::Duration; - use crate::perf::*; - "#, - name = raw_obj_name, - )?; - - write!( - skel, - r#" - static GLOBAL_TX: Lazy)>>>> = - Lazy::new(|| Mutex::new(None)); - - pub fn handle_event(_cpu: i32, data: &[u8]) {{ - let event = Vec::from(data); - GLOBAL_TX - .lock() - .unwrap() - .as_ref() - .unwrap() - .send((_cpu as usize, event)) - .unwrap(); - }} - - pub fn handle_lost_events(cpu: i32, count: u64) {{ - eprintln!("Lost {{}} events on CPU {{}}", count, cpu); - }} - - pub struct Skel<'a> {{ - pub skel: {name}Skel<'a>, - rx: Option)>>, - }} - - impl <'a> Default for Skel<'a> {{ - fn default() -> Self {{ - Skel {{ - skel: unsafe {{ std::mem::MaybeUninit::zeroed().assume_init() }}, - rx: None, - }} - }} - }} - - impl<'a> Skel <'a> {{ - pub fn open(&mut self, debug: bool, btf: &Option) -> Result> {{ - let btf_cstring; - let mut btf_cstring_ptr = std::ptr::null(); - if let Some(x) = btf {{ - btf_cstring = std::ffi::CString::new(x.clone())?; - btf_cstring_ptr = btf_cstring.as_ptr(); - }} - - let mut skel_builder = {name}SkelBuilder::default(); - skel_builder.obj_builder.debug(debug); - let mut open_opts = skel_builder.obj_builder.opts(std::ptr::null()); - open_opts.btf_custom_path = btf_cstring_ptr; - let mut open_skel = skel_builder.open_opts(open_opts)?; - Ok(open_skel) - }} - - pub fn load( - &mut self, - mut openskel: Open{name}Skel<'a>, - enabled: Vec<&str>, - disabled: Vec<&str>, - ) -> Result<()> {{ - - if enabled.len() != 0 && disabled.len() == 0 {{ - for x in openskel.obj.progs_iter_mut() {{ - x.set_autoload(false)?; - }} - }} - - for x in enabled {{ - if let Some(y) = openskel.obj.prog_mut(x) {{ - y.set_autoload(true)?; - }} else {{ - bail!("failed to find program: {{}}", x) - }} - }} - - for x in disabled {{ - if let Some(y) = openskel.obj.prog_mut(x) {{ - y.set_autoload(false)?; - }} else {{ - bail!("failed to find program: {{}}", x) - }} - }} - self.skel = openskel.load()?; - Ok(()) - }} - - pub fn open_load( - &mut self, - debug: bool, - btf: &Option, - enabled: Vec<&str>, - disabled: Vec<&str>, - ) -> Result<()> {{ - let openskel = self.open(debug, btf)?; - self.load(openskel, enabled, disabled) - }} - - pub fn attach(&mut self) -> Result<()> {{ - Ok(self.skel.attach()?) - }} - - pub fn load_enabled(&mut self, openskel: Open{name}Skel<'a>, enabled: Vec<&str>) -> Result<()> {{ - self.load(openskel, enabled, vec![]) - }} - - pub fn load_disabled(&mut self, openskel: Open{name}Skel<'a>, disabled: Vec<&str>) -> Result<()> {{ - self.load(openskel, vec![], disabled) - }} - - pub fn open_load_enabled( - &mut self, - debug: bool, - btf: &Option, - enabled: Vec<&str>, - ) -> Result<()> {{ - self.open_load(debug, btf, enabled, vec![]) - }} - - pub fn open_load_disabled( - &mut self, - debug: bool, - btf: &Option, - disabled: Vec<&str>, - ) -> Result<()> {{ - self.open_load(debug, btf, vec![], disabled) - }} - "#, - name = obj_name - )?; - - for map in MapIter::new(object) { - let map_type = unsafe { libbpf_sys::bpf_map__type(map) }; - // BPF_MAP_TYPE_PERF_EVENT_ARRAY is 4 - if map_type == 4 { - write!( - skel, - r#" - pub fn poll(&mut self, timeout: Duration) -> Result)>> {{ - if let Some(rx) = &self.rx {{ - match rx.recv_timeout(timeout) {{ - Ok(data) => {{ - return Ok(Some(data)); - }} - Err(e) => {{ - return Ok(None); - }} - }} - }} - let (tx, rx) = crossbeam_channel::unbounded(); - self.rx = Some(rx); - *GLOBAL_TX.lock().unwrap() = Some(tx); - let perf = PerfBufferBuilder::new(self.skel.maps_mut().perf_map()) - .sample_cb(handle_event) - .lost_cb(handle_lost_events) - .build()?; - std::thread::spawn(move || loop {{ - perf.poll(timeout).unwrap(); - }}); - log::debug!("start successfully perf thread to receive event"); - Ok(None) - }} - "#, - )?; - continue; - } - - let map_name = match get_map_name(map)? { - Some(n) => n, - None => continue, - }; - - if map_name.starts_with("inner") { - // skip inner map - continue; - } - - let key = btf.type_declaration(unsafe { libbpf_sys::bpf_map__btf_key_type_id(map) })?; - let value = btf.type_declaration(unsafe { libbpf_sys::bpf_map__btf_value_type_id(map) })?; - - println!("key {}, value {}", key, value); - - write!( - skel, - r#" - pub fn {map_name}_update(&mut self, key: {key}, value: {value}) -> Result<()> {{ - - let mapkey = unsafe {{ std::mem::transmute::<{key}, [u8; std::mem::size_of::<{key}>()]>(key) }}; - let mapvalue = unsafe {{ std::mem::transmute::<{value}, [u8; std::mem::size_of::<{value}>()]>(value)}}; - - self.skel.maps_mut().{map_name}().update( - &mapkey, - &mapvalue, - libbpf_rs::MapFlags::ANY, - )?; - Ok(()) - }} - - pub fn {map_name}_delete(&mut self, key: {key}) -> Result<()> {{ - - let mapkey = unsafe {{ std::mem::transmute::<{key}, [u8; std::mem::size_of::<{key}>()]>(key) }}; - - self.skel.maps_mut().{map_name}().delete( - &mapkey, - )?; - Ok(()) - }} - - pub fn {map_name}_lookup(&mut self, key: {key}) -> Result> {{ - let mapkey = unsafe {{ std::mem::transmute::<{key}, [u8; std::mem::size_of::<{key}>()]>(key) }}; - - if let Some(x) = self.skel.maps_mut().{map_name}().lookup( - &mapkey, - libbpf_rs::MapFlags::ANY, - )? {{ - return Ok(Some(unsafe {{ std::mem::transmute_copy::, {value}>(&x) }})); - }} - - Ok(None) - }} - "#, - map_name = map_name, - key = key, - value = value, - )?; - } - - writeln!(skel, "}}")?; - - Ok(skel) -} - -/// Generate a single skeleton -fn gen_skel(name: &str, obj: &Path, out: &Path, rustfmt_path: &Path) -> Result<()> { - if name.is_empty() { - bail!("Object file has no name"); - } - let skel = rustfmt(&gen_skel_contents(name, obj)?, rustfmt_path)?; - let mut file = File::create(out)?; - file.write_all(skel.as_bytes())?; - - Ok(()) -} - -pub fn gen_single(obj_file: &Path, output: &Path, rustfmt_path: &Path) -> Result<()> { - let filename = match obj_file.file_name() { - Some(n) => n, - None => bail!( - "Could not determine file name for object file: {}", - obj_file.to_string_lossy() - ), - }; - - let name = match filename.to_str() { - Some(n) => { - if !n.ends_with(".o") { - bail!("Object file does not have `.o` suffix: {}", n); - } - - n.split('.').next().unwrap() - } - None => bail!( - "Object file name is not valid unicode: {}", - filename.to_string_lossy() - ), - }; - - if let Err(e) = gen_skel(name, obj_file, output, rustfmt_path) { - bail!( - "Failed to generate skeleton for {}: {}", - obj_file.to_string_lossy(), - e - ); - } - - Ok(()) -} diff --git a/source/tools/detect/net/rtrace/bpfskel/src/lib.rs b/source/tools/detect/net/rtrace/bpfskel/src/lib.rs deleted file mode 100644 index 607f11ddd6905d40acc1a8c29b2cafad60d81be8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/lib.rs +++ /dev/null @@ -1,33 +0,0 @@ -use anyhow::{bail, Result}; -use std::path::Path; -use std::path::PathBuf; -mod gen; -mod btf; - -pub struct BpfSkel { - obj: Option, - rustfmt: PathBuf, -} - -impl BpfSkel { - pub fn new() -> Self { - BpfSkel { - obj: None, - rustfmt: "rustfmt".into(), - } - } - - pub fn obj>(&mut self, obj: P) -> &mut BpfSkel { - self.obj = Some(obj.as_ref().to_path_buf()); - self - } - - pub fn generate>(&mut self, output: P) -> Result<()> { - if let Some(obj) = &self.obj { - let out = output.as_ref().to_path_buf(); - gen::gen_single(obj, &out, &self.rustfmt)?; - return Ok(()); - } - bail!("no obj file") - } -} diff --git a/source/tools/detect/net/rtrace/bpfskel/src/main.rs b/source/tools/detect/net/rtrace/bpfskel/src/main.rs deleted file mode 100644 index c7d0609d34a3350143775caac3694326fe7cff5e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/bpfskel/src/main.rs +++ /dev/null @@ -1,48 +0,0 @@ -use anyhow::{bail, Result}; -use bpfskel::BpfSkel; -use libbpf_cargo::SkeletonBuilder; -use std::path::{Path, PathBuf}; -use structopt::StructOpt; -use tempfile::tempdir; - -#[derive(Debug, StructOpt)] -pub struct BpfSkelCli { - #[structopt(short, long, help = "path of bpf source, such as *.bpf.c")] - bpfsrc: String, - #[structopt(short, long, default_value = "./skel.rs", help = "skel.rs location")] - skel: String, -} - -fn main() -> Result<()> { - let opts = BpfSkelCli::from_args(); - - let path = Path::new(&opts.bpfsrc); - if !path.exists() { - bail!("{} file not exist", opts.bpfsrc) - } - - let filename = path.file_name().unwrap().to_str().unwrap(); - if !opts.bpfsrc.ends_with(".bpf.c") { - bail!("file not end with .bpf.c") - } - let basename = filename[0..filename.len() - ".bpf.c".len()].to_owned(); - - let dir = tempdir()?; - let obj_path = dir.path().join(format!("{}.bpf.o", basename)); - let skel_path = dir.path().join(format!("{}.skel.rs", basename)); - - match SkeletonBuilder::new() - .source(&opts.bpfsrc) - .obj(&obj_path) - .build_and_generate(&skel_path) - { - Ok(()) => {} - Err(e) => { - println!("{}", e); - panic!() - } - } - - BpfSkel::new().obj(&obj_path).generate(&opts.skel).unwrap(); - Ok(()) -} diff --git a/source/tools/detect/net/rtrace/build.rs b/source/tools/detect/net/rtrace/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..6aee264e2d9cd8fc394efe2dfd61dd3775f99a98 --- /dev/null +++ b/source/tools/detect/net/rtrace/build.rs @@ -0,0 +1,63 @@ +use libbpf_cargo::SkeletonBuilder; +use std::env; +use std::path::PathBuf; + +fn generate_skeleton(out: &mut PathBuf, name: &str) { + let c_path = format!("src/bpf/{}.bpf.c", name); + let rs_name = format!("{}.skel.rs", name); + out.push(&rs_name); + SkeletonBuilder::new() + .source(&c_path) + .clang_args("-Wno-compare-distinct-pointer-types") + .build_and_generate(&out) + .unwrap(); + + out.pop(); + println!("cargo:rerun-if-changed={c_path}"); +} + +fn generate_header(out: &mut PathBuf, name: &str) { + let header_path = format!("src/bpf/{}.h", name); + let rs_name = format!("{}.rs", name); + + out.push(&rs_name); + let bindings = bindgen::Builder::default() + .header(&header_path) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .unwrap(); + bindings.write_to_file(&out).unwrap(); + out.pop(); + + println!("cargo:rerun-if-changed={header_path}"); +} + +fn main() { + let mut out = + PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); + + generate_skeleton(&mut out, "ping_sender"); + generate_header(&mut out, "pingtrace"); + + generate_skeleton(&mut out, "userslow"); + generate_header(&mut out, "userslow"); + + generate_skeleton(&mut out, "queueslow"); + generate_header(&mut out, "queueslow"); + + generate_skeleton(&mut out, "drop"); + generate_header(&mut out, "drop"); + + generate_skeleton(&mut out, "retran"); + generate_header(&mut out, "retran"); + + generate_skeleton(&mut out, "virtio"); + generate_header(&mut out, "virtio"); + + generate_skeleton(&mut out, "socket"); + + generate_skeleton(&mut out, "tcpping"); + generate_header(&mut out, "tcpping"); + + generate_header(&mut out, "filter"); +} diff --git a/source/tools/detect/net/rtrace/builder/Cargo.toml b/source/tools/detect/net/rtrace/builder/Cargo.toml deleted file mode 100644 index 679cd87dbd7a4def7f6b79d757bc08a97ded1151..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/builder/Cargo.toml +++ /dev/null @@ -1,16 +0,0 @@ -[package] -name = "builder" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[lib] -proc-macro = true - -[dependencies] -proc-macro2 = "1.0" -quote = "1.0" -syn = { version = "1.0", features = ["extra-traits"] } -libc = "0.2" - diff --git a/source/tools/detect/net/rtrace/builder/buildertest/Cargo.toml b/source/tools/detect/net/rtrace/builder/buildertest/Cargo.toml deleted file mode 100644 index 906b927fafede614d890138054472804433a7eea..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/builder/buildertest/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "buildertest" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -log = "0.4.14" -anyhow = "1.0.57" -once_cell = "1.8.0" -crossbeam-channel = "0.5" -builder = { path = "../../builder" } \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/builder/buildertest/src/main.rs b/source/tools/detect/net/rtrace/builder/buildertest/src/main.rs deleted file mode 100644 index 5cc7b404bae581c562bbac6774f2c962004280b8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/builder/buildertest/src/main.rs +++ /dev/null @@ -1,28 +0,0 @@ - -use builder::SkelBuilder; - -use anyhow::{bail, Result}; -use libbpf_rs::libbpf_sys; -use once_cell::sync::Lazy; -use std::collections::HashMap; -use std::sync::Mutex; -use std::time::Duration; - -struct IcmpSkel<'a> { - a: &'a HashMap, -} - -#[derive(SkelBuilder)] -pub struct Test<'a> { - pub skel: IcmpSkel<'a>, - rx: Option)>>, - delta: u64, - events: HashMap, -} - - - -fn main() { - - -} \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/builder/src/lib.rs b/source/tools/detect/net/rtrace/builder/src/lib.rs deleted file mode 100644 index fe22ebffbbcbb2a5306ea57c512db60133081ecc..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/builder/src/lib.rs +++ /dev/null @@ -1,303 +0,0 @@ -use proc_macro::TokenStream; -use quote::{format_ident, quote, ToTokens}; -use syn::{ - parse_macro_input, punctuated::Punctuated, spanned::Spanned, token::Comma, Data, DataEnum, - DataStruct, DeriveInput, Field, Fields, FieldsNamed, Ident, Meta, MetaList, NestedMeta, Path, - Result, Variant, -}; - -macro_rules! fail { - ($msg:literal) => { - syn::Error::new(proc_macro2::Span::call_site(), $msg) - .to_compile_error() - .into() - }; - ($tkns:ident, $msg:literal) => { - syn::Error::new_spanned($tkns, $msg) - .to_compile_error() - .into() - }; -} - -/// SkelBuilder: generate builder for bpf skel -/// your original structure should have below fields: -/// 1. MUST HAVE: pub skel: [NAME]Skel<'a>, -/// 2. OPTIONAL : rx: crossbeam_channel::Receiver<(usize, Vec)>, -#[proc_macro_derive(SkelBuilder)] -pub fn skelbuilder_derive(input: TokenStream) -> TokenStream { - let syn = parse_macro_input!(input as DeriveInput); - let gen = impl_skelbuilder(&syn); - gen.into() -} - -fn impl_skelbuilder(syn: &DeriveInput) -> proc_macro2::TokenStream { - let (struct_ident, fields) = match syn.data { - Data::Struct(ref datastruct) => (&syn.ident, &datastruct.fields), - _ => { - return syn::Error::new(syn.span(), "expected struct type") - .into_compile_error() - .into(); - } - }; - - // skel name default is struct name - let mut skel_name = struct_ident.to_string(); - - let named_fields = if let Data::Struct(DataStruct { - fields: Fields::Named(FieldsNamed { ref named, .. }), - .. - }) = syn.data - { - named - } else { - return fail!("derive builder only supports structs with named fields"); - }; - - let builder_name = format_ident!("{}Builder", skel_name.to_string()); - - let build_method_token = impl_skelbuiler_build(struct_ident, &builder_name); - let builder_token = impl_skelbuilder_builder(struct_ident, &skel_name, named_fields); - - quote! { - - static GLOBAL_TX: Lazy)>>>> = - Lazy::new(|| Mutex::new(None)); - - fn handle_event(_cpu: i32, data: &[u8]) { - let event = Vec::from(data); - GLOBAL_TX - .lock() - .unwrap() - .as_ref() - .unwrap() - .send((_cpu as usize, event)) - .unwrap(); - } - - fn handle_lost_events(cpu: i32, count: u64) { - eprintln!("Lost {} events on CPU {}", count, cpu); - } - - #builder_token - #build_method_token - } -} - -fn impl_skelbuiler_build( - struct_ident: &syn::Ident, - builder_name: &Ident, -) -> proc_macro2::TokenStream { - quote! { - impl<'a> #struct_ident <'a>{ - pub fn builder() -> #builder_name <'a> { - #builder_name { - openskel: None, - skel: None, - rx: None, - } - } - } - } -} - -fn impl_skelbuilder_builder( - struct_ident: &Ident, - skel_name: &String, - named_fields: &Punctuated, -) -> proc_macro2::TokenStream { - let builder_name = format_ident!("{}Builder", skel_name.to_string()); - let openskel_name = format_ident!("Open{}Skel", skel_name); - let skelskel_name = format_ident!("{}Skel", skel_name); - - let open_method = impl_skelbuilder_builder_methods_open(skel_name); - let load_method = impl_skelbuilder_builder_methods_load(); - let open_perf_method = impl_skelbuilder_builder_methods_open_perf(); - let attach_method = impl_skelbuilder_builder_methods_attach(); - let build_method = impl_skelbuilder_builder_methods_build(struct_ident, named_fields); - - quote! { - pub struct #builder_name<'a> { - pub openskel: Option<#openskel_name<'a>>, - pub skel: Option<#skelskel_name<'a>>, - rx: Option)>>, - } - - impl <'a> #builder_name<'a> { - - #open_method - #load_method - #open_perf_method - #attach_method - #build_method - - } - } -} - -fn impl_skelbuilder_builder_methods_open(skel_name: &String) -> proc_macro2::TokenStream { - let skel_builder_ident = format_ident!("{}SkelBuilder", skel_name); - quote! { - pub fn open(&mut self, debug: bool, btf: &Option) -> &mut Self { - if let Some(openskel) = &self.openskel { - panic!("don't try to open skeleton object twice") - } - let btf_cstring; - let mut btf_cstring_ptr = std::ptr::null(); - if let Some(x) = btf { - btf_cstring = std::ffi::CString::new(x.clone()) - .expect(&format!("failed to create CString from :{}", x)); - btf_cstring_ptr = btf_cstring.as_ptr(); - } - - let mut skel_builder = #skel_builder_ident::default(); - skel_builder.obj_builder.debug(debug); - let mut open_opts = skel_builder.obj_builder.opts(std::ptr::null()); - open_opts.btf_custom_path = btf_cstring_ptr; - self.openskel = Some( - skel_builder - .open_opts(open_opts) - .expect("failed to open target skeleton object"), - ); - log::debug!( - "open skeleton object sucessfully, btf: {:?}, debug: {}", - btf, - debug - ); - self - } - } -} - -fn impl_skelbuilder_builder_methods_load() -> proc_macro2::TokenStream { - quote! { - // load with some bpf program disabled or enabled - pub fn load_enabled(&mut self, enabled: Vec<(&str, bool)>) -> &mut Self { - let mut has_enabled = false; - let mut has_disabled = false; - for enable in &enabled { - if enable.1 { - has_enabled = true; - } else { - has_disabled = true; - } - } - - if let Some(openskel) = &mut self.openskel { - for x in &enabled { - if has_enabled && !has_disabled { - log::debug!("disable autoload of all bpf program"); - for x in openskel.obj.progs_iter_mut() { - x.set_autoload(false).expect("failed to set autoload"); - } - } - - if let Some(y) = openskel.obj.prog_mut(x.0) { - log::debug!("enabled({}) autoload of {} bpf program", x.1, x.0); - y.set_autoload(x.1).expect("failed to set autoload"); - continue; - } - log::error!("failed to find bpf program: {}", x.0); - } - } - self.load() - } - - // disabled autoload of all bpf program - pub fn load_nothing(&mut self) -> &mut Self { - log::debug!("disable autoload of all bpf program"); - if let Some(openskel) = &mut self.openskel { - for x in openskel.obj.progs_iter_mut() { - x.set_autoload(false).expect("failed to set autoload"); - } - } - self.load() - } - - // enabled autoload of all bpf program - pub fn load(&mut self) -> &mut Self { - if let Some(mut openskel) = self.openskel.take() { - log::debug!("start loading bpf program"); - self.skel = Some(openskel.load().expect("failed to load bpf program")); - return self; - } - panic!("open skeleton object first") - } - } -} - -fn impl_skelbuilder_builder_methods_attach() -> proc_macro2::TokenStream { - quote! { - pub fn attach(&mut self) -> &mut Self { - if let Some(skel) = &mut self.skel { - log::debug!("start attaching bpf program"); - skel.attach().expect("failed to attach bpf program"); - return self; - } - panic!("Before attach, you should open skeleton object and load bpf program first") - } - } -} - -fn impl_skelbuilder_builder_methods_open_perf() -> proc_macro2::TokenStream { - quote! { - // open perf buffer and inital an perf poll thread with 200ms timeout - pub fn open_perf(&mut self) -> &mut Self { - if self.skel.is_none() { - panic!("Before open perf buffer, you should load bpf program first") - } - - let mut tmp_rx = None; - if let Some(skel) = &mut self.skel { - let (tx, rx) = crossbeam_channel::unbounded(); - *GLOBAL_TX.lock().unwrap() = Some(tx); - tmp_rx = Some(rx); - let perf = utils::perf::PerfBufferBuilder::new(skel.maps_mut().perf_map()) - .sample_cb(handle_event) - .lost_cb(handle_lost_events) - .build() - .expect("failed to open perf buffer"); - - log::debug!("start perf thread"); - std::thread::spawn(move || loop { - perf.poll(std::time::Duration::from_millis(200)).unwrap(); - }); - } - - self.rx = tmp_rx; - self - } - } -} - -// implement builder methods: build -fn impl_skelbuilder_builder_methods_build( - struct_ident: &Ident, - named_fields: &Punctuated, -) -> proc_macro2::TokenStream { - let init_fields = named_fields.iter().map(|field| { - let name = &field.ident; - - let mut skip = false; - if let Some(ident) = &field.ident { - let field_name = ident.to_string(); - if field_name.contains("skel") || field_name.contains("rx") { - skip = true; - } - } - - if !skip { - quote! { #name: Default::default(), } - } else { - quote! {} - } - }); - quote! { - pub fn build(&mut self) -> #struct_ident<'a> { - #struct_ident { - skel: self.skel.take().unwrap(), - rx: self.rx.take(), - #(#init_fields)* - } - } - } -} diff --git a/source/tools/detect/net/rtrace/cenum-rs/.gitignore b/source/tools/detect/net/rtrace/cenum-rs/.gitignore deleted file mode 100644 index ddc83bed8348fb6e01271f8a8266bfb1a4c1f3b8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/target -src/main.rs \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/cenum-rs/Cargo.toml b/source/tools/detect/net/rtrace/cenum-rs/Cargo.toml deleted file mode 100644 index 0c2dadabb1ee598bedd4d31f72ad8f32d320dddc..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "cenum-rs" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[lib] -proc-macro = true - -[dependencies] -proc-macro2 = "1.0" -quote = "1.0" -syn = { version = "1.0", features = ["extra-traits"] } -libc = "0.2" \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/cenum-rs/cargo-expand.sh b/source/tools/detect/net/rtrace/cenum-rs/cargo-expand.sh deleted file mode 100755 index c14a6ac93402b6405304bcbc80706e8f0c45644a..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/cargo-expand.sh +++ /dev/null @@ -1,3 +0,0 @@ - -# to see generated code -cargo expand --bin cenum-rs \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/cenum-rs/rust-toolchain b/source/tools/detect/net/rtrace/cenum-rs/rust-toolchain deleted file mode 100644 index 07ade694b1a3cc9fb96d96d47b3c28719673fe23..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -nightly \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/cenum-rs/src/lib.rs b/source/tools/detect/net/rtrace/cenum-rs/src/lib.rs deleted file mode 100644 index f195f50ee39fe7f1291e3de5cda101ef6820f41c..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/src/lib.rs +++ /dev/null @@ -1,156 +0,0 @@ -use proc_macro::TokenStream; -use quote::quote; -use syn::{ - parse_macro_input, punctuated::Punctuated, spanned::Spanned, Data, DataEnum, DeriveInput, Meta, - MetaList, NestedMeta, Path, Result, Variant, -}; - -// Generates the TryFrom, From and Display impl -#[proc_macro_derive(CEnum, attributes(cenum))] -pub fn cenum_derive(input: TokenStream) -> TokenStream { - let syn = parse_macro_input!(input as DeriveInput); - let gen = impl_cenum(&syn); - gen.into() -} - -fn impl_cenum(syn: &DeriveInput) -> proc_macro2::TokenStream { - // default enum type is u32 - let default_type_token = "u32".parse().unwrap(); - let mut enum_type_token = &default_type_token; - if let Some(attr) = syn.attrs.first() { - enum_type_token = &attr.tokens; - } - - let (enum_ident, variants) = match syn.data { - Data::Enum(ref dataenum) => (&syn.ident, &dataenum.variants), - _ => { - return syn::Error::new(syn.span(), "expected enum type") - .into_compile_error() - .into(); - } - }; - - let mut tryfrom_rust2c_token = Vec::new(); - let mut tryfrom_c2rust_token = Vec::new(); - let mut from_rust2c_token = Vec::new(); - let mut from_c2rust_token = Vec::new(); - let mut display_token = Vec::new(); - - for variant in variants { - let ident = &variant.ident; - match cenum_attr_value_display(variant) { - Ok((Some(value_token), Some(display_str))) => { - tryfrom_rust2c_token.push(quote! { - #enum_ident::#ident => Ok(#value_token), - }); - - from_rust2c_token.push(quote! { - #enum_ident::#ident => #value_token, - }); - - tryfrom_c2rust_token.push(quote! { - #value_token=> Ok( #enum_ident::#ident), - }); - - from_c2rust_token.push(quote! { - #value_token=> #enum_ident::#ident, - }); - - display_token.push(quote! { - #enum_ident::#ident => #display_str, - }); - } - _ => { - return syn::Error::new(syn.span(), "expected value or display attribute") - .into_compile_error() - .into(); - } - } - } - - let output = quote! { - impl std::convert::TryFrom<#enum_type_token> for #enum_ident { - type Error = String; - fn try_from(val: #enum_type_token) -> Result { - match val { - #(#tryfrom_c2rust_token)* - _ => Err(format!("convert {} failurely", val)) - } - } - } - - // impl std::convert::From<#enum_type_token> for #enum_ident { - // fn from(val: #enum_type_token) -> Self { - // match val { - // #(#from_c2rust_token)* - // _ => panic!("failed to convert") - // } - // } - // } - - impl std::convert::TryFrom<#enum_ident> for #enum_type_token { - type Error = &'static str; - fn try_from(ident: #enum_ident) -> Result { - match ident { - #(#tryfrom_rust2c_token)* - _ => Err("failed to convert") - } - } - } - - // impl std::convert::From<#enum_ident> for #enum_type_token { - // fn from(ident: #enum_ident) -> Self { - // match ident { - // #(#from_rust2c_token)* - // _ => panic!("failed to convert") - // } - // } - // } - - impl std::fmt::Display for #enum_ident { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let out = match self { - #(#display_token)* - }; - write!(f, "{}", out) - } - } - }; - output -} - -// Punctuated -fn cenum_attr_value_display( - variant: &Variant, -) -> Result<(Option, Option)> { - let mut value_token = None; - let mut display_str = None; - let attr = variant.attrs.first().unwrap().parse_meta()?; - - if let Meta::List(MetaList { - ref path, - ref nested, - .. - }) = attr - { - if let Some(p) = path.segments.first() { - if p.ident == "cenum" { - for nest in nested { - if let NestedMeta::Meta(syn::Meta::NameValue(kv)) = nest { - if kv.path.is_ident("value") { - if let syn::Lit::Str(ref ident_str) = kv.lit { - value_token = Some(ident_str.value().parse().unwrap()); - } - } else if kv.path.is_ident("display") { - if let syn::Lit::Str(ref ident_str) = kv.lit { - display_str = Some(ident_str.value()); - } - } - } - } - } - } - } - Ok((value_token, display_str)) -} - diff --git a/source/tools/detect/net/rtrace/cenum-rs/tests/cenum_test.rs b/source/tools/detect/net/rtrace/cenum-rs/tests/cenum_test.rs deleted file mode 100644 index 7e14dfd7680102ebe1ee3d6a06f29b93361ea71b..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/cenum-rs/tests/cenum_test.rs +++ /dev/null @@ -1,28 +0,0 @@ -use cenum_rs::CEnum; - -#[test] -fn test_basic() { - #[derive(Debug, PartialEq, CEnum)] - #[cenum(i32)] - enum ProtocolType { - #[cenum(value = "libc::IPPROTO_ICMP", display = "Icmp")] - Icmp, - #[cenum(value = "libc::IPPROTO_TCP", display = "Tcp")] - Tcp, - #[cenum(value = "libc::IPPROTO_UDP", display = "Udp")] - Udp, - } - - assert_eq!( - ProtocolType::Icmp, - ProtocolType::try_from(libc::IPPROTO_ICMP).unwrap() - ); - assert_eq!( - ProtocolType::Tcp, - ProtocolType::try_from(libc::IPPROTO_TCP).unwrap() - ); - assert_eq!( - ProtocolType::Udp, - ProtocolType::try_from(libc::IPPROTO_UDP).unwrap() - ); -} diff --git a/source/tools/detect/net/rtrace/common.h b/source/tools/detect/net/rtrace/common.h deleted file mode 100644 index 82d4cd8adc24444f0937621fe09c09d07d60925e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/common.h +++ /dev/null @@ -1,255 +0,0 @@ -#ifndef __COMMON_H -#define __COMMON_H - -#ifndef u8 -typedef unsigned char u8; -#endif - -#ifndef u16 -typedef unsigned short int u16; -#endif - -#ifndef u32 -typedef unsigned int u32; -#endif - -#ifndef u64 -typedef long long unsigned int u64; -#endif - -#define MAX_STACK_DEPTH 20 // max depth of each stack trace to track - -enum -{ - DROP_KFREE_SKB = 0, - DROP_TCP_DROP, - DROP_IPTABLES_DROP, - DROP_NFCONNTRACK_DROP, - LATENCY_EVENT, - CONNECT_LATENCY_EVENT, - EVENT_UNKNOWN, -}; - -enum -{ - ERR_OK = 0, - ERR_PROTOCOL_NOT_DETERMINED, - ERR_PROTOCOL_NOT_SUPPORT, -}; - -struct addr_pair -{ - u32 saddr; - u32 daddr; - u16 sport; - u16 dport; -}; - -struct event -{ - // Event Type - u8 type; - // Sock state - u8 state; - u8 protocol; - u8 error; - // process command - u8 comm[16]; - // stack id - u32 stackid; - u32 pid; - u64 ts; - struct addr_pair ap; - - // Don't move anonymous structs before and after - union - { - // 1. for latency module - struct - { - u32 pidtime_array_idx; - u32 socktime_array_idx; - u64 queue_ts; - u64 rcv_ts; - }; - - // 2. for drop module - struct - { - // iptables table name - u8 name[32]; - // iptables hook chain name - u32 hook; - u8 sk_protocol; - struct addr_pair skap; - } drop_params; - - // 3. for abnormal module - struct - { - u32 i_ino; - // queue - // length of accept queue - u32 sk_ack_backlog; - // length of syn queue - u32 icsk_accept_queue; - u32 sk_max_ack_backlog; - - // memory - u32 sk_wmem_queued; - u32 sndbuf; - u32 rmem_alloc; - u32 sk_rcvbuf; - - u32 drop; - u32 retran; - u32 ooo; - } abnormal; - - // 4. for connect latency - struct - { - u64 sock; - } connectlatency; - }; - // rcvnxt -}; - -struct filter -{ - u32 pid; - u16 protocol; - /* get latency distribution or not? */ - u8 distribution; - u64 threshold; - struct addr_pair ap; -}; - -struct onesecond -{ - u64 ts; // initial timestamp - u32 clear; - u32 bitmap[32]; // 1 second bitmap -}; - -struct seconds4_ring -{ - struct onesecond os[4]; - u32 idx; -}; - -#ifdef __VMLINUX_H__ - -// return u64 -#define ns() bpf_ktime_get_ns() -// return u32 -#define pid() (bpf_get_current_pid_tgid() >> 32) -// return u32 -#define tid() ((u32)bpf_get_current_pid_tgid()) -#define COMM(comm) bpf_get_current_comm(comm, sizeof(comm)) -#define comm(c) COMM(c) -// return u32 -#define cpu() bpf_get_smp_processor_id() - -// https://github.com/aquasecurity/tracee/blob/main/pkg/ebpf/c/tracee.bpf.c -#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \ - struct \ - { \ - __uint(type, _type); \ - __uint(max_entries, _max_entries); \ - __type(key, _key_type); \ - __type(value, _value_type); \ - } _name SEC(".maps"); - -#define BPF_HASH(_name, _key_type, _value_type, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, _max_entries) - -#define BPF_LRU_HASH(_name, _key_type, _value_type, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_LRU_HASH, _key_type, _value_type, _max_entries) - -#define BPF_ARRAY(_name, _value_type, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_ARRAY, u32, _value_type, _max_entries) - -#define BPF_PERCPU_ARRAY(_name, _value_type, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_PERCPU_ARRAY, u32, _value_type, _max_entries) - -#define BPF_PROG_ARRAY(_name, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_PROG_ARRAY, u32, u32, _max_entries) - -#define BPF_PERF_OUTPUT(_name, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_PERF_EVENT_ARRAY, int, __u32, _max_entries) - -// stack traces: the value is 1 big byte array of the stack addresses -typedef __u64 stack_trace_t[MAX_STACK_DEPTH]; -#define BPF_STACK_TRACE(_name, _max_entries) \ - BPF_MAP(_name, BPF_MAP_TYPE_STACK_TRACE, u32, stack_trace_t, _max_entries) - - -#define READ_KERN(ptr) \ - ({ \ - typeof(ptr) _val; \ - __builtin_memset((void *)&_val, 0, sizeof(_val)); \ - bpf_core_read((void *)&_val, sizeof(_val), &ptr); \ - _val; \ - }) - -#define READ_USER(ptr) \ - ({ \ - typeof(ptr) _val; \ - __builtin_memset((void *)&_val, 0, sizeof(_val)); \ - bpf_core_read_user((void *)&_val, sizeof(_val), &ptr); \ - _val; \ - }) - -struct -{ - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(u32)); -} perf_map SEC(".maps"); - -static __always_inline void set_addr_pair_by_sock(struct sock *sk, struct addr_pair *ap) -{ - bpf_probe_read(&ap->daddr, sizeof(ap->daddr), &sk->__sk_common.skc_daddr); - bpf_probe_read(&ap->dport, sizeof(ap->dport), &sk->__sk_common.skc_dport); - bpf_probe_read(&ap->saddr, sizeof(ap->saddr), &sk->__sk_common.skc_rcv_saddr); - bpf_probe_read(&ap->sport, sizeof(ap->sport), &sk->__sk_common.skc_num); - ap->dport = bpf_ntohs(ap->dport); -} - -static __always_inline void onesecond_insert(volatile struct onesecond *os, u64 ns) -{ - u32 msec = ns / (1000 * 1000); - volatile u32 idx = msec / 32; - - if (os->clear & (1 << idx)) - { - os->bitmap[idx & 0x1f] |= (1 << (msec & 0x1f)); - } - else - { - os->clear |= (1 << idx); - os->bitmap[idx & 0x1f] = (1 << (msec & 0x1f)); - } -} - -static __always_inline void seconds4_ring_insert(struct seconds4_ring *sr, u64 ts) -{ - u32 idx = sr->idx; - u32 prets = sr->os[idx & 0x3].ts; - u64 delta = ts - prets; - if (delta >= 1000 * 1000 * 1000) - { - idx++; - sr->idx = idx; - sr->os[idx & 0x3].ts = ts; - sr->os[idx & 0x3].clear = 0; - sr->os[idx & 0x3].bitmap[0] = 0; - } - - onesecond_insert(&sr->os[idx & 0x3], delta); -} - -#endif - -#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/drop/Cargo.toml b/source/tools/detect/net/rtrace/drop/Cargo.toml deleted file mode 100644 index c94cc6a481300150f402ea3397317029f5be5f31..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ -[package] -name = "drop" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -log = "0.4.14" -libc = "0.2.125" -anyhow = "1.0.57" -once_cell = "1.8.0" -crossbeam-channel = "0.5" -cenum-rs = { git = "https://github.com/chengshuyi/cenum-rs.git" } -plain = "0.2" -utils = { path = "../utils" } -eutils-rs = { git = "https://github.com/chengshuyi/eutils-rs.git" } -builder = { path = "../builder" } -chrono = "0.4.19" -serde_json = "1.0" -serde = {version = "1.0", features = ["derive"]} - -[build-dependencies] -libbpf-cargo = "0.13" -bindgen = "0.60.1" \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/drop/build.rs b/source/tools/detect/net/rtrace/drop/build.rs deleted file mode 100644 index 3c217787399dc1ab655f10195716e19226c7bf5c..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/build.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::{env, path::PathBuf}; -use libbpf_cargo::SkeletonBuilder; -const SRC: &str = "src/bpf/drop.bpf.c"; -const HDR: &str = "src/bpf/drop.h"; - -fn main() { - let mut out = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); - out.push("drop.skel.rs"); - SkeletonBuilder::new() - .source(SRC) - .build_and_generate(&out) - .unwrap(); - - // let bindings = bindgen::Builder::default() - // .header(HDR) - // .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - // .generate() - // .expect("Unable to generate bindings for drop crate"); - - // out.pop(); - // out.push("bindings.rs"); - // bindings - // .write_to_file(&out) - // .expect("Couldn't write bindings for drop crate!"); - - println!("cargo:rerun-if-changed={}", SRC); - println!("cargo:rerun-if-changed={}", HDR); -} diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/bindings.rs b/source/tools/detect/net/rtrace/drop/src/bpf/bindings.rs deleted file mode 100644 index 00119a3e0bc05b5ade28a5c41e4592230b4def81..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/bpf/bindings.rs +++ /dev/null @@ -1,1157 +0,0 @@ -/* automatically generated by rust-bindgen 0.60.1 */ - -pub const MAX_STACK_DEPTH: u32 = 20; -pub const IPTABLE_FILTER: u32 = 1; -pub const IPTABLE_MANGLE: u32 = 2; -pub const IPTABLE_RAW: u32 = 3; -pub const ARPTABLE_FILTER: u32 = 4; -pub const IPTABLE_SECURITY: u32 = 5; -pub const IPTABLE_NAT: u32 = 6; -pub type u8_ = ::std::os::raw::c_uchar; -pub type u16_ = ::std::os::raw::c_ushort; -pub type u32_ = ::std::os::raw::c_uint; -pub type u64_ = ::std::os::raw::c_ulonglong; -pub const DROP_KFREE_SKB: _bindgen_ty_1 = 0; -pub const DROP_TCP_DROP: _bindgen_ty_1 = 1; -pub const DROP_IPTABLES_DROP: _bindgen_ty_1 = 2; -pub const DROP_NFCONNTRACK_DROP: _bindgen_ty_1 = 3; -pub const LATENCY_EVENT: _bindgen_ty_1 = 4; -pub const CONNECT_LATENCY_EVENT: _bindgen_ty_1 = 5; -pub const EVENT_UNKNOWN: _bindgen_ty_1 = 6; -pub type _bindgen_ty_1 = ::std::os::raw::c_uint; -pub const ERR_OK: _bindgen_ty_2 = 0; -pub const ERR_PROTOCOL_NOT_DETERMINED: _bindgen_ty_2 = 1; -pub const ERR_PROTOCOL_NOT_SUPPORT: _bindgen_ty_2 = 2; -pub type _bindgen_ty_2 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct addr_pair { - pub saddr: u32_, - pub daddr: u32_, - pub sport: u16_, - pub dport: u16_, -} -#[test] -fn bindgen_test_layout_addr_pair() { - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!("Size of: ", stringify!(addr_pair)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(addr_pair)) - ); - fn test_field_saddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).saddr) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(saddr) - ) - ); - } - test_field_saddr(); - fn test_field_daddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).daddr) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(daddr) - ) - ); - } - test_field_daddr(); - fn test_field_sport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sport) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(sport) - ) - ); - } - test_field_sport(); - fn test_field_dport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).dport) as usize - ptr as usize - }, - 10usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(dport) - ) - ); - } - test_field_dport(); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct event { - pub type_: u8_, - pub state: u8_, - pub protocol: u8_, - pub error: u8_, - pub comm: [u8_; 16usize], - pub stackid: u32_, - pub pid: u32_, - pub ts: u64_, - pub ap: addr_pair, - pub __bindgen_anon_1: event__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union event__bindgen_ty_1 { - pub __bindgen_anon_1: event__bindgen_ty_1__bindgen_ty_1, - pub drop_params: event__bindgen_ty_1__bindgen_ty_2, - pub abnormal: event__bindgen_ty_1__bindgen_ty_3, - pub connectlatency: event__bindgen_ty_1__bindgen_ty_4, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_1 { - pub pidtime_array_idx: u32_, - pub socktime_array_idx: u32_, - pub queue_ts: u64_, - pub rcv_ts: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_1) - ) - ); - fn test_field_pidtime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pidtime_array_idx) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(pidtime_array_idx) - ) - ); - } - test_field_pidtime_array_idx(); - fn test_field_socktime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).socktime_array_idx) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(socktime_array_idx) - ) - ); - } - test_field_socktime_array_idx(); - fn test_field_queue_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).queue_ts) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(queue_ts) - ) - ); - } - test_field_queue_ts(); - fn test_field_rcv_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rcv_ts) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(rcv_ts) - ) - ); - } - test_field_rcv_ts(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_2 { - pub name: [u8_; 32usize], - pub hook: u32_, - pub sk_protocol: u8_, - pub skap: addr_pair, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 52usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_2) - ) - ); - fn test_field_name() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).name) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(name) - ) - ); - } - test_field_name(); - fn test_field_hook() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).hook) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(hook) - ) - ); - } - test_field_hook(); - fn test_field_sk_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_protocol) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(sk_protocol) - ) - ); - } - test_field_sk_protocol(); - fn test_field_skap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).skap) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(skap) - ) - ); - } - test_field_skap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_3 { - pub i_ino: u32_, - pub sk_ack_backlog: u32_, - pub icsk_accept_queue: u32_, - pub sk_max_ack_backlog: u32_, - pub sk_wmem_queued: u32_, - pub sndbuf: u32_, - pub rmem_alloc: u32_, - pub sk_rcvbuf: u32_, - pub drop: u32_, - pub retran: u32_, - pub ooo: u32_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 44usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_3)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_3) - ) - ); - fn test_field_i_ino() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).i_ino) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(i_ino) - ) - ); - } - test_field_i_ino(); - fn test_field_sk_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_ack_backlog) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_ack_backlog) - ) - ); - } - test_field_sk_ack_backlog(); - fn test_field_icsk_accept_queue() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).icsk_accept_queue) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(icsk_accept_queue) - ) - ); - } - test_field_icsk_accept_queue(); - fn test_field_sk_max_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_max_ack_backlog) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_max_ack_backlog) - ) - ); - } - test_field_sk_max_ack_backlog(); - fn test_field_sk_wmem_queued() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_wmem_queued) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_wmem_queued) - ) - ); - } - test_field_sk_wmem_queued(); - fn test_field_sndbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sndbuf) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sndbuf) - ) - ); - } - test_field_sndbuf(); - fn test_field_rmem_alloc() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rmem_alloc) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(rmem_alloc) - ) - ); - } - test_field_rmem_alloc(); - fn test_field_sk_rcvbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_rcvbuf) as usize - ptr as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_rcvbuf) - ) - ); - } - test_field_sk_rcvbuf(); - fn test_field_drop() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(drop) - ) - ); - } - test_field_drop(); - fn test_field_retran() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).retran) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(retran) - ) - ); - } - test_field_retran(); - fn test_field_ooo() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ooo) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(ooo) - ) - ); - } - test_field_ooo(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_4 { - pub sock: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_4() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_4)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_4) - ) - ); - fn test_field_sock() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sock) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(sock) - ) - ); - } - test_field_sock(); -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event__bindgen_ty_1)) - ); - fn test_field_drop_params() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop_params) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(drop_params) - ) - ); - } - test_field_drop_params(); - fn test_field_abnormal() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).abnormal) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(abnormal) - ) - ); - } - test_field_abnormal(); - fn test_field_connectlatency() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).connectlatency) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(connectlatency) - ) - ); - } - test_field_connectlatency(); -} -#[test] -fn bindgen_test_layout_event() { - assert_eq!( - ::std::mem::size_of::(), - 112usize, - concat!("Size of: ", stringify!(event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event)) - ); - fn test_field_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(type_) - ) - ); - } - test_field_type(); - fn test_field_state() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).state) as usize - ptr as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(state) - ) - ); - } - test_field_state(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_error() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).error) as usize - ptr as usize - }, - 3usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(error) - ) - ); - } - test_field_error(); - fn test_field_comm() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).comm) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(comm) - ) - ); - } - test_field_comm(); - fn test_field_stackid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).stackid) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(stackid) - ) - ); - } - test_field_stackid(); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 32usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ts)) - ); - } - test_field_ts(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 40usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ap)) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct filter { - pub pid: u32_, - pub protocol: u16_, - pub distribution: u8_, - pub threshold: u64_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_filter() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(filter)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(filter)) - ); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_distribution() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).distribution) as usize - ptr as usize - }, - 6usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(distribution) - ) - ); - } - test_field_distribution(); - fn test_field_threshold() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).threshold) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(threshold) - ) - ); - } - test_field_threshold(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct onesecond { - pub ts: u64_, - pub clear: u32_, - pub bitmap: [u32_; 32usize], -} -#[test] -fn bindgen_test_layout_onesecond() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!("Size of: ", stringify!(onesecond)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(onesecond)) - ); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(ts) - ) - ); - } - test_field_ts(); - fn test_field_clear() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).clear) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(clear) - ) - ); - } - test_field_clear(); - fn test_field_bitmap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).bitmap) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(bitmap) - ) - ); - } - test_field_bitmap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct seconds4_ring { - pub os: [onesecond; 4usize], - pub idx: u32_, -} -#[test] -fn bindgen_test_layout_seconds4_ring() { - assert_eq!( - ::std::mem::size_of::(), - 584usize, - concat!("Size of: ", stringify!(seconds4_ring)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(seconds4_ring)) - ); - fn test_field_os() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).os) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(os) - ) - ); - } - test_field_os(); - fn test_field_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).idx) as usize - ptr as usize - }, - 576usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(idx) - ) - ); - } - test_field_idx(); -} -pub const NF_CONNTRACK: _bindgen_ty_3 = 0; -pub const IPTABLES: _bindgen_ty_3 = 1; -pub const KFREE_SKB: _bindgen_ty_3 = 2; -pub const TCP_DROP: _bindgen_ty_3 = 3; -pub const TP_KFREE_SKB: _bindgen_ty_3 = 4; -pub type _bindgen_ty_3 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct drop_filter { - pub protocol: u16_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_drop_filter() { - assert_eq!( - ::std::mem::size_of::(), - 16usize, - concat!("Size of: ", stringify!(drop_filter)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(drop_filter)) - ); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(drop_filter), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(drop_filter), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct drop_event { - pub location: u64_, - pub proto: u16_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_drop_event() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(drop_event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(drop_event)) - ); - fn test_field_location() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).location) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(drop_event), - "::", - stringify!(location) - ) - ); - } - test_field_location(); - fn test_field_proto() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).proto) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(drop_event), - "::", - stringify!(proto) - ) - ); - } - test_field_proto(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(drop_event), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/bpf_core.h b/source/tools/detect/net/rtrace/drop/src/bpf/bpf_core.h deleted file mode 120000 index a95235716a58e33b85e3602ede588a52d61162cf..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/bpf/bpf_core.h +++ /dev/null @@ -1 +0,0 @@ -../../../bpf_core.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/common.h b/source/tools/detect/net/rtrace/drop/src/bpf/common.h deleted file mode 120000 index 372cd74878559d6d745e6a121a91ff585fbb2d34..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/bpf/common.h +++ /dev/null @@ -1 +0,0 @@ -../../../common.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/drop.bpf.c b/source/tools/detect/net/rtrace/drop/src/bpf/drop.bpf.c deleted file mode 100644 index 34686e4714ba6c50a078b61472f6d333d745b6fb..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/bpf/drop.bpf.c +++ /dev/null @@ -1,387 +0,0 @@ -#include "vmlinux.h" -#include -#include -#include -#include - -#include "drop.h" -#include "bpf_core.h" - -struct kfree_skb_tp_args -{ - u32 pad[2]; - struct sk_buff *skbaddr; - u64 location; - u16 protocol; -}; - - -__always_inline int fill_event(void *ctx, struct drop_event *event, struct sk_buff *skb) -{ - struct iphdr ih = {}; - struct tcphdr th = {}; - struct udphdr uh = {}; - u16 network_header, transport_header; - char *head; - - bpf_probe_read(&head, sizeof(head), &skb->head); - bpf_probe_read(&network_header, sizeof(network_header), &skb->network_header); - if (network_header != 0) - { - bpf_probe_read(&ih, sizeof(ih), head + network_header); - event->ap.saddr = ih.saddr; - event->ap.daddr = ih.daddr; - event->proto = ih.protocol; - transport_header = network_header + (ih.ihl << 2); - } - else - { - bpf_probe_read(&transport_header, sizeof(transport_header), &skb->transport_header); - } - switch (event->proto) - { - case IPPROTO_UDP: - if (transport_header != 0 && transport_header != 0xffff) - { - bpf_probe_read(&uh, sizeof(uh), head + transport_header); - event->ap.sport = bpf_ntohs(uh.source); - event->ap.dport = bpf_ntohs(uh.dest); - } - break; - case IPPROTO_TCP: - bpf_probe_read(&th, sizeof(th), head + transport_header); - event->ap.sport = bpf_ntohs(th.source); - event->ap.dport = bpf_ntohs(th.dest); - break; - default: - break; - } - - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, event, sizeof(struct drop_event)); - return 0; -} - - - -SEC("tracepoint/skb/kfree_skb") -int tp_kfree_skb(struct kfree_skb_tp_args *ctx) -{ - struct drop_event event = {}; - - event.proto = ctx->protocol; - event.location = ctx->location; - fill_event(ctx, &event, ctx->skbaddr); - return 0; -} - - -SEC("kprobe/tcp_drop") -int BPF_KPROBE(tcp_drop, struct sock *sk, struct sk_buff *skb) -{ - - struct drop_event event = {}; - u64 bp; - bpf_probe_read(&event.proto, sizeof(event.proto), &skb->protocol); - event.proto = bpf_ntohs(event.proto); - bp = PT_REGS_FP(ctx); - - bpf_probe_read(&event.location, sizeof(event.location), (void *)(bp+8)); - fill_event(ctx, &event, skb); - return 0; -} - - -#if 0 -struct tid_map_value -{ - struct sk_buff *skb; - struct nf_hook_state *state; - struct xt_table *table; - u32 hook; -}; - - -BPF_ARRAY(filter_map, struct drop_filter, 1); -BPF_HASH(inner_tid_map, u32, struct tid_map_value, 1024); -BPF_STACK_TRACE(stackmap, 1024); - -#define NF_DROP 0 -#define NF_ACCEPT 1 - -__always_inline void fill_stack(void *ctx, struct drop_event *event) -{ - event->stackid = bpf_get_stackid(ctx, &stackmap, 0); -} - -__always_inline void fill_pid(struct drop_event *event) -{ - // pid info - event->pid = pid(); - comm(event->comm); -} - -__always_inline int fill_sk_skb(struct drop_event *event, struct sock *sk, struct sk_buff *skb) -{ - struct net *net = NULL; - struct iphdr ih = {}; - struct tcphdr th = {}; - struct udphdr uh = {}; - u16 protocol = 0; - bool has_netheader = false; - u16 network_header, transport_header; - char *head; - event->has_sk = false; - if (sk) - { - event->has_sk = true; - // address pair - bpf_probe_read(&event->skap.daddr, sizeof(event->skap.daddr), &sk->__sk_common.skc_daddr); - bpf_probe_read(&event->skap.dport, sizeof(event->skap.dport), &sk->__sk_common.skc_dport); - bpf_probe_read(&event->skap.saddr, sizeof(event->skap.saddr), &sk->__sk_common.skc_rcv_saddr); - bpf_probe_read(&event->skap.sport, sizeof(event->skap.sport), &sk->__sk_common.skc_num); - event->skap.dport = bpf_ntohs(event->skap.dport); - - protocol = bpf_core_sock_sk_protocol(sk); - event->sk_protocol = protocol; - bpf_probe_read(&event->sk_state, sizeof(event->sk_state), &sk->__sk_common.skc_state); - } - - bpf_probe_read(&head, sizeof(head), &skb->head); - bpf_probe_read(&network_header, sizeof(network_header), &skb->network_header); - if (network_header != 0) - { - bpf_probe_read(&ih, sizeof(ih), head + network_header); - has_netheader = true; - event->skbap.saddr = ih.saddr; - event->skbap.daddr = ih.daddr; - event->skb_protocol = ih.protocol; - - protocol = ih.protocol; - transport_header = network_header + (ih.ihl << 2); - } - else - { - bpf_probe_read(&transport_header, sizeof(transport_header), &skb->transport_header); - } - switch (protocol) - { - case IPPROTO_ICMP: - break; - case IPPROTO_UDP: - if (transport_header != 0 && transport_header != 0xffff) - { - bpf_probe_read(&uh, sizeof(uh), head + transport_header); - event->skbap.sport = bpf_ntohs(uh.source); - event->skbap.dport = bpf_ntohs(uh.dest); - } - break; - case IPPROTO_TCP: - bpf_probe_read(&th, sizeof(th), head + transport_header); - event->skbap.sport = bpf_ntohs(th.source); - event->skbap.dport = bpf_ntohs(th.dest); - break; - default: - return -1; - break; - } - return 0; -} - -__always_inline void handle(void *ctx, struct sock *sk, struct sk_buff *skb, u32 type) -{ - u32 key = 0; - struct drop_filter *filter = NULL; - struct drop_event event = {}; - - event.type = type; - if (fill_sk_skb(&event, sk, skb) < 0) - return; - filter = bpf_map_lookup_elem(&filter_map, &key); - if (filter) - { - if (!event.sk_protocol && !event.skb_protocol) - return; - - if (filter->protocol) - { - if (filter->protocol != event.skb_protocol && filter->protocol != event.sk_protocol) - return; - } - - if (sk) - { - // skip Close state - if (event.sk_state == 7) - return; - // sock addrpair - if (filter->ap.daddr && event.skap.daddr != filter->ap.daddr) - return; - if (filter->ap.saddr && event.skap.saddr != filter->ap.saddr) - return; - if (filter->ap.dport && event.skap.dport != filter->ap.dport) - return; - if (filter->ap.sport && event.skap.sport != filter->ap.sport) - return; - } - - // skb - if (filter->ap.daddr && event.skbap.saddr != filter->ap.daddr) - return; - if (filter->ap.saddr && event.skbap.daddr != filter->ap.saddr) - return; - if (filter->ap.dport && event.skbap.sport != filter->ap.dport) - return; - if (filter->ap.sport && event.skbap.dport != filter->ap.sport) - return; - - } - - fill_stack(ctx, &event); - // pid info - fill_pid(&event); - - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &event, sizeof(event)); -} - -SEC("kprobe/tcp_drop") -int BPF_KPROBE(tcp_drop, struct sock *sk, struct sk_buff *skb) -{ - handle(ctx, sk, skb, TCP_DROP); - return 0; -} - -SEC("tracepoint/skb/kfree_skb") -int tp_kfree_skb(struct kfree_skb_tp_args *ctx) -{ - u32 key = 0; - struct drop_filter *filter = NULL; - struct drop_event event = {}; - - event.type = TP_KFREE_SKB; - filter = bpf_map_lookup_elem(&filter_map, &key); - if (filter) { - if ( filter->protocol !=0 && filter->protocol != ctx->protocol ) - return 0; - } - - event.skb_protocol = ctx->protocol; - event.location = ctx->location; - - return 0; -} - -SEC("kprobe/kfree_skb") -int BPF_KPROBE(kfree_skb, struct sk_buff *skb) -{ - struct sock *sk; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - handle(ctx, sk, skb, KFREE_SKB); - return 0; -} - -#define NF_DROP 0 - -__always_inline void ipt_do_table_entry(struct sk_buff *skb, struct nf_hook_state *state, struct xt_table *table, u32 hook) -{ - u32 tid = bpf_get_current_pid_tgid(); - struct tid_map_value value = {}; - value.skb = skb; - value.state = state; - value.table = table; - value.hook = hook; - bpf_map_update_elem(&inner_tid_map, &tid, &value, BPF_ANY); -} - -// for kernel 4.19 and 5.10 -// unsigned int -// ipt_do_table(struct sk_buff *skb, -// const struct nf_hook_state *state, -// struct xt_table *table) -SEC("kprobe/ipt_do_table") -int BPF_KPROBE(ipt_do_table, void *priv, struct sk_buff *skb, struct nf_hook_state *state) -{ - u32 hook; - bpf_probe_read(&hook, sizeof(hook), &state->hook); - ipt_do_table_entry(skb, state, priv, hook); - return 0; -} - -SEC("kprobe/ipt_do_table") -int BPF_KPROBE(ipt_do_table310, struct sk_buff *skb, u32 hook, struct nf_hook_state *state) -{ - ipt_do_table_entry(skb, state, PT_REGS_PARM4(ctx), hook); - return 0; -} - -SEC("kretprobe/ipt_do_table") -int BPF_KRETPROBE(ipt_do_table_ret, int ret) -{ - struct sock *sk; - struct tid_map_value *value; - struct drop_event event = {}; - u32 tid = bpf_get_current_pid_tgid(); - u64 addr; - - if (ret == NF_DROP) - { - value = bpf_map_lookup_elem(&inner_tid_map, &tid); - if (value == NULL) - return 0; - - struct nf_hook_state *state = value->state; - struct xt_table *table = value->table; - struct sk_buff *skb = value->skb; - - event.type = DROP_IPTABLES_DROP; - addr = bpf_core_xt_table_name(table); - if (addr) - bpf_probe_read(event.name, sizeof(event.name), (void *)addr); - - event.hook = value->hook; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - fill_stack(ctx, &event); - fill_pid(&event); - fill_sk_skb(&event, sk, skb); - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &event, sizeof(event)); - } - bpf_map_delete_elem(&inner_tid_map, &tid); - return 0; -} - -SEC("kprobe/__nf_conntrack_confirm") -int BPF_KPROBE(__nf_conntrack_confirm, struct sk_buff *skb) -{ - ipt_do_table_entry(skb, NULL, NULL, 0); - return 0; -} - -SEC("kretprobe/__nf_conntrack_confirm") -int BPF_KRETPROBE(__nf_conntrack_confirm_ret, int ret) -{ - struct drop_event event = {}; - struct tid_map_value *value; - struct sk_buff *skb; - struct sock *sk; - u32 tid = bpf_get_current_pid_tgid(); - if (ret == NF_DROP) - { - value = bpf_map_lookup_elem(&inner_tid_map, &tid); - if (value == NULL) - return 0; - - event.type = NF_CONNTRACK; - skb = value->skb; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - fill_stack(ctx, &event); - fill_pid(&event); - fill_sk_skb(&event, sk, skb); - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &event, sizeof(event)); - } - - bpf_map_delete_elem(&inner_tid_map, &tid); - return 0; -} - -#endif - -char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/drop.h b/source/tools/detect/net/rtrace/drop/src/bpf/drop.h deleted file mode 100644 index a94fc87ee9e8fc1ab5c03dfaa577b492596f68f7..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/bpf/drop.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef __DROP_H -#define __DROP_H -#include "common.h" - -#define IPTABLE_FILTER 1 -#define IPTABLE_MANGLE 2 -#define IPTABLE_RAW 3 -#define ARPTABLE_FILTER 4 -#define IPTABLE_SECURITY 5 -#define IPTABLE_NAT 6 - - -enum { - NF_CONNTRACK, - IPTABLES, - KFREE_SKB, - TCP_DROP, - TP_KFREE_SKB, -}; - -struct drop_filter -{ - u16 protocol; - struct addr_pair ap; -}; - -#if 0 -struct drop_event -{ - // Event Type - u8 type; - u8 has_sk; - // Sock state - u8 sk_state; - u8 sk_protocol; - u16 cpu; - struct addr_pair skap; - - // skb state - u8 skb_protocol; - struct addr_pair skbap; - - u32 pid; - // stack id - u32 stackid; - // process command - u8 comm[16]; - u64 ts; - // iptables table name - u8 name[32]; - // iptables hook chain name - u32 hook; - - u64 location; -}; - -#endif - -struct drop_event -{ - u64 location; - u16 proto; - struct addr_pair ap; -}; - -#endif diff --git a/source/tools/detect/net/rtrace/drop/src/drop.rs b/source/tools/detect/net/rtrace/drop/src/drop.rs deleted file mode 100644 index bcd57220e998b5c6bfbaf0137d58c0221fb7cacd..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/drop.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::{bindings::*, skel::*, DropEvent}; -use anyhow::{bail, Result}; -use builder::SkelBuilder; -use crossbeam_channel; -use once_cell::sync::Lazy; -use std::sync::Mutex; -use std::thread; -use std::time; -use std::time::Duration; -use utils::macros::*; -use utils::{init_zeroed, to_vec, kernel_stack::KernelStack}; - -#[derive(SkelBuilder)] -pub struct Drop<'a> { - pub skel: DropSkel<'a>, - rx: Option)>>, -} - -impl<'a> Drop<'a> { - pub fn new(debug: bool, btf: &Option) -> Result { - let mut drop = Drop::builder().open(debug, btf).load().open_perf().build(); - drop.skel.attach()?; - Ok(drop) - } - - pub fn poll(&mut self, timeout: Duration) -> Result> { - if let Some(rx) = &self.rx { - match rx.recv_timeout(timeout) { - Ok(mut data) => { - // https://stackoverflow.com/questions/42499049/transmuting-u8-buffer-to-struct-in-rust - let (head, body, tail) = unsafe { data.1.align_to_mut::() }; - assert!(head.is_empty(), "Data was not aligned"); - let mut se = body[0]; - log::debug!("{:?}", se); - let mut de = DropEvent::from_drop_event(se); - - return Ok(Some(de)); - } - Err(_) => return Ok(None), - } - } - bail!("perf channel receiver is none") - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_events_sock_basic() { - utils::bump_memlock_rlimit().unwrap(); - let mut sock = Sock::new(true, &None); - loop { - if let Some(event) = sock.poll(Duration::from_millis(200)).unwrap() { - println!("{}", event); - } - } - } -} diff --git a/source/tools/detect/net/rtrace/drop/src/lib.rs b/source/tools/detect/net/rtrace/drop/src/lib.rs deleted file mode 100644 index 971ee9df80019813f1b4e1cc0dbadd8ced327afb..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/drop/src/lib.rs +++ /dev/null @@ -1,46 +0,0 @@ -mod bindings { - include!("bpf/bindings.rs"); -} - -mod skel { - include!(concat!(env!("OUT_DIR"), "/drop.skel.rs")); -} - -mod drop; -// pub mod json; - -pub use drop::Drop; -use eutils_rs::net::{ProtocolType, TcpState}; -use std::fmt; -use std::net::SocketAddr; -use cenum_rs::CEnum; -use crate::bindings::*; -use anyhow::Result; -use std::net::SocketAddrV4; -use utils::net::Addrpair; - - -use utils::kernel_stack::{KernelStack, GLOBAL_KALLSYMS}; - -use serde::{Deserialize, Serialize}; - - -#[derive(Serialize, Deserialize)] -pub struct DropEvent { - proto: u16, - src: String, - dst: String, - symbol: String, -} - -impl DropEvent { - pub fn from_drop_event(de: drop_event) -> Self{ - let ap = utils::addr_pair_2_Addrpair!(de.ap); - DropEvent { - src: ap.local.to_string(), - dst: ap.remote.to_string(), - proto: de.proto, - symbol: GLOBAL_KALLSYMS.lock().unwrap().addr_to_sym(de.location), - } - } -} diff --git a/source/tools/detect/net/rtrace/latency/icmp/Cargo.toml b/source/tools/detect/net/rtrace/latency/icmp/Cargo.toml deleted file mode 100644 index b213978f1a06f3fe2c8eb52572f0a4c0a790dc4c..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/Cargo.toml +++ /dev/null @@ -1,24 +0,0 @@ -[package] -name = "icmp" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -log = "0.4.14" -libc = "0.2.125" -anyhow = "1.0.57" -once_cell = "1.8.0" -crossbeam-channel = "0.5" -cenum-rs = { path = "../../cenum-rs" } -plain = "0.2" -utils = { path = "../../utils" } -builder = { path = "../../builder" } -serde = {version = "1.0", features = ["derive"]} - -[build-dependencies] -libbpf-cargo = "0.13" -bindgen = "0.60.1" \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/latency/icmp/build.rs b/source/tools/detect/net/rtrace/latency/icmp/build.rs deleted file mode 100644 index 1f425a18b2773ec031cfb152dab45ded4ae90eb4..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/build.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::{env, path::PathBuf}; -use libbpf_cargo::SkeletonBuilder; -const SRC: &str = "src/bpf/icmp.bpf.c"; -const HDR: &str = "src/bpf/icmp.h"; - -fn main() { - let mut out = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); - out.push("icmp.skel.rs"); - SkeletonBuilder::new() - .source(SRC) - .build_and_generate(&out) - .unwrap(); - - // let bindings = bindgen::Builder::default() - // .header(HDR) - // .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - // .generate() - // .expect("Unable to generate bindings for icmp crate"); - - // out.pop(); - // out.push("bindings.rs"); - // bindings - // .write_to_file(&out) - // .expect("Couldn't write bindings for icmp crate!"); - - println!("cargo:rerun-if-changed={}", SRC); - println!("cargo:rerun-if-changed={}", HDR); -} diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bindings.rs b/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bindings.rs deleted file mode 100644 index dc96e71c9c6864834360fba98d756d787ad87c43..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bindings.rs +++ /dev/null @@ -1,1208 +0,0 @@ -/* automatically generated by rust-bindgen 0.60.1 */ - -pub const MAX_STACK_DEPTH: u32 = 20; -pub type u8_ = ::std::os::raw::c_uchar; -pub type u16_ = ::std::os::raw::c_ushort; -pub type u32_ = ::std::os::raw::c_uint; -pub type u64_ = ::std::os::raw::c_ulonglong; -pub const DROP_KFREE_SKB: _bindgen_ty_1 = 0; -pub const DROP_TCP_DROP: _bindgen_ty_1 = 1; -pub const DROP_IPTABLES_DROP: _bindgen_ty_1 = 2; -pub const DROP_NFCONNTRACK_DROP: _bindgen_ty_1 = 3; -pub const LATENCY_EVENT: _bindgen_ty_1 = 4; -pub const CONNECT_LATENCY_EVENT: _bindgen_ty_1 = 5; -pub const EVENT_UNKNOWN: _bindgen_ty_1 = 6; -pub type _bindgen_ty_1 = ::std::os::raw::c_uint; -pub const ERR_OK: _bindgen_ty_2 = 0; -pub const ERR_PROTOCOL_NOT_DETERMINED: _bindgen_ty_2 = 1; -pub const ERR_PROTOCOL_NOT_SUPPORT: _bindgen_ty_2 = 2; -pub type _bindgen_ty_2 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct addr_pair { - pub saddr: u32_, - pub daddr: u32_, - pub sport: u16_, - pub dport: u16_, -} -#[test] -fn bindgen_test_layout_addr_pair() { - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!("Size of: ", stringify!(addr_pair)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(addr_pair)) - ); - fn test_field_saddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).saddr) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(saddr) - ) - ); - } - test_field_saddr(); - fn test_field_daddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).daddr) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(daddr) - ) - ); - } - test_field_daddr(); - fn test_field_sport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sport) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(sport) - ) - ); - } - test_field_sport(); - fn test_field_dport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).dport) as usize - ptr as usize - }, - 10usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(dport) - ) - ); - } - test_field_dport(); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct event { - pub type_: u8_, - pub state: u8_, - pub protocol: u8_, - pub error: u8_, - pub comm: [u8_; 16usize], - pub stackid: u32_, - pub pid: u32_, - pub ts: u64_, - pub ap: addr_pair, - pub __bindgen_anon_1: event__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union event__bindgen_ty_1 { - pub __bindgen_anon_1: event__bindgen_ty_1__bindgen_ty_1, - pub drop_params: event__bindgen_ty_1__bindgen_ty_2, - pub abnormal: event__bindgen_ty_1__bindgen_ty_3, - pub connectlatency: event__bindgen_ty_1__bindgen_ty_4, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_1 { - pub pidtime_array_idx: u32_, - pub socktime_array_idx: u32_, - pub queue_ts: u64_, - pub rcv_ts: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_1) - ) - ); - fn test_field_pidtime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pidtime_array_idx) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(pidtime_array_idx) - ) - ); - } - test_field_pidtime_array_idx(); - fn test_field_socktime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).socktime_array_idx) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(socktime_array_idx) - ) - ); - } - test_field_socktime_array_idx(); - fn test_field_queue_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).queue_ts) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(queue_ts) - ) - ); - } - test_field_queue_ts(); - fn test_field_rcv_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rcv_ts) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(rcv_ts) - ) - ); - } - test_field_rcv_ts(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_2 { - pub name: [u8_; 32usize], - pub hook: u32_, - pub sk_protocol: u8_, - pub skap: addr_pair, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 52usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_2) - ) - ); - fn test_field_name() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).name) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(name) - ) - ); - } - test_field_name(); - fn test_field_hook() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).hook) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(hook) - ) - ); - } - test_field_hook(); - fn test_field_sk_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_protocol) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(sk_protocol) - ) - ); - } - test_field_sk_protocol(); - fn test_field_skap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).skap) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(skap) - ) - ); - } - test_field_skap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_3 { - pub i_ino: u32_, - pub sk_ack_backlog: u32_, - pub icsk_accept_queue: u32_, - pub sk_max_ack_backlog: u32_, - pub sk_wmem_queued: u32_, - pub sndbuf: u32_, - pub rmem_alloc: u32_, - pub sk_rcvbuf: u32_, - pub drop: u32_, - pub retran: u32_, - pub ooo: u32_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 44usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_3)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_3) - ) - ); - fn test_field_i_ino() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).i_ino) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(i_ino) - ) - ); - } - test_field_i_ino(); - fn test_field_sk_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_ack_backlog) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_ack_backlog) - ) - ); - } - test_field_sk_ack_backlog(); - fn test_field_icsk_accept_queue() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).icsk_accept_queue) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(icsk_accept_queue) - ) - ); - } - test_field_icsk_accept_queue(); - fn test_field_sk_max_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_max_ack_backlog) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_max_ack_backlog) - ) - ); - } - test_field_sk_max_ack_backlog(); - fn test_field_sk_wmem_queued() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_wmem_queued) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_wmem_queued) - ) - ); - } - test_field_sk_wmem_queued(); - fn test_field_sndbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sndbuf) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sndbuf) - ) - ); - } - test_field_sndbuf(); - fn test_field_rmem_alloc() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rmem_alloc) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(rmem_alloc) - ) - ); - } - test_field_rmem_alloc(); - fn test_field_sk_rcvbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_rcvbuf) as usize - ptr as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_rcvbuf) - ) - ); - } - test_field_sk_rcvbuf(); - fn test_field_drop() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(drop) - ) - ); - } - test_field_drop(); - fn test_field_retran() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).retran) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(retran) - ) - ); - } - test_field_retran(); - fn test_field_ooo() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ooo) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(ooo) - ) - ); - } - test_field_ooo(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_4 { - pub sock: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_4() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_4)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_4) - ) - ); - fn test_field_sock() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sock) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(sock) - ) - ); - } - test_field_sock(); -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event__bindgen_ty_1)) - ); - fn test_field_drop_params() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop_params) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(drop_params) - ) - ); - } - test_field_drop_params(); - fn test_field_abnormal() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).abnormal) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(abnormal) - ) - ); - } - test_field_abnormal(); - fn test_field_connectlatency() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).connectlatency) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(connectlatency) - ) - ); - } - test_field_connectlatency(); -} -#[test] -fn bindgen_test_layout_event() { - assert_eq!( - ::std::mem::size_of::(), - 112usize, - concat!("Size of: ", stringify!(event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event)) - ); - fn test_field_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(type_) - ) - ); - } - test_field_type(); - fn test_field_state() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).state) as usize - ptr as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(state) - ) - ); - } - test_field_state(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_error() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).error) as usize - ptr as usize - }, - 3usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(error) - ) - ); - } - test_field_error(); - fn test_field_comm() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).comm) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(comm) - ) - ); - } - test_field_comm(); - fn test_field_stackid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).stackid) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(stackid) - ) - ); - } - test_field_stackid(); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 32usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ts)) - ); - } - test_field_ts(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 40usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ap)) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct filter { - pub pid: u32_, - pub protocol: u16_, - pub distribution: u8_, - pub threshold: u64_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_filter() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(filter)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(filter)) - ); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_distribution() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).distribution) as usize - ptr as usize - }, - 6usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(distribution) - ) - ); - } - test_field_distribution(); - fn test_field_threshold() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).threshold) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(threshold) - ) - ); - } - test_field_threshold(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct onesecond { - pub ts: u64_, - pub clear: u32_, - pub bitmap: [u32_; 32usize], -} -#[test] -fn bindgen_test_layout_onesecond() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!("Size of: ", stringify!(onesecond)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(onesecond)) - ); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(ts) - ) - ); - } - test_field_ts(); - fn test_field_clear() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).clear) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(clear) - ) - ); - } - test_field_clear(); - fn test_field_bitmap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).bitmap) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(bitmap) - ) - ); - } - test_field_bitmap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct seconds4_ring { - pub os: [onesecond; 4usize], - pub idx: u32_, -} -#[test] -fn bindgen_test_layout_seconds4_ring() { - assert_eq!( - ::std::mem::size_of::(), - 584usize, - concat!("Size of: ", stringify!(seconds4_ring)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(seconds4_ring)) - ); - fn test_field_os() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).os) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(os) - ) - ); - } - test_field_os(); - fn test_field_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).idx) as usize - ptr as usize - }, - 576usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(idx) - ) - ); - } - test_field_idx(); -} -pub const PING_SND: _bindgen_ty_3 = 0; -pub const PING_NET_DEV_QUEUE: _bindgen_ty_3 = 1; -pub const PING_NET_DEV_XMIT: _bindgen_ty_3 = 2; -pub const PING_DEV_RCV: _bindgen_ty_3 = 3; -pub const PING_NETIF_RCV: _bindgen_ty_3 = 4; -pub const PING_ICMP_RCV: _bindgen_ty_3 = 5; -pub const PING_RCV: _bindgen_ty_3 = 6; -pub type _bindgen_ty_3 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct icmp_event { - pub type_: u8_, - pub icmp_type: u8_, - pub cpu: u16_, - pub seq: u16_, - pub id: u16_, - pub ts: u64_, - pub skb_ts: u64_, - pub pid: u32_, - pub comm: [u8_; 16usize], -} -#[test] -fn bindgen_test_layout_icmp_event() { - assert_eq!( - ::std::mem::size_of::(), - 48usize, - concat!("Size of: ", stringify!(icmp_event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(icmp_event)) - ); - fn test_field_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(type_) - ) - ); - } - test_field_type(); - fn test_field_icmp_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).icmp_type) as usize - ptr as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(icmp_type) - ) - ); - } - test_field_icmp_type(); - fn test_field_cpu() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).cpu) as usize - ptr as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(cpu) - ) - ); - } - test_field_cpu(); - fn test_field_seq() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).seq) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(seq) - ) - ); - } - test_field_seq(); - fn test_field_id() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).id) as usize - ptr as usize - }, - 6usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(id) - ) - ); - } - test_field_id(); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(ts) - ) - ); - } - test_field_ts(); - fn test_field_skb_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).skb_ts) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(skb_ts) - ) - ); - } - test_field_skb_ts(); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_comm() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).comm) as usize - ptr as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(icmp_event), - "::", - stringify!(comm) - ) - ); - } - test_field_comm(); -} diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bpf_core.h b/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bpf_core.h deleted file mode 120000 index c697f2eea8872b3020c841ceeacc8cd71357936b..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/bpf_core.h +++ /dev/null @@ -1 +0,0 @@ -../../../../bpf_core.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/common.h b/source/tools/detect/net/rtrace/latency/icmp/src/bpf/common.h deleted file mode 120000 index 9575095a7a49abb4d22bac8e91681c560ad96e3c..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/common.h +++ /dev/null @@ -1 +0,0 @@ -../../../../common.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.h b/source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.h deleted file mode 100644 index bf9681d66c99efda759ac479084d67277172984f..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef __ICMP_H -#define __ICMP_H -#include "common.h" - -enum -{ - PING_SND = 0, - PING_NET_DEV_QUEUE, - PING_NET_DEV_XMIT, - PING_DEV_RCV, - PING_NETIF_RCV, - PING_ICMP_RCV, - PING_RCV, -}; - -struct icmp_event -{ - u8 type; - // echo or reply - u8 icmp_type; - u16 cpu; - u16 seq; - u16 id; - u64 ts; - u64 skb_ts; - u32 pid; - u8 comm[16]; -}; - -#endif diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/icmp.rs b/source/tools/detect/net/rtrace/latency/icmp/src/icmp.rs deleted file mode 100644 index 3ada83a702ed810befece01e59dfca6eaab1d7d8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/icmp.rs +++ /dev/null @@ -1,166 +0,0 @@ -use crate::bindings::*; -use crate::skel::*; -use crate::IcmpEvent; -use crate::IcmpEventType; -use crate::IcmpEvents; -use anyhow::{bail, Result}; -use builder::SkelBuilder; -use once_cell::sync::Lazy; -use std::collections::HashMap; -use std::sync::Mutex; -use std::time::Duration; - -#[derive(SkelBuilder)] -pub struct Icmp<'a> { - pub skel: IcmpSkel<'a>, - rx: Option)>>, - events: HashMap<(u16, u16), Vec>, - delta: u64, -} - -impl<'a> Icmp<'a> { - // open, load and attach - pub fn new(debug: bool, btf: &Option) -> Icmp<'a> { - let mut icmp = Icmp::builder() - .open(debug, btf) - .load() - .open_perf() - .attach() - .build(); - icmp.delta = utils::timestamp::delta_of_mono_real_time(); - icmp - } - - fn process_event(&mut self, ie: icmp_event) -> Option<(IcmpEvents, IcmpEvents)> { - let key = (ie.id, ie.seq); - self.events.entry(key).or_insert(vec![]).push(ie); - - if key.1 == 0 { - return None; - } - - let prevkey = (key.0, key.1 - 1); - if let Some(events) = self.events.remove(&prevkey) { - let mut send = IcmpEvents::new(true, prevkey.0, prevkey.1); - let mut recv = IcmpEvents::new(false, prevkey.0, prevkey.1); - - for event in &events { - let is_echo = event.icmp_type == 8; // ICMP_ECHO:8 ICMP_ECHOREPLY:0 - let ty = IcmpEventType::try_from(event.type_ as u32).expect("wrong icmp type"); - let ts = event.ts; - let cpu = event.cpu; - let pid = event.pid; - - let mut idx = 0; - for i in &event.comm { - if *i == '\0' as u8 { - break; - } - idx += 1; - } - let comm = unsafe { String::from_utf8_unchecked(event.comm[..idx].to_vec()) }; - - let mut ie = IcmpEvent::new(cpu, ty, ts); - ie.set_pid(pid); - ie.set_comm(comm); - - match ty { - IcmpEventType::PingSnd => { - send.push(ie); - } - - IcmpEventType::PingNetDevQueue | IcmpEventType::PingNetDevXmit => { - if is_echo { - send.push(ie); - } else { - recv.push(ie); - } - } - - IcmpEventType::PingDevRcv => {} - - IcmpEventType::PingNetifRcv => { - if is_echo { - recv.push(ie); - } else { - send.push(ie); - } - } - - IcmpEventType::PingIcmpRcv => { - let mut skbts = event.skb_ts; - - if skbts > self.delta { - skbts -= self.delta; - } - - if is_echo { - recv.push(ie); - // if skbts != 0 { - // recv.push(IcmpEvent::new(cpu, IcmpEventType::PingDevRcv, skbts)); - // } - } else { - send.push(ie); - // if skbts != 0 { - // send.push(IcmpEvent::new(cpu, IcmpEventType::PingDevRcv, skbts)); - // } - } - } - - IcmpEventType::PingRcv => { - send.push(ie); - } - } - } - - send.sort(); - recv.sort(); - return Some((send, recv)); - } - - None - } - - // return two events: sender and receiver - pub fn poll(&mut self, timeout: Duration) -> Result> { - if let Some(rx) = &self.rx { - match rx.recv_timeout(timeout) { - Ok(mut data) => { - // https://stackoverflow.com/questions/42499049/transmuting-u8-buffer-to-struct-in-rust - let (head, body, tail) = unsafe { data.1.align_to_mut::() }; - assert!(head.is_empty(), "Data was not aligned"); - - let mut ie = body[0]; - ie.cpu = data.0 as u16; - return Ok(self.process_event(ie)); - } - Err(_) => return Ok(None), - } - } - bail!("perf channel receiver is none") - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_latency_icmp_basic() { - utils::bump_memlock_rlimit().unwrap(); - let mut icmp = Icmp::new(true, &None); - loop { - if let Some(event) = icmp.poll(std::time::Duration::from_millis(2000)).unwrap() { - println!("{} {}", event.0, event.1); - return; - } - } - } - - #[test] - fn test_latency_icmp_icmp_event_type() { - - let ty = IcmpEventType::PingDevRcv; - } -} - diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/lib.rs b/source/tools/detect/net/rtrace/latency/icmp/src/lib.rs deleted file mode 100644 index b2a3c429be824cdd5292f0856bae520c588acc5f..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/latency/icmp/src/lib.rs +++ /dev/null @@ -1,155 +0,0 @@ -mod bindings { - include!("bpf/bindings.rs"); -} - -mod skel { - include!(concat!(env!("OUT_DIR"), "/icmp.skel.rs")); -} - -mod icmp; - -pub use {icmp::Icmp, skel::OpenIcmpSkel}; - -use bindings::*; -use cenum_rs::CEnum; -use std::fmt; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Copy, CEnum, Serialize, Deserialize)] -pub enum IcmpEventType { - #[cenum(value = "PING_SND", display = "PingSend")] - PingSnd, - #[cenum(value = "PING_NET_DEV_QUEUE", display = "DevQueue")] - PingNetDevQueue, - #[cenum(value = "PING_NET_DEV_XMIT", display = "DevXmit")] - PingNetDevXmit, - #[cenum(value = "PING_DEV_RCV", display = "DevRcv")] - PingDevRcv, - #[cenum(value = "PING_NETIF_RCV", display = "NetifRcv")] - PingNetifRcv, - #[cenum(value = "PING_ICMP_RCV", display = "IcmpRcv")] - PingIcmpRcv, - #[cenum(value = "PING_RCV", display = "PingRcv")] - PingRcv, -} - - -#[derive(Serialize, Deserialize)] -pub struct IcmpEvent { - pub cpu: u16, - pub ty: IcmpEventType, - pub ts: u64, - pub pid: u32, - pub comm: String, -} - -impl IcmpEvent { - pub(crate) fn new(cpu: u16, ty: IcmpEventType, ts: u64) -> Self { - IcmpEvent { - cpu, - ty, - ts, - pid: 0, - comm: "0".to_owned(), - } - } - - pub fn set_pid(&mut self, pid: u32) { - self.pid = pid; - } - - pub fn set_comm(&mut self, comm: String) { - self.comm = comm; - } - - // pub fn set_comm(&mut self, comm: ) { - - // } - - pub fn ts(&self) -> u64 { - self.ts - } -} - -#[derive(Serialize, Deserialize)] -pub struct IcmpEvents { - pub sender: bool, - pub id: u16, - pub seq: u16, - pub events: Vec, -} - -impl IcmpEvents { - pub(crate) fn new(sender: bool, id: u16, seq: u16) -> Self { - IcmpEvents { - sender, - id, - seq, - events: vec![], - } - } - - pub fn sender_ts(&self) -> u64 { - todo!() - } - - pub fn receiver_ts(&self) -> u64 { - todo!() - } - - pub fn start_ts(&self) -> u64 { - self.events[0].ts - } - - pub fn end_ts(&self) -> u64 { - self.events.last().unwrap().ts - } - - pub fn duration(&self) -> u64 { - self.end_ts() - self.start_ts() - } - - pub(crate) fn push(&mut self, event: IcmpEvent) { - self.events.push(event); - } - - pub(crate) fn sort(&mut self) { - self.events.sort_by(|a, b| a.ts().cmp(&b.ts())); - } - - pub fn valid(&self) -> bool { - !self.events.is_empty() - } -} - -impl fmt::Display for IcmpEvents { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.events.len() == 0 { - return write!(f, ""); - } - - if self.sender { - write!(f, "sender: ")?; - } else { - write!(f, "receiver: ")?; - } - - write!(f, "id: {} seq: {}, ", self.id, self.seq)?; - let mut pre_ts = self.events[0].ts; - write!(f, "{}({},{}/{})", self.events[0].ty, self.events[0].cpu, self.events[0].pid, self.events[0].comm)?; - for i in 1..self.events.len() { - let delta = (self.events[i].ts - pre_ts) / 1000; - pre_ts = self.events[i].ts; - write!( - f, - "-> {}us ->{}({},{}/{})", - delta, - self.events[i].ty, - self.events[i].cpu, - self.events[i].pid, - self.events[i].comm - )?; - } - write!(f, "") - } -} diff --git a/source/tools/detect/net/rtrace/resources/rtrace b/source/tools/detect/net/rtrace/resources/rtrace new file mode 100755 index 0000000000000000000000000000000000000000..f0c2aa8b1dbc90eb372f69c956a4073525bc9667 Binary files /dev/null and b/source/tools/detect/net/rtrace/resources/rtrace differ diff --git a/source/tools/detect/net/rtrace/resources/rtrace.db b/source/tools/detect/net/rtrace/resources/rtrace.db new file mode 100644 index 0000000000000000000000000000000000000000..dc938cb2c0ec6667a1a662318f8e7194cd7bfe73 Binary files /dev/null and b/source/tools/detect/net/rtrace/resources/rtrace.db differ diff --git a/source/tools/detect/net/rtrace/retran/Cargo.toml b/source/tools/detect/net/rtrace/retran/Cargo.toml deleted file mode 100644 index f2c006d53213f6f4efa621b895782c5780d90219..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/Cargo.toml +++ /dev/null @@ -1,29 +0,0 @@ -[package] -name = "retran" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -log = "0.4.14" -libc = "0.2.125" -anyhow = "1.0.57" -once_cell = "1.8.0" -crossbeam-channel = "0.5" -cenum-rs = { git = "https://github.com/chengshuyi/cenum-rs.git" } -plain = "0.2" -utils = { path = "../utils" } -eutils-rs = { git = "https://github.com/chengshuyi/eutils-rs.git" } -gettid = "0.1.2" -builder = { path = "../builder" } -num_cpus = "1.0" -perf-event-open-sys = "1.0" -serde_json = "1.0" -serde = {version = "1.0", features = ["derive"]} - -[build-dependencies] -libbpf-cargo = "0.13" -bindgen = "0.60.1" diff --git a/source/tools/detect/net/rtrace/retran/build.rs b/source/tools/detect/net/rtrace/retran/build.rs deleted file mode 100644 index cbc36300e26e586aabc59ea9b01e599989ee6c64..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/build.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::{env, path::PathBuf}; -use libbpf_cargo::SkeletonBuilder; -const SRC: &str = "src/bpf/retran.bpf.c"; -const HDR: &str = "src/bpf/retran.h"; - -fn main() { - let mut out = PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); - out.push("retran.skel.rs"); - SkeletonBuilder::new() - .source(SRC) - .build_and_generate(&out) - .unwrap(); - - // let bindings = bindgen::Builder::default() - // .header(HDR) - // .parse_callbacks(Box::new(bindgen::CargoCallbacks)) - // .generate() - // .expect("Unable to generate bindings for retran crate"); - - // out.pop(); - // out.push("bindings.rs"); - // bindings - // .write_to_file(&out) - // .expect("Couldn't write bindings for retran crate!"); - - println!("cargo:rerun-if-changed={}", SRC); - println!("cargo:rerun-if-changed={}", HDR); -} diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/bindings.rs b/source/tools/detect/net/rtrace/retran/src/bpf/bindings.rs deleted file mode 100644 index 5982157bf668847a4022f7d858d599553d6affa0..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/src/bpf/bindings.rs +++ /dev/null @@ -1,1134 +0,0 @@ -/* automatically generated by rust-bindgen 0.60.1 */ - -pub const MAX_STACK_DEPTH: u32 = 20; -pub type u8_ = ::std::os::raw::c_uchar; -pub type u16_ = ::std::os::raw::c_ushort; -pub type u32_ = ::std::os::raw::c_uint; -pub type u64_ = ::std::os::raw::c_ulonglong; -pub const DROP_KFREE_SKB: _bindgen_ty_1 = 0; -pub const DROP_TCP_DROP: _bindgen_ty_1 = 1; -pub const DROP_IPTABLES_DROP: _bindgen_ty_1 = 2; -pub const DROP_NFCONNTRACK_DROP: _bindgen_ty_1 = 3; -pub const LATENCY_EVENT: _bindgen_ty_1 = 4; -pub const CONNECT_LATENCY_EVENT: _bindgen_ty_1 = 5; -pub const EVENT_UNKNOWN: _bindgen_ty_1 = 6; -pub type _bindgen_ty_1 = ::std::os::raw::c_uint; -pub const ERR_OK: _bindgen_ty_2 = 0; -pub const ERR_PROTOCOL_NOT_DETERMINED: _bindgen_ty_2 = 1; -pub const ERR_PROTOCOL_NOT_SUPPORT: _bindgen_ty_2 = 2; -pub type _bindgen_ty_2 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct addr_pair { - pub saddr: u32_, - pub daddr: u32_, - pub sport: u16_, - pub dport: u16_, -} -#[test] -fn bindgen_test_layout_addr_pair() { - assert_eq!( - ::std::mem::size_of::(), - 12usize, - concat!("Size of: ", stringify!(addr_pair)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!("Alignment of ", stringify!(addr_pair)) - ); - fn test_field_saddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).saddr) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(saddr) - ) - ); - } - test_field_saddr(); - fn test_field_daddr() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).daddr) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(daddr) - ) - ); - } - test_field_daddr(); - fn test_field_sport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sport) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(sport) - ) - ); - } - test_field_sport(); - fn test_field_dport() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).dport) as usize - ptr as usize - }, - 10usize, - concat!( - "Offset of field: ", - stringify!(addr_pair), - "::", - stringify!(dport) - ) - ); - } - test_field_dport(); -} -#[repr(C)] -#[derive(Copy, Clone)] -pub struct event { - pub type_: u8_, - pub state: u8_, - pub protocol: u8_, - pub error: u8_, - pub comm: [u8_; 16usize], - pub stackid: u32_, - pub pid: u32_, - pub ts: u64_, - pub ap: addr_pair, - pub __bindgen_anon_1: event__bindgen_ty_1, -} -#[repr(C)] -#[derive(Copy, Clone)] -pub union event__bindgen_ty_1 { - pub __bindgen_anon_1: event__bindgen_ty_1__bindgen_ty_1, - pub drop_params: event__bindgen_ty_1__bindgen_ty_2, - pub abnormal: event__bindgen_ty_1__bindgen_ty_3, - pub connectlatency: event__bindgen_ty_1__bindgen_ty_4, -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_1 { - pub pidtime_array_idx: u32_, - pub socktime_array_idx: u32_, - pub queue_ts: u64_, - pub rcv_ts: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 24usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_1) - ) - ); - fn test_field_pidtime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pidtime_array_idx) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(pidtime_array_idx) - ) - ); - } - test_field_pidtime_array_idx(); - fn test_field_socktime_array_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).socktime_array_idx) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(socktime_array_idx) - ) - ); - } - test_field_socktime_array_idx(); - fn test_field_queue_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).queue_ts) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(queue_ts) - ) - ); - } - test_field_queue_ts(); - fn test_field_rcv_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rcv_ts) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_1), - "::", - stringify!(rcv_ts) - ) - ); - } - test_field_rcv_ts(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_2 { - pub name: [u8_; 32usize], - pub hook: u32_, - pub sk_protocol: u8_, - pub skap: addr_pair, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_2() { - assert_eq!( - ::std::mem::size_of::(), - 52usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_2)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_2) - ) - ); - fn test_field_name() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).name) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(name) - ) - ); - } - test_field_name(); - fn test_field_hook() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).hook) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(hook) - ) - ); - } - test_field_hook(); - fn test_field_sk_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_protocol) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(sk_protocol) - ) - ); - } - test_field_sk_protocol(); - fn test_field_skap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).skap) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_2), - "::", - stringify!(skap) - ) - ); - } - test_field_skap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_3 { - pub i_ino: u32_, - pub sk_ack_backlog: u32_, - pub icsk_accept_queue: u32_, - pub sk_max_ack_backlog: u32_, - pub sk_wmem_queued: u32_, - pub sndbuf: u32_, - pub rmem_alloc: u32_, - pub sk_rcvbuf: u32_, - pub drop: u32_, - pub retran: u32_, - pub ooo: u32_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_3() { - assert_eq!( - ::std::mem::size_of::(), - 44usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_3)) - ); - assert_eq!( - ::std::mem::align_of::(), - 4usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_3) - ) - ); - fn test_field_i_ino() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).i_ino) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(i_ino) - ) - ); - } - test_field_i_ino(); - fn test_field_sk_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_ack_backlog) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_ack_backlog) - ) - ); - } - test_field_sk_ack_backlog(); - fn test_field_icsk_accept_queue() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).icsk_accept_queue) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(icsk_accept_queue) - ) - ); - } - test_field_icsk_accept_queue(); - fn test_field_sk_max_ack_backlog() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_max_ack_backlog) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_max_ack_backlog) - ) - ); - } - test_field_sk_max_ack_backlog(); - fn test_field_sk_wmem_queued() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_wmem_queued) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_wmem_queued) - ) - ); - } - test_field_sk_wmem_queued(); - fn test_field_sndbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sndbuf) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sndbuf) - ) - ); - } - test_field_sndbuf(); - fn test_field_rmem_alloc() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).rmem_alloc) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(rmem_alloc) - ) - ); - } - test_field_rmem_alloc(); - fn test_field_sk_rcvbuf() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sk_rcvbuf) as usize - ptr as usize - }, - 28usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(sk_rcvbuf) - ) - ); - } - test_field_sk_rcvbuf(); - fn test_field_drop() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop) as usize - ptr as usize - }, - 32usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(drop) - ) - ); - } - test_field_drop(); - fn test_field_retran() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).retran) as usize - ptr as usize - }, - 36usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(retran) - ) - ); - } - test_field_retran(); - fn test_field_ooo() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ooo) as usize - ptr as usize - }, - 40usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_3), - "::", - stringify!(ooo) - ) - ); - } - test_field_ooo(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct event__bindgen_ty_1__bindgen_ty_4 { - pub sock: u64_, -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1__bindgen_ty_4() { - assert_eq!( - ::std::mem::size_of::(), - 8usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1__bindgen_ty_4)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!( - "Alignment of ", - stringify!(event__bindgen_ty_1__bindgen_ty_4) - ) - ); - fn test_field_sock() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).sock) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1__bindgen_ty_4), - "::", - stringify!(sock) - ) - ); - } - test_field_sock(); -} -#[test] -fn bindgen_test_layout_event__bindgen_ty_1() { - assert_eq!( - ::std::mem::size_of::(), - 56usize, - concat!("Size of: ", stringify!(event__bindgen_ty_1)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event__bindgen_ty_1)) - ); - fn test_field_drop_params() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).drop_params) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(drop_params) - ) - ); - } - test_field_drop_params(); - fn test_field_abnormal() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).abnormal) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(abnormal) - ) - ); - } - test_field_abnormal(); - fn test_field_connectlatency() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).connectlatency) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event__bindgen_ty_1), - "::", - stringify!(connectlatency) - ) - ); - } - test_field_connectlatency(); -} -#[test] -fn bindgen_test_layout_event() { - assert_eq!( - ::std::mem::size_of::(), - 112usize, - concat!("Size of: ", stringify!(event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(event)) - ); - fn test_field_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).type_) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(type_) - ) - ); - } - test_field_type(); - fn test_field_state() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).state) as usize - ptr as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(state) - ) - ); - } - test_field_state(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_error() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).error) as usize - ptr as usize - }, - 3usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(error) - ) - ); - } - test_field_error(); - fn test_field_comm() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).comm) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(comm) - ) - ); - } - test_field_comm(); - fn test_field_stackid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).stackid) as usize - ptr as usize - }, - 20usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(stackid) - ) - ); - } - test_field_stackid(); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 24usize, - concat!( - "Offset of field: ", - stringify!(event), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 32usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ts)) - ); - } - test_field_ts(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 40usize, - concat!("Offset of field: ", stringify!(event), "::", stringify!(ap)) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct filter { - pub pid: u32_, - pub protocol: u16_, - pub distribution: u8_, - pub threshold: u64_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_filter() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(filter)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(filter)) - ); - fn test_field_pid() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).pid) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(pid) - ) - ); - } - test_field_pid(); - fn test_field_protocol() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).protocol) as usize - ptr as usize - }, - 4usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(protocol) - ) - ); - } - test_field_protocol(); - fn test_field_distribution() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).distribution) as usize - ptr as usize - }, - 6usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(distribution) - ) - ); - } - test_field_distribution(); - fn test_field_threshold() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).threshold) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(threshold) - ) - ); - } - test_field_threshold(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(filter), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct onesecond { - pub ts: u64_, - pub clear: u32_, - pub bitmap: [u32_; 32usize], -} -#[test] -fn bindgen_test_layout_onesecond() { - assert_eq!( - ::std::mem::size_of::(), - 144usize, - concat!("Size of: ", stringify!(onesecond)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(onesecond)) - ); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(ts) - ) - ); - } - test_field_ts(); - fn test_field_clear() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).clear) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(clear) - ) - ); - } - test_field_clear(); - fn test_field_bitmap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).bitmap) as usize - ptr as usize - }, - 12usize, - concat!( - "Offset of field: ", - stringify!(onesecond), - "::", - stringify!(bitmap) - ) - ); - } - test_field_bitmap(); -} -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct seconds4_ring { - pub os: [onesecond; 4usize], - pub idx: u32_, -} -#[test] -fn bindgen_test_layout_seconds4_ring() { - assert_eq!( - ::std::mem::size_of::(), - 584usize, - concat!("Size of: ", stringify!(seconds4_ring)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(seconds4_ring)) - ); - fn test_field_os() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).os) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(os) - ) - ); - } - test_field_os(); - fn test_field_idx() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).idx) as usize - ptr as usize - }, - 576usize, - concat!( - "Offset of field: ", - stringify!(seconds4_ring), - "::", - stringify!(idx) - ) - ); - } - test_field_idx(); -} -pub const SYN_RETRAN: _bindgen_ty_3 = 0; -pub const SLOW_START_RETRAN: _bindgen_ty_3 = 1; -pub const RTO_RETRAN: _bindgen_ty_3 = 2; -pub const FAST_RETRAN: _bindgen_ty_3 = 3; -pub const TLP: _bindgen_ty_3 = 4; -pub type _bindgen_ty_3 = ::std::os::raw::c_uint; -#[repr(C)] -#[derive(Debug, Copy, Clone)] -pub struct retran_event { - pub tcp_state: u8_, - pub ca_state: u8_, - pub retran_type: u8_, - pub ts: u64_, - pub ap: addr_pair, -} -#[test] -fn bindgen_test_layout_retran_event() { - assert_eq!( - ::std::mem::size_of::(), - 32usize, - concat!("Size of: ", stringify!(retran_event)) - ); - assert_eq!( - ::std::mem::align_of::(), - 8usize, - concat!("Alignment of ", stringify!(retran_event)) - ); - fn test_field_tcp_state() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).tcp_state) as usize - ptr as usize - }, - 0usize, - concat!( - "Offset of field: ", - stringify!(retran_event), - "::", - stringify!(tcp_state) - ) - ); - } - test_field_tcp_state(); - fn test_field_ca_state() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ca_state) as usize - ptr as usize - }, - 1usize, - concat!( - "Offset of field: ", - stringify!(retran_event), - "::", - stringify!(ca_state) - ) - ); - } - test_field_ca_state(); - fn test_field_retran_type() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).retran_type) as usize - ptr as usize - }, - 2usize, - concat!( - "Offset of field: ", - stringify!(retran_event), - "::", - stringify!(retran_type) - ) - ); - } - test_field_retran_type(); - fn test_field_ts() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ts) as usize - ptr as usize - }, - 8usize, - concat!( - "Offset of field: ", - stringify!(retran_event), - "::", - stringify!(ts) - ) - ); - } - test_field_ts(); - fn test_field_ap() { - assert_eq!( - unsafe { - let uninit = ::std::mem::MaybeUninit::::uninit(); - let ptr = uninit.as_ptr(); - ::std::ptr::addr_of!((*ptr).ap) as usize - ptr as usize - }, - 16usize, - concat!( - "Offset of field: ", - stringify!(retran_event), - "::", - stringify!(ap) - ) - ); - } - test_field_ap(); -} diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/bpf_core.h b/source/tools/detect/net/rtrace/retran/src/bpf/bpf_core.h deleted file mode 120000 index a95235716a58e33b85e3602ede588a52d61162cf..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/src/bpf/bpf_core.h +++ /dev/null @@ -1 +0,0 @@ -../../../bpf_core.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/common.h b/source/tools/detect/net/rtrace/retran/src/bpf/common.h deleted file mode 120000 index 372cd74878559d6d745e6a121a91ff585fbb2d34..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/src/bpf/common.h +++ /dev/null @@ -1 +0,0 @@ -../../../common.h \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/retran/src/lib.rs b/source/tools/detect/net/rtrace/retran/src/lib.rs deleted file mode 100644 index 8b78a1c3dd71b23cc2952de3d2fe9fcbb98370ca..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/src/lib.rs +++ /dev/null @@ -1,80 +0,0 @@ -mod bindings { - include!("bpf/bindings.rs"); -} - -mod skel { - include!(concat!(env!("OUT_DIR"), "/retran.skel.rs")); -} -mod retran; - -use bindings::retran_event; -use eutils_rs::net::TcpState; -use std::net::{IpAddr, Ipv4Addr, SocketAddr}; -pub use retran::Retran; - -pub struct RetranFilter { - -} - -use serde::{Deserialize, Serialize}; - - -#[derive(Serialize, Deserialize)] -pub struct RetranEvent { - pub tcp_state: String, - pub ca_state: String, - pub retran_type: String, - pub sport: u16, - pub dport: u16, - pub sip: String, - pub dip: String, - pub ts: u64, -} - - -impl RetranEvent { - pub fn from_event(event: &retran_event) -> Self { - let tcp_state = TcpState::from(event.tcp_state as i32).to_string(); - let mut ca_state = "".to_owned(); - match event.ca_state { - 0 => { - ca_state = "open".to_owned(); - } - 1 => { - ca_state = "disorder".to_owned(); - } - 2 => { - ca_state = "cwr".to_owned(); - } - 3 => { - ca_state = "recovery".to_owned(); - } - 4 => { - ca_state = "loss".to_owned(); - } - _ => { - ca_state = "none".to_owned(); - } - } - let retran_type = match event.retran_type { - 0 => "SynRetran", - 1 => "SlowStartRetran", - 2 => "RtoRetran", - 3 => "FastRetran", - 4 => "TLP", - _ => "Other", - }; - RetranEvent { - tcp_state, - ca_state, - ts: event.ts, - retran_type: retran_type.to_owned(), - - sport: event.ap.sport, - dport: event.ap.dport, - sip: IpAddr::V4(Ipv4Addr::from(u32::from_be(event.ap.saddr))).to_string(), - dip: IpAddr::V4(Ipv4Addr::from(u32::from_be(event.ap.daddr))).to_string(), - } - } -} - diff --git a/source/tools/detect/net/rtrace/retran/src/retran.rs b/source/tools/detect/net/rtrace/retran/src/retran.rs deleted file mode 100644 index 77ca732ef404105609f1ae611460b7c2be858776..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/retran/src/retran.rs +++ /dev/null @@ -1,61 +0,0 @@ -use anyhow::{bail, Result}; -use crossbeam_channel; -use std::thread; -use std::time; -use once_cell::sync::Lazy; -use builder::SkelBuilder; -use utils::macros::*; -use crate::{bindings::*, skel::*, RetranFilter, RetranEvent}; -use utils::{init_zeroed, to_vecu8}; -use std::sync::Mutex; -use std::time::Duration; -use utils::*; - -#[derive(SkelBuilder)] -pub struct Retran<'a> { - pub skel:RetranSkel<'a>, - rx: Option)>>, -} - -impl<'a> Retran<'a> { - pub fn new(debug: bool, btf: &Option) -> Result { - let mut retran = Retran::builder().open(debug, btf).load().open_perf().build(); - - // sock.skel.maps_mut().filter_map().update( - // &to_vec::(0), - // &to_vec::(filter), - // libbpf_rs::MapFlags::ANY, - // )?; - - retran.skel.attach()?; - Ok(retran) - } - - pub fn poll(&mut self, timeout: Duration) -> Result> { - if let Some(rx) = &self.rx { - match rx.recv_timeout(timeout) { - Ok(mut data) => { - // https://stackoverflow.com/questions/42499049/transmuting-u8-buffer-to-struct-in-rust - let (head, body, _) = unsafe { data.1.align_to_mut::() }; - assert!(head.is_empty(), "Data was not aligned"); - return Ok(Some(RetranEvent::from_event(&body[0]))); - } - Err(_) => return Ok(None), - } - } - bail!("perf channel receiver is none") - } - -} - - - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_basic() { - } -} - diff --git a/source/tools/detect/net/rtrace/rustfmt.toml b/source/tools/detect/net/rtrace/rustfmt.toml new file mode 100644 index 0000000000000000000000000000000000000000..a0bb61aaea4efe96cf5ec5fac4ec3bd31f12388e --- /dev/null +++ b/source/tools/detect/net/rtrace/rustfmt.toml @@ -0,0 +1 @@ +imports_granularity = "item" \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/analyzer/delta_netdev.rs b/source/tools/detect/net/rtrace/src/analyzer/delta_netdev.rs new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/delta_netdev.rs @@ -0,0 +1 @@ + diff --git a/source/tools/detect/net/rtrace/src/analyzer/delta_netstat.rs b/source/tools/detect/net/rtrace/src/analyzer/delta_netstat.rs new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/delta_netstat.rs @@ -0,0 +1 @@ + diff --git a/source/tools/detect/net/rtrace/src/analyzer/delta_snmp.rs b/source/tools/detect/net/rtrace/src/analyzer/delta_snmp.rs new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/delta_snmp.rs @@ -0,0 +1 @@ + diff --git a/source/tools/detect/net/rtrace/utils/src/drop/mod.rs b/source/tools/detect/net/rtrace/src/analyzer/mod.rs similarity index 35% rename from source/tools/detect/net/rtrace/utils/src/drop/mod.rs rename to source/tools/detect/net/rtrace/src/analyzer/mod.rs index 701cbd31442753dc4de1ecccf6e805370c79ad51..5600d1367a13726a753d72dc92df536945540e96 100644 --- a/source/tools/detect/net/rtrace/utils/src/drop/mod.rs +++ b/source/tools/detect/net/rtrace/src/analyzer/mod.rs @@ -1,5 +1,6 @@ - - -pub mod delta_dev; +pub mod delta_netdev; pub mod delta_netstat; pub mod delta_snmp; +pub mod tcpping; +pub mod tcpping_stat; +pub mod virtio; diff --git a/source/tools/detect/net/rtrace/src/analyzer/tcpping.rs b/source/tools/detect/net/rtrace/src/analyzer/tcpping.rs new file mode 100644 index 0000000000000000000000000000000000000000..a17789a9037155381f914abed45c3ba51016504a --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/tcpping.rs @@ -0,0 +1,302 @@ +use crate::collector::tcpping::Tcpping; +use crate::collector::tcpping::TcppingStage; +use crate::common::sched::Process; +use crate::common::sched::Sched; +use crate::common::stats::Stats; +use crate::common::utils::ns2ms; +use std::net::Ipv4Addr; + +#[derive(Debug, Default)] +struct TcppingStat { + seq: u32, + timeout: bool, + // analysis + time_tot: u64, + time_user_tx: u64, + time_user_rx: u64, + time_kernel_tx: u64, + time_kernel_rx: u64, + time_external_link: u64, + + // ksoftirqd + in_ksoftirqd: bool, + xmit_irq: u64, + sched_irq: u64, + process: Process, +} + +impl From<&Tcpping> for TcppingStat { + fn from(tp: &Tcpping) -> Self { + let mut tstat = TcppingStat::default(); + tstat.seq = tp.seq; + + if tp.is_timeout() { + tstat.timeout = true; + return tstat; + } + + let xmit = tp.stage_ts(TcppingStage::TxKernelOut); + let recv = tp.stage_ts(TcppingStage::RxKernelIn); + + tstat.time_tot = tp.delta(TcppingStage::RxUser, TcppingStage::TxUser); + tstat.time_user_tx = tp.delta(TcppingStage::TxKernelIn, TcppingStage::TxUser); + tstat.time_user_rx = tp.delta(TcppingStage::RxUser, TcppingStage::RxKernelOut); + tstat.time_kernel_tx = tp.delta(TcppingStage::TxKernelOut, TcppingStage::TxKernelIn); + tstat.time_kernel_rx = tp.delta(TcppingStage::RxKernelOut, TcppingStage::RxKernelIn); + tstat.time_external_link = recv - xmit; + + let in_range = |left, right, cur| { + if cur >= left && cur <= right { + true + } else { + false + } + }; + + let scheds: Vec<(&u64, &Sched)> = tp.scheds().collect(); + if let Some(sched) = scheds.last() { + if in_range(xmit, recv, *sched.0) && sched.1.next.comm.starts_with("ksoftirqd") { + tstat.in_ksoftirqd = true; + } + } + + if tstat.in_ksoftirqd { + for (i, (&ts, sched)) in scheds.iter().enumerate() { + if in_range(xmit, recv, ts) {} + } + } + + let irqs_iter = tp.irqs(); + if let Some(&irq) = irqs_iter.last() { + if irq > xmit { + tstat.xmit_irq = irq - xmit; + + for (&ts, sched) in scheds.iter().rev() { + if ts < irq { + tstat.process = sched.next.clone(); + tstat.sched_irq = irq - ts; + break; + } + } + } + } + + tstat + } +} + +#[derive(Debug, Default)] +pub struct TcppingAnalyzer { + // raw + tpstats: Vec, + // base + packet_loss: u32, + packet_done: u32, + base_stats: Stats, + // enhanced + tx_user_stats: Stats, + tx_kernel_stats: Stats, + external_link_stats: Stats, + rx_kernel_stats: Stats, + rx_user_stats: Stats, + // diagnose + diags: Vec, +} + +macro_rules! collect_field { + ($st: expr, $field: ident) => { + $st.iter() + .filter(|x| !x.timeout) + .map(|x| x.$field) + .collect() + }; +} + +impl TcppingAnalyzer { + pub fn new(tps: Vec, virtio: String) -> Self { + let mut ta = TcppingAnalyzer::default(); + let tpstats: Vec = tps.iter().map(|tp| TcppingStat::from(tp)).collect(); + ta.tpstats = tpstats; + if !virtio.is_empty() { + ta.diags.push(virtio); + } + ta + } + + fn packet_count(&mut self) { + let mut packet_loss = 0; + let mut packet_done = 0; + + self.tpstats.iter().for_each(|x| { + if x.timeout { + packet_loss += 1; + } else { + packet_done += 1; + } + }); + self.packet_done = packet_done; + self.packet_loss = packet_loss; + } + + pub fn anaylysis(&mut self) { + // base + self.packet_count(); + if self.packet_done == 0 { + return; + } + let tot: Vec = collect_field!(self.tpstats, time_tot); + self.base_stats = Stats::new(tot); + + // enhanced + let tx_user = collect_field!(self.tpstats, time_user_tx); + self.tx_user_stats = Stats::new(tx_user); + + let tx_kernel = collect_field!(self.tpstats, time_kernel_tx); + self.tx_kernel_stats = Stats::new(tx_kernel); + + let external_link = collect_field!(self.tpstats, time_external_link); + self.external_link_stats = Stats::new(external_link); + + let rx_kernel = collect_field!(self.tpstats, time_kernel_rx); + self.rx_kernel_stats = Stats::new(rx_kernel); + + let rx_user = collect_field!(self.tpstats, time_user_rx); + self.rx_user_stats = Stats::new(rx_user); + + // diagnose + let is_overlimit = |avg: u64, cur: u64| { + if cur < avg { + return false; + } + if cur - avg > 500_000 && (cur - avg) * 100 / avg >= 50 { + return true; + } + false + }; + + let mut diags = vec![]; + for tp in &self.tpstats { + if tp.timeout { + continue; + } + let seq_string = format!("tcp_seq={} time={:.3}ms", tp.seq, ns2ms(tp.time_tot)); + if is_overlimit(self.tx_user_stats.avg, tp.time_user_tx) { + diags.push(format!( + "{seq_string}, reason: Sending packet in user mode is too slow" + )); + } + + if is_overlimit(self.tx_kernel_stats.avg, tp.time_kernel_tx) { + diags.push(format!( + "{seq_string}, reason: Sending packet in kernel mode is too slow" + )); + } + + // if is_overlimit(out.avg, tp.time_external_link) { + // println!("seq: {}, reason: Sending packet in kernel mode is too slow", tp.seq); + // } + // detect irq off + if is_overlimit(self.external_link_stats.avg, tp.xmit_irq) { + if !tp.process.comm.contains("swapper") + && (tp.xmit_irq < tp.sched_irq * 2 && tp.sched_irq > 1_000_000) + { + diags.push(format!("{seq_string}, reason: may be that process {} has turned off interrupts. So far, this process has occupied {}ms of cpu.", tp.process.to_string(), ns2ms(tp.sched_irq))); + } else { + diags.push(format!("{seq_string}, reason: external link is too slow")); + } + } + + if is_overlimit(self.rx_kernel_stats.avg, tp.time_kernel_rx) { + diags.push(format!( + "{seq_string}, reason: Receiving packet in kernel mode is too slow", + )); + } + + if is_overlimit(self.rx_user_stats.avg, tp.time_user_rx) { + diags.push(format!( + "{seq_string}, reason: Receiving packet in user mode is too slow", + )); + } + } + self.diags.extend_from_slice(&diags); + } + + fn stats_string(&self, s: &Stats) -> String { + format!( + "min/avg/max/mdev = {:.3}/{:.3}/{:.3}/{:.3} ms", + ns2ms(s.min), + ns2ms(s.avg), + ns2ms(s.max), + ns2ms(s.mdev) + ) + } + + pub fn base_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!("--- {}.{} tcpping statistics ---", dst, dport)); + lines.push(format!( + "{} packets transmitted, {} received, {:.2}% packet loss", + self.packet_done + self.packet_loss, + self.packet_done, + (self.packet_loss * 100) as f32 / (self.packet_done + self.packet_loss) as f32 + )); + lines.push(self.stats_string(&self.base_stats)); + lines.join("\n") + } + + pub fn enhanced_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!( + "--- {}.{} tcpping enhanced statistics ---", + dst, dport + )); + + lines.push(format!( + "userspace transmitted time consuming {}", + self.stats_string(&self.tx_user_stats) + )); + lines.push(format!( + "kernel transmitted time consuming {}", + self.stats_string(&self.tx_kernel_stats) + )); + lines.push(format!( + "link+irq+softirq time consuming {}", + self.stats_string(&self.external_link_stats) + )); + lines.push(format!( + "kernel received time consuming {}", + self.stats_string(&self.rx_kernel_stats) + )); + lines.push(format!( + "userspace received time consuming {}", + self.stats_string(&self.rx_user_stats) + )); + lines.join("\n") + } + + pub fn diagnose_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!( + "--- {}.{} tcpping diagnosing report ---", + dst, dport + )); + if self.diags.len() == 0 { + lines.push("everything is ok!".to_owned()); + } else { + lines.extend_from_slice(&self.diags); + } + lines.join("\n") + } + + pub fn analysis_result(&self, dst: &Ipv4Addr, dport: u16) -> String { + let base = self.base_string(dst, dport); + let en = self.enhanced_string(dst, dport); + let diag = self.diagnose_string(dst, dport); + + format!("\n{}\n\n{}\n\n{}", base, en, diag) + } + + pub fn print(&self, dst: &Ipv4Addr, dport: u16) { + println!("{}", self.analysis_result(dst, dport)); + } +} diff --git a/source/tools/detect/net/rtrace/src/analyzer/tcpping_stat.rs b/source/tools/detect/net/rtrace/src/analyzer/tcpping_stat.rs new file mode 100644 index 0000000000000000000000000000000000000000..0b3deb9963c7fbf1d13d52e69985ecfa50f8c51e --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/tcpping_stat.rs @@ -0,0 +1,324 @@ +use crate::collector::tcpping::Tcpping; +use crate::collector::tcpping::TcppingStage; +use crate::common::iqr::iqr_upper_outliers; +use crate::common::sched::Process; +use crate::common::sched::Sched; +use crate::common::stats::Stats; +use crate::common::utils::ns2ms; +use std::net::Ipv4Addr; + +#[derive(Debug, Default)] +struct TcppingStat { + seq: u32, + timeout: bool, + // analysis + time_tot: u64, + time_user_tx: u64, + time_user_rx: u64, + time_kernel_tx: u64, + time_kernel_rx: u64, + time_external_link: u64, + + // ksoftirqd + in_ksoftirqd: bool, + xmit_irq: u64, + sched_irq: u64, + process: Process, +} + +impl From<&Tcpping> for TcppingStat { + fn from(tp: &Tcpping) -> Self { + let mut tstat = TcppingStat::default(); + tstat.seq = tp.seq; + + if tp.is_timeout() { + tstat.timeout = true; + return tstat; + } + + let xmit = tp.stage_ts(TcppingStage::TxKernelOut); + let recv = tp.stage_ts(TcppingStage::RxKernelIn); + + tstat.time_tot = tp.delta(TcppingStage::RxUser, TcppingStage::TxUser); + tstat.time_user_tx = tp.delta(TcppingStage::TxKernelIn, TcppingStage::TxUser); + tstat.time_user_rx = tp.delta(TcppingStage::RxUser, TcppingStage::RxKernelOut); + tstat.time_kernel_tx = tp.delta(TcppingStage::TxKernelOut, TcppingStage::TxKernelIn); + tstat.time_kernel_rx = tp.delta(TcppingStage::RxKernelOut, TcppingStage::RxKernelIn); + tstat.time_external_link = recv - xmit; + + let in_range = |left, right, cur| { + if cur >= left && cur <= right { + true + } else { + false + } + }; + + let scheds: Vec<(&u64, &Sched)> = tp.scheds().collect(); + if let Some(sched) = scheds.last() { + if in_range(xmit, recv, *sched.0) && sched.1.next.comm.starts_with("ksoftirqd") { + tstat.in_ksoftirqd = true; + } + } + + if tstat.in_ksoftirqd { + for (i, (&ts, sched)) in scheds.iter().enumerate() { + if in_range(xmit, recv, ts) {} + } + } + + let irqs_iter = tp.irqs(); + if let Some(&irq) = irqs_iter.last() { + if irq > xmit { + tstat.xmit_irq = irq - xmit; + + for (&ts, sched) in scheds.iter().rev() { + if ts < irq { + tstat.process = sched.next.clone(); + tstat.sched_irq = irq - ts; + break; + } + } + } + } + + tstat + } +} + +#[derive(Debug, Default)] +pub struct TcppingStatAnalyzer { + // raw + tpstats: Vec, + // base + packet_loss: u32, + packet_done: u32, + base_stats: Stats, + // enhanced + tx_user_stats: Stats, + tx_kernel_stats: Stats, + external_link_stats: Stats, + rx_kernel_stats: Stats, + rx_user_stats: Stats, + // diagnose + diags: Vec, +} + +macro_rules! collect_field { + ($st: expr, $field: ident) => { + $st.iter() + .filter(|x| !x.timeout) + .map(|x| x.$field) + .collect() + }; +} + +impl TcppingStatAnalyzer { + pub fn new(tps: Vec, virtio: String) -> Self { + let mut ta = TcppingStatAnalyzer::default(); + let mut tpstats = vec![]; + for tp in tps { + if tp.is_timeout() { + continue; + } + tpstats.push(TcppingStat::from(&tp)); + } + ta.tpstats = tpstats; + if !virtio.is_empty() { + ta.diags.push(virtio); + } + ta + } + + fn packet_count(&mut self) { + let mut packet_loss = 0; + let mut packet_done = 0; + + self.tpstats.iter().for_each(|x| { + if x.timeout { + packet_loss += 1; + } else { + packet_done += 1; + } + }); + self.packet_done = packet_done; + self.packet_loss = packet_loss; + } + + pub fn anaylysis(&mut self) { + // base + self.packet_count(); + if self.packet_done == 0 { + return; + } + let tot: Vec = collect_field!(self.tpstats, time_tot); + let outliers = iqr_upper_outliers(tot.clone()); + self.base_stats = Stats::new(tot); + + // enhanced + let tx_user: Vec = collect_field!(self.tpstats, time_user_tx); + self.tx_user_stats = Stats::new(tx_user.clone()); + + let tx_kernel: Vec = collect_field!(self.tpstats, time_kernel_tx); + self.tx_kernel_stats = Stats::new(tx_kernel.clone()); + + let external_link: Vec = collect_field!(self.tpstats, time_external_link); + self.external_link_stats = Stats::new(external_link.clone()); + + let rx_kernel: Vec = collect_field!(self.tpstats, time_kernel_rx); + self.rx_kernel_stats = Stats::new(rx_kernel.clone()); + + let rx_user: Vec = collect_field!(self.tpstats, time_user_rx); + self.rx_user_stats = Stats::new(rx_user.clone()); + + // diagnose + let is_overlimit = |avg: u64, cur: u64| { + if cur < avg { + return false; + } + if cur - avg > 500_000 && (cur - avg) * 100 / avg >= 50 { + return true; + } + false + }; + + let mut diags = vec![]; + for ol in outliers { + let tp = &self.tpstats[ol]; + let seq_string = format!( + "tcp_seq={} time={:.3}ms", + self.tpstats[ol].seq, + ns2ms(self.tpstats[ol].time_tot) + ); + + let z1 = self.tx_user_stats.zscore(tx_user[ol]); + let z2 = self.tx_kernel_stats.zscore(tx_kernel[ol]); + let z3 = self.external_link_stats.zscore(external_link[ol]); + let z4 = self.rx_kernel_stats.zscore(rx_kernel[ol]); + let z5 = self.rx_user_stats.zscore(rx_user[ol]); + let zs = vec![z1, z2, z3, z4, z5]; + + let zmax = zs + .iter() + .enumerate() + .max_by_key(|&(_, item)| item) + .unwrap() + .0; + match zmax { + 0 => { + diags.push(format!( + "{seq_string}, reason: Sending packet in user mode is too slow" + )); + } + 1 => { + diags.push(format!( + "{seq_string}, reason: Sending packet in kernel mode is too slow" + )); + } + 2 => { + if is_overlimit(self.external_link_stats.avg, self.tpstats[ol].xmit_irq) { + if !self.tpstats[ol].process.comm.contains("swapper") + && (self.tpstats[ol].xmit_irq < self.tpstats[ol].sched_irq * 2 + && self.tpstats[ol].sched_irq > 1_000_000) + { + diags.push(format!("{seq_string}, reason: may be that process {} has turned off interrupts. So far, this process has occupied {}ms of cpu.", tp.process.to_string(), ns2ms(tp.sched_irq))); + } else { + diags.push(format!("{seq_string}, reason: external link is too slow")); + } + } + } + 3 => { + diags.push(format!( + "{seq_string}, reason: Receiving packet in kernel mode is too slow", + )); + } + 4 => { + diags.push(format!( + "{seq_string}, reason: Receiving packet in user mode is too slow", + )); + } + _ => unreachable!(), + } + } + self.diags.extend_from_slice(&diags); + } + + fn stats_string(&self, s: &Stats) -> String { + format!( + "min/avg/max/mdev = {:.3}/{:.3}/{:.3}/{:.3} ms", + ns2ms(s.min), + ns2ms(s.avg), + ns2ms(s.max), + ns2ms(s.mdev) + ) + } + + pub fn base_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!("--- {}.{} tcpping statistics ---", dst, dport)); + lines.push(format!( + "{} packets transmitted, {} received, {:.2}% packet loss", + self.packet_done + self.packet_loss, + self.packet_done, + (self.packet_loss * 100) as f32 / (self.packet_done + self.packet_loss) as f32 + )); + lines.push(self.stats_string(&self.base_stats)); + lines.join("\n") + } + + pub fn enhanced_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!( + "--- {}.{} tcpping enhanced statistics ---", + dst, dport + )); + + lines.push(format!( + "userspace transmitted time consuming {}", + self.stats_string(&self.tx_user_stats) + )); + lines.push(format!( + "kernel transmitted time consuming {}", + self.stats_string(&self.tx_kernel_stats) + )); + lines.push(format!( + "link+irq+softirq time consuming {}", + self.stats_string(&self.external_link_stats) + )); + lines.push(format!( + "kernel received time consuming {}", + self.stats_string(&self.rx_kernel_stats) + )); + lines.push(format!( + "userspace received time consuming {}", + self.stats_string(&self.rx_user_stats) + )); + lines.join("\n") + } + + pub fn diagnose_string(&self, dst: &Ipv4Addr, dport: u16) -> String { + let mut lines = vec![]; + lines.push(format!( + "--- {}.{} tcpping diagnosing report ---", + dst, dport + )); + if self.diags.len() == 0 { + lines.push("everything is ok!".to_owned()); + } else { + lines.extend_from_slice(&self.diags); + } + lines.join("\n") + } + + pub fn analysis_result(&self, dst: &Ipv4Addr, dport: u16) -> String { + let base = self.base_string(dst, dport); + let en = self.enhanced_string(dst, dport); + let diag = self.diagnose_string(dst, dport); + + format!("\n{}\n\n{}\n\n{}", base, en, diag) + } + + pub fn print(&self, dst: &Ipv4Addr, dport: u16) { + println!("{}", self.analysis_result(dst, dport)); + } +} diff --git a/source/tools/detect/net/rtrace/src/analyzer/virtio.rs b/source/tools/detect/net/rtrace/src/analyzer/virtio.rs new file mode 100644 index 0000000000000000000000000000000000000000..752816de6e6abee08abd0ec04dcc9a9eb4c3142e --- /dev/null +++ b/source/tools/detect/net/rtrace/src/analyzer/virtio.rs @@ -0,0 +1,105 @@ +use crate::collector::virtio::Queue; +use crate::collector::virtio::Virtio; + +#[derive(Default)] +pub struct VirtioAnalyzer { + virtios: Vec, + ftx: Vec, + frx: Vec, +} + +impl VirtioAnalyzer { + pub fn new(virtios: Vec) -> Self { + let mut v = VirtioAnalyzer::default(); + v.virtios = virtios; + v + } + pub fn analysis(&mut self) { + let mut prev = self.virtios.iter(); + let mut next = self.virtios.iter(); + next.next(); + + let mut ftx = vec![]; + let mut frx = vec![]; + loop { + if let Some(v1) = prev.next() { + if let Some(v2) = next.next() { + let (tx, rx) = analysis_virtios(v1, v2); + if tx.is_empty() && rx.is_empty() { + continue; + } + + ftx = tx; + frx = rx; + + break; + } + } + break; + } + + self.ftx = ftx; + self.frx = frx; + } + + pub fn analysis_result(&self) -> String { + let mut res = String::new(); + if !self.ftx.is_empty() { + res += format!( + "faulty send queue: {}", + self.ftx + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + ) + .as_str(); + } + + if !self.frx.is_empty() { + res += format!( + "faulty recv queue: {}", + self.frx + .iter() + .map(|x| x.to_string()) + .collect::>() + .join(",") + ) + .as_str(); + } + res + } + + pub fn print(&self) { + let res = self.analysis_result(); + if !res.is_empty() { + println!("{res}"); + } + } +} + +pub fn analysis_virtios(v1: &Virtio, v2: &Virtio) -> (Vec, Vec) { + let mut rxs = vec![]; + let mut txs = vec![]; + for (idx, (q1, q2)) in v1.tx.iter().zip(v2.tx.iter()).enumerate() { + if tpackets(q1) != 0 && tpackets(q2) != 0 && q1.last_used == q2.last_used { + txs.push(idx); + } + } + + for (idx, (q1, q2)) in v1.rx.iter().zip(v2.rx.iter()).enumerate() { + if rpackets(q1) != 0 && rpackets(q2) != 0 && q1.last_used == q2.last_used { + rxs.push(idx); + } + } + + (txs, rxs) +} + +fn tpackets(q: &Queue) -> u16 { + q.avail - q.used +} + +fn rpackets(q: &Queue) -> u16 { + q.used - q.last_used +} diff --git a/source/tools/detect/net/rtrace/src/application/drop.rs b/source/tools/detect/net/rtrace/src/application/drop.rs new file mode 100644 index 0000000000000000000000000000000000000000..4c7aaa6a9d08262396bf3ff289b068a7eed078b4 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/drop.rs @@ -0,0 +1,43 @@ +use crate::collector::launcher::initial_collector_netdev; +use crate::collector::launcher::initial_collector_netstat; +use crate::collector::launcher::initial_collector_snmp; +use crate::collector::launcher::initial_collector_thread_drop; +use crate::common::config::Config; +use crate::event::get_event_channel; +use crate::event::Event; + +pub struct DropApplication {} + +impl DropApplication { + pub fn run(config: Config) { + let (tx, rx) = get_event_channel(); + drop_counter(); + + initial_collector_thread_drop(&config, tx); + + loop { + match rx.recv() { + Ok(event) => match event { + Event::Drop(d) => { + println!("{}", serde_json::to_string(&d).unwrap()); + } + Event::Stop => break, + _ => panic!("unexpected event type"), + }, + Err(e) => panic!("unexpected channel error: {}", e), + } + } + + drop_counter(); + } +} + +fn drop_counter() { + let netstat = initial_collector_netstat().unwrap(); + let snmp = initial_collector_snmp().unwrap(); + let netdev = initial_collector_netdev().unwrap(); + + println!("{}", serde_json::to_string(&netstat).unwrap()); + println!("{}", serde_json::to_string(&snmp).unwrap()); + println!("{}", serde_json::to_string(&netdev).unwrap()); +} diff --git a/source/tools/detect/net/rtrace/src/application/jitter.rs b/source/tools/detect/net/rtrace/src/application/jitter.rs new file mode 100644 index 0000000000000000000000000000000000000000..2246e09e599571de1f9869933ce78fe1b38f646b --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/jitter.rs @@ -0,0 +1,40 @@ +use crate::collector::launcher::initial_collector_thread_queueslow; +use crate::collector::launcher::initial_collector_thread_userslow; +use crate::common::config::Config; +use crate::event::get_event_channel; +use crate::event::Event; + +pub struct JitterApplication {} + +impl JitterApplication { + pub fn run(config: Config) { + let (tx, rx) = get_event_channel(); + + initial_collector_thread_queueslow(&config, tx.clone()); + initial_collector_thread_userslow(&config, tx); + + loop { + match rx.recv() { + Ok(event) => match event { + Event::QueueSlow(q) => { + if config.output_json { + println!("{}", serde_json::to_string(&q).unwrap()); + } else { + println!("{}", q); + } + } + Event::UserSlow(u) => { + if config.output_json { + println!("{}", serde_json::to_string(&u).unwrap()); + } else { + println!("{}", u); + } + } + Event::Stop => break, + _ => panic!("unexpected event type"), + }, + Err(e) => panic!("unexpected channel error: {}", e), + } + } + } +} diff --git a/source/tools/detect/net/rtrace/src/application/mod.rs b/source/tools/detect/net/rtrace/src/application/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..353e434623c85808ffc53b235e61fdde619d9767 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/mod.rs @@ -0,0 +1,5 @@ +pub mod drop; +pub mod jitter; +pub mod ping; +pub mod retran; +pub mod tcpping; diff --git a/source/tools/detect/net/rtrace/src/application/ping.rs b/source/tools/detect/net/rtrace/src/application/ping.rs new file mode 100644 index 0000000000000000000000000000000000000000..5912b73832a283aac93b65fa5ca9f5c518c06369 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/ping.rs @@ -0,0 +1,26 @@ +use crate::collector::launcher::initial_collector_thread_ping; +use crate::common::config::Config; +use crate::event::get_event_channel; +use crate::event::Event; + +pub struct PingApplication {} + +impl PingApplication { + pub fn run(config: Config) { + let (tx, rx) = get_event_channel(); + + initial_collector_thread_ping(&config, tx); + loop { + match rx.recv() { + Ok(event) => match event { + Event::Ping(p) => { + println!("{}", p); + } + Event::Stop => break, + _ => panic!("unexpected event type"), + }, + Err(e) => panic!("unexpected channel error: {}", e), + } + } + } +} diff --git a/source/tools/detect/net/rtrace/src/application/retran.rs b/source/tools/detect/net/rtrace/src/application/retran.rs new file mode 100644 index 0000000000000000000000000000000000000000..377535ce49fc63dd4b915bf0189a3940d1b67404 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/retran.rs @@ -0,0 +1,27 @@ +use crate::collector::launcher::initial_collector_thread_retran; +use crate::common::config::Config; +use crate::event::get_event_channel; +use crate::event::Event; + +pub struct RetranApplication {} + +impl RetranApplication { + pub fn run(config: Config) { + let (tx, rx) = get_event_channel(); + + initial_collector_thread_retran(&config, tx); + + loop { + match rx.recv() { + Ok(event) => match event { + Event::Retran(r) => { + println!("{}", serde_json::to_string(&r).unwrap()); + } + Event::Stop => break, + _ => panic!("unexpected event type"), + }, + Err(e) => panic!("unexpected channel error: {}", e), + } + } + } +} diff --git a/source/tools/detect/net/rtrace/src/application/tcpping.rs b/source/tools/detect/net/rtrace/src/application/tcpping.rs new file mode 100644 index 0000000000000000000000000000000000000000..27ce78cd94cdc2b8bb9a0160b8bedd059943ecb6 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/application/tcpping.rs @@ -0,0 +1,79 @@ +use crate::analyzer::tcpping::TcppingAnalyzer; +use crate::analyzer::tcpping_stat::TcppingStatAnalyzer; +use crate::analyzer::virtio::VirtioAnalyzer; +use crate::collector::launcher::initial_collector_thread_tcpping; +use crate::collector::launcher::initial_collector_thread_virtio; +use crate::common::config::Config; +use crate::common::utils::ns2ms; +use crate::event::get_event_channel; +use crate::event::Event; +use std::net::Ipv4Addr; + +pub struct TcppingApplication {} + +impl TcppingApplication { + pub fn run(config: Config) { + let (tx, rx) = get_event_channel(); + let dport = config.dst.1; + let dst = Ipv4Addr::from(u32::from_be(config.dst.0)); + println!("TCPPING {}.{}, powered by rtrace", dst, dport); + + initial_collector_thread_tcpping(&config, tx.clone()); + if config.virtio { + initial_collector_thread_virtio(&config, tx.clone()); + } + + let mut count = 0; + let mut tcppings = vec![]; + let mut virtios = vec![]; + loop { + match rx.recv() { + Ok(event) => match event { + Event::Tcpping(t) => { + log::info!("{}", t); + if t.is_timeout() { + println!("ack from {}.{}: tcp_seq={} timeout(3s)", dst, dport, t.seq,); + } else { + println!( + "ack from {}.{}: tcp_seq={} time={:.3}ms", + dst, + dport, + t.seq, + ns2ms(t.time()) + ); + } + tcppings.push(t); + count += 1; + if count == config.count { + break; + } + } + Event::Virtio(v) => { + log::info!("{}", v); + virtios.push(v); + } + Event::Stop => break, + _ => panic!("unexpected event type"), + }, + Err(e) => panic!("unexpected channel error: {}", e), + } + } + + let mut virtio_diag = String::new(); + if config.virtio { + let mut va = VirtioAnalyzer::new(virtios); + va.analysis(); + virtio_diag = va.analysis_result(); + } + + if config.iqr { + let mut tsa = TcppingStatAnalyzer::new(tcppings, virtio_diag); + tsa.anaylysis(); + tsa.print(&dst, dport); + } else { + let mut ta = TcppingAnalyzer::new(tcppings, virtio_diag); + ta.anaylysis(); + ta.print(&dst, dport); + } + } +} diff --git a/source/tools/detect/net/rtrace/src/bpf/drop.bpf.c b/source/tools/detect/net/rtrace/src/bpf/drop.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..9a673bb6d2c933f7f03f92475e159415281eb9d1 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/drop.bpf.c @@ -0,0 +1,121 @@ +#include "vmlinux.h" +#include +#include +#include +#include + +#include "drop.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perf_events SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct drop_filter); +} filters SEC(".maps"); + +struct kfree_skb_tp_args +{ + u32 pad[2]; + struct sk_buff *skbaddr; + u64 location; + u16 protocol; +}; + +__always_inline int fill_event(void *ctx, struct drop_event *event, struct sk_buff *skb) +{ + struct iphdr ih = {}; + struct tcphdr th = {}; + struct udphdr uh = {}; + u16 network_header, transport_header; + char *head; + + bpf_probe_read(&head, sizeof(head), &skb->head); + bpf_probe_read(&network_header, sizeof(network_header), &skb->network_header); + if (network_header != 0) + { + bpf_probe_read(&ih, sizeof(ih), head + network_header); + event->saddr = ih.saddr; + event->daddr = ih.daddr; + event->proto = ih.protocol; + transport_header = network_header + (ih.ihl << 2); + } + else + { + bpf_probe_read(&transport_header, sizeof(transport_header), &skb->transport_header); + } + switch (event->proto) + { + case IPPROTO_UDP: + if (transport_header != 0 && transport_header != 0xffff) + { + bpf_probe_read(&uh, sizeof(uh), head + transport_header); + event->sport = bpf_ntohs(uh.source); + event->dport = bpf_ntohs(uh.dest); + } + break; + case IPPROTO_TCP: + bpf_probe_read(&th, sizeof(th), head + transport_header); + event->sport = bpf_ntohs(th.source); + event->dport = bpf_ntohs(th.dest); + break; + default: + break; + } + int key = 0; + struct drop_filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter) + return 0; + if (filter->protocol != event->proto) + return 0; + + if (filter->saddr && (filter->saddr != event->saddr || filter->daddr != event->daddr)) + return 0; + + if (filter->daddr && (filter->daddr != event->daddr || filter->saddr != event->saddr)) + return 0; + + if (filter->sport && (filter->sport != event->sport || filter->dport != event->dport)) + return 0; + + if (filter->dport && (filter->dport != event->dport || filter->sport != event->sport)) + return 0; + + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, event, sizeof(struct drop_event)); + return 0; +} + +SEC("tracepoint/skb/kfree_skb") +int tp_kfree_skb(struct kfree_skb_tp_args *ctx) +{ + struct drop_event event = {}; + + event.proto = ctx->protocol; + event.location = ctx->location; + fill_event(ctx, &event, ctx->skbaddr); + return 0; +} + +SEC("kprobe/tcp_drop") +int BPF_KPROBE(tcp_drop, struct sock *sk, struct sk_buff *skb) +{ + + struct drop_event event = {}; + u64 bp; + bpf_probe_read(&event.proto, sizeof(event.proto), &skb->protocol); + event.proto = bpf_ntohs(event.proto); + BPF_KPROBE_READ_RET_IP(bp, ctx); + + bpf_probe_read(&event.location, sizeof(event.location), (void *)(bp + 8)); + fill_event(ctx, &event, skb); + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/drop.h b/source/tools/detect/net/rtrace/src/bpf/drop.h new file mode 100644 index 0000000000000000000000000000000000000000..dabb39c635d42fa8a2cdbb1acac9cbcbafc068f7 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/drop.h @@ -0,0 +1,24 @@ +#ifndef __DROP_H +#define __DROP_H + +struct drop_filter +{ + unsigned short protocol; + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; +}; + + +struct drop_event +{ + unsigned long long location; + unsigned short proto; + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; +}; + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/filter.h b/source/tools/detect/net/rtrace/src/bpf/filter.h new file mode 100644 index 0000000000000000000000000000000000000000..92855b7560367e0773df8f7b8ea24c453f26f248 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/filter.h @@ -0,0 +1,34 @@ +#ifndef __FILTER_H +#define __FILTER_H + +struct filter +{ + int pid; + unsigned long long threshold; + unsigned int protocol; + unsigned short be_lport; + unsigned short be_rport; + unsigned short lport; + unsigned short rport; + unsigned long long sock; +}; + +#ifdef __VMLINUX_H__ + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct filter); +} filters SEC(".maps"); + +static __always_inline struct filter *get_filter() +{ + int key = 0; + return bpf_map_lookup_elem(&filters, &key); +} + +#endif + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.bpf.c b/source/tools/detect/net/rtrace/src/bpf/ping_sender.bpf.c similarity index 38% rename from source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.bpf.c rename to source/tools/detect/net/rtrace/src/bpf/ping_sender.bpf.c index 8d8a6046f984e088b2c3c8b016d8a192d4899520..9003ccf949e7215ca0342403f884ce101d55591d 100644 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/icmp.bpf.c +++ b/source/tools/detect/net/rtrace/src/bpf/ping_sender.bpf.c @@ -5,10 +5,23 @@ #include #include -#include "common.h" -#include "bpf_core.h" -#include "icmp.h" +#include "pingtrace.h" +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct ping_key); + __type(value, struct ping_sender); + __uint(max_entries, 1024); +} ping_events SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, void *); + __uint(max_entries, 1024); +} tid_msghdr SEC(".maps"); __always_inline void get_icmphdr_with_l4(struct sk_buff *skb, struct icmphdr *ih) { @@ -37,108 +50,68 @@ __always_inline int get_icmphdr_with_l3(struct sk_buff *skb, struct icmphdr *ich return -1; } -#if 0 -#define MAC_HEADER_SIZE 14 -__always_inline int get_icmphdr_with_l2(struct sk_buff *skb, struct icmphdr *ich) +__always_inline bool l4_set_ping_key(struct sk_buff *skb, struct ping_key *key, int ty) { - struct iphdr ih = {0}; - u16 mac_header, network_header; - char *head; - bpf_probe_read(&mac_header, sizeof(mac_header), &skb->mac_header); - network_header = mac_header + MAC_HEADER_SIZE; - bpf_probe_read(&head, sizeof(head), &skb->head); - bpf_probe_read(&ih, sizeof(ih), head + network_header); - - if (ih.protocol == IPPROTO_ICMP) - { - bpf_probe_read(ich, sizeof(*ich), head + network_header + (ih.ihl << 2)); - return 0; - } -} -#endif + struct icmphdr icmph = {0}; -__always_inline void trace_icmp_skb_with_l4(void *ctx, struct sock *sk, struct sk_buff *skb, int type) -{ - struct icmphdr ih = {0}; - struct icmp_event ie = {0}; + get_icmphdr_with_l4(skb, &icmph); + if (ty != icmph.type) + return false; - if (sk) - { - u16 protocol; - protocol = bpf_core_sock_sk_protocol(sk); - if (protocol != IPPROTO_ICMP) - return; - } - - get_icmphdr_with_l4(skb, &ih); - - ie.skb_ts = bpf_core_skb_tstamp(skb); - ie.seq = bpf_ntohs(ih.un.echo.sequence); - ie.id = bpf_ntohs(ih.un.echo.id); - ie.icmp_type = ih.type; - ie.ts = bpf_ktime_get_ns(); - ie.type = type; - ie.pid = pid(); - COMM(ie.comm); - - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &ie, sizeof(ie)); + key->seq = bpf_ntohs(icmph.un.echo.sequence); + key->id = bpf_ntohs(icmph.un.echo.id); + return true; } -// have network header -__always_inline void trace_icmp_skb_with_l3(void *ctx, struct sock *sk, struct sk_buff *skb, int type) +__always_inline bool l3_set_ping_key(struct sk_buff *skb, struct ping_key *key, int ty) { - u16 protocol; struct icmphdr icmph = {0}; - struct icmp_event ie = {0}; - if (sk) - { - protocol = bpf_core_sock_sk_protocol(sk); - if (protocol != IPPROTO_ICMP) - return; - } + if (get_icmphdr_with_l3(skb, &icmph) != 0) + return false; - if (get_icmphdr_with_l3(skb, &icmph)) - return; + if (ty != icmph.type) + return false; - ie.skb_ts = bpf_core_skb_tstamp(skb); - ie.seq = bpf_ntohs(icmph.un.echo.sequence); - ie.id = bpf_ntohs(icmph.un.echo.id); - ie.icmp_type = icmph.type; - ie.ts = bpf_ktime_get_ns(); - ie.type = type; - ie.pid = pid(); - COMM(ie.comm); - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &ie, sizeof(ie)); + key->seq = bpf_ntohs(icmph.un.echo.sequence); + key->id = bpf_ntohs(icmph.un.echo.id); + return true; } -#if 0 -// no need this function, see verfication/skb_network_header.bt file for more details -__always_inline void trace_icmp_skb_with_l2(void *ctx, struct sock *sk, struct sk_buff *skb, int type) +struct msghdr___310 { - struct icmphdr icmph = {}; - struct icmp_event ie = {}; - - if (get_icmphdr_with_l2(skb, &icmph)) - return; + struct iovec *msg_iov; +}; - ie.skb_ts = fix_get_skb_tstamp(skb); - ie.seq = bpf_ntohs(icmph.un.echo.sequence); - ie.id = bpf_ntohs(icmph.un.echo.id); - ie.icmp_type = icmph.type; - ie.ts = bpf_ktime_get_ns(); - ie.type = type; +static __always_inline u16 bpf_core_sock_sk_protocol(struct sock *sk) +{ + return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); +} - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &ie, sizeof(ie)); +// libbpf: prog 'kprobe__raw_sendmsg': relo #3: kind (0), spec is [346] struct msghdr.msg_iter.iov (0:2:4:0 @ offset 40) +static __always_inline void *fix_msghdr_base(struct msghdr *msg) +{ + void *ptr; + if (bpf_core_field_exists(msg->msg_iter)) + { + BPF_CORE_READ_INTO(&ptr, msg, msg_iter.iov, iov_base); + } + else + { + struct msghdr___310 *msg310 = (void *)msg; + ; + BPF_CORE_READ_INTO(&ptr, msg310, msg_iov, iov_base); + } + return ptr; } -#endif __always_inline void raw_and_dgram_entry(void *ctx, struct sock *sk, struct msghdr *msg, bool inet) { struct icmphdr ih = {}; - struct icmp_event ie = {}; + struct ping_key key = {0}; char *ptr; u16 protocol; + struct ping_sender sender = {0}; protocol = bpf_core_sock_sk_protocol(sk); if (protocol != IPPROTO_ICMP) @@ -150,21 +123,18 @@ __always_inline void raw_and_dgram_entry(void *ctx, struct sock *sk, struct msgh if (ih.code == 0) { if (!inet) - ie.id = ih.un.echo.id; + key.id = ih.un.echo.id; else { struct inet_sock *inetsk = sk; - bpf_probe_read(&ie.id, sizeof(ie.id), &inetsk->inet_sport); + bpf_probe_read(&key.id, sizeof(key.id), &inetsk->inet_sport); } - ie.id = bpf_ntohs(ie.id); - ie.seq = bpf_ntohs(ih.un.echo.sequence); - - ie.icmp_type = ih.type; - ie.ts = bpf_ktime_get_ns(); - ie.type = PING_SND; - ie.pid = pid(); - COMM(ie.comm); - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &ie, sizeof(ie)); + key.id = bpf_ntohs(key.id); + key.seq = bpf_ntohs(ih.un.echo.sequence); + sender.ty = PING; + sender.stages[PING_SND].ts = bpf_ktime_get_ns(); + sender.stages[PING_SND].cpu = bpf_get_smp_processor_id(); + bpf_map_update_elem(&ping_events, &key, &sender, BPF_ANY); } } @@ -196,72 +166,123 @@ int BPF_KPROBE(kprobe__ping_v4_sendmsg, struct sock *sk, struct msghdr *msg) raw_and_dgram_entry(ctx, sk, msg, true); return 0; } -#if 0 -SEC("kprobe/ping_sendmsg") -int BPF_KPROBE(kprobe__ping_sendmsg, u64 arg1, struct sock *sk, struct msghdr *msg) -{ - raw_and_dgram_entry(ctx, sk, msg, true); - return 0; -} -#endif -struct tracepoint_args +struct tp_net_arg { u32 pad[2]; struct sk_buff *skbaddr; }; SEC("tracepoint/net/net_dev_queue") -int tp_net_dev_queue(struct tracepoint_args *args) +int tp_net_dev_queue(struct tp_net_arg *args) { struct sk_buff *skb = args->skbaddr; - struct sock *sk; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - trace_icmp_skb_with_l3(args, sk, skb, PING_NET_DEV_QUEUE); + struct ping_key key = {0}; + if (!l3_set_ping_key(skb, &key, ICMP_ECHO)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + sender->stages[PING_DEV_QUEUE].ts = bpf_ktime_get_ns(); + sender->stages[PING_DEV_QUEUE].cpu = bpf_get_smp_processor_id(); return 0; } SEC("tracepoint/net/net_dev_xmit") -int tp_net_dev_xmit(struct tracepoint_args *args) +int tp_net_dev_xmit(struct tp_net_arg *args) { struct sk_buff *skb = args->skbaddr; - struct sock *sk; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - trace_icmp_skb_with_l3(args, sk, skb, PING_NET_DEV_XMIT); + struct ping_key key = {0}; + if (!l3_set_ping_key(skb, &key, ICMP_ECHO)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + sender->stages[PING_DEV_XMIT].ts = bpf_ktime_get_ns(); + sender->stages[PING_DEV_XMIT].cpu = bpf_get_smp_processor_id(); return 0; } -SEC("kprobe/ping_rcv") -int BPF_KPROBE(kprobe__ping_rcv, struct sk_buff *skb) +SEC("tracepoint/net/netif_receive_skb") +int tp_netif_receive_skb(struct tp_net_arg *args) { - trace_icmp_skb_with_l4(ctx, NULL, skb, PING_RCV); + struct sk_buff *skb = args->skbaddr; + struct ping_key key = {0}; + if (!l3_set_ping_key(skb, &key, ICMP_ECHOREPLY)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + int cpu = bpf_get_smp_processor_id(); + output_all_events(args, cpu); + sender->stages[PING_NETIF_RCV].ts = bpf_ktime_get_ns(); + sender->stages[PING_NETIF_RCV].cpu = cpu; return 0; } SEC("kprobe/icmp_rcv") int BPF_KPROBE(kprobe__icmp_rcv, struct sk_buff *skb) { - trace_icmp_skb_with_l4(ctx, NULL, skb, PING_ICMP_RCV); + struct ping_key key = {0}; + if (!l4_set_ping_key(skb, &key, ICMP_ECHOREPLY)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + sender->stages[PING_ICMP_RCV].ts = bpf_ktime_get_ns(); + sender->stages[PING_ICMP_RCV].cpu = bpf_get_smp_processor_id(); return 0; } -SEC("tracepoint/net/netif_receive_skb") -int tp_netif_receive_skb(struct tracepoint_args *args) +#if 0 +SEC("kprobe/ping_rcv") +int BPF_KPROBE(kprobe__ping_rcv, struct sk_buff *skb) { - trace_icmp_skb_with_l3(args, NULL, args->skbaddr, PING_NETIF_RCV); + struct ping_key key = {0}; + int cpu = bpf_get_smp_processor_id(); + if (!l4_set_ping_key(skb, &key, ICMP_ECHOREPLY)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + sender->key = key; + sender->stages[PING_RCV].ts = bpf_ktime_get_ns(); + sender->stages[PING_RCV].cpu = cpu; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, sender, sizeof(*sender)); return 0; } +#endif -#if 0 -SEC("kprobe/__kfree_skb") -int BPF_KPROBE(__kfree_skb, struct sk_buff *skb) +SEC("kprobe/skb_free_datagram") +int BPF_KPROBE(kprobe__skb_free_datagram, struct sock *sk, struct sk_buff *skb) { - struct sock *sk; - bpf_probe_read(&sk, sizeof(sk), &skb->sk); - if (sk) - trace_icmp_skb_with_l4(ctx, sk, skb, PING_KFREE_SKB); + struct ping_key key = {0}; + int cpu = bpf_get_smp_processor_id(); + u16 protocol = bpf_core_sock_sk_protocol(sk); + if (protocol != IPPROTO_ICMP) + return 0; + if (!l4_set_ping_key(skb, &key, ICMP_ECHOREPLY)) + return 0; + + struct ping_sender *sender = bpf_map_lookup_elem(&ping_events, &key); + if (!sender) + return 0; + + sender->key = key; + sender->stages[PING_RCV].ts = bpf_ktime_get_ns(); + sender->stages[PING_RCV].cpu = cpu; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, sender, sizeof(*sender)); return 0; } -#endif char _license[] SEC("license") = "GPL"; diff --git a/source/tools/detect/net/rtrace/src/bpf/pingtrace.h b/source/tools/detect/net/rtrace/src/bpf/pingtrace.h new file mode 100644 index 0000000000000000000000000000000000000000..a34334a07f283c4bbaaabeabdfa47c7255510356 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/pingtrace.h @@ -0,0 +1,257 @@ +#ifndef __ICMP_H +#define __ICMP_H + +#define ICMP_ECHO 8 +#define ICMP_ECHOREPLY 0 + +#define IRQ_RING_SIZE 8 +#define SOFTIRQ_RING_SIZE 8 +#define WAKEUP_RING_SIZE 8 +#define SCHEDSWITCH_RING_SIZE 8 + +struct ping_key +{ + unsigned short seq; + unsigned short id; +}; + +struct ping_stage +{ + unsigned long long ts; + unsigned int cpu; +}; + +enum PING_SENDER_STAGE +{ + PING_SND = 0, + PING_DEV_QUEUE, + PING_DEV_XMIT, + PING_NETIF_RCV, + PING_ICMP_RCV, + PING_RCV, + PING_MAX, +}; + +enum EVENT_TYPE +{ + PING, + IRQ, + SOFTIRQ, + WAKEUP, + SCHED, +}; + +struct ping_sender +{ + unsigned long long ty; + struct ping_key key; + struct ping_stage stages[PING_MAX]; +}; + +struct irq +{ + unsigned long long ty; + unsigned long long tss[IRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct softirq +{ + unsigned long long ty; + unsigned long long tss[SOFTIRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct wakeup +{ + unsigned long long ty; + unsigned long long tss[SOFTIRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct sched +{ + unsigned long long ty; + struct + { + int prev_pid; + int next_pid; + unsigned char prev_comm[16]; + unsigned char next_comm[16]; + unsigned long long ts; + } ss[SCHEDSWITCH_RING_SIZE]; + unsigned long long cnt; +}; + +#ifdef __VMLINUX_H__ + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perf_events SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct irq); + __uint(max_entries, 1024); +} irq_events SEC(".maps"); + +SEC("kprobe/skb_recv_done") +int BPF_KPROBE(kprobe__skb_recv_done) +{ + int cpu = bpf_get_smp_processor_id(); + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (!hi) + return 0; + + unsigned long long cnt = hi->cnt & (IRQ_RING_SIZE - 1); + hi->cnt++; + hi->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +SEC("kprobe/mlx5e_completion_event") +int BPF_KPROBE(kprobe__mlx5e_completion_event) +{ + int cpu = bpf_get_smp_processor_id(); + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (!hi) + return 0; + + unsigned long long cnt = hi->cnt & (IRQ_RING_SIZE - 1); + hi->cnt++; + hi->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct softirq); + __uint(max_entries, 1024); +} softirq_events SEC(".maps"); + +SEC("kprobe/__do_softirq") +int BPF_KPROBE(kprobe____do_softirq) +{ + int cpu = bpf_get_smp_processor_id(); + struct softirq *si = bpf_map_lookup_elem(&softirq_events, &cpu); + if (!si) + return 0; + + unsigned long long cnt = si->cnt & (SOFTIRQ_RING_SIZE - 1); + si->cnt++; + si->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct wakeup); + __uint(max_entries, 1024); +} wakeup_events SEC(".maps"); + +// int wake_up_process(struct task_struct *p) +SEC("kprobe/wake_up_process") +int BPF_KPROBE(kprobe__wake_up_process, struct task_struct *p) +{ + unsigned char comm[16]; + bpf_probe_read(comm, sizeof(comm), &p->comm); + + if (comm[0] != 'k' || comm[1] != 's' || comm[2] != 'o' || comm[3] != 'f' || comm[4] != 't' || comm[5] != 'i' || comm[6] != 'r') + return 0; + + int cpu = bpf_get_smp_processor_id(); + struct wakeup *wu = bpf_map_lookup_elem(&wakeup_events, &cpu); + if (!wu) + return 0; + + unsigned long long cnt = wu->cnt & (WAKEUP_RING_SIZE - 1); + wu->cnt++; + wu->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct sched); + __uint(max_entries, 1024); +} sched_events SEC(".maps"); + +struct tp_sched_switch_arg +{ + u64 pad; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long prev_state; + char next_comm[16]; + pid_t next_pid; +}; + +SEC("tracepoint/sched/sched_switch") +int tp_sched_switch(struct tp_sched_switch_arg *arg) +{ + int cpu = bpf_get_smp_processor_id(); + struct sched *ring = NULL; + + ring = bpf_map_lookup_elem(&sched_events, &cpu); + if (!ring) + return 0; + + u64 cnt = ring->cnt & (SCHEDSWITCH_RING_SIZE - 1); + ring->cnt++; + + ring->ss[cnt].ts = bpf_ktime_get_ns(); + ring->ss[cnt].next_pid = arg->next_pid; + ring->ss[cnt].prev_pid = arg->prev_pid; + + __builtin_memcpy(ring->ss[cnt].next_comm, arg->next_comm, 16); + __builtin_memcpy(ring->ss[cnt].prev_comm, arg->prev_comm, 16); + + return 0; +} + +static __always_inline void output_all_events(void *ctx, int cpu) +{ + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (hi) + { + hi->ty = IRQ; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, hi, sizeof(*hi)); + } + + struct softirq *si = bpf_map_lookup_elem(&softirq_events, &cpu); + if (si) + { + si->ty = SOFTIRQ; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, si, sizeof(*si)); + } + + struct wakeup *wu = bpf_map_lookup_elem(&wakeup_events, &cpu); + if (wu) + { + wu->ty = WAKEUP; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, wu, sizeof(*wu)); + } + + struct sched *ss = bpf_map_lookup_elem(&sched_events, &cpu); + if (ss) + { + ss->ty = SCHED; + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, ss, sizeof(*ss)); + } +} + +#endif + +#endif diff --git a/source/tools/detect/net/rtrace/src/bpf/queueslow.bpf.c b/source/tools/detect/net/rtrace/src/bpf/queueslow.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..a3ed25ffa22de9cd2981495efa79d2d4eb69dd37 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/queueslow.bpf.c @@ -0,0 +1,115 @@ +#define BPF_NO_GLOBAL_DATA +#include "vmlinux.h" +#include +#include +#include +#include + +#include "queueslow.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024000); + __type(key, u64); + __type(value, u64); +} skbs SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct filter); +} filters SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perf_events SEC(".maps"); + +struct net_dev_queue_arg +{ + u64 pad; + void *skb; +}; + +__always_inline void set_addrpair(struct queue_slow *qs, struct sk_buff *skb) +{ + + struct iphdr ih = {}; + struct tcphdr th = {}; + struct udphdr uh = {}; + u16 network_header, transport_header; + char *head; + + bpf_probe_read(&head, sizeof(head), &skb->head); + bpf_probe_read(&network_header, sizeof(network_header), &skb->network_header); + if (network_header != 0) + { + bpf_probe_read(&ih, sizeof(ih), head + network_header); + qs->saddr = ih.saddr; + qs->daddr = ih.daddr; + qs->protocol = ih.protocol; + transport_header = network_header + (ih.ihl << 2); + } + switch (qs->protocol) + { + case IPPROTO_UDP: + if (transport_header != 0 && transport_header != 0xffff) + { + bpf_probe_read(&uh, sizeof(uh), head + transport_header); + qs->sport = bpf_ntohs(uh.source); + qs->dport = bpf_ntohs(uh.dest); + } + break; + case IPPROTO_TCP: + bpf_probe_read(&th, sizeof(th), head + transport_header); + qs->sport = bpf_ntohs(th.source); + qs->dport = bpf_ntohs(th.dest); + break; + default: + break; + } +} + +SEC("tracepoint/net/net_dev_queue") +int tp_net_dev_queue(struct net_dev_queue_arg *arg) +{ + u64 ts = bpf_ktime_get_ns(); + void *skb = arg->skb; + bpf_map_update_elem(&skbs, &skb, &ts, BPF_ANY); + return 0; +} + +SEC("tracepoint/net/net_dev_xmit") +int tp_net_dev_xmit(struct net_dev_queue_arg *arg) +{ + void *skb = arg->skb; + u64 *prev_ts = bpf_map_lookup_elem(&skbs, &skb); + if (!prev_ts) + return 0; + int key = 0; + struct filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter) + return 0; + + u64 ts = bpf_ktime_get_ns(); + u64 delta = ts - *prev_ts; + if (delta > filter->threshold) + { + struct queue_slow qs = {0}; + set_addrpair(&qs, skb); + + if (qs.protocol != filter->protocol) + return 0; + + bpf_perf_event_output(arg, &perf_events, BPF_F_CURRENT_CPU, &qs, sizeof(qs)); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/queueslow.h b/source/tools/detect/net/rtrace/src/bpf/queueslow.h new file mode 100644 index 0000000000000000000000000000000000000000..e1497b8bb5aa31370f6a924d0b6628472c6dcfc6 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/queueslow.h @@ -0,0 +1,21 @@ +#ifndef __QUEUE_SLOW_H +#define __QUEUE_SLOW_H + +struct filter +{ + unsigned long long threshold; + unsigned int protocol; +}; + + +struct queue_slow +{ + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; + unsigned int protocol; + unsigned long long latency; +}; + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/retran.bpf.c b/source/tools/detect/net/rtrace/src/bpf/retran.bpf.c similarity index 77% rename from source/tools/detect/net/rtrace/retran/src/bpf/retran.bpf.c rename to source/tools/detect/net/rtrace/src/bpf/retran.bpf.c index c582dfb56bacd3903caadb08c810f5deb000fc3b..500fcb29826d5f081ecfbf55c7f23cc93aadebfb 100644 --- a/source/tools/detect/net/rtrace/retran/src/bpf/retran.bpf.c +++ b/source/tools/detect/net/rtrace/src/bpf/retran.bpf.c @@ -5,10 +5,24 @@ #include #include -#include "common.h" -#include "bpf_core.h" #include "retran.h" +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 102400); + __type(key, u64); + __type(value, u8); +} sockmap SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perf_events SEC(".maps"); + + struct tracepoint_args { u32 pad[2]; @@ -16,8 +30,6 @@ struct tracepoint_args struct sock *sk; }; -BPF_HASH(sockmap, u64, u8, 102400); - __always_inline void trace_retransmit(void *ctx, struct sock *sk, struct sk_buff *skb) { struct inet_connection_sock *icsk = sk; @@ -53,9 +65,13 @@ __always_inline void trace_retransmit(void *ctx, struct sock *sk, struct sk_buff } } - set_addr_pair_by_sock(sk, &re.ap); - re.ts = ns(); - bpf_perf_event_output(ctx, &perf_map, BPF_F_CURRENT_CPU, &re, sizeof(re)); + bpf_probe_read(&re.daddr, sizeof(re.daddr), &sk->__sk_common.skc_daddr); + bpf_probe_read(&re.dport, sizeof(re.dport), &sk->__sk_common.skc_dport); + bpf_probe_read(&re.saddr, sizeof(re.saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read(&re.sport, sizeof(re.sport), &sk->__sk_common.skc_num); + re.dport = bpf_ntohs(re.dport); + + bpf_perf_event_output(ctx, &perf_events, BPF_F_CURRENT_CPU, &re, sizeof(re)); } #if 0 diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/retran.h b/source/tools/detect/net/rtrace/src/bpf/retran.h similarity index 38% rename from source/tools/detect/net/rtrace/retran/src/bpf/retran.h rename to source/tools/detect/net/rtrace/src/bpf/retran.h index 75eb53c4b1968054d6a06de6b95615d2703e6709..ee3e1da4d3d71134355e08c75c8993f1cc3fbb13 100644 --- a/source/tools/detect/net/rtrace/retran/src/bpf/retran.h +++ b/source/tools/detect/net/rtrace/src/bpf/retran.h @@ -1,11 +1,8 @@ #ifndef __RETRAN_H #define __RETRAN_H -#include "common.h" - - - -enum { +enum +{ SYN_RETRAN, SLOW_START_RETRAN, RTO_RETRAN, @@ -13,15 +10,16 @@ enum { TLP, }; +struct retran_event +{ + unsigned char tcp_state; + unsigned char ca_state; + unsigned char retran_type; -struct retran_event { - u8 tcp_state; - u8 ca_state; - u8 retran_type; - u64 ts; - - struct addr_pair ap; - + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; }; #endif diff --git a/source/tools/detect/net/rtrace/src/bpf/socket.bpf.c b/source/tools/detect/net/rtrace/src/bpf/socket.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..781d82024f1b34725e5926d991d2902a09491ec8 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/socket.bpf.c @@ -0,0 +1,42 @@ +#include "vmlinux.h" + +#include +#include +#include +#include + +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +SEC("socket") +int socket_tcp(struct __sk_buff *skb) +{ + __u64 nhoff = 0; + __u64 ip_proto; + __u64 verlen; + u32 ports; + u16 dport; + + ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + if (ip_proto != 6 ) + return 0; + + verlen = load_byte(skb, nhoff + 0); + if (verlen == 0x45) + nhoff += 20; + else + nhoff += (verlen & 0xF) << 2; + + ports = load_word(skb, nhoff); + dport = (u16)ports; + if (dport == 40330) + return -1; + return 0; +} + + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/tcpping.bpf.c b/source/tools/detect/net/rtrace/src/bpf/tcpping.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..485fdfe9e6e99283dcf20139f134cf6a1645f639 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/tcpping.bpf.c @@ -0,0 +1,333 @@ +#define BPF_NO_GLOBAL_DATA +#include "vmlinux.h" +#include +#include +#include +#include + +#include "tcpping.h" + +// copy fron pingtrace.h +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct irq); + __uint(max_entries, 1024); +} irq_events SEC(".maps"); + +SEC("kprobe/skb_recv_done") +int BPF_KPROBE(kprobe__skb_recv_done) +{ + int cpu = bpf_get_smp_processor_id(); + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (!hi) + return 0; + + unsigned long long cnt = hi->cnt & (IRQ_RING_SIZE - 1); + hi->cnt++; + hi->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +SEC("kprobe/mlx5e_completion_event") +int BPF_KPROBE(kprobe__mlx5e_completion_event) +{ + int cpu = bpf_get_smp_processor_id(); + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (!hi) + return 0; + + unsigned long long cnt = hi->cnt & (IRQ_RING_SIZE - 1); + hi->cnt++; + hi->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct softirq); + __uint(max_entries, 1024); +} softirq_events SEC(".maps"); + +SEC("kprobe/__do_softirq") +int BPF_KPROBE(kprobe____do_softirq) +{ + int cpu = bpf_get_smp_processor_id(); + struct softirq *si = bpf_map_lookup_elem(&softirq_events, &cpu); + if (!si) + return 0; + + unsigned long long cnt = si->cnt & (SOFTIRQ_RING_SIZE - 1); + si->cnt++; + si->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct wakeup); + __uint(max_entries, 1024); +} wakeup_events SEC(".maps"); + +// int wake_up_process(struct task_struct *p) +SEC("kprobe/wake_up_process") +int BPF_KPROBE(kprobe__wake_up_process, struct task_struct *p) +{ + unsigned char comm[16]; + bpf_probe_read(comm, sizeof(comm), &p->comm); + + if (comm[0] != 'k' || comm[1] != 's' || comm[2] != 'o' || comm[3] != 'f' || comm[4] != 't' || comm[5] != 'i' || comm[6] != 'r') + return 0; + + int cpu = bpf_get_smp_processor_id(); + struct wakeup *wu = bpf_map_lookup_elem(&wakeup_events, &cpu); + if (!wu) + return 0; + + unsigned long long cnt = wu->cnt & (WAKEUP_RING_SIZE - 1); + wu->cnt++; + wu->tss[cnt] = bpf_ktime_get_ns(); + return 0; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct sched); + __uint(max_entries, 1024); +} sched_events SEC(".maps"); + +struct tp_sched_switch_arg +{ + u64 pad; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long prev_state; + char next_comm[16]; + pid_t next_pid; +}; + +SEC("tracepoint/sched/sched_switch") +int tp_sched_switch(struct tp_sched_switch_arg *arg) +{ + int cpu = bpf_get_smp_processor_id(); + struct sched *ring = NULL; + + ring = bpf_map_lookup_elem(&sched_events, &cpu); + if (!ring) + return 0; + + u64 cnt = ring->cnt & (SCHEDSWITCH_RING_SIZE - 1); + ring->cnt++; + + ring->ss[cnt].ts = bpf_ktime_get_ns(); + ring->ss[cnt].next_pid = arg->next_pid; + ring->ss[cnt].prev_pid = arg->prev_pid; + + __builtin_memcpy(ring->ss[cnt].next_comm, arg->next_comm, 16); + __builtin_memcpy(ring->ss[cnt].prev_comm, arg->prev_comm, 16); + + return 0; +} + +static __always_inline void save_all_events(struct tcpping *tp, int cpu) +{ + struct irq *hi = bpf_map_lookup_elem(&irq_events, &cpu); + if (hi) + tp->irq = *hi; + + struct softirq *si = bpf_map_lookup_elem(&softirq_events, &cpu); + if (si) + tp->sirq = *si; + + struct wakeup *wu = bpf_map_lookup_elem(&wakeup_events, &cpu); + if (wu) + tp->wu = *wu; + + struct sched *ss = bpf_map_lookup_elem(&sched_events, &cpu); + if (ss) + tp->sched = *ss; +} + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct filter); +} filters SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct tcpping); +} latency SEC(".maps"); + +struct msghdr___310 +{ + struct iovec *msg_iov; +}; + +__always_inline void get_icmphdr_with_l4(struct sk_buff *skb, struct tcphdr *th) +{ + char *head; + u16 transport_header; + + bpf_probe_read(&transport_header, sizeof(transport_header), &skb->transport_header); + bpf_probe_read(&head, sizeof(head), &skb->head); + bpf_probe_read(th, sizeof(*th), head + transport_header); +} + +__always_inline bool get_tcphdr_with_l3(struct sk_buff *skb, struct tcphdr *th) +{ + struct iphdr ih = {0}; + u16 network_header; + char *head; + bpf_probe_read(&network_header, sizeof(network_header), &skb->network_header); + bpf_probe_read(&head, sizeof(head), &skb->head); + bpf_probe_read(&ih, sizeof(ih), head + network_header); + + if (ih.protocol == IPPROTO_TCP) + { + bpf_probe_read(th, sizeof(*th), head + network_header + (ih.ihl << 2)); + return true; + } + return false; +} + +SEC("kprobe/raw_sendmsg") +int BPF_KPROBE(kprobe__raw_sendmsg, u64 arg1, u64 arg2, u64 arg3) +{ + struct sock *sk = NULL; + struct msghdr___310 msg310 = {}; + int key = 0; + + int pid = bpf_get_current_pid_tgid() >> 32; + struct filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter || filter->pid != pid) + return 0; + + if (!bpf_core_field_exists(msg310.msg_iov)) // alinux2 & 3 + sk = (struct sk_buff *)arg1; + else // centos 310 + sk = (struct sk_buff *)arg2; + + filter->sock = (u64)sk; + struct tcpping *tp = bpf_map_lookup_elem(&latency, &key); + if (!tp) + return 0; + + tp->stages[TCPPING_TX_ENTRY].ts = bpf_ktime_get_ns(); + return 0; +} + +struct tp_net_arg +{ + u32 pad[2]; + struct sk_buff *skbaddr; +}; + + +SEC("tracepoint/net/net_dev_xmit") +int tp_net_dev_xmit(struct tp_net_arg *args) +{ + struct sk_buff *skb = args->skbaddr; + struct sock *sk; + int key = 0; + struct filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter) + return 0; + + bpf_probe_read(&sk, sizeof(sk), &skb->sk); + if (filter->sock != sk) + return 0; + + struct tcpping *tp = bpf_map_lookup_elem(&latency, &key); + if (!tp) + return 0; + tp->stages[TCPPING_TX_EXIT].ts = bpf_ktime_get_ns(); + return 0; +} + +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +SEC("socket") +int socket_tcp(struct __sk_buff *skb) +{ + __u64 nhoff = 0; + __u64 ip_proto; + __u64 verlen; + u32 ports; + u16 dport; + int key = 0; + + ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); + if (ip_proto != 6) + return 0; + + verlen = load_byte(skb, nhoff + 0); + if (verlen == 0x45) + nhoff += 20; + else + nhoff += (verlen & 0xF) << 2; + + ports = load_word(skb, nhoff); + dport = (u16)ports; + struct filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter) + return 0; + + if (dport == filter->lport) + { + int key = 0; + struct tcpping *tp = bpf_map_lookup_elem(&latency, &key); + if (!tp) + return 0; + tp->stages[TCPPING_RX_EXIT].ts = bpf_ktime_get_ns(); + return -1; + } + return 0; +} + + +SEC("tracepoint/net/netif_receive_skb") +int tp_netif_receive_skb(struct tp_net_arg *args) +{ + struct sk_buff *skb = args->skbaddr; + struct tcphdr th = {0}; + int key = 0; + if (!get_tcphdr_with_l3(skb, &th)) + return 0; + + struct filter *filter = bpf_map_lookup_elem(&filters, &key); + if (!filter) + return 0; + + if (filter->be_lport != th.dest || filter->be_rport != th.source) + return 0; + + int cpu = bpf_get_smp_processor_id(); + struct tcpping *tp = bpf_map_lookup_elem(&latency, &key); + if (!tp) + return 0; + + tp->stages[TCPPING_RX_ENTRY].ts = bpf_ktime_get_ns(); + save_all_events(tp, cpu); + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/tcpping.h b/source/tools/detect/net/rtrace/src/bpf/tcpping.h new file mode 100644 index 0000000000000000000000000000000000000000..1354c2c1a94ef952e50c5f806e9019ec04ab1369 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/tcpping.h @@ -0,0 +1,75 @@ +#ifndef TCPPROBE_H +#define TCPPROBE_H + +#define IRQ_RING_SIZE 4 +#define SOFTIRQ_RING_SIZE 4 +#define WAKEUP_RING_SIZE 4 +#define SCHEDSWITCH_RING_SIZE 8 + +enum TCPPING_STAGE +{ + TCPPING_TX_USR = 0, + TCPPING_TX_ENTRY, + TCPPING_TX_EXIT, + TCPPING_RX_ENTRY, + TCPPING_RX_EXIT, + TCPPING_RX_USR, + TCPPROBE_STAGE_MAX, +}; + +struct filter +{ + int pid; + unsigned short be_lport; + unsigned short be_rport; + unsigned short lport; + unsigned short rport; + unsigned long long sock; +}; + +struct tcpping_stage +{ + unsigned long long ts; +}; + +struct irq +{ + unsigned long long tss[IRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct softirq +{ + unsigned long long tss[SOFTIRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct wakeup +{ + unsigned long long tss[SOFTIRQ_RING_SIZE]; + unsigned long long cnt; +}; + +struct sched +{ + struct + { + int prev_pid; + int next_pid; + unsigned char prev_comm[16]; + unsigned char next_comm[16]; + unsigned long long ts; + } ss[SCHEDSWITCH_RING_SIZE]; + unsigned long long cnt; +}; + +struct tcpping +{ + struct tcpping_stage stages[TCPPROBE_STAGE_MAX]; + struct irq irq; + struct softirq sirq; + struct wakeup wu; + struct sched sched; +}; + +#endif diff --git a/source/tools/detect/net/rtrace/src/bpf/thread.h b/source/tools/detect/net/rtrace/src/bpf/thread.h new file mode 100644 index 0000000000000000000000000000000000000000..c79c66bb756797a8e63209c37ae77748ff4ff423 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/thread.h @@ -0,0 +1,82 @@ + + +#ifndef __THREAD_H +#define __THREAD_H + +#define MAX_ITEM_NUM 16 + +enum THREAD_ITEM_TYPE +{ + SCHED_IN, + SCHED_OUT, + WAKE_UP, + MIGRATION, +}; + +struct thread_item +{ + unsigned short ty; + unsigned short cpu; + unsigned long long ts; +}; + +struct thread_event +{ + unsigned int tid; + unsigned int cnt; + struct thread_item items[MAX_ITEM_NUM]; +}; + +#ifdef __VMLINUX_H__ + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 102400); + __type(key, u32); + __type(value, struct thread_event); +} threads SEC(".maps"); + +static __always_inline void __add_thread_item(struct thread_event *event, enum THREAD_ITEM_TYPE ty, u64 ts, u16 cpu) +{ + u64 cnt = event->cnt & (MAX_ITEM_NUM - 1); + event->cnt++; + event->items[cnt].ty = ty; + event->items[cnt].ts = ts; + event->items[cnt].cpu = cpu; +} + +static __always_inline void add_thread_item(u32 tid, enum THREAD_ITEM_TYPE ty, u64 ts, u16 cpu) +{ + struct thread_event *eventp; + eventp = bpf_map_lookup_elem(&threads, &tid); + if (!eventp) + { + struct thread_event event = {0}; + __add_thread_item(&event, ty, ts, cpu); + bpf_map_update_elem(&threads, &tid, &event, BPF_ANY); + } + else + { + __add_thread_item(eventp, ty, ts, cpu); + } +} + +static __always_inline void add_sched_in(u32 tid, u64 ts, u16 cpu) +{ + add_thread_item(tid, SCHED_IN, ts, cpu); +} + +static __always_inline void add_sched_out(u32 tid, u64 ts, u16 cpu) +{ + add_thread_item(tid, SCHED_OUT, ts, cpu); +} + +static __always_inline void add_sched_wakeup(u32 tid, u64 ts, u16 cpu) +{ + add_thread_item(tid, WAKE_UP, ts, cpu); +} + +#endif + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/userslow.bpf.c b/source/tools/detect/net/rtrace/src/bpf/userslow.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..cf9ba52fad9a9e125f583491062b032e40179f22 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/userslow.bpf.c @@ -0,0 +1,168 @@ +#define BPF_NO_GLOBAL_DATA +#include "vmlinux.h" +#include +#include +#include +#include + +#include "userslow.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u64); + __type(value, u64); +} sock_cookies SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1024); + __type(key, u32); + __type(value, struct sched_event); +} scheds SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perf_events SEC(".maps"); + +struct tcp_probe_arg +{ + u64 pad; + u8 saddr[28]; + u8 daddr[28]; + u16 sport; + u16 dport; + u32 mark; + u16 data_len; + u32 snd_nxt; + u32 snd_una; + u32 snd_cwnd; + u32 sshtresh; + u32 snd_wnd; + u32 srtt; + u32 rcv_wnd; + u64 sock_cookie; +}; + +SEC("tracepoint/tcp/tcp_probe") +int tp_tcp_probe(struct tcp_probe_arg *arg) +{ + u64 cookie = arg->sock_cookie; + u64 ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&sock_cookies, &cookie, &ts, BPF_ANY); + return 0; +} + +struct tcp_rcv_space_adjust_arg +{ + u64 pad; + void *skaddr; + u16 sport; + u16 dport; + u32 saddr; + u32 daddr; + u64 saddr_v6[2]; + u64 daddr_v6[2]; + u64 sock_cookie; +}; + +SEC("tracepoint/tcp/tcp_rcv_space_adjust") +int tp_tcp_rcv_space_adjust(struct tcp_rcv_space_adjust_arg *arg) +{ + u64 cookie = arg->sock_cookie; + u64 *prev_ts = bpf_map_lookup_elem(&sock_cookies, &cookie); + + if (!prev_ts) + return 0; + + u64 ts = bpf_ktime_get_ns(); + u64 delta = ts - *prev_ts; + + int key = 0; + + struct filter *filter = get_filter(); + if (filter && delta > filter->threshold) + { + struct slow_event event = {0}; + + event.sport = arg->sport; + event.dport = arg->dport; + event.saddr = arg->saddr; + event.daddr = arg->daddr; + event.krcv_ts = *prev_ts; + event.urcv_ts = ts; + int cpu = bpf_get_smp_processor_id(); + struct sched_event *sched = bpf_map_lookup_elem(&scheds, &cpu); + if (sched) + event.sched = *sched; + + int tid = bpf_get_current_pid_tgid(); + struct thread_event *thread = bpf_map_lookup_elem(&threads, &tid); + if (thread) + event.thread = *thread; + + bpf_perf_event_output(arg, &perf_events, BPF_F_CURRENT_CPU, &event, sizeof(event)); + } + + return 0; +} + +struct tp_sched_switch_arg +{ + u64 pad; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long prev_state; + char next_comm[16]; + pid_t next_pid; +}; + +SEC("tracepoint/sched/sched_switch") +int tp_sched_switch(struct tp_sched_switch_arg *arg) +{ + int cpu = bpf_get_smp_processor_id(); + struct sched_event *event = NULL; + + event = bpf_map_lookup_elem(&scheds, &cpu); + if (!event) + return 0; + + u64 ts = bpf_ktime_get_ns(); + event->ts = ts; + event->next_pid = arg->next_pid; + event->prev_pid = arg->prev_pid; + + add_sched_in(arg->next_pid, ts, cpu); + add_sched_out(arg->prev_pid, ts, cpu); + + __builtin_memcpy(event->next_comm, arg->next_comm, 16); + __builtin_memcpy(event->prev_comm, arg->prev_comm, 16); + + return 0; +} + +struct tp_sched_wakeup_arg +{ + u64 pad; + char comm[16]; + pid_t pid; + int prio; +}; + +SEC("tracepoint/sched/sched_wakeup") +int tp_sched_wakeup(struct tp_sched_wakeup_arg *arg) +{ + u32 tid = arg->pid; + u64 ts = bpf_ktime_get_ns(); + int cpu = bpf_get_smp_processor_id(); + add_sched_wakeup(tid, ts, cpu); + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/userslow.h b/source/tools/detect/net/rtrace/src/bpf/userslow.h new file mode 100644 index 0000000000000000000000000000000000000000..ffc0062292339859c2502b4d057d09bce98fb535 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/userslow.h @@ -0,0 +1,30 @@ +#ifndef __USER_SLOW_H +#define __USER_SLOW_H + +#include "filter.h" +#include "thread.h" + +struct sched_event +{ + int prev_pid; + int next_pid; + unsigned char prev_comm[16]; + unsigned char next_comm[16]; + unsigned long long ts; +}; + +struct slow_event +{ + unsigned long long krcv_ts; + unsigned long long urcv_ts; + + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; + + struct sched_event sched; + struct thread_event thread; +}; + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/virtio.bpf.c b/source/tools/detect/net/rtrace/src/bpf/virtio.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..37cdc8be4539025ec7c0150050a04ccacf170319 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/virtio.bpf.c @@ -0,0 +1,118 @@ +#include "vmlinux.h" +#include +#include +#include +#include + +#include "virtio.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct virito_queue); +} imap SEC(".maps"); + +#define ALIGN(x, a) __ALIGN(x, (typeof(x))(a)-1) +#define __ALIGN(x, mask) (((x) + (mask)) & ~(mask)) + +struct virtnet_info { + struct virtio_device *vdev; + struct virtqueue *cvq; + struct net_device *dev; + struct send_queue *sq; + struct receive_queue *rq; +}; + +struct send_queue { + struct virtqueue *vq; +}; + +struct receive_queue { + struct virtqueue *vq; +}; + +__always_inline struct virtnet_info *get_virtnet_info(struct net_device *dev) +{ + return (struct virtnet_info *)((char *)dev + ALIGN(bpf_core_type_size(struct net_device), NETDEV_ALIGN)); +} + + +// for tx queue +SEC("kprobe/dev_id_show") +int BPF_KPROBE(kprobe_dev_id_show, struct device *device) +{ + struct net_device *dev = container_of(device, struct net_device, dev); + struct virtnet_info *vi = get_virtnet_info(dev); + int key = 0; + + struct virito_queue *qs = bpf_map_lookup_elem(&imap, &key); + if (!qs) + return 0; + + int pid = bpf_get_current_pid_tgid() >> 32; + if (qs->pid != pid) + return 0; + + int tx = qs->tx_idx; + qs->tx_idx++; + + struct send_queue *sq; + bpf_probe_read(&sq, sizeof(sq), &vi->sq); + sq = (char *)sq + tx * qs->sq_size; + struct virtqueue *vq; + bpf_probe_read(&vq, sizeof(vq), &sq->vq); + struct vring_virtqueue *vvq = container_of(vq, struct vring_virtqueue, vq); + struct vring vring; + bpf_probe_read(&vring, sizeof(vring), &vvq->split.vring); + struct virtio_ring *ring = &qs->txs[tx & (MAX_QUEUE_NUM - 1)]; + + bpf_probe_read(&ring->avail_idx, sizeof(u16), &vring.avail->idx); + bpf_probe_read(&ring->used_idx, sizeof(u16), &vring.used->idx); + bpf_probe_read(&ring->last_used_idx, sizeof(u16), &vvq->last_used_idx); + ring->len = vring.num; + + return 0; +} + +// for rx queue +SEC("kprobe/dev_port_show") +int BPF_KPROBE(kprobe_dev_port_show, struct device *device) +{ + struct net_device *dev = container_of(device, struct net_device, dev); + struct virtnet_info *vi = get_virtnet_info(dev); + int key = 0; + + struct virito_queue *qs = bpf_map_lookup_elem(&imap, &key); + if (!qs) + return 0; + + int pid = bpf_get_current_pid_tgid() >> 32; + if (qs->pid != pid) + return 0; + + u64 rx = qs->rx_idx; + qs->rx_idx++; + if (rx >= MAX_QUEUE_NUM) + return 0; + + struct receive_queue *rq; + bpf_probe_read(&rq, sizeof(rq), &vi->rq); + rq = (char *)rq + rx * qs->rq_size; + struct virtqueue *vq; + bpf_probe_read(&vq, sizeof(vq), &rq->vq); + struct vring_virtqueue *vvq = container_of(vq, struct vring_virtqueue, vq); + struct vring vring; + bpf_probe_read(&vring, sizeof(vring), &vvq->split.vring); + + struct virtio_ring *ring = &qs->rxs[rx & (MAX_QUEUE_NUM - 1)]; + + bpf_probe_read(&ring->avail_idx, sizeof(u16), &vring.avail->idx); + bpf_probe_read(&ring->used_idx, sizeof(u16), &vring.used->idx); + bpf_probe_read(&ring->last_used_idx, sizeof(u16), &vvq->last_used_idx); + ring->len = vring.num; + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/bpf/virtio.h b/source/tools/detect/net/rtrace/src/bpf/virtio.h new file mode 100644 index 0000000000000000000000000000000000000000..bcbdf219fcc2e1e240ae9f22eb8c65f4e6b97554 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/bpf/virtio.h @@ -0,0 +1,32 @@ + +#ifndef __PERF_H +#define __PERF_H + +#define MAX_QUEUE_NUM 32 +#define NETDEV_ALIGN 32 + + +struct virtio_ring +{ + unsigned short len; + unsigned short last_used_idx; + unsigned short avail_idx; + unsigned short used_idx; +}; + +struct virito_queue +{ + int pid; + + int sq_size; + int rq_size; + + struct virtio_ring rxs[MAX_QUEUE_NUM]; + struct virtio_ring txs[MAX_QUEUE_NUM]; + + unsigned int tx_idx; + unsigned int rx_idx; +}; + + +#endif diff --git a/source/tools/detect/net/rtrace/drop/src/bpf/vmlinux.h b/source/tools/detect/net/rtrace/src/bpf/vmlinux.h similarity index 44% rename from source/tools/detect/net/rtrace/drop/src/bpf/vmlinux.h rename to source/tools/detect/net/rtrace/src/bpf/vmlinux.h index 8ef5cc3b7aa30651c7830d79afcf99edcc28ee1b..1add98ab8b0b6a7c2fafc6dec1b5df0af309f56f 100644 --- a/source/tools/detect/net/rtrace/drop/src/bpf/vmlinux.h +++ b/source/tools/detect/net/rtrace/src/bpf/vmlinux.h @@ -1,10 +1,9 @@ - #ifndef __VMLINUX_ARCH_H__ #define __VMLINUX_ARCH_H__ #if defined(__TARGET_ARCH_x86) - #include "../../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" #elif defined(__TARGET_ARCH_arm64) - #include "../../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" #endif #endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/collector/drop.rs b/source/tools/detect/net/rtrace/src/collector/drop.rs new file mode 100644 index 0000000000000000000000000000000000000000..ea643f0488ca77bc490e2818c1c1da085baca937 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/drop.rs @@ -0,0 +1,141 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/drop.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/drop.rs")); +} +use crate::common::ksyms::get_symbol_with_offset; +use crate::common::protocol::Protocol; +use crate::common::utils::any_as_u8_slice; +use crate::common::utils::btf_path_ptr; +use crate::common::utils::handle_lost_events; +use crate::event::Event; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use libbpf_rs::MapFlags; +use libbpf_rs::PerfBufferBuilder; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::SocketAddr; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; + +static DISABLE_TP_KFEE_SKB: AtomicBool = AtomicBool::new(false); + +#[derive(Serialize, Deserialize, Debug)] +pub struct Drop { + pub src: SocketAddr, + pub dst: SocketAddr, + pub sym: String, + pub protocol: Protocol, +} + +impl fmt::Display for Drop { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} {} -> {} {}", + self.protocol, self.src, self.dst, self.sym + ) + } +} + +pub struct DropCollector<'a> { + skel: bpf::DropSkel<'a>, +} + +impl<'a> DropCollector<'a> { + /// attach ping sender eBPF program + pub fn new( + verbose: bool, + protocol: Protocol, + saddr: u32, + daddr: u32, + sport: u16, + dport: u16, + ) -> Self { + let mut builder = bpf::DropSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let mut open_skel = builder.open_opts(opts).unwrap(); + + if !has_tp_kfree_skb() { + open_skel + .progs_mut() + .tp_kfree_skb() + .set_autoload(false) + .unwrap(); + } + + let mut skel = open_skel + .load() + .expect("failed to load pingtrace sender program"); + + // set filter map + let filter = bpf::drop_filter { + protocol: protocol as u16, + saddr, + daddr, + sport, + dport, + }; + skel.maps_mut() + .filters() + .update( + &0_i32.to_ne_bytes(), + unsafe { any_as_u8_slice(&filter) }, + MapFlags::ANY, + ) + .expect("failed to update userslow filter map"); + skel.attach() + .expect("failed to attach pingtrace sender program"); + DropCollector { skel } + } + + pub fn poll(&mut self, mut tx: Sender) { + let handle_event = move |cpu: i32, data: &[u8]| { + __handle_event(&mut tx, cpu, data); + }; + + let perf = PerfBufferBuilder::new(&self.skel.maps_mut().perf_events()) + .sample_cb(handle_event) + .lost_cb(handle_lost_events) + .build() + .unwrap(); + + loop { + perf.poll(std::time::Duration::from_millis(200)).unwrap(); + } + } +} + +fn __handle_event(tx: &mut Sender, _cpu: i32, data: &[u8]) { + let data_vec = data.to_vec(); + let (head, body, _tail) = unsafe { data_vec.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event = body[0]; + + let drop = Drop { + src: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.saddr))), + event.sport, + ), + dst: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.daddr))), + event.dport, + ), + protocol: Protocol::try_from(event.proto as i32).unwrap(), + sym: get_symbol_with_offset(&event.location), + }; + tx.send(Event::Drop(drop)).expect("failed to send events"); +} + +pub fn disable_tp_kfree_skb() { + DISABLE_TP_KFEE_SKB.store(true, Ordering::SeqCst); +} + +pub fn has_tp_kfree_skb() -> bool { + !DISABLE_TP_KFEE_SKB.load(Ordering::SeqCst) +} diff --git a/source/tools/detect/net/rtrace/src/collector/launcher.rs b/source/tools/detect/net/rtrace/src/collector/launcher.rs new file mode 100644 index 0000000000000000000000000000000000000000..8c97237c2463f2283b18142d386d9747c2029ac2 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/launcher.rs @@ -0,0 +1,123 @@ +use crate::collector::drop::DropCollector; +use crate::collector::netdev::NetDev; +use crate::collector::netstat::Netstat; +use crate::collector::ping::PingSender; +use crate::collector::queueslow::QueueSlowCollector; +use crate::collector::snmp::Snmp; +use crate::collector::tcpping::TcppingCollector; +use crate::collector::userslow::UserSlowCollector; +use crate::collector::virtio::VirtioCollector; +use crate::common::config::Config; +use crate::common::utils::detect_rps; +use crate::common::utils::get_host_ipv4; +use crate::common::utils::is_virtio_net; +use crate::event::Event; +use anyhow::Result; +use crossbeam_channel::Sender; +use std::net::Ipv4Addr; + +use super::retran::RetranCollector; + +pub fn initial_collector_thread_drop(config: &Config, tx: Sender) { + log::debug!("inital drop collector thread"); + let mut dp = DropCollector::new( + config.verbose, + config.protocol, + config.src.0, + config.dst.0, + config.src.1, + config.dst.1, + ); + + std::thread::spawn(move || { + dp.poll(tx); + }); +} + +pub fn initial_collector_thread_tcpping(config: &Config, tx: Sender) { + let (dsti, dport) = config.dst; + let src = if config.src.0 == 0 { + get_host_ipv4() + } else { + Ipv4Addr::from(u32::from_be(config.src.0)) + }; + let dst = Ipv4Addr::from(u32::from_be(dsti)); + let interval = config.period; + let count = config.count; + let sport = config.src.1; + let verbose = config.verbose; + + log::debug!("inital tcpping collector thread"); + std::thread::spawn(move || { + let mut tp = TcppingCollector::new(verbose); + tp.ping(tx, interval, count, sport, dport, src, dst); + }); +} + +pub fn initial_collector_thread_virtio(config: &Config, tx: Sender) { + if !is_virtio_net(&config.interface) { + panic!("unsupport non-virtio net: {}", config.interface); + } + + let interface = config.interface.clone(); + let period = config.period; + let verbose = config.verbose; + + log::debug!("inital virtio collector thread"); + std::thread::spawn(move || { + let mut vp = VirtioCollector::new(verbose, interface); + let _ = vp.refresh(); + loop { + std::thread::sleep(period); + let v = vp.refresh(); + tx.send(Event::Virtio(v)).unwrap(); + } + }); +} + +pub fn initial_collector_thread_ping(config: &Config, tx: Sender) { + if detect_rps() { + log::warn!("It is strongly recommended to turn off rps before using this function"); + } + + log::debug!("inital ping collector thread"); + let mut sender = PingSender::new(config.verbose); + std::thread::spawn(move || { + sender.poll(tx); + }); +} + +pub fn initial_collector_thread_retran(config: &Config, tx: Sender) { + let mut rt = RetranCollector::new(config.verbose); + std::thread::spawn(move || { + rt.poll(tx); + }); +} + +pub fn initial_collector_thread_userslow(config: &Config, tx: Sender) { + log::debug!("inital userslow collector thread"); + let mut us = UserSlowCollector::new(config.verbose, config.threshold); + std::thread::spawn(move || { + us.poll(tx); + }); +} + +pub fn initial_collector_thread_queueslow(config: &Config, tx: Sender) { + log::debug!("inital queueslow collector thread"); + let mut qs = QueueSlowCollector::new(config.verbose, config.protocol, config.threshold); + std::thread::spawn(move || { + qs.poll(tx); + }); +} + +pub fn initial_collector_netstat() -> Result { + Netstat::new() +} + +pub fn initial_collector_snmp() -> Result { + Snmp::new() +} + +pub fn initial_collector_netdev() -> Result { + NetDev::new() +} diff --git a/source/tools/detect/net/rtrace/src/collector/mod.rs b/source/tools/detect/net/rtrace/src/collector/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..bb100cc4ca088a48cc359de757745d8ef5bcd61e --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/mod.rs @@ -0,0 +1,11 @@ +pub mod drop; +pub mod launcher; +pub mod netdev; +pub mod netstat; +pub mod ping; +pub mod queueslow; +pub mod retran; +pub mod snmp; +pub mod tcpping; +pub mod userslow; +pub mod virtio; diff --git a/source/tools/detect/net/rtrace/src/collector/netdev.rs b/source/tools/detect/net/rtrace/src/collector/netdev.rs new file mode 100644 index 0000000000000000000000000000000000000000..6eed59100f88989d1cfb226bbcb0c1a91b6f8758 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/netdev.rs @@ -0,0 +1,72 @@ +use anyhow::Result; +use procfs::net::DeviceStatus; +use serde::Deserialize; +use serde::Serialize; +use std::collections::HashMap; +use std::ops::Sub; + +#[derive(Default, Debug, Clone, Deserialize, Serialize)] +pub struct NetDev { + dev: HashMap>, +} + +fn device_status_to_hashmap(ds: &DeviceStatus) -> HashMap { + let mut hm = HashMap::default(); + + hm.insert("recv_bytes".to_owned(), ds.recv_bytes as isize); + hm.insert("recv_packets".to_owned(), ds.recv_packets as isize); + hm.insert("recv_errs".to_owned(), ds.recv_errs as isize); + hm.insert("recv_drop".to_owned(), ds.recv_drop as isize); + hm.insert("recv_fifo".to_owned(), ds.recv_fifo as isize); + hm.insert("recv_frame".to_owned(), ds.recv_frame as isize); + hm.insert("recv_compressed".to_owned(), ds.recv_compressed as isize); + hm.insert("recv_multicast".to_owned(), ds.recv_multicast as isize); + hm.insert("sent_bytes".to_owned(), ds.sent_bytes as isize); + hm.insert("sent_packets".to_owned(), ds.sent_packets as isize); + hm.insert("sent_errs".to_owned(), ds.sent_errs as isize); + hm.insert("sent_drop".to_owned(), ds.sent_drop as isize); + hm.insert("sent_fifo".to_owned(), ds.sent_fifo as isize); + hm.insert("sent_colls".to_owned(), ds.sent_colls as isize); + hm.insert("sent_carrier".to_owned(), ds.sent_carrier as isize); + hm.insert("sent_compressed".to_owned(), ds.sent_compressed as isize); + + hm +} + +impl NetDev { + pub fn new() -> Result { + let devs = procfs::net::dev_status()?; + let mut hm = HashMap::default(); + for (name, dev) in devs { + hm.insert(name, device_status_to_hashmap(&dev)); + } + + Ok(Self { dev: hm }) + } +} + +impl<'a> Sub<&'a Self> for NetDev { + type Output = Self; + + fn sub(self, rhs: &'a Self) -> Self::Output { + let mut result = NetDev::default(); + + for (dev_name, metrics) in self.dev { + let rhs_metrics = rhs.dev.get(&dev_name); + + let mut result_metrics = HashMap::default(); + for (metric_name, value) in metrics { + let rhs_value = rhs_metrics + .and_then(|rhs_metrics| rhs_metrics.get(&metric_name)) + .unwrap_or(&0); + + // Subtract rhs_value from value + result_metrics.insert(metric_name.clone(), value - rhs_value); + } + + result.dev.insert(dev_name, result_metrics); + } + + result + } +} diff --git a/source/tools/detect/net/rtrace/utils/src/drop/delta_netstat.rs b/source/tools/detect/net/rtrace/src/collector/netstat.rs similarity index 42% rename from source/tools/detect/net/rtrace/utils/src/drop/delta_netstat.rs rename to source/tools/detect/net/rtrace/src/collector/netstat.rs index 7bf34c924cb290241b6e46892838bed143eb94fd..7cc589110f91f7d9ce2a488c18d80382326113b4 100644 --- a/source/tools/detect/net/rtrace/utils/src/drop/delta_netstat.rs +++ b/source/tools/detect/net/rtrace/src/collector/netstat.rs @@ -1,9 +1,9 @@ -use anyhow::{bail, Result}; -use serde::{Deserialize, Serialize}; +use anyhow::bail; +use anyhow::Result; +use serde::Deserialize; +use serde::Serialize; use std::collections::HashMap; -use std::fmt; -use std::fs::{read_to_string, File}; -use std::io::{self, BufRead}; +use std::fs::read_to_string; use std::ops::Sub; use std::path::Path; use std::str::FromStr; @@ -58,6 +58,10 @@ impl FromStr for Netstat { } impl Netstat { + pub fn new() -> Result { + Self::from_file("/proc/net/netstat") + } + pub fn from_file

(path: P) -> Result where P: AsRef, @@ -71,85 +75,17 @@ impl Netstat { } } -#[derive(Default, Debug, Clone)] -pub struct DeltaNetstat { - path: String, - prenetstat: Netstat, - curnetstat: Netstat, -} - -pub struct NetstatDropStatus { - pub key: String, - pub count: isize, - pub reason: String, -} - -impl DeltaNetstat { - pub fn new(path: &str) -> Result { - let curnetstat = Netstat::from_file(path)?; - Ok(DeltaNetstat { - path: path.clone().to_owned(), - prenetstat: curnetstat.clone(), - curnetstat, - }) - } - - pub fn update(&mut self) -> Result<()> { - std::mem::swap(&mut self.prenetstat, &mut self.curnetstat); - self.curnetstat = Netstat::from_file(&self.path)?; - Ok(()) - } - - fn delta(&self, key: &(String, String)) -> Option { - // if let Some(x) = self.curnetstat.hm.get(&key) { - // if let Some(y) = self.prenetstat.hm.get(&key) { - // return Some(*x - *y); +impl<'a> Sub<&'a Self> for Netstat { + type Output = Self; - // } - // } - None - } - - pub fn drop_reason(&self) -> Vec { - let mut ret = vec![]; - - let mut key = ("TcpExt:".into(), "TCPWqueueTooBig".into()); - - if let Some(x) = self.delta(&key) { - ret.push(NetstatDropStatus { - key: key.1.clone(), - count: x, - reason: "内核bug, 重传包切片失败导致包重传失败".into(), - }); - } + fn sub(self, rhs: &'a Self) -> Self::Output { + let mut result = Netstat::default(); - key.1 = "PAWSPassiveRejected".into(); - if let Some(x) = self.delta(&key) { - ret.push(NetstatDropStatus { - key: key.1.clone(), - count: x, - reason: "客户端发送的syn包所携带的时间戳未通过PAWS检查".into(), - }); + for (stat_name, value) in self.netstat { + let rhs_value = rhs.netstat.get(&stat_name).unwrap_or(&0); + result.netstat.insert(stat_name, value - rhs_value); } - ret + result } } - -impl fmt::Display for DeltaNetstat { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // for (k, v) in &self.curnetstat.hm { - // let pre_v = self.prenetstat.hm.get(k).unwrap(); - // if v - pre_v != 0 { - // write!(f, "{}:{} {} ", k.0, k.1, v - pre_v)?; - // } - // } - Ok(()) - } -} - -pub fn show_netstat_json() -> Result<()> { - let netstat = Netstat::from_file("/proc/net/netstat")?; - println!("{}", serde_json::to_string(&netstat)?); - Ok(()) -} diff --git a/source/tools/detect/net/rtrace/src/collector/ping.rs b/source/tools/detect/net/rtrace/src/collector/ping.rs new file mode 100644 index 0000000000000000000000000000000000000000..b04c6358939c8e4eb25a04b8476ee6ddb020cb62 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/ping.rs @@ -0,0 +1,384 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/ping_sender.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/pingtrace.rs")); +} + +use crate::common::btree::CpuBTreeMap; +use crate::common::btree::CpuBTreeSet; +use crate::common::ksyms::has_kernel_symbol; +use crate::common::raw_event::RawEvent; +use crate::common::sched::Process; +use crate::common::sched::Sched; +use crate::common::utils::btf_path_ptr; +use crate::common::utils::handle_lost_events; +use crate::event::Event; +use bpf::PING_SENDER_STAGE_PING_DEV_QUEUE; +use bpf::PING_SENDER_STAGE_PING_DEV_XMIT; +use bpf::PING_SENDER_STAGE_PING_ICMP_RCV; +use bpf::PING_SENDER_STAGE_PING_NETIF_RCV; +use bpf::PING_SENDER_STAGE_PING_RCV; +use bpf::PING_SENDER_STAGE_PING_SND; +use byteorder::ByteOrder; +use crossbeam_channel::Receiver; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use libbpf_rs::PerfBufferBuilder; +use serde::Deserialize; +use serde::Serialize; +use std::collections::BTreeMap; +use std::fmt; + +/// a life of ping packet +#[derive(Deserialize, Serialize, Debug)] +pub enum PingStage { + PingSnd, + PingDevQueue, + PingDevXmit, + PingNetifRcv, + PingIcmpRcv, + PingRcv, + Irq, + SoftIrq, + Wakeup, + Sched(Sched), +} + +impl fmt::Display for PingStage { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PingStage::PingSnd => write!(f, "Send"), + PingStage::PingDevQueue => write!(f, "DevQueue"), + PingStage::PingDevXmit => write!(f, "DevXmit"), + PingStage::PingNetifRcv => write!(f, "NetifRcv"), + PingStage::PingIcmpRcv => write!(f, "IcmpRcv"), + PingStage::PingRcv => write!(f, "Recv"), + PingStage::Irq => write!(f, "Irq"), + PingStage::SoftIrq => write!(f, "SoftIrq"), + PingStage::Wakeup => write!(f, "WakeupKosfitrqd"), + PingStage::Sched(s) => write!(f, "SchedSwitch({})", s), + } + } +} + +/// The actual path information of ping packets +#[derive(Deserialize, Serialize, Debug)] +pub struct Ping { + id: u16, + seq: u16, + stages: BTreeMap, +} + +impl Ping { + pub(crate) fn new(id: u16, seq: u16) -> Self { + Ping { + id, + seq, + stages: Default::default(), + } + } + + pub(crate) fn add_stage(&mut self, ts: u64, stage: PingStage) { + self.stages.insert(ts, stage); + } + + pub(crate) fn add_irq(&mut self, ts: u64) { + self.add_stage(ts, PingStage::Irq) + } + + pub(crate) fn add_softirq(&mut self, ts: u64) { + self.add_stage(ts, PingStage::SoftIrq) + } + pub(crate) fn add_wakeup(&mut self, ts: u64) { + self.add_stage(ts, PingStage::Wakeup) + } + pub(crate) fn add_sched(&mut self, ts: u64, sd: Sched) { + self.add_stage(ts, PingStage::Sched(sd)) + } +} + +impl fmt::Display for Ping { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut string_vec = vec![]; + let mut prev_ts = 0; + for (k, v) in self.stages.iter() { + if prev_ts == 0 { + string_vec.push(format!("{}", v)); + prev_ts = *k; + continue; + } + + string_vec.push(format!("{}us", (*k - prev_ts) / 1000)); + string_vec.push(format!("{}", v)); + prev_ts = *k; + } + + write!( + f, + "Id:{} Seq:{} {}", + self.id, + self.seq, + string_vec.join(" -> ") + ) + } +} + +fn __handle_event(tx: &mut Sender, cpu: i32, data: &[u8]) { + log::debug!("receive perf buffer event"); + let ty = byteorder::NativeEndian::read_u64(data) as u32; + let data_vec = data.to_vec(); + + let raw_event = RawEvent { + cpu, + ty, + data: data_vec, + }; + + tx.send(raw_event).expect("failed to send events"); +} + +/// wrapper of ping sender skeleton +pub struct PingSender<'a> { + skel: bpf::PingSenderSkel<'a>, +} + +impl<'a> PingSender<'a> { + /// attach ping sender eBPF program + pub fn new(verbose: bool) -> Self { + let mut builder = bpf::PingSenderSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let mut open_skel = builder.open_opts(opts).unwrap(); + + // check network card type + if has_kernel_symbol("skb_recv_done") { + log::debug!("detect network card: virtio"); + open_skel + .progs_mut() + .kprobe__mlx5e_completion_event() + .set_autoload(false) + .unwrap(); + } else if has_kernel_symbol("mlx5e_completion_event") { + log::debug!("detect network card: mlx5"); + open_skel + .progs_mut() + .kprobe__skb_recv_done() + .set_autoload(false) + .unwrap(); + } else { + log::error!("detect network card: unknown, only support virtio and mlx5"); + panic!(); + } + + let mut skel = open_skel + .load() + .expect("failed to load pingtrace sender program"); + skel.attach() + .expect("failed to attach pingtrace sender program"); + PingSender { skel } + } + + // Poll the raw event of perf for ping sender + fn inernal_poll_thread(&mut self, mut tx: Sender) { + let handle_event = move |cpu: i32, data: &[u8]| { + __handle_event(&mut tx, cpu, data); + }; + let perf = PerfBufferBuilder::new(&self.skel.maps_mut().perf_events()) + .sample_cb(handle_event) + .lost_cb(handle_lost_events) + .build() + .unwrap(); + log::debug!("start pingtrace perf buffer polling thread"); + std::thread::spawn(move || loop { + perf.poll(std::time::Duration::from_millis(200)).unwrap(); + }); + } + + pub fn poll(&mut self, tx: Sender) { + log::debug!("start pingtrace polling thread"); + let (itx, irx) = crossbeam_channel::unbounded(); + self.inernal_poll_thread(itx); + // do polling + do_poll_thread(tx, irx); + } +} + +fn do_poll_thread(tx: Sender, irx: Receiver) { + let cpus = num_cpus::get(); + let mut irqs = CpuBTreeSet::::new(cpus); + let mut softirqs = CpuBTreeSet::::new(cpus); + let mut wakeups = CpuBTreeSet::::new(cpus); + let mut scheds = CpuBTreeMap::::new(cpus); + + loop { + match irx.recv_timeout(std::time::Duration::from_millis(200)) { + Ok(raw_event) => { + if let Some(e) = handle_raw_event( + raw_event, + &mut irqs, + &mut softirqs, + &mut wakeups, + &mut scheds, + ) { + tx.send(Event::Ping(e)).expect("Faild to send event"); + } + } + Err(_) => {} + } + } +} + +fn handle_raw_event( + raw_event: RawEvent, + irqs: &mut CpuBTreeSet, + softirqs: &mut CpuBTreeSet, + wakeups: &mut CpuBTreeSet, + scheds: &mut CpuBTreeMap, +) -> Option { + macro_rules! record_raw_event { + ($raw_event: ident, $ty: ident, $tree: ident) => { + let cpu = $raw_event.cpu as usize; + let (head, body, _tail) = unsafe { $raw_event.data.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event_raw = body[0]; + + for ts in event_raw.tss { + $tree.insert(cpu, ts); + } + }; + } + + match raw_event.ty { + bpf::EVENT_TYPE_PING => { + let (head, body, _tail) = unsafe { raw_event.data.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let ping = body[0]; + let pt = handle_ping_event(ping, irqs, softirqs, wakeups, scheds); + return Some(pt); + } + bpf::EVENT_TYPE_IRQ => { + record_raw_event!(raw_event, irq, irqs); + } + bpf::EVENT_TYPE_SOFTIRQ => { + record_raw_event!(raw_event, softirq, softirqs); + } + bpf::EVENT_TYPE_WAKEUP => { + record_raw_event!(raw_event, wakeup, wakeups); + } + bpf::EVENT_TYPE_SCHED => handle_sched_event(scheds, &raw_event), + _ => panic!( + "Unknown event type: {}, data: {:?}", + raw_event.ty, raw_event.data + ), + } + + None +} + +fn handle_ping_event( + ping: bpf::ping_sender, + irqs: &mut CpuBTreeSet, + softirqs: &mut CpuBTreeSet, + wakeups: &mut CpuBTreeSet, + scheds: &mut CpuBTreeMap, +) -> Ping { + let mut pt = Ping::new(ping.key.id, ping.key.seq); + + let send_ts = ping.stages[PING_SENDER_STAGE_PING_SND as usize].ts; + let dev_queue_ts = ping.stages[PING_SENDER_STAGE_PING_DEV_QUEUE as usize].ts; + let dev_xmit_ts = ping.stages[PING_SENDER_STAGE_PING_DEV_XMIT as usize].ts; + let netif_rcv_ts = ping.stages[PING_SENDER_STAGE_PING_NETIF_RCV as usize].ts; + let netif_rcv_cpu = ping.stages[PING_SENDER_STAGE_PING_NETIF_RCV as usize].cpu as usize; + let icmp_rcv_ts = ping.stages[PING_SENDER_STAGE_PING_ICMP_RCV as usize].ts; + let recv_ts = ping.stages[PING_SENDER_STAGE_PING_RCV as usize].ts; + + assert_ne!(send_ts, 0); + assert_ne!(dev_queue_ts, 0); + assert_ne!(dev_xmit_ts, 0); + assert_ne!(netif_rcv_ts, 0); + assert_ne!(icmp_rcv_ts, 0); + assert_ne!(recv_ts, 0); + + pt.add_stage(send_ts, PingStage::PingSnd); + pt.add_stage(dev_queue_ts, PingStage::PingDevQueue); + pt.add_stage(dev_xmit_ts, PingStage::PingDevXmit); + pt.add_stage(netif_rcv_ts, PingStage::PingNetifRcv); + pt.add_stage(icmp_rcv_ts, PingStage::PingIcmpRcv); + pt.add_stage(recv_ts, PingStage::PingRcv); + + let mut irq_ts = 0; + if let Some(irq) = irqs.lower_bound(netif_rcv_cpu, netif_rcv_ts) { + pt.add_irq(*irq); + irq_ts = *irq; + } else { + log::warn!("Hardware irq lost"); + } + + if irq_ts != 0 { + for si in softirqs.in_range(netif_rcv_cpu, irq_ts, netif_rcv_ts) { + pt.add_softirq(si); + } + } + + let mut sched_start_ts = 0; + if irq_ts != 0 { + if let Some(sched) = scheds.lower_bound(netif_rcv_cpu, irq_ts) { + sched_start_ts = *sched.0; + pt.add_stage(sched_start_ts, PingStage::Sched(sched.1.clone())); + } + } + + for (ts, sched) in scheds.range(netif_rcv_cpu, sched_start_ts, netif_rcv_ts) { + pt.add_sched(*ts, sched.clone()); + } + + if let Some((ts, sched)) = scheds.lower_bound(netif_rcv_cpu, netif_rcv_ts) { + if sched.next.comm.starts_with("ksoftirqd") { + // find wakeup + if let Some(wakeup_ts) = wakeups.lower_bound(netif_rcv_cpu, *ts) { + pt.add_wakeup(*wakeup_ts); + } + } + } + + let split_ts = netif_rcv_ts - 3_000_000_000; + irqs.flush(netif_rcv_cpu, split_ts); + softirqs.flush(netif_rcv_cpu, split_ts); + wakeups.flush(netif_rcv_cpu, split_ts); + scheds.flush(netif_rcv_cpu, split_ts); + + pt +} + +fn handle_sched_event(scheds: &mut CpuBTreeMap, raw_event: &RawEvent) { + let cpu = raw_event.cpu as usize; + let (head, body, _tail) = unsafe { raw_event.data.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event_raw = body[0]; + + let cnt = event_raw.cnt as usize; + for i in 0..(bpf::SCHEDSWITCH_RING_SIZE as usize) { + if cnt <= i { + break; + } + + let idx = (cnt - i - 1) & ((bpf::SCHEDSWITCH_RING_SIZE as usize) - 1); + let ts = event_raw.ss[idx].ts; + if scheds.contains_key(cpu, &ts) { + continue; + } + + let prev = Process::new( + event_raw.ss[idx].prev_pid as u32, + event_raw.ss[idx].prev_comm.to_vec(), + ); + let next = Process::new( + event_raw.ss[idx].next_pid as u32, + event_raw.ss[idx].next_comm.to_vec(), + ); + + let sched = Sched::new(prev, next); + + scheds.insert(cpu, ts, sched); + } +} diff --git a/source/tools/detect/net/rtrace/src/collector/queueslow.rs b/source/tools/detect/net/rtrace/src/collector/queueslow.rs new file mode 100644 index 0000000000000000000000000000000000000000..b625d236de946e009c325f6a7036037fe70ca4f3 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/queueslow.rs @@ -0,0 +1,114 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/queueslow.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/queueslow.rs")); +} +use crate::common::protocol::Protocol; +use crate::common::utils::any_as_u8_slice; +use crate::common::utils::btf_path_ptr; +use crate::common::utils::handle_lost_events; +use crate::event::Event; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use libbpf_rs::MapFlags; +use libbpf_rs::PerfBufferBuilder; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::SocketAddr; + +#[derive(Deserialize, Serialize, Debug)] +pub struct QueueSlow { + saddr: SocketAddr, + daddr: SocketAddr, + protocol: Protocol, + delay: u64, +} + +impl fmt::Display for QueueSlow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} {} -> {} {}us", + self.protocol, + self.saddr, + self.daddr, + self.delay / 1000 + ) + } +} + +pub struct QueueSlowCollector<'a> { + skel: bpf::QueueslowSkel<'a>, +} + +impl<'a> QueueSlowCollector<'a> { + /// attach ping sender eBPF program + pub fn new(verbose: bool, protocol: Protocol, threshold: u64) -> Self { + let mut builder = bpf::QueueslowSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let mut open_skel = builder.open_opts(opts).unwrap(); + + let mut skel = open_skel + .load() + .expect("failed to load pingtrace sender program"); + // set filter map + let filter = bpf::filter { + protocol: protocol as u32, + threshold, + }; + skel.maps_mut() + .filters() + .update( + &0_i32.to_ne_bytes(), + unsafe { any_as_u8_slice(&filter) }, + MapFlags::ANY, + ) + .expect("failed to update userslow filter map"); + skel.attach() + .expect("failed to attach pingtrace sender program"); + QueueSlowCollector { skel } + } + + pub fn poll(&mut self, mut tx: Sender) { + log::debug!("start queueslow polling thread"); + let handle_event = move |cpu: i32, data: &[u8]| { + __handle_event(&mut tx, cpu, data); + }; + + let perf = PerfBufferBuilder::new(&self.skel.maps_mut().perf_events()) + .sample_cb(handle_event) + .lost_cb(handle_lost_events) + .build() + .unwrap(); + + loop { + perf.poll(std::time::Duration::from_millis(200)).unwrap(); + } + } +} + +fn __handle_event(tx: &mut Sender, _cpu: i32, data: &[u8]) { + let data_vec = data.to_vec(); + let (head, body, _tail) = unsafe { data_vec.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event = body[0]; + + let qs = QueueSlow { + saddr: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.saddr))), + event.sport, + ), + daddr: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.daddr))), + event.dport, + ), + protocol: Protocol::try_from(event.protocol as i32).unwrap(), + delay: event.latency, + }; + tx.send(Event::QueueSlow(qs)) + .expect("failed to send events"); +} diff --git a/source/tools/detect/net/rtrace/src/collector/retran.rs b/source/tools/detect/net/rtrace/src/collector/retran.rs new file mode 100644 index 0000000000000000000000000000000000000000..1466933e3f437256219bcccad306a860046398f1 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/retran.rs @@ -0,0 +1,111 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/retran.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/retran.rs")); +} +use crate::common::utils::btf_path_ptr; +use crate::common::utils::handle_lost_events; +use crate::event::Event; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use libbpf_rs::PerfBufferBuilder; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::SocketAddr; + +#[derive(Serialize, Deserialize, Debug)] +pub struct Retran { + pub tcp_state: String, + pub ca_state: String, + pub retran_type: String, + pub src: SocketAddr, + pub dst: SocketAddr, +} + +impl fmt::Display for Retran { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} -> {} retran_type:{}", + self.src, self.dst, self.retran_type + ) + } +} + +pub struct RetranCollector<'a> { + skel: bpf::RetranSkel<'a>, +} + +impl<'a> RetranCollector<'a> { + /// attach ping sender eBPF program + pub fn new(verbose: bool) -> Self { + let mut builder = bpf::RetranSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let open_skel = builder.open_opts(opts).unwrap(); + let mut skel = open_skel.load().expect("failed to load retran program"); + skel.attach().expect("failed to attach retran program"); + RetranCollector { skel } + } + + pub fn poll(&mut self, mut tx: Sender) { + log::debug!("start retran polling thread"); + let handle_event = move |cpu: i32, data: &[u8]| { + __handle_event(&mut tx, cpu, data); + }; + + let perf = PerfBufferBuilder::new(&self.skel.maps_mut().perf_events()) + .sample_cb(handle_event) + .lost_cb(handle_lost_events) + .build() + .unwrap(); + + loop { + perf.poll(std::time::Duration::from_millis(200)).unwrap(); + } + } +} + +fn __handle_event(tx: &mut Sender, _cpu: i32, data: &[u8]) { + let data_vec = data.to_vec(); + let (head, body, _tail) = unsafe { data_vec.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event = body[0]; + + let ca_state = match event.ca_state { + 0 => "open", + 1 => "disorder", + 2 => "cwr", + 3 => "recovery", + 4 => "loss", + _ => "none", + }; + + let retran_type = match event.retran_type { + 0 => "SynRetran", + 1 => "SlowStartRetran", + 2 => "RtoRetran", + 3 => "FastRetran", + 4 => "TLP", + _ => "Other", + }; + + let retran = Retran { + tcp_state: "".to_owned(), + ca_state: ca_state.to_string(), + retran_type: retran_type.to_owned(), + src: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.saddr))), + event.sport, + ), + dst: SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.daddr))), + event.dport, + ), + }; + tx.send(Event::Retran(retran)) + .expect("failed to send events"); +} diff --git a/source/tools/detect/net/rtrace/utils/src/drop/delta_snmp.rs b/source/tools/detect/net/rtrace/src/collector/snmp.rs similarity index 48% rename from source/tools/detect/net/rtrace/utils/src/drop/delta_snmp.rs rename to source/tools/detect/net/rtrace/src/collector/snmp.rs index 952c93f9c7870257358bf561b371550df4e024a4..41c75abc7db919f581ac3b7ba97e498c07854a2e 100644 --- a/source/tools/detect/net/rtrace/utils/src/drop/delta_snmp.rs +++ b/source/tools/detect/net/rtrace/src/collector/snmp.rs @@ -1,14 +1,13 @@ -use anyhow::{bail, Result}; +use anyhow::bail; +use anyhow::Result; +use serde::Deserialize; +use serde::Serialize; use std::collections::HashMap; -use std::fmt; -use std::fs::{read_to_string, File}; -use std::io::{self, BufRead}; +use std::fs::read_to_string; use std::ops::Sub; use std::path::Path; use std::str::FromStr; -use serde::{Deserialize, Serialize}; - #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct Snmp { snmp: HashMap, @@ -59,6 +58,10 @@ impl FromStr for Snmp { } impl Snmp { + pub fn new() -> Result { + Self::from_file("/proc/net/snmp") + } + pub fn from_file

(path: P) -> Result where P: AsRef, @@ -72,69 +75,17 @@ impl Snmp { } } -#[derive(Default, Debug, Clone)] -pub struct DeltaSnmp { - path: String, - presnmp: Snmp, - cursnmp: Snmp, -} - -pub struct SnmpDropStatus { - pub key: String, - pub count: isize, - pub reason: String, -} - -impl DeltaSnmp { - pub fn new(path: &str) -> Result { - let cursnmp = Snmp::from_file(path)?; - Ok(DeltaSnmp { - path: path.clone().to_owned(), - presnmp: cursnmp.clone(), - cursnmp, - }) - } - - pub fn update(&mut self) -> Result<()> { - std::mem::swap(&mut self.presnmp, &mut self.cursnmp); - self.cursnmp = Snmp::from_file(&self.path)?; - Ok(()) - } - - fn delta(&self, key: &(String, String)) -> Option { - // if let Some(x) = self.cursnmp.hm.get(&key) { - // if let Some(y) = self.presnmp.hm.get(&key) { - // return Some(*x - *y); - // } - // } - None - } - - pub fn drop_reason(&self) -> Vec { - let mut ret = vec![]; +impl<'a> Sub<&'a Self> for Snmp { + type Output = Self; - let mut key = ("Tcp:".into(), "InCsumErrors".into()); + fn sub(self, rhs: &'a Self) -> Self::Output { + let mut result = Snmp::default(); - if let Some(x) = self.delta(&key) { - ret.push(SnmpDropStatus { - key: key.1.clone(), - count: x, - reason: "报文的checksum值不对,报文数据可能被硬件篡改".into(), - }); + for (stat_name, value) in self.snmp { + let rhs_value = rhs.snmp.get(&stat_name).unwrap_or(&0); + result.snmp.insert(stat_name, value - rhs_value); } - ret - } -} - -impl fmt::Display for DeltaSnmp { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - // for (k, v) in &self.cursnmp.hm { - // let pre_v = self.presnmp.hm.get(k).unwrap(); - // if v - pre_v != 0 { - // write!(f, "{}:{} {} ", k.0, k.1, v - pre_v)?; - // } - // } - Ok(()) + result } } diff --git a/source/tools/detect/net/rtrace/src/collector/tcpping.rs b/source/tools/detect/net/rtrace/src/collector/tcpping.rs new file mode 100644 index 0000000000000000000000000000000000000000..4753090c7dc4a0e728ae44de62551b91ce793e1e --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/tcpping.rs @@ -0,0 +1,420 @@ +use crate::common::ksyms::has_kernel_symbol; +use crate::common::sched::Process; +use crate::common::sched::Sched; +use crate::common::utils::allocate_port; +use crate::common::utils::any_as_u8_slice; +use crate::common::utils::current_monotime; +use crate::event::Event; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use pnet::packet::ip::IpNextHeaderProtocols; +use pnet::packet::tcp::MutableTcpPacket; +use pnet::packet::tcp::TcpFlags; +use pnet::transport::tcp_packet_iter; +use pnet::transport::transport_channel; +use pnet::transport::TransportChannelType::Layer4; +use pnet::transport::TransportProtocol::Ipv4; +use pnet::transport::TransportReceiver; +use pnet::transport::TransportSender; +use serde::Deserialize; +use serde::Serialize; +use std::collections::BTreeMap; +use std::fmt; +use std::net::Ipv4Addr; +use std::os::unix::io::AsFd; +use std::os::unix::io::AsRawFd; +use std::time::Duration; + +include!(concat!(env!("OUT_DIR"), "/tcpping.skel.rs")); +include!(concat!(env!("OUT_DIR"), "/tcpping.rs")); + +/// a life of tcpping packet +#[derive(Deserialize, Serialize, Debug, Clone)] +pub enum TcppingStage { + TxUser, + TxKernelIn, + TxKernelOut, + RxKernelIn, + RxKernelOut, + RxUser, + Irq, + SoftIrq, + Sched(Sched), +} + +impl TcppingStage { + pub fn is_stage(&self, stage: &TcppingStage) -> bool { + match (self, stage) { + (TcppingStage::TxUser, TcppingStage::TxUser) => true, + (TcppingStage::TxKernelIn, TcppingStage::TxKernelIn) => true, + (TcppingStage::TxKernelOut, TcppingStage::TxKernelOut) => true, + (TcppingStage::RxKernelIn, TcppingStage::RxKernelIn) => true, + (TcppingStage::RxKernelOut, TcppingStage::RxKernelOut) => true, + (TcppingStage::RxUser, TcppingStage::RxUser) => true, + (TcppingStage::Irq, TcppingStage::Irq) => true, + (TcppingStage::SoftIrq, TcppingStage::SoftIrq) => true, + (TcppingStage::Sched(_), TcppingStage::Sched(_)) => true, + _ => false, + } + } +} + +impl fmt::Display for TcppingStage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TcppingStage::TxUser => write!(f, "TxUser"), + TcppingStage::TxKernelIn => write!(f, "TxKernelIn"), + TcppingStage::TxKernelOut => write!(f, "TxKernelOut"), + TcppingStage::RxKernelIn => write!(f, "RxKernelIn"), + TcppingStage::RxKernelOut => write!(f, "RxKernelOut"), + TcppingStage::RxUser => write!(f, "RxUser"), + TcppingStage::Irq => write!(f, "Irq"), + TcppingStage::SoftIrq => write!(f, "SoftIrq"), + TcppingStage::Sched(sched) => write!(f, "Sched({})", sched), + } + } +} + +#[derive(Deserialize, Serialize, Debug, Default)] +pub struct Tcpping { + pub seq: u32, + pub stages: BTreeMap, +} + +impl Tcpping { + pub fn stage_ts(&self, s: TcppingStage) -> u64 { + for (ts, stage) in &self.stages { + if stage.is_stage(&s) { + return *ts; + } + } + panic!("internal error"); + } + + pub fn scheds(&self) -> impl Iterator { + self.stages.iter().filter_map(|(ts, stage)| { + if let TcppingStage::Sched(ref sched) = stage { + Some((ts, sched)) + } else { + None + } + }) + } + + pub fn irqs(&self) -> impl Iterator { + self.stages.iter().filter_map(|(ts, stage)| { + if let TcppingStage::Irq = stage { + Some(ts) + } else { + None + } + }) + } + + pub fn delta(&self, left: TcppingStage, right: TcppingStage) -> u64 { + self.stage_ts(left) - self.stage_ts(right) + } + + pub fn time(&self) -> u64 { + self.delta(TcppingStage::RxUser, TcppingStage::TxUser) + } + + pub fn is_timeout(&self) -> bool { + self.stages.len() == 0 + } + + pub(crate) fn add_stage(&mut self, ts: u64, stage: TcppingStage) { + assert_ne!(ts, 0, "error:{}", stage); + self.stages.insert(ts, stage); + } + + pub(crate) fn add_irq(&mut self, ts: u64) { + self.add_stage(ts, TcppingStage::Irq) + } + + pub(crate) fn add_softirq(&mut self, ts: u64) { + self.add_stage(ts, TcppingStage::SoftIrq) + } + + pub(crate) fn add_sched(&mut self, ts: u64, sd: Sched) { + self.add_stage(ts, TcppingStage::Sched(sd)) + } +} + +impl fmt::Display for Tcpping { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut string_vec = vec![]; + let mut prev_ts = 0; + for (ts, name) in &self.stages { + if prev_ts == 0 { + string_vec.push(name.to_string()); + prev_ts = *ts; + continue; + } + + string_vec.push(format!("{}us", (*ts - prev_ts) / 1000)); + string_vec.push(name.to_string()); + prev_ts = *ts; + } + + write!(f, "seq:{} {}", self.seq, string_vec.join(" -> ")) + } +} + +pub struct TcppingCollector<'a> { + skel: TcppingSkel<'a>, + tx: TransportSender, + rx: TransportReceiver, +} + +impl<'a> TcppingCollector<'a> { + pub fn new(verbose: bool) -> Self { + // let interfaces = datalink::interfaces(); + let protocol = Layer4(Ipv4(IpNextHeaderProtocols::Tcp)); + let (tx, rx) = match transport_channel(4096, protocol) { + Ok((tx, rx)) => (tx, rx), + Err(e) => panic!( + "An error occurred when creating the transport channel: {}", + e + ), + }; + + let mut skel_builder = TcppingSkelBuilder::default(); + skel_builder.obj_builder.debug(verbose); + let mut open_skel = skel_builder.open().unwrap(); + + // check network card type + if has_kernel_symbol("skb_recv_done") { + log::debug!("detect network card: virtio"); + open_skel + .progs_mut() + .kprobe__mlx5e_completion_event() + .set_autoload(false) + .unwrap(); + } else if has_kernel_symbol("mlx5e_completion_event") { + log::debug!("detect network card: mlx5"); + open_skel + .progs_mut() + .kprobe__skb_recv_done() + .set_autoload(false) + .unwrap(); + } else { + log::error!("detect network card: unknown, only support virtio and mlx5"); + panic!(); + } + + let mut skel = open_skel.load().unwrap(); + + skel.attach().unwrap(); + let prog = skel.progs().socket_tcp().as_fd().as_raw_fd(); + set_bpf_filter(rx.socket.fd, prog); + + TcppingCollector { skel, tx, rx } + } + + fn set_filter(&mut self, sport: u16, dport: u16) { + let mut filter = unsafe { std::mem::zeroed::() }; + filter.pid = unsafe { libc::getpid() }; + filter.be_lport = sport.to_be(); + filter.be_rport = dport.to_be(); + filter.lport = sport; + filter.rport = dport; + self.skel + .maps_mut() + .filters() + .update( + &0_u32.to_ne_bytes(), + unsafe { any_as_u8_slice(&filter) }, + libbpf_rs::MapFlags::ANY, + ) + .unwrap(); + } + + pub fn ping( + &mut self, + tx: Sender, + interval: Duration, + count: u32, + sport: u16, + dport: u16, + src: Ipv4Addr, + dst: Ipv4Addr, + ) { + for seq in 0..count { + let new_sport = allocate_port(sport); + assert_ne!(new_sport, 0, "failed to allocate local port"); + self.set_filter(new_sport, dport); + + let seq = seq + 1; + let send_ts = current_monotime(); + match self.ping_once(seq, new_sport, dport, src, dst) { + Some(_) => { + let mut tp = self.do_collect(seq); + tp.add_stage(send_ts, TcppingStage::TxUser); + tx.send(Event::Tcpping(tp)).unwrap(); + std::thread::sleep(interval); + } + None => { + let mut tp = Tcpping::default(); + tp.seq = seq; + tx.send(Event::Tcpping(tp)).unwrap(); + } + } + } + } + + fn do_collect(&mut self, seq: u32) -> Tcpping { + let ts = current_monotime(); + match self + .skel + .maps_mut() + .latency() + .lookup(&0_u32.to_ne_bytes(), libbpf_rs::MapFlags::ANY) + { + Ok(Some(data)) => { + let (head, body, _tail) = unsafe { data.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let tp = body[0]; + let mut new_tp = Tcpping::default(); + + let mut cnt = tp.irq.cnt as usize; + for i in 0..(IRQ_RING_SIZE as usize) { + if cnt <= i { + break; + } + let idx = (cnt - i - 1) & ((IRQ_RING_SIZE as usize) - 1); + new_tp.add_irq(tp.irq.tss[idx]); + } + + cnt = tp.sirq.cnt as usize; + for i in 0..(SOFTIRQ_RING_SIZE as usize) { + if cnt <= i { + break; + } + let idx = (cnt - i - 1) & ((SOFTIRQ_RING_SIZE as usize) - 1); + new_tp.add_softirq(tp.sirq.tss[idx]); + } + + cnt = tp.sched.cnt as usize; + for i in 0..(SCHEDSWITCH_RING_SIZE as usize) { + if cnt <= i { + break; + } + let idx = (cnt - i - 1) & ((SCHEDSWITCH_RING_SIZE as usize) - 1); + let ss = &tp.sched.ss[idx]; + let prev = Process::new(ss.prev_pid as u32, ss.prev_comm.to_vec()); + let next = Process::new(ss.next_pid as u32, ss.next_comm.to_vec()); + new_tp.add_sched(ss.ts, Sched::new(prev, next)); + } + + new_tp.add_stage( + tp.stages[TCPPING_STAGE_TCPPING_TX_ENTRY as usize].ts, + TcppingStage::TxKernelIn, + ); + new_tp.add_stage( + tp.stages[TCPPING_STAGE_TCPPING_TX_EXIT as usize].ts, + TcppingStage::TxKernelOut, + ); + new_tp.add_stage( + tp.stages[TCPPING_STAGE_TCPPING_RX_ENTRY as usize].ts, + TcppingStage::RxKernelIn, + ); + new_tp.add_stage( + tp.stages[TCPPING_STAGE_TCPPING_RX_EXIT as usize].ts, + TcppingStage::RxKernelOut, + ); + + new_tp.add_stage(ts, TcppingStage::RxUser); + new_tp.seq = seq; + + let zero_tp = unsafe { std::mem::zeroed::() }; + self.skel + .maps_mut() + .latency() + .update( + &0_u32.to_ne_bytes(), + unsafe { any_as_u8_slice(&zero_tp) }, + libbpf_rs::MapFlags::ANY, + ) + .unwrap(); + + return new_tp; + } + Ok(None) => { + panic!("unexpected"); + } + Err(e) => { + panic!("error: {}", e); + } + } + } + + fn ping_once( + &mut self, + seq: u32, + sport: u16, + dport: u16, + src: Ipv4Addr, + dst: Ipv4Addr, + ) -> Option<()> { + let tcp = build_tcphdr(seq, sport, dport, src, dst); + self.tx.send_to(tcp, std::net::IpAddr::V4(dst)).unwrap(); + let mut iter = tcp_packet_iter(&mut self.rx); + loop { + match iter.next_with_timeout(std::time::Duration::from_secs(1)) { + Ok(None) => { + log::debug!("timeout: failed to receive syn+ack packet"); + return None; + } + Ok(Some((p, _))) => { + if p.get_destination() == sport && p.get_acknowledgement() == seq + 1 { + log::debug!("receive response, seq: {}", seq); + return Some(()); + } else { + log::debug!("unexpected packet: {:?}", p); + } + } + Err(e) => { + panic!("error: {}", e); + } + } + } + } +} + +fn build_tcphdr( + seq: u32, + sport: u16, + dport: u16, + src: Ipv4Addr, + dst: Ipv4Addr, +) -> MutableTcpPacket<'static> { + let mut tcp_header = MutableTcpPacket::owned(vec![0u8; 20]).unwrap(); + tcp_header.set_source(sport); + tcp_header.set_destination(dport); + + tcp_header.set_flags(TcpFlags::SYN); + tcp_header.set_window(64240); + tcp_header.set_data_offset(5); + tcp_header.set_urgent_ptr(0); + tcp_header.set_sequence(seq); + let checksum = pnet::packet::tcp::ipv4_checksum(&tcp_header.to_immutable(), &src, &dst); + tcp_header.set_checksum(checksum); + tcp_header +} + +fn set_bpf_filter(sock: i32, prog: i32) { + unsafe { + assert_eq!( + libc::setsockopt( + sock, + libc::SOL_SOCKET, + libc::SO_ATTACH_BPF, + &prog as *const i32 as *const libc::c_void, + 4, + ), + 0, + "failed to bind eBPF program into socket" + ); + }; +} diff --git a/source/tools/detect/net/rtrace/src/collector/userslow.rs b/source/tools/detect/net/rtrace/src/collector/userslow.rs new file mode 100644 index 0000000000000000000000000000000000000000..cb3e6012f9363cfdf18321256bc0414cb3357f10 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/userslow.rs @@ -0,0 +1,169 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/userslow.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/userslow.rs")); +} +use crate::common::sched::Process; +use crate::common::sched::Sched; +use crate::common::utils::btf_path_ptr; +use crate::common::utils::handle_lost_events; +use crate::event::Event; +use crate::filter::Filter; +use crossbeam_channel::Sender; +use libbpf_rs::skel::*; +use libbpf_rs::MapFlags; +use libbpf_rs::PerfBufferBuilder; +use serde::Deserialize; +use serde::Serialize; +use std::collections::BTreeMap; +use std::fmt; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::SocketAddr; + +#[derive(Deserialize, Serialize, Debug)] +pub enum UserslowStage { + KernelRcv, + Wakeup(u16), + SchedIn(u16), + SchedOut(u16), + UserRcv, +} + +impl fmt::Display for UserslowStage { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + UserslowStage::KernelRcv => write!(f, "KernelRcv"), + UserslowStage::Wakeup(ref value) => write!(f, "Wakeup({})", value), + UserslowStage::SchedIn(ref value) => write!(f, "SchedIn({})", value), + UserslowStage::SchedOut(ref value) => write!(f, "SchedOut({})", value), + UserslowStage::UserRcv => write!(f, "UserRcv"), + } + } +} + +#[derive(Deserialize, Serialize, Debug)] +pub struct Userslow { + saddr: SocketAddr, + daddr: SocketAddr, + proc: Process, + stages: BTreeMap, +} + +impl Userslow { + pub fn new(event: &bpf::slow_event) -> Self { + let mut stages = BTreeMap::::default(); + stages.insert(event.krcv_ts, UserslowStage::KernelRcv); + stages.insert(event.urcv_ts, UserslowStage::UserRcv); + + let saddr = SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.saddr))), + event.sport, + ); + let daddr = SocketAddr::new( + IpAddr::V4(Ipv4Addr::from(u32::from_be(event.daddr))), + event.dport, + ); + let proc = Process::new(event.sched.next_pid as u32, event.sched.next_comm.to_vec()); + + let cnt = event.thread.cnt as usize; + for i in 0..(bpf::MAX_ITEM_NUM as usize) { + if cnt <= i { + break; + } + let idx = (cnt - i - 1) & ((bpf::MAX_ITEM_NUM as usize) - 1); + let item = &event.thread.items[idx]; + let stage = match item.ty as u32 { + bpf::THREAD_ITEM_TYPE_SCHED_IN => UserslowStage::SchedIn(item.cpu), + bpf::THREAD_ITEM_TYPE_SCHED_OUT => UserslowStage::SchedOut(item.cpu), + bpf::THREAD_ITEM_TYPE_WAKE_UP => UserslowStage::Wakeup(item.cpu), + _ => panic!("BUG: internal"), + }; + stages.insert(item.ts, stage); + } + Userslow { + saddr, + daddr, + proc, + stages, + } + } +} + +impl fmt::Display for Userslow { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut string_vec = vec![]; + let mut prev_ts = 0; + for (ts, name) in &self.stages { + if prev_ts == 0 { + string_vec.push(name.to_string()); + prev_ts = *ts; + continue; + } + + string_vec.push(format!("{}us", (*ts - prev_ts) / 1000)); + string_vec.push(name.to_string()); + prev_ts = *ts; + } + + write!( + f, + "process:{} {}->{} {}", + self.proc, + self.saddr, + self.daddr, + string_vec.join(" -> ") + ) + } +} + +pub struct UserSlowCollector<'a> { + skel: bpf::UserslowSkel<'a>, +} + +impl<'a> UserSlowCollector<'a> { + /// attach ping sender eBPF program + pub fn new(verbose: bool, threshold: u64) -> Self { + let mut builder = bpf::UserslowSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let open_skel = builder.open_opts(opts).unwrap(); + let mut skel = open_skel + .load() + .expect("failed to load pingtrace sender program"); + + let mut filter = Filter::new(skel.maps().filters()); + filter.set_threshold(threshold); + filter.update(); + + skel.attach() + .expect("failed to attach pingtrace sender program"); + UserSlowCollector { skel } + } + + pub fn poll(&mut self, mut tx: Sender) { + log::debug!("start userslow polling thread"); + let handle_event = move |cpu: i32, data: &[u8]| { + __handle_event(&mut tx, cpu, data); + }; + + let perf = PerfBufferBuilder::new(&self.skel.maps_mut().perf_events()) + .sample_cb(handle_event) + .lost_cb(handle_lost_events) + .build() + .unwrap(); + + loop { + perf.poll(std::time::Duration::from_millis(200)).unwrap(); + } + } +} + +fn __handle_event(tx: &mut Sender, _cpu: i32, data: &[u8]) { + let data_vec = data.to_vec(); + let (head, body, _tail) = unsafe { data_vec.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let event = body[0]; + let us = Userslow::new(&event); + tx.send(Event::UserSlow(us)).expect("failed to send events"); +} diff --git a/source/tools/detect/net/rtrace/src/collector/virtio.rs b/source/tools/detect/net/rtrace/src/collector/virtio.rs new file mode 100644 index 0000000000000000000000000000000000000000..88005ffd5f306a384037abefd698f15a67d98009 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/collector/virtio.rs @@ -0,0 +1,177 @@ +mod bpf { + include!(concat!(env!("OUT_DIR"), "/virtio.skel.rs")); + include!(concat!(env!("OUT_DIR"), "/virtio.rs")); +} +use crate::common::utils::any_as_u8_slice; +use crate::common::utils::btf_path_ptr; +use crate::common::utils::get_queue_count; +use crate::common::utils::get_send_receive_queue; +use libbpf_rs::skel::*; +use libbpf_rs::MapFlags; +use std::fs; + +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct Queue { + pub avail: u16, + pub used: u16, + pub last_used: u16, + pub len: u16, +} + +impl fmt::Display for Queue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "avail-{}, used-{}, last_used-{}, len-{}", + self.avail, self.used, self.last_used, self.len + ) + } +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct Virtio { + pub rx: Vec, + pub tx: Vec, +} + +impl fmt::Display for Virtio { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut rxs = "RecvQueue ".to_owned(); + let mut txs = "SendQueue ".to_owned(); + for (_, rx) in self.rx.iter().enumerate() { + rxs.push_str(&format!( + "{:>03.2}%/{:<5} ", + ((rx.used - rx.last_used) as f32 / rx.len as f32) * 100.0, + rx.last_used + )); + } + + for (_, tx) in self.tx.iter().enumerate() { + txs.push_str(&format!( + "{:>03.2}%/{:<5} ", + ((tx.avail - tx.used) as f32 / tx.len as f32) * 100.0, + tx.last_used + )); + } + + write!(f, "{}\n{}", txs, rxs) + } +} + +pub struct VirtioCollector<'a> { + skel: bpf::VirtioSkel<'a>, + interface: String, + tx_path: String, + rx_path: String, + rq_size: i32, + sq_size: i32, +} + +impl<'a> VirtioCollector<'a> { + pub fn new(verbose: bool, interface: String) -> Self { + let mut builder = bpf::VirtioSkelBuilder::default(); + builder.obj_builder.debug(verbose); + let mut opts = builder.obj_builder.opts(std::ptr::null()); + opts.btf_custom_path = btf_path_ptr(); + let open_skel = builder.open_opts(opts).unwrap(); + + let mut skel = open_skel.load().expect("failed to load virtio program"); + + let mut mapval = unsafe { std::mem::zeroed::() }; + mapval.pid = unsafe { libc::getpid() }; + let (sq, rq) = get_send_receive_queue().unwrap(); + mapval.sq_size = sq as i32; + mapval.rq_size = rq as i32; + skel.maps_mut() + .imap() + .update( + &0_i32.to_ne_bytes(), + unsafe { any_as_u8_slice(&mapval) }, + MapFlags::ANY, + ) + .expect("failed to update userslow filter map"); + skel.attach().expect("failed to attach virtio program"); + VirtioCollector { + skel, + tx_path: format!("/sys/class/net/{}/dev_id", interface), + rx_path: format!("/sys/class/net/{}/dev_port", interface), + interface, + sq_size: sq as i32, + rq_size: rq as i32, + } + } + + fn trigger(&self) { + let qc = get_queue_count(&self.interface).unwrap() / 2; + for _ in 0..qc { + fs::read_to_string(&self.tx_path).unwrap(); + fs::read_to_string(&self.rx_path).unwrap(); + } + } + + fn reset_map(&mut self) { + let mut mapval = unsafe { std::mem::zeroed::() }; + mapval.pid = unsafe { libc::getpid() }; + mapval.sq_size = self.sq_size; + mapval.rq_size = self.rq_size; + self.skel + .maps_mut() + .imap() + .update( + &0_i32.to_ne_bytes(), + unsafe { any_as_u8_slice(&mapval) }, + MapFlags::ANY, + ) + .expect("failed to update userslow filter map"); + } + + fn read_map(&mut self) -> bpf::virito_queue { + let ret = self + .skel + .maps_mut() + .imap() + .lookup(&0_i32.to_ne_bytes(), MapFlags::ANY) + .expect("failed to read virtio map") + .unwrap(); + let (head, body, _tail) = unsafe { ret.align_to::() }; + debug_assert!(head.is_empty(), "Data was not aligned"); + let vq = body[0]; + vq + } + + pub fn refresh(&mut self) -> Virtio { + self.reset_map(); + self.trigger(); + let vq = self.read_map(); + + let mut tx_queues = vec![]; + let mut rx_queues = vec![]; + for tx in 0..(vq.tx_idx as usize) { + tx_queues.push(Queue { + avail: vq.txs[tx].avail_idx, + used: vq.txs[tx].used_idx, + last_used: vq.txs[tx].last_used_idx, + len: vq.txs[tx].len, + }); + } + + for rx in 0..(vq.rx_idx as usize) { + rx_queues.push(Queue { + avail: vq.rxs[rx].avail_idx, + used: vq.rxs[rx].used_idx, + last_used: vq.rxs[rx].last_used_idx, + len: vq.rxs[rx].len, + }); + } + + let v = Virtio { + tx: tx_queues, + rx: rx_queues, + }; + v + } +} diff --git a/source/tools/detect/net/rtrace/src/common.rs b/source/tools/detect/net/rtrace/src/common.rs deleted file mode 100644 index 73802c72ee71584ef7243f5a21c2e87ddbc1b22b..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/common.rs +++ /dev/null @@ -1,302 +0,0 @@ -#[path = "bindings/commonbinding.rs"] -pub mod commonbinding; - -use anyhow::{bail, Result}; -use commonbinding::*; -use eutils_rs::net::ProtocolType; -use std::net::{IpAddr, Ipv4Addr, SocketAddr, SocketAddrV4}; - -pub enum EventType { - DropKfreeSkb, - DropTcpDrop, - DropIptables, - DropNfconntrackDrop, - - LatencyEvent, - ConnectLatencyEvent, - Unknown, -} - -impl From for EventType { - fn from(value: u32) -> Self { - match value { - DROP_KFREE_SKB => EventType::DropKfreeSkb, - DROP_TCP_DROP => EventType::DropTcpDrop, - DROP_IPTABLES_DROP => EventType::DropIptables, - DROP_NFCONNTRACK_DROP => EventType::DropNfconntrackDrop, - LATENCY_EVENT => EventType::LatencyEvent, - CONNECT_LATENCY_EVENT => EventType::ConnectLatencyEvent, - _ => EventType::Unknown, - } - } -} - -/// Filter -pub struct Filter { - pub filter: filter, -} - -impl Filter { - pub fn new() -> Filter { - Filter { - filter: unsafe { std::mem::MaybeUninit::zeroed().assume_init() }, - } - } - - pub fn set_pid(&mut self, pid: u32) { - self.filter.pid = pid; - } - - pub fn set_ap(&mut self, src: &Option, dst: &Option) -> Result<()> { - self.set_src(src)?; - self.set_dst(dst) - } - - pub fn set_src(&mut self, src: &Option) -> Result<()> { - if let Some(x) = src { - let s: SocketAddrV4 = x.parse()?; - self.filter.ap.saddr = u32::from_le_bytes(s.ip().octets()); - self.filter.ap.sport = s.port(); - } - Ok(()) - } - - pub fn set_dst(&mut self, dst: &Option) -> Result<()> { - if let Some(x) = dst { - let sock: SocketAddrV4 = x.parse()?; - self.filter.ap.saddr = u32::from_le_bytes(sock.ip().octets()); - self.filter.ap.sport = sock.port(); - } - Ok(()) - } - - pub fn set_protocol(&mut self, protocol: u16) { - self.filter.protocol = protocol; - } - - pub fn set_threshold(&mut self, threshold: u64) { - self.filter.threshold = threshold; - } - - pub fn to_vec(&self) -> Vec { - unsafe { - std::slice::from_raw_parts( - &self.filter as *const filter as *const u8, - std::mem::size_of::(), - ) - .to_vec() - } - } - - pub fn filter(&self) -> filter { - self.filter.clone() - } -} - -pub struct Event { - // event: event, - data: (usize, Vec), - pub ptr: *const event, - score: Option -} - -impl Event { - pub fn new(data: (usize, Vec)) -> Event { - Event { - ptr: &data.1[0] as *const u8 as *const event, - data, - score: None, - } - } - - pub fn event_type(&self) -> EventType { - EventType::from(unsafe { (*self.ptr).type_ as u32 }) - } - - pub fn pid(&self) -> u32 { - unsafe { (*self.ptr).pid } - } - - pub fn comm(&self) -> String { - unsafe { String::from_utf8_unchecked((*self.ptr).comm.to_vec()) } - } - - pub fn local(&self) -> SocketAddr { - let saddr = unsafe { (*self.ptr).ap.saddr }; - let sport = unsafe { (*self.ptr).ap.sport }; - SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(saddr))), sport) - } - - pub fn remote(&self) -> SocketAddr { - let daddr = unsafe { (*self.ptr).ap.daddr }; - let dport = unsafe { (*self.ptr).ap.dport }; - SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(daddr))), dport) - } - - pub fn addr_pair(&self) -> (SocketAddr, SocketAddr) { - (self.local(), self.remote()) - } - - pub fn protocol(&self) -> ProtocolType { - ProtocolType::from(unsafe { *self.ptr }.protocol as i32) - } - - pub fn state(&self) -> u8 { - unsafe { (*self.ptr).state } - } - // latency module parameters - pub fn queue_ts(&self) -> u64 { - unsafe { (*self.ptr).__bindgen_anon_1.__bindgen_anon_1.queue_ts } - } - pub fn rcv_ts(&self) -> u64 { - unsafe { (*self.ptr).__bindgen_anon_1.__bindgen_anon_1.rcv_ts } - } - pub fn pidtime_array_idx(&self) -> u32 { - unsafe { - (*self.ptr) - .__bindgen_anon_1 - .__bindgen_anon_1 - .pidtime_array_idx - } - } - pub fn socktime_array_idx(&self) -> u32 { - unsafe { - (*self.ptr) - .__bindgen_anon_1 - .__bindgen_anon_1 - .socktime_array_idx - } - } - // drop module parameters - pub fn sk_protocol(&self) -> ProtocolType { - ProtocolType::from(unsafe { (*self.ptr).__bindgen_anon_1.drop_params.sk_protocol as i32 }) - } - - pub fn skap(&self) -> (SocketAddr, SocketAddr) { - let saddr = unsafe { (*self.ptr).__bindgen_anon_1.drop_params.skap.saddr }; - let sport = unsafe { (*self.ptr).__bindgen_anon_1.drop_params.skap.sport }; - let src = SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(saddr))), sport); - let daddr = unsafe { (*self.ptr).__bindgen_anon_1.drop_params.skap.daddr }; - let dport = unsafe { (*self.ptr).__bindgen_anon_1.drop_params.skap.dport }; - let dst = SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(daddr))), dport); - (src, dst) - } - - pub fn stackid(&self) -> u32 { - unsafe { (*self.ptr).stackid } - } - - pub fn iptables_name(&self) -> String { - unsafe { - String::from_utf8_unchecked((*self.ptr).__bindgen_anon_1.drop_params.name.to_vec()) - } - } - - pub fn iptables_chain_name(&self) -> String { - let hooknum = unsafe { (*self.ptr).__bindgen_anon_1.drop_params.hook }; - match hooknum { - 0 => "PREROUTING".to_owned(), - 1 => "LOCAL_IN".to_owned(), - 2 => "FORWARD".to_owned(), - 3 => "LOCAL_OUT".to_owned(), - 4 => "POSTROUTING".to_owned(), - _ => "unknown".to_owned(), - } - } - // abnormal module params - pub fn accept_queue(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.sk_ack_backlog } - } - - pub fn syn_queue(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.icsk_accept_queue } - } - - pub fn rcv_mem(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.rmem_alloc } - } - - pub fn snd_mem(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.sk_wmem_queued } - } - - pub fn drop(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.drop } - } - - pub fn retran(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.retran } - } - - pub fn ooo(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.ooo } - } - - pub fn inum(&self) -> u32 { - unsafe { (*self.ptr).__bindgen_anon_1.abnormal.i_ino } - } - - pub fn set_abnormal_score(&mut self, score: u32) { - self.score = Some(score); - } - - pub fn abnormal_score(&self) -> u32 { - self.score.unwrap() - } - -} - -pub struct FourSecondsRing { - fsr: seconds4_ring, -} - -fn onesecond_bit_exist(os: &onesecond, pos: u64) -> bool { - let idx = pos / 32; - let bit = pos & 0x1f; - - if (os.clear & (1 << idx)) != 0 { - return (os.bitmap[idx as usize] & (1 << bit)) != 0; - } - - false -} - -pub fn tss_in_range(fsr: *const seconds4_ring, left: u64, right: u64) -> Vec { - let mut tss = Vec::new(); - unsafe { - for os in (*fsr).os { - let mut startpos = 0; - let mut endpos = 0; - if os.ts <= left { - startpos = (left - os.ts) / 1_000_000; - } - - if os.ts <= right { - endpos = std::cmp::min(1000, (right - os.ts) / 1_000_000 + 1); - } - - if startpos >= endpos { - continue; - } - - for pos in startpos..endpos { - if onesecond_bit_exist(&os, pos) { - tss.push(pos * 1_000_000 + os.ts); - } - } - } - } - tss.sort(); - tss -} - -pub fn string_to_addr_pair(src: &String, dst: &String) -> Result { - let s: SocketAddrV4 = src.parse()?; - let d: SocketAddrV4 = dst.parse()?; - Ok(addr_pair { - saddr: u32::from_le_bytes(s.ip().octets()), - daddr: u32::from_le_bytes(d.ip().octets()), - sport: s.port(), - dport: d.port(), - }) -} diff --git a/source/tools/detect/net/rtrace/src/common/asset.rs b/source/tools/detect/net/rtrace/src/common/asset.rs new file mode 100644 index 0000000000000000000000000000000000000000..04f994ad3560768f190766043b8ecf28a3b2ff5c --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/asset.rs @@ -0,0 +1,12 @@ +use rust_embed::RustEmbed; + +#[cfg(feature = "embed-rtrace")] +#[derive(RustEmbed)] +#[folder = "resources"] +pub struct Asset; + +#[cfg(not(feature = "embed-rtrace"))] +#[derive(RustEmbed)] +#[folder = "resources"] +#[include = "*.db"] +pub struct Asset; diff --git a/source/tools/detect/net/rtrace/src/common/btree.rs b/source/tools/detect/net/rtrace/src/common/btree.rs new file mode 100644 index 0000000000000000000000000000000000000000..74080811cad355c0e289bc2623b057854a87912e --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/btree.rs @@ -0,0 +1,162 @@ +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::ops::Bound::Excluded; +use std::ops::Bound::Included; +use std::ops::Bound::Unbounded; + +/// wrapper for Vec> +pub struct CpuBTreeSet(Vec>); + +impl CpuBTreeSet +where + T: Clone, +{ + pub fn new(cpu: usize) -> Self { + CpuBTreeSet(vec![BTreeSet::::new(); cpu]) + } + + pub fn insert(&mut self, cpu: usize, value: T) + where + T: Ord, + { + self.0[cpu].insert(value); + } + + pub fn in_range(&mut self, cpu: usize, left: T, right: T) -> Vec + where + T: Ord, + { + let l = Included(left); + let r = Included(right); + let mut res = vec![]; + for elem in self.0[cpu].range((l, r)) { + res.push(elem.clone()); + } + res + } + + pub fn lower_bound(&self, cpu: usize, val: T) -> Option<&T> + where + T: Ord, + { + let mut res = self.0[cpu].range((Unbounded, Excluded(val))); + res.next_back() + } + + pub fn upper_bound(&self, cpu: usize, val: T) -> Option<&T> + where + T: Ord, + { + let mut res = self.0[cpu].range((Excluded(val), Unbounded)); + res.next() + } + + /// Delete old data + pub fn flush(&mut self, cpu: usize, val: T) + where + T: Ord, + { + let new = self.0[cpu].split_off(&val); + self.0[cpu] = new; + } +} + +/// wrapper for Vec> +pub struct CpuBTreeMap(Vec>); + +impl CpuBTreeMap +where + K: Clone, + V: Clone, +{ + pub fn new(cpu: usize) -> Self { + CpuBTreeMap(vec![BTreeMap::::new(); cpu]) + } + + pub fn contains_key(&self, cpu: usize, key: &K) -> bool + where + K: Ord, + { + self.0[cpu].contains_key(key) + } + + pub fn insert(&mut self, cpu: usize, key: K, value: V) + where + K: Ord, + { + self.0[cpu].insert(key, value); + } + + pub fn range(&mut self, cpu: usize, left: K, right: K) -> Vec<(&K, &V)> + where + K: Ord, + { + let l = Excluded(left); + let r = Excluded(right); + let mut res = vec![]; + for elem in self.0[cpu].range((l, r)) { + res.push(elem); + } + res + } + + pub fn lower_bound(&self, cpu: usize, val: K) -> Option<(&K, &V)> + where + K: Ord, + { + let mut res = self.0[cpu].range((Unbounded, Excluded(val))); + res.next_back() + } + + pub fn upper_bound(&self, cpu: usize, val: K) -> Option<(&K, &V)> + where + K: Ord, + { + let mut res = self.0[cpu].range((Excluded(val), Unbounded)); + res.next() + } + + /// Delete old data + pub fn flush(&mut self, cpu: usize, val: K) + where + K: Ord, + { + let new = self.0[cpu].split_off(&val); + self.0[cpu] = new; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lower_upper_bound() { + let mut tree = CpuBTreeSet::new(1); + tree.insert(0, 0); + tree.insert(0, 1); + tree.insert(0, 2); + tree.insert(0, 3); + tree.insert(0, 4); + + assert_eq!(tree.lower_bound(0, 0), None); + assert_eq!(tree.lower_bound(0, 1), Some(&0)); + + assert_eq!(tree.upper_bound(0, 3), Some(&4)); + assert_eq!(tree.upper_bound(0, 4), None); + } + + #[test] + fn flush() { + let mut tree = CpuBTreeSet::new(1); + tree.insert(0, 0); + tree.insert(0, 1); + tree.insert(0, 2); + tree.insert(0, 3); + tree.insert(0, 4); + + tree.flush(0, 3); + assert_eq!(tree.0[0].first(), Some(&3)); + assert_eq!(tree.0[0].last(), Some(&4)); + } +} diff --git a/source/tools/detect/net/rtrace/src/common/config.rs b/source/tools/detect/net/rtrace/src/common/config.rs new file mode 100644 index 0000000000000000000000000000000000000000..40162ea85b7bf8e325037c0214b10f0c2552ed76 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/config.rs @@ -0,0 +1,52 @@ +use crate::common::protocol::Protocol; +use std::time::Duration; + +#[derive(Debug, Clone, Default)] +pub struct Config { + pub threshold: u64, + pub src: (u32, u16), + pub dst: (u32, u16), + pub protocol: Protocol, + pub jitter: bool, + pub drop: bool, + pub retran: bool, + pub verbose: bool, + pub ping: bool, + + pub output_raw: bool, + pub output_json: bool, + pub interface: String, + pub period: Duration, + pub virtio: bool, + + pub disable_kfree_skb: bool, + pub tcpping: bool, + pub count: u32, + pub iqr: bool, +} + +impl Config { + pub fn set_protocol_icmp(&mut self) { + self.protocol = Protocol::Icmp; + } + + pub fn set_protocol_tcp(&mut self) { + self.protocol = Protocol::Tcp; + } + + pub fn enable_drop(&mut self) { + self.drop = true; + } + + pub fn enable_retran(&mut self) { + self.retran = true; + } + + pub fn disable_drop_kfree_skb(&mut self) { + self.disable_kfree_skb = true; + } + + pub fn enable_virtio(&self) -> bool { + self.virtio + } +} diff --git a/source/tools/detect/net/rtrace/src/common/file_logger.rs b/source/tools/detect/net/rtrace/src/common/file_logger.rs new file mode 100644 index 0000000000000000000000000000000000000000..a0341c9dc266ad0caac919b513bb4980b42ef985 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/file_logger.rs @@ -0,0 +1,67 @@ +use chrono::Local; +use log::LevelFilter; +use log::Log; +use log::Metadata; +use log::Record; +use log::SetLoggerError; +use std::fs::File; +use std::fs::OpenOptions; +use std::io::Write; +use std::io::{self}; +use std::path::Path; +use std::sync::Mutex; + +struct FileLogger { + file: Mutex, +} + +impl FileLogger { + fn new(log_path: &str) -> io::Result { + let path = Path::new(log_path); + if let Some(dir_path) = path.parent() { + std::fs::create_dir_all(dir_path)?; + } + + let file = OpenOptions::new() + .create(true) + .write(true) + .append(true) + .open(log_path)?; + Ok(FileLogger { + file: Mutex::new(file), + }) + } +} + +impl Log for FileLogger { + fn enabled(&self, metadata: &Metadata) -> bool { + metadata.level() <= log::Level::Info + } + + fn log(&self, record: &Record) { + if self.enabled(record.metadata()) { + let now = Local::now(); + let log_entry = format!("{} - {} - {}\n", now, record.level(), record.args()); + if let Ok(mut file) = self.file.lock() { + let _ = writeln!(file, "{}", log_entry); + } + } + } + + fn flush(&self) {} +} + +pub fn setup_file_logger(verbose: bool) -> Result<(), SetLoggerError> { + let start_time = Local::now().format("%Y-%m-%dT%H:%M:%S").to_string(); + let log_file_path = format!("/var/log/rtrace/{}.log", start_time); + + let logger = FileLogger::new(&log_file_path).expect("Unable to create logger"); + + log::set_boxed_logger(Box::new(logger)).map(|()| { + log::set_max_level(if verbose { + LevelFilter::Debug + } else { + LevelFilter::Info + }) + }) +} diff --git a/source/tools/detect/net/rtrace/src/common/iqr.rs b/source/tools/detect/net/rtrace/src/common/iqr.rs new file mode 100644 index 0000000000000000000000000000000000000000..97b0948eade1eb20b4209d6bcd844b106e3a7d7d --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/iqr.rs @@ -0,0 +1,42 @@ +use ndarray::Array; +use ndarray::Array1; +use ndarray_stats::interpolate::Nearest; +use ndarray_stats::Quantile1dExt; +use noisy_float::types::n64; + +pub struct IQR { + q1: u64, + q3: u64, + upper: u64, +} + +impl IQR { + pub fn new(mut array: Array>) -> Self { + let q1 = array.quantile_mut(n64(0.25), &Nearest).unwrap(); + let q3 = array.quantile_mut(n64(0.75), &Nearest).unwrap(); + let iqr = q3 - q1; + let upper = (iqr as f64 * 1.5) as u64 + q3; + IQR { q1, q3, upper } + } + + pub fn is_upper_outlier(&self, val: u64) -> bool { + if val > self.upper { + true + } else { + false + } + } +} + +pub fn iqr_upper_outliers(data: Vec) -> Vec { + let arr = Array1::from_vec(data.clone()); + let iqr = IQR::new(arr); + + let mut res = vec![]; + for (i, &d) in data.iter().enumerate() { + if iqr.is_upper_outlier(d) { + res.push(i); + } + } + res +} diff --git a/source/tools/detect/net/rtrace/utils/src/kernel_stack.rs b/source/tools/detect/net/rtrace/src/common/ksyms.rs similarity index 31% rename from source/tools/detect/net/rtrace/utils/src/kernel_stack.rs rename to source/tools/detect/net/rtrace/src/common/ksyms.rs index edb90c4f77fda6656213159ffb40214fb857bc0e..864db9427b02aef05b3b62416b7d46fd9715d97d 100644 --- a/source/tools/detect/net/rtrace/utils/src/kernel_stack.rs +++ b/source/tools/detect/net/rtrace/src/common/ksyms.rs @@ -1,27 +1,27 @@ use anyhow::Result; -use byteorder::{NativeEndian, ReadBytesExt}; use once_cell::sync::Lazy; -use std::collections::HashSet; +use std::collections::BTreeMap; +use std::collections::HashMap; use std::fs::File; -use std::io::Cursor; -use std::io::{self, BufRead}; -use std::sync::Mutex; +use std::io::BufRead; +use std::io::{self}; +use std::ops::Bound; -pub static GLOBAL_KALLSYMS: Lazy> = Lazy::new(|| { - let ksyms = Kallsyms::try_from("/proc/kallsyms").unwrap(); - Mutex::new(ksyms) +pub static GLOBAL_KALLSYMS: Lazy = Lazy::new(|| { + let ksyms = KSyms::try_from("/proc/kallsyms").unwrap(); + ksyms }); #[derive(Debug, Default)] -pub struct Kallsyms { - syms: Vec<(String, u64)>, - hs: HashSet, +pub struct KSyms { + hash: HashMap, + syms: BTreeMap, } -impl TryFrom<&str> for Kallsyms { +impl TryFrom<&str> for KSyms { type Error = anyhow::Error; fn try_from(path: &str) -> Result { - let mut ksyms = Kallsyms::new(); + let mut ksyms = KSyms::new(); let file = File::open(path)?; let lines = io::BufReader::new(file).lines(); for line in lines { @@ -35,97 +35,81 @@ impl TryFrom<&str> for Kallsyms { } } } - ksyms.sort(); log::debug!( "Load ksyms done from {:?}, symbols length: {}", path, - ksyms.get_ksyms_num() + ksyms.ksyms_size() ); Ok(ksyms) } } -impl Kallsyms { - pub fn new() -> Self { - Kallsyms { - syms: Vec::new(), - hs: HashSet::default(), +impl KSyms { + pub(crate) fn new() -> Self { + KSyms { + syms: BTreeMap::new(), + hash: HashMap::default(), } } fn insert(&mut self, sym_name: String, sym_addr: u64) { - self.syms.push((sym_name.clone(), sym_addr)); - self.hs.insert(sym_name); + self.syms.insert(sym_addr, sym_name.clone()); + self.hash.insert(sym_name, sym_addr); } - fn get_ksyms_num(&self) -> usize { + fn ksyms_size(&self) -> usize { self.syms.len() } - fn sort(&mut self) { - self.syms.sort_by(|a, b| a.1.cmp(&b.1)); - } - pub fn has_sym(&self, sym_name: &str) -> bool { - self.hs.contains(sym_name) + self.hash.contains_key(sym_name) } +} - pub fn addr_to_sym(&self, addr: u64) -> String { - let mut start = 0; - let mut end = self.syms.len() - 1; - let mut mid; - let mut sym_addr; +pub fn has_kernel_symbol(name: &str) -> bool { + GLOBAL_KALLSYMS.has_sym(name) +} - while start < end { - mid = start + (end - start + 1) / 2; - sym_addr = self.syms[mid].1; +pub fn get_symbol(addr: &u64) -> &String { + let res = GLOBAL_KALLSYMS + .syms + .range((Bound::Unbounded, Bound::Included(addr))); - if sym_addr <= addr { - start = mid; - } else { - end = mid - 1; - } - } + res.last().unwrap().1 +} - if start == end && self.syms[start].1 <= addr { - let mut name = self.syms[start].0.clone(); - name.push_str(&format!("+{}", addr - self.syms[start].1)); - return name; - } +pub fn get_symbol_with_offset(addr: &u64) -> String { + let res = GLOBAL_KALLSYMS + .syms + .range((Bound::Unbounded, Bound::Included(addr))); - return String::from("Not Found"); - } + let last = res.last().unwrap(); + format!("{}+{}", last.1, addr - last.0) } -pub struct KernelStack { - stack: Vec, +pub fn get_addr(sym: &str) -> Option<&u64> { + GLOBAL_KALLSYMS.hash.get(sym) } -impl KernelStack { - pub fn new(stack: &Vec) -> KernelStack { - let depth = stack.len() / 8; - let mut rdr = Cursor::new(stack.clone()); - let mut stack_str = Vec::new(); - let kallsyms = GLOBAL_KALLSYMS.lock().unwrap(); +#[cfg(test)] +mod tests { + use super::*; - for _ in 0..depth { - let addr = rdr.read_u64::().unwrap(); - stack_str.push(kallsyms.addr_to_sym(addr)); - } - KernelStack { stack: stack_str } + #[test] + fn ksyms() { + let ksyms = KSyms::try_from("/proc/kallsyms").unwrap(); + assert_ne!(ksyms.ksyms_size(), 0); + assert!(ksyms.has_sym("tcp_sendmsg")); } -} -impl std::fmt::Display for KernelStack { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for s in &self.stack { - writeln!(f, "\t{}", s)?; - } - write!(f, "") - } -} + #[test] + fn global_ksyms() { + assert!(has_kernel_symbol("tcp_sendmsg")); + let addr = get_addr("tcp_sendmsg").unwrap(); + assert_eq!(get_symbol(addr), "tcp_sendmsg"); -pub fn has_kernel_symbol(name: &str) -> bool { - GLOBAL_KALLSYMS.lock().unwrap().has_sym(name) -} \ No newline at end of file + let new_addr = *addr + 53; + assert_eq!(get_symbol_with_offset(&new_addr), "tcp_sendmsg+53"); + } +} diff --git a/source/tools/detect/net/rtrace/src/common/mod.rs b/source/tools/detect/net/rtrace/src/common/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..cebcdb8a2590f16351131bd13e02a4598fc1b430 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/mod.rs @@ -0,0 +1,11 @@ +pub mod asset; +pub(crate) mod btree; +pub mod config; +pub mod file_logger; +pub mod iqr; +pub mod ksyms; +pub mod protocol; +pub mod raw_event; +pub mod sched; +pub mod stats; +pub mod utils; diff --git a/source/tools/detect/net/rtrace/src/common/protocol.rs b/source/tools/detect/net/rtrace/src/common/protocol.rs new file mode 100644 index 0000000000000000000000000000000000000000..6bb8ea651107e132e4000d6a95b77faa32c98240 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/protocol.rs @@ -0,0 +1,51 @@ +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Deserialize, Serialize)] +#[repr(i32)] +pub enum Protocol { + Icmp = libc::IPPROTO_ICMP, + Tcp = libc::IPPROTO_TCP, + Udp = libc::IPPROTO_UDP, +} + +impl Default for Protocol { + fn default() -> Self { + Protocol::Tcp + } +} + +impl TryFrom<&str> for Protocol { + type Error = &'static str; + fn try_from(value: &str) -> Result { + match value { + "icmp" => Ok(Protocol::Icmp), + "tcp" => Ok(Protocol::Tcp), + "udp" => Ok(Protocol::Udp), + _ => Err("Unknown protocol string"), + } + } +} + +impl TryFrom for Protocol { + type Error = &'static str; + fn try_from(value: i32) -> Result { + match value { + libc::IPPROTO_ICMP => Ok(Protocol::Icmp), + libc::IPPROTO_TCP => Ok(Protocol::Tcp), + libc::IPPROTO_UDP => Ok(Protocol::Udp), + _ => Err("unsupport protocol type"), + } + } +} + +impl fmt::Display for Protocol { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Protocol::Icmp => write!(f, "icmp"), + Protocol::Tcp => write!(f, "tcp"), + Protocol::Udp => write!(f, "udp"), + } + } +} diff --git a/source/tools/detect/net/rtrace/src/common/raw_event.rs b/source/tools/detect/net/rtrace/src/common/raw_event.rs new file mode 100644 index 0000000000000000000000000000000000000000..5f615b420c494c0a693620213275b866d95fee5c --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/raw_event.rs @@ -0,0 +1,6 @@ +/// raw information of perf event +pub struct RawEvent { + pub cpu: i32, + pub ty: u32, + pub data: Vec, +} diff --git a/source/tools/detect/net/rtrace/src/common/sched.rs b/source/tools/detect/net/rtrace/src/common/sched.rs new file mode 100644 index 0000000000000000000000000000000000000000..8cda3f6c4db745ddc8b479f662b128eae61a86ab --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/sched.rs @@ -0,0 +1,49 @@ +use serde::Deserialize; +use serde::Serialize; +use std::fmt; +/// process information +#[derive(Clone, Serialize, Deserialize, Debug, Default)] +pub struct Process { + pub pid: u32, + pub comm: String, +} + +impl Process { + pub fn new(pid: u32, mut comm: Vec) -> Self { + let len = comm.iter().position(|&x| x == 0).unwrap_or(comm.len()); + comm.truncate(len); + Process { + pid, + comm: unsafe { + String::from_utf8_unchecked(comm) + .trim_matches(char::from(0)) + .to_owned() + }, + } + } +} + +impl fmt::Display for Process { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}/{}", self.pid, self.comm) + } +} + +/// kernel schedule switch information +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct Sched { + pub prev: Process, + pub next: Process, +} + +impl Sched { + pub fn new(prev: Process, next: Process) -> Self { + Sched { prev, next } + } +} + +impl fmt::Display for Sched { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} -> {}", self.prev, self.next) + } +} diff --git a/source/tools/detect/net/rtrace/src/common/stats.rs b/source/tools/detect/net/rtrace/src/common/stats.rs new file mode 100644 index 0000000000000000000000000000000000000000..9f9685fcaa5e9a91882b2eab44cdb9dab91c79d6 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/stats.rs @@ -0,0 +1,36 @@ +use ndarray::Array1; +use ndarray_stats::QuantileExt; +use noisy_float::prelude::*; + +#[derive(Debug, Default)] +pub struct Stats { + pub min: u64, + pub avg: u64, + pub max: u64, + pub mdev: u64, + pub stddev: u64, +} + +impl Stats { + pub fn new(a: Vec) -> Self { + let arr = Array1::from_vec(a); + let avg = arr.mean().unwrap(); + let mdev = arr.mapv(|x| x.abs_diff(avg)).mean().unwrap() as u64; + let variance = arr.mapv(|x| x.abs_diff(avg).pow(2)).sum() / (arr.len() as u64); + + Stats { + min: *arr.min().unwrap(), + avg, + max: *arr.max().unwrap(), + mdev, + stddev: f64::sqrt(variance as f64) as u64, + } + } + + pub fn zscore(&self, v: u64) -> u64 { + if v < self.avg { + return 0; + } + (v - self.avg) / self.stddev + } +} diff --git a/source/tools/detect/net/rtrace/src/common/utils.rs b/source/tools/detect/net/rtrace/src/common/utils.rs new file mode 100644 index 0000000000000000000000000000000000000000..e517314919851acec52f17c19edf3006f3c4803f --- /dev/null +++ b/source/tools/detect/net/rtrace/src/common/utils.rs @@ -0,0 +1,253 @@ +use crate::common::asset::Asset; +use anyhow::bail; +use anyhow::Result; +use cached::proc_macro::once; +use once_cell::sync::Lazy; +use rust_embed::RustEmbed; +use std::ffi::CString; +use std::fs; +use std::fs::File; +use std::io::Write; +use std::net::IpAddr; +use std::net::Ipv4Addr; +use std::net::SocketAddr; +use std::net::TcpListener; +use std::os::unix::prelude::PermissionsExt; +use std::process::Command; +use std::process::Stdio; +use sysctl::Sysctl; +use tempfile::tempdir; +use tempfile::NamedTempFile; + +pub static SYSAK_BTF_PATH: Lazy> = Lazy::new(|| { + if let Ok(sysak) = std::env::var("SYSAK_WORK_PATH") { + if let Ok(info) = uname::uname() { + return Some( + CString::new(format!("{}/tools/vmlinux-{}", sysak, info.release)).unwrap(), + ); + } + } + None +}); + +/// Parse the input string and return the IP and port +pub fn parse_ip_str(ip: &str) -> (u32, u16) { + let vs: Vec<&str> = ip.split(':').collect(); + let mut addr: u32 = 0; + let mut port: u16 = 0; + for v in vs { + if v.contains(".") { + let ip: Ipv4Addr = v.parse().unwrap(); + addr = ip.into(); + addr = addr.to_be(); + } else { + port = v.parse().unwrap(); + } + } + return (addr, port); +} + +/// Check whether rps is enabled +pub fn detect_rps() -> bool { + if let Ok(ctl) = sysctl::Ctl::new("net.core.rps_sock_flow_entries") { + if let Ok(val) = ctl.value_string() { + return !(val.cmp(&"0".to_owned()) == std::cmp::Ordering::Equal); + } + } + false +} + +/// Handling Perf buffer loss events +pub fn handle_lost_events(cpu: i32, count: u64) { + eprintln!("Lost {count} events on CPU {cpu}"); +} + +/// convert any structure into bytes +pub unsafe fn any_as_u8_slice(p: &T) -> &[u8] { + ::core::slice::from_raw_parts((p as *const T) as *const u8, ::core::mem::size_of::()) +} + +pub fn btf_path_ptr() -> *const i8 { + SYSAK_BTF_PATH + .as_ref() + .map_or(std::ptr::null(), |x| x.as_ptr()) +} + +pub fn is_virtio_net(interface: &str) -> bool { + let driver_path = format!("/sys/class/net/{}/device/driver", interface); + + if let Ok(driver_link) = fs::read_link(driver_path) { + if let Some(driver_name) = driver_link.file_name() { + if let Some(driver) = driver_name.to_str() { + return driver == "virtio_net"; + } + } + } + + false +} + +pub fn get_queue_count(interface: &str) -> Option { + let queues_dir = format!("/sys/class/net/{}/queues", interface); + + if let Ok(entries) = fs::read_dir(queues_dir) { + let count = entries.count(); + Some(count) + } else { + None + } +} + +pub fn get_current_kernel_version() -> Result { + let mut info = unsafe { std::mem::MaybeUninit::::zeroed().assume_init() }; + let mut release_version = Vec::with_capacity(info.release.len()); + let ret = unsafe { libc::uname(&mut info as *mut libc::utsname) }; + if ret < 0 { + bail!("failed to call function: libc::uname, error code: {}", ret) + } + + for i in info.release { + release_version.push(i as u8); + } + + Ok(String::from_utf8(release_version)? + .trim_matches(char::from(0)) + .to_owned()) +} + +pub fn get_send_receive_queue() -> Result<(usize, usize)> { + if let Some(file_data) = Asset::get("rtrace.db") { + let mut temp_file = NamedTempFile::new()?; + temp_file.write_all(&file_data.data)?; + let path = temp_file.into_temp_path(); + let conn = sqlite::open(path)?; + let query = format!( + "SELECT * FROM sql_count WHERE version = \"{}\"", + get_current_kernel_version()? + ); + + let mut stmt = conn.prepare(&query)?; + + while let Ok(sqlite::State::Row) = stmt.next() { + let sq = stmt.read::("send_queue")? as usize; + let rq = stmt.read::("receive_queue")? as usize; + return Ok((sq, rq)); + } + } + + bail!("not found send_queue and receive_queue in rtrace.db") +} + +pub fn run_old_rtrace(args: Vec) { + if let Some(file_data) = Asset::get("rtrace") { + let dir = tempdir().unwrap(); + let file_path = dir.path().join("rtrace"); + { + let mut temp_file = File::create(&file_path).unwrap(); + temp_file.write_all(&file_data.data).unwrap(); + let metadata = temp_file.metadata().unwrap(); + let mut permissions = metadata.permissions(); + permissions.set_mode(0o777); + temp_file.set_permissions(permissions).unwrap(); + } + let mut child = Command::new(file_path) + .args(args) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .spawn() + .unwrap(); + let _ = child.wait().unwrap(); + } +} + +/// Monotonically increasing timestamp, incremented by 1 when the clock interrupt +/// is triggered. This clock source is used by the bpf_ktime_get_ns function. +pub fn current_monotime() -> u64 { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut ts) }; + + (ts.tv_sec as u64) * 1000_000_000 + (ts.tv_nsec as u64) +} + +/// System-wide realtime clock. It is generally synchronized with the clock of +/// the master server through the ntp protocol. +pub fn current_realtime() -> u64 { + let mut ts = libc::timespec { + tv_sec: 0, + tv_nsec: 0, + }; + unsafe { libc::clock_gettime(libc::CLOCK_REALTIME, &mut ts) }; + + (ts.tv_sec as u64) * 1000_000_000 + (ts.tv_nsec as u64) +} + +#[once] +pub fn get_host_ip() -> std::net::IpAddr { + local_ip_address::local_ip().unwrap() +} + +#[once] +pub fn get_host_ipv4() -> std::net::Ipv4Addr { + match local_ip_address::local_ip().unwrap() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => panic!("ipv6 not support"), + } +} + +pub fn ns2ms(ns: u64) -> f32 { + (ns as f32) / 1_000_000.0 +} + +pub fn allocate_port(sport: u16) -> u16 { + let addr: SocketAddr = format!("127.0.0.1:{}", sport) + .parse() + .expect("Invalid address"); + if let Ok(listener) = TcpListener::bind(addr) { + let local_addr = listener.local_addr().expect("Failed to get local address"); + let port = local_addr.port(); + drop(listener); + port + } else { + 0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_ip() { + let test1 = "127.0.0.1"; + let test2 = "127.0.0.1:8080"; + let test3 = "8080"; + + assert_eq!(parse_ip_str(test1), (16777343, 0)); + assert_eq!(parse_ip_str(test2), (16777343, 8080)); + assert_eq!(parse_ip_str(test3), (0, 8080)); + } + + #[test] + fn virtio_net() { + assert!(is_virtio_net("eth0")); + assert!(!is_virtio_net("lo")); + } + + #[test] + fn virtio_queue_count() { + assert_ne!(get_queue_count("eth0").unwrap(), 0); + } + + #[test] + fn kernel_version() { + assert!(get_current_kernel_version().is_ok()) + } + + #[test] + fn send_receive_queue() { + assert!(get_send_receive_queue().is_ok()); + } +} diff --git a/source/tools/detect/net/rtrace/src/delta/delta.rs b/source/tools/detect/net/rtrace/src/delta/delta.rs deleted file mode 100644 index 204e9e894bfc94fb72fa01433ffa104ddebf3101..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/delta/delta.rs +++ /dev/null @@ -1,119 +0,0 @@ -use std::{ - fs::File, - io::{BufReader, BufWriter, Read, Write}, - net::SocketAddrV4, - path::PathBuf, -}; - -use anyhow::{bail, Result}; -use structopt::StructOpt; -use utils::{delta_netstat::*, delta_snmp::*, delta_dev::*, timestamp::current_monotime}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, StructOpt)] -pub struct DeltaCommand { - #[structopt(long, help = "File to store data")] - output: Option, - #[structopt(long, help = "Tracing file /proc/net/netstat")] - netstat: bool, - #[structopt(long, help = "Tracing file /proc/net/snmp")] - snmp: bool, - #[structopt(long, default_value = "1", help = "Sample period")] - period: u64, - - #[structopt(long, help = "json format")] - json: bool, -} - -use crate::message::MessageType; - - -pub fn delta_snmp_thread(tx: crossbeam_channel::Sender, period: u64) { - std::thread::sleep(std::time::Duration::from_millis(period)); - -} - -#[derive(Serialize, Deserialize, Default)] -pub struct DropReasons { - reasons: Vec<(String, isize, String)>, -} - -impl DropReasons { - - pub fn add_netstat(&mut self, reason: &NetstatDropStatus) { - self.reasons.push( - (reason.key.clone(), reason.count, reason.reason.clone()) - ); - } - - pub fn add_snmp(&mut self, reason: &SnmpDropStatus) { - self.reasons.push( - (reason.key.clone(), reason.count, reason.reason.clone()) - ); - } - - pub fn add_dev(&mut self, reason: &DeviceDropStatus) { - self.reasons.push( - (format!("{}-{}", reason.dev, reason.key), reason.count, reason.reason.clone()) - ); - } -} - -fn for_sysom(cmd: &DeltaCommand) { - let mut netstat = DeltaNetstat::new("/proc/net/netstat").unwrap(); - let mut snmp = DeltaSnmp::new("/proc/net/snmp").unwrap(); - let mut dev = DeltaDev::new().unwrap(); - std::thread::sleep(std::time::Duration::from_secs(cmd.period)); - - netstat.update().unwrap(); - snmp.update().unwrap(); - dev.update().unwrap(); - - let mut reasons = DropReasons::default(); - - for reason in netstat.drop_reason() { - reasons.add_netstat(&reason); - } - - for reason in snmp.drop_reason() { - reasons.add_snmp(&reason); - } - - for reason in dev.drop_reason() { - reasons.add_dev(&reason); - } - - println!("{}", serde_json::to_string(&reasons).unwrap()); - -} - -pub fn run_delta(cmd: &DeltaCommand) { - - let mut netstat = None; - let mut snmp = None; - - if cmd.json { - for_sysom(cmd); - return; - } - - if cmd.netstat { - netstat = Some(DeltaNetstat::new("/proc/net/netstat").unwrap()); - } - - if cmd.snmp { - snmp = Some(DeltaSnmp::new("/proc/net/snmp").unwrap()); - } - - loop { - std::thread::sleep(std::time::Duration::from_secs(cmd.period)); - if let Some(x) = &mut netstat { - x.update().unwrap(); - println!("{} Netstat: {}", eutils_rs::timestamp::current_monotime(), x); - } - if let Some(x) = &mut snmp { - x.update().unwrap(); - println!("{} Snmp: {}",eutils_rs::timestamp::current_monotime(), x); - } - } -} diff --git a/source/tools/detect/net/rtrace/src/delta/mod.rs b/source/tools/detect/net/rtrace/src/delta/mod.rs deleted file mode 100644 index 0e3e20563453763c3a74ad1dc354ac21fc344e22..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/delta/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ - - -pub mod delta; - diff --git a/source/tools/detect/net/rtrace/src/drop/drop.rs b/source/tools/detect/net/rtrace/src/drop/drop.rs deleted file mode 100644 index 57693040b4c5d77286f18719aa91ffa0141242b9..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/drop/drop.rs +++ /dev/null @@ -1,59 +0,0 @@ -use anyhow::{bail, Result}; -use drop::{Drop, DropEvent}; -use serde::{Deserialize, Serialize}; -use structopt::StructOpt; -use utils::{timestamp::{current_monotime, current_realtime}, delta_netstat::{DeltaNetstat, Netstat}, delta_snmp::{DeltaSnmp, Snmp}, kernel_stack::has_kernel_symbol, delta_dev::NetDev}; - -#[derive(Debug, StructOpt)] -pub struct DropCommand { - #[structopt(long, default_value = "600", help = "program running time in seconds")] - duration: usize, -} - -fn get_enabled_points(opts: &DropCommand) -> Result> { - let mut enabled = vec![]; - if !has_kernel_symbol("tcp_drop") { - enabled.push(("tcp_drop", false)); - } - Ok(enabled) -} - -fn show_counter() { - let netstat = Netstat::from_file("/proc/net/netstat").unwrap(); - let snmp = Snmp::from_file("/proc/net/snmp").unwrap(); - let dev = NetDev::new().unwrap(); - - println!("{}", serde_json::to_string(&netstat).unwrap()); - println!("{}", serde_json::to_string(&snmp).unwrap()); - println!("{}", serde_json::to_string(&dev).unwrap()); -} - -pub fn run_drop(cmd: &DropCommand, debug: bool, btf: &Option) { - let mut drop = Drop::builder() - .open(debug, btf) - .load_enabled(get_enabled_points(cmd).expect("failed to get enabled points")) - .open_perf() - .build(); - - drop.skel.attach().expect("failed to attach bpf program"); - - let duration = (cmd.duration * 1_000_000_000) as u64; - let start_ns = current_monotime(); - - show_counter(); - - loop { - if let Some(event) = drop - .poll(std::time::Duration::from_millis(100)) - .expect("failed to poll drop event") - { - println!("{}", serde_json::to_string(&event).unwrap()); - } - - if current_monotime() - start_ns >= duration { - break; - } - } - - show_counter(); -} diff --git a/source/tools/detect/net/rtrace/src/drop/mod.rs b/source/tools/detect/net/rtrace/src/drop/mod.rs deleted file mode 100644 index c5c7fbb739d05a35f2f57b9a20d997b510ab88ff..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/drop/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod drop; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/event.rs b/source/tools/detect/net/rtrace/src/event.rs new file mode 100644 index 0000000000000000000000000000000000000000..6f319c4e0b87ff9f01159939b6167d0bfd597229 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/event.rs @@ -0,0 +1,43 @@ +use crate::collector::drop::Drop; +use crate::collector::ping::Ping; +use crate::collector::queueslow::QueueSlow; +use crate::collector::retran::Retran; +use crate::collector::tcpping::Tcpping; +use crate::collector::userslow::Userslow; +use crate::collector::virtio::Virtio; +use crossbeam_channel::Receiver; +use crossbeam_channel::Sender; +use once_cell::sync::Lazy; +use std::time::Duration; + +pub static GLOBAL_CHANNEL: Lazy<(Sender, Receiver)> = + Lazy::new(|| crossbeam_channel::unbounded()); + +#[derive(Debug)] +pub enum Event { + Ping(Ping), + UserSlow(Userslow), + QueueSlow(QueueSlow), + Drop(Drop), + Retran(Retran), + Virtio(Virtio), + Tcpping(Tcpping), + Stop, +} + +pub fn get_event_channel() -> (Sender, Receiver) { + (GLOBAL_CHANNEL.0.clone(), GLOBAL_CHANNEL.1.clone()) +} + +pub fn send_stop_event() { + if let Err(error) = GLOBAL_CHANNEL.0.send(Event::Stop) { + log::error!("Failed to send stop event: {}", error); + } +} + +pub fn initial_stop_event_thread(t: Duration) { + std::thread::spawn(move || { + std::thread::sleep(t); + send_stop_event(); + }); +} diff --git a/source/tools/detect/net/rtrace/src/filter.rs b/source/tools/detect/net/rtrace/src/filter.rs new file mode 100644 index 0000000000000000000000000000000000000000..c36a4f04539704b01c17ded37a25fee5b4ee2338 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/filter.rs @@ -0,0 +1,39 @@ +use libbpf_rs::Map; +use libbpf_rs::MapHandle; + +use crate::common::utils::any_as_u8_slice; +mod bpf { + include!(concat!(env!("OUT_DIR"), "/filter.rs")); +} + +pub struct Filter { + map: MapHandle, + raw: bpf::filter, +} + +impl Filter { + pub fn new(map: &Map) -> Self { + Filter { + map: libbpf_rs::MapHandle::try_clone(map).unwrap(), + raw: unsafe { std::mem::zeroed::() }, + } + } + + pub fn set_threshold(&mut self, t: u64) { + self.raw.threshold = t; + } + + pub fn clear(&mut self) { + self.raw = unsafe { std::mem::zeroed::() } + } + + pub fn update(&mut self) { + self.map + .update( + &0_u32.to_ne_bytes(), + unsafe { any_as_u8_slice(&self.raw) }, + libbpf_rs::MapFlags::ANY, + ) + .expect("failed to update filter map"); + } +} diff --git a/source/tools/detect/net/rtrace/src/latency/event.rs b/source/tools/detect/net/rtrace/src/latency/event.rs deleted file mode 100644 index ca1dd90f8f90accd43e3c567edccd6bffbdf871c..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/latency/event.rs +++ /dev/null @@ -1,39 +0,0 @@ - - -use serde::{Deserialize, Serialize}; - -#[derive(Serialize, Deserialize, Default)] -pub struct LatencyEvent { - sender: bool, - id: u16, - seq: u16, - - send_ms: u8, - out_ms: u8, - recv_ms: u8, -} - - -impl LatencyEvent { - - - pub fn new() -> Self { - Self::default() - } - - pub fn set_sender(&mut self) { - self.sender = true - } - - pub fn clr_sender(&mut self) { - self.sender = false - } - - pub fn clr_sender(&mut self) { - self.sender = false - } - - pub fn clr_sender(&mut self) { - self.sender = false - } -} \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/latency/latency.rs b/source/tools/detect/net/rtrace/src/latency/latency.rs deleted file mode 100644 index a2fc3a3f54e932d1bab1e7623abc3ad806f5f273..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/latency/latency.rs +++ /dev/null @@ -1,120 +0,0 @@ -use crate::message::MessageType; -use anyhow::{bail, Result}; -use crossbeam_channel::{Receiver, Sender}; -use eutils_rs::timestamp::current_monotime; -use icmp::{IcmpEventType, IcmpEvents}; -use serde::{Deserialize, Serialize}; -use structopt::StructOpt; -use utils::*; - -#[derive(Debug, Clone, StructOpt)] -pub struct LatencyCommand { - #[structopt(long, default_value = "tcp", help = "Network protocol type")] - proto: String, - #[structopt(long, help = "enable json output format")] - json: bool, - #[structopt(long, default_value = "60", help = "program running time in seconds")] - duration: usize, -} - -struct Latency { - cmd: LatencyCommand, - debug: bool, - btf: Option, - timeout: std::time::Duration, - rx: Receiver, - tx: Sender, -} - -impl Latency { - pub fn new(cmd: &LatencyCommand, debug: bool, btf: &Option) -> Self { - let (tx, rx) = crossbeam_channel::unbounded(); - Latency { - cmd: cmd.clone(), - debug: debug, - btf: btf.clone(), - timeout: std::time::Duration::from_millis(200), - rx, - tx, - } - } - - fn start_icmp_thread(&self) { - let mut icmp = icmp::Icmp::new(self.debug, &self.btf); - let tx = self.tx.clone(); - let timeout = self.timeout; - std::thread::spawn(move || loop { - if let Some(event) = icmp.poll(timeout).unwrap() { - tx.send(MessageType::MessageIcmpEvent(event.0)).unwrap(); - tx.send(MessageType::MessageIcmpEvent(event.1)).unwrap(); - } - }); - } - -} - -fn run_latency_icmp(cmd: &LatencyCommand, debug: bool, btf: &Option) { - let mut latency = Latency::new(cmd, debug, btf); - latency.start_icmp_thread(); - - let mut boot_ts = 0; - let mut start_ts = 0; - let mut end_ts = 0; - let mut show_message = "".to_owned(); - - let start = current_monotime(); - let duration = (cmd.duration as u64) * 1_000_000_000; - - loop { - match latency.rx.recv_timeout(std::time::Duration::from_millis(200)) { - Ok(event) => match event { - MessageType::MessageIcmpEvent(icmpe) => { - if icmpe.events.len() == 0 { - continue; - } - - if cmd.json { - println!("{}", serde_json::to_string(&icmpe).unwrap()); - } else { - show_message = icmpe.to_string(); - start_ts = icmpe.start_ts(); - end_ts = icmpe.end_ts(); - } - } - _ => {} - }, - Err(_) => { - if !cmd.json { - continue; - } - } - } - if boot_ts == 0 { - boot_ts = start_ts; - } - - if current_monotime() - start > duration { - break; - } - if !cmd.json { - println!( - "SinceBootTimeDuration: {}ms -> {}ms\n{}", - (start_ts - boot_ts) / 1_000_000, - (end_ts - boot_ts) / 1_000_000, - show_message - ); - } - } - -} - -pub fn run_latency(cmd: &LatencyCommand, debug: bool, btf: &Option) { - let mut latency = Latency::new(cmd, debug, btf); - - match cmd.proto.as_str() { - "icmp" => { - run_latency_icmp(cmd, debug, btf); - } - _ => {} - } -} diff --git a/source/tools/detect/net/rtrace/src/latency/mod.rs b/source/tools/detect/net/rtrace/src/latency/mod.rs deleted file mode 100644 index cf8ab0fcbccbb8aae55e703cf8d195fea4ee1565..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/latency/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ - - - -pub mod latency; - - - diff --git a/source/tools/detect/net/rtrace/src/lib.rs b/source/tools/detect/net/rtrace/src/lib.rs new file mode 100644 index 0000000000000000000000000000000000000000..e20782b82dc77ad48cfc4ec7f4881ab863193dd3 --- /dev/null +++ b/source/tools/detect/net/rtrace/src/lib.rs @@ -0,0 +1,6 @@ +pub mod analyzer; +pub mod application; +pub mod collector; +pub mod common; +pub mod event; +pub mod filter; diff --git a/source/tools/detect/net/rtrace/src/main.rs b/source/tools/detect/net/rtrace/src/main.rs index 3d5b0fa2845a2e5cbdaa19c16b0c309e6fa67a96..17413bf667f149f9afc5ec8248ee443d1bec37eb 100644 --- a/source/tools/detect/net/rtrace/src/main.rs +++ b/source/tools/detect/net/rtrace/src/main.rs @@ -1,75 +1,159 @@ -mod latency; -use crate::latency::latency::{run_latency, LatencyCommand}; +use anyhow::Result; +use rtrace::application::drop::DropApplication; +use rtrace::application::jitter::JitterApplication; +use rtrace::application::ping::PingApplication; +use rtrace::application::retran::RetranApplication; +use rtrace::application::tcpping::TcppingApplication; +use rtrace::common::config::Config; +use rtrace::common::file_logger::setup_file_logger; +use rtrace::common::protocol::Protocol; +use rtrace::common::utils::parse_ip_str; +use rtrace::common::utils::run_old_rtrace; +use rtrace::event::initial_stop_event_thread; +use structopt::StructOpt; -mod delta; -use crate::delta::delta::{run_delta, DeltaCommand}; +fn parse_protocol(src: &str) -> Result { + Protocol::try_from(src) +} -mod drop; -use crate::drop::drop::{run_drop, DropCommand}; +fn parse_threshold(threshold: &str) -> u64 { + let base = if threshold.ends_with("ms") { + 1_000_000 + } else if threshold.ends_with("us") { + 1000 + } else { + panic!("Please end with ms or us") + }; -mod retran; -use crate::retran::{run_retran, RetranCommand}; + let mut tmp = threshold.to_owned(); + tmp.pop(); + tmp.pop(); -use anyhow::{bail, Result}; -use structopt::StructOpt; + let ns: u64 = tmp.parse().expect("not a number"); -mod message; -use std::path::Path; + ns * base +} #[derive(Debug, StructOpt)] #[structopt(name = "rtrace", about = "Diagnosing tools of kernel network")] pub struct Command { - #[structopt(subcommand)] - subcommand: SubCommand, - - #[structopt(long, help = "Custom btf path")] - btf: Option, - + #[structopt(long, help = "program running time in seconds")] + duration: Option, + #[structopt(short, long, default_value = "0us", parse(from_str = parse_threshold), help = "jitter threshold, ms/us")] + threshold: u64, + #[structopt(long, parse(from_str = parse_ip_str), help = "Network source address")] + src: Option<(u32, u16)>, + #[structopt(long, parse(from_str = parse_ip_str), help = "Network destination address")] + dst: Option<(u32, u16)>, + #[structopt(short, long, default_value = "tcp", parse(try_from_str = parse_protocol), help = "Network protocol")] + protocol: Protocol, + #[structopt(short, long, default_value = "1000", help = "Collection times")] + count: u32, + #[structopt( + short, + long, + default_value = "eth0", + help = "Monitoring virtio-net tx/rx queue" + )] + interface: String, + #[structopt(long, default_value = "1", help = "Monitoring period, unit seconds")] + period: f32, + #[structopt(long, help = "Enable the network jitter diagnosing module")] + jitter: bool, + #[structopt(long, help = "Monitoring virtio-net tx/rx queue")] + virtio: bool, + #[structopt(long, help = "Enable the network packetdrop diagnosing module")] + drop: bool, + #[structopt(long, help = "Enable the network retransmission diagnosing module")] + retran: bool, + #[structopt(long, help = "Enable the tcpping module")] + tcpping: bool, + #[structopt(long, help = "Enable the ping module")] + ping: bool, + #[structopt(long, help = "output in json format")] + json: bool, + #[structopt(long, help = "Enable IQR analysis module")] + iqr: bool, #[structopt(short, long, help = "Verbose debug output")] verbose: bool, } -#[derive(Debug, StructOpt)] -enum SubCommand { - #[structopt(name = "drop", about = "Packet drop diagnosing")] - Drop(DropCommand), - #[structopt(name = "latency", about = "Packet latency tracing")] - Latency(LatencyCommand), - #[structopt(name = "delta", about = "Store or display delta information")] - Delta(DeltaCommand), - #[structopt(name = "retran", about = "Tracing retransmit")] - Retran(RetranCommand), +fn compatible_args() -> Vec { + let mut args: Vec = std::env::args().collect(); + let mut old_rtrace = false; + for arg in &mut args { + if arg.as_str() == "retran" { + *arg = "--retran".to_owned(); + } else if arg.as_str() == "drop" { + *arg = "--drop".to_owned(); + } else if arg.as_str() == "latency" { + old_rtrace = true; + } + } + if old_rtrace { + args.remove(0); + run_old_rtrace(args); + return vec![]; + } + args } -fn main() -> Result<()> { - env_logger::init(); - let opts = Command::from_args(); +fn main() { + let args = compatible_args(); + if args.is_empty() { + return; + } + let opts = Command::from_iter(args.iter()); + setup_file_logger(opts.verbose).expect("failed to setup file logger"); - let mut btf = opts.btf.clone(); - if btf.is_none() { - if let Ok(sysak) = std::env::var("SYSAK_WORK_PATH") { - if let Ok(info) = uname::uname() { - btf = Some(format!("{}/tools/vmlinux-{}", sysak, info.release)); - log::debug!("{:?}", btf); - } - } + let config = Config { + threshold: opts.threshold, + src: if let Some(x) = opts.src { x } else { (0, 0) }, + dst: if let Some(x) = opts.dst { x } else { (0, 0) }, + protocol: opts.protocol, + jitter: opts.jitter, + drop: opts.drop, + retran: opts.retran, + verbose: opts.verbose, + ping: opts.ping, + output_raw: !opts.json, + output_json: opts.json, + interface: opts.interface.clone(), + period: std::time::Duration::from_millis((opts.period * 1000.0) as u64), + virtio: opts.virtio, + disable_kfree_skb: false, + tcpping: opts.tcpping, + count: opts.count, + iqr: opts.iqr, + }; + + if let Some(d) = opts.duration { + let dt = std::time::Duration::from_secs(d); + initial_stop_event_thread(dt); } - eutils_rs::helpers::bump_memlock_rlimit()?; - match opts.subcommand { - SubCommand::Drop(cmd) => { - run_drop(&cmd, opts.verbose, &btf); - } - SubCommand::Latency(cmd) => { - run_latency(&cmd, opts.verbose, &btf); - } - SubCommand::Delta(cmd) => { - run_delta(&cmd); - } - SubCommand::Retran(cmd) => { - run_retran(&cmd, opts.verbose, &btf); - } + if config.drop { + DropApplication::run(config); + return; + } + + if config.tcpping { + TcppingApplication::run(config); + return; + } + + if config.ping { + PingApplication::run(config); + return; } - Ok(()) + if config.retran { + RetranApplication::run(config); + return; + } + + if config.jitter { + JitterApplication::run(config); + return; + } } diff --git a/source/tools/detect/net/rtrace/src/message.rs b/source/tools/detect/net/rtrace/src/message.rs deleted file mode 100644 index 0176bf7467f8628b8f07f8a284c5620786a5a87e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/message.rs +++ /dev/null @@ -1,56 +0,0 @@ -pub enum MessageType { - MessageIcmpEvent(icmp::IcmpEvents), -} - -use std::collections::BTreeMap; - -#[derive(Default)] -pub struct MessageOrderedQueue { - queue: BTreeMap>, -} - -impl MessageOrderedQueue { - pub fn new() -> Self { - Self::default() - } - - pub fn push(&mut self, ts: u64, msg: MessageType) { - self.queue.entry(ts).or_insert(vec![]).push(msg); - } - - pub fn pop(&mut self) -> Option { - let mut ret = None; - let mut ts = None; - - if let Some(val) = self.queue.iter_mut().next() { - ret = val.1.pop(); - if val.1.is_empty() { - ts = Some(*val.0); - } - } - - if let Some(x) = ts { - self.queue.remove(&x); - } - ret - } - - pub fn poll_enqueue(&mut self, rx: &mut crossbeam_channel::Receiver) { - loop { - match rx.recv() { - Ok(msg) => { - let mut ts = 0; - match &msg { - MessageType::MessageIcmpEvent(icmp) => { - ts = icmp.start_ts(); - } - _ => {} - } - - self.push(ts, msg); - } - Err(_) => {} - } - } - } -} diff --git a/source/tools/detect/net/rtrace/src/retran/mod.rs b/source/tools/detect/net/rtrace/src/retran/mod.rs deleted file mode 100644 index 6a79d81cbf758ba25002ee5bec2d8983eb472056..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/retran/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ - -mod retran; -pub use self::retran::RetranCommand; -pub use self::retran::run_retran; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/retran/retran.rs b/source/tools/detect/net/rtrace/src/retran/retran.rs deleted file mode 100644 index 2fd3d3ffe3d0a0c95fcd84d06e6d5eb9491fefb5..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/retran/retran.rs +++ /dev/null @@ -1,55 +0,0 @@ -use anyhow::{bail, Result}; -use eutils_rs::proc::Netstat; -use retran::{RetranEvent, Retran}; -use structopt::StructOpt; -use utils::{timestamp::{current_monotime, current_realtime}, delta_netstat::show_netstat_json}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, StructOpt)] -pub struct RetranCommand { - #[structopt(long, help = "Process identifier of container")] - pid: Option, - #[structopt(long, default_value = "600", help = "program running time in seconds")] - duration: usize, -} - - -// fn get_enabled_points() -> Result> { -// let mut enabled = vec![]; -// if eutils_rs::KernelVersion::current()? >= eutils_rs::KernelVersion::try_from("4.10.0")? { -// enabled.push(("tp_tcp_retransmit_skb", true)); -// } else { -// enabled.push(("__tcp_retransmit_skb", true)); -// } -// Ok(enabled) -// } - -pub fn run_retran(cmd: &RetranCommand, debug: bool, btf: &Option) { - let mut retran = Retran::builder() - .open(debug, btf) - .load() - .open_perf() - .build(); - - retran.skel.attach().expect("failed to attach bpf program"); - - let duration = (cmd.duration * 1_000_000_000) as u64; - let start_ns = current_monotime(); - - show_netstat_json().unwrap(); - - loop { - if let Some(event) = retran - .poll(std::time::Duration::from_millis(100)) - .expect("failed to poll drop event") - { - println!("{}",serde_json::to_string(&event).unwrap()); - } - - if current_monotime() - start_ns >= duration { - break; - } - } - - show_netstat_json().unwrap(); -} diff --git a/source/tools/detect/net/rtrace/src/stack.rs b/source/tools/detect/net/rtrace/src/stack.rs deleted file mode 100644 index 53ec2a86e8d2498807209c51d58e8f5852c95b72..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/stack.rs +++ /dev/null @@ -1,44 +0,0 @@ - -use std::fmt; -use std::io::Cursor; - -pub struct Stack { - stack: Vec, -} - -impl Stack { - - // fn stack_string(kallsyms: &Kallsyms, stack: Vec) -> Result> { - // let stack_depth = stack.len() / 8; - // let mut rdr = Cursor::new(stack); - // let mut stackstring = Vec::new(); - - // for i in 0..stack_depth { - // let addr = rdr.read_u64::()?; - // if addr == 0 { - // break; - // } - // stackstring.push(kallsyms.addr_to_sym(addr)); - // } - // Ok(stackstring) - // } - - pub fn new(stack: Vec) -> Stack { - - - Stack { - stack: Vec::new() - } - } - -} - - -impl fmt::Display for Stack { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - for stack in &self.stack { - writeln!(f, "\t{}", stack)?; - } - write!(f, "") - } -} \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/src/tcpdump.rs b/source/tools/detect/net/rtrace/src/tcpdump.rs deleted file mode 100644 index c325e32f8c48506a76d2d144d0e1de40a1cf9304..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/tcpdump.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::{ - fs::File, - io::{BufReader, BufWriter, Read, Write}, - net::SocketAddrV4, - path::PathBuf, -}; - -use anyhow::{bail, Result}; -use std::sync::Mutex; -use structopt::StructOpt; -use utils::*; - -#[derive(Debug, Clone, StructOpt)] -pub struct TcpdumpCommand { - #[structopt(short, long, help = "File to store data")] - output: Option, - #[structopt(short, long, help = "File to parse data")] - input: Option, - #[structopt(long, help = "Local network address of tracing skb")] - src: Option, - #[structopt(long, help = "Remote network address of tracing skb")] - dst: Option, - #[structopt(long, help = "Eanble timer event")] - timerevent: bool, -} - -use sock::*; - -static mut EVENTS_NUM: usize = 0; - -fn parse_tcpdump_file(cmd: &TcpdumpCommand) -> bool { - if let Some(i) = &cmd.input { - let mut saddr = 0; - let mut daddr = 0; - let mut sport = 0; - let mut dport = 0; - - if let Some(ip) = &cmd.src { - let s: SocketAddrV4 = ip.parse().unwrap(); - saddr = u32::from_le_bytes(s.ip().octets()); - sport = s.port(); - } - - if let Some(ip) = &cmd.dst { - let s: SocketAddrV4 = ip.parse().unwrap(); - daddr = u32::from_le_bytes(s.ip().octets()); - dport = s.port(); - } - - let mut f = File::open(i).expect("faile to open file"); - let meta = std::fs::metadata(&i).expect("unable to read metadata"); - let mut buffer = vec![0; meta.len() as usize]; - f.read(&mut buffer).expect("buffer overflow"); - - // if buffer.len() % std::mem::size_of::() != 0 { - // println!("data not aligned"); - // } - - // let sock_events_sz = std::mem::size_of::(); - // for i in (0..buffer.len()).step_by(sock_events_sz) { - // let (h, b, t) = unsafe { buffer[i..i + sock_events_sz].align_to::() }; - // let event = b[0]; - - // if saddr != 0 && event.ap.saddr != saddr { - // continue; - // } - - // if daddr != 0 && event.ap.daddr != daddr { - // continue; - // } - - // if sport != 0 && event.ap.sport != sport { - // continue; - // } - - // if dport != 0 && event.ap.dport != dport { - // continue; - // } - - // println!("{}", event); - // } - - return true; - } - false -} - -pub fn run_tcpdump(cmd: &TcpdumpCommand, debug: bool, btf: &Option) { - if parse_tcpdump_file(cmd) { - return; - } - - let filter = inner_sock_filter::new(&cmd.src, &cmd.dst).unwrap(); - let mut sock = Sock::new(debug, btf, filter).unwrap(); - let mut writer = None; - if let Some(x) = &cmd.output { - writer = Some(BufWriter::new(File::create(x).unwrap())); - } - - let (tx, rx) = crossbeam_channel::unbounded(); - - ctrlc::set_handler(move || tx.send(()).expect("Could not send signal on channel.")) - .expect("Error setting Ctrl-C handler"); - - println!("Waiting for Ctrl-C..."); - - if writer.is_some() { - std::thread::spawn(|| loop { - std::thread::sleep(std::time::Duration::from_secs(1)); - unsafe { - println!( - "{} events,{} KB", - EVENTS_NUM, - EVENTS_NUM / 1024 * std::mem::size_of::() - ); - } - }); - } - - loop { - if let Some(mut event) = sock.poll(std::time::Duration::from_millis(200)).unwrap() { - if let Some(x) = &mut writer { - unsafe { EVENTS_NUM += 1 }; - let slice = unsafe { - std::slice::from_raw_parts_mut( - &mut event as *mut sock_event as *mut u8, - std::mem::size_of::(), - ) - }; - x.write(slice).expect("failed to write event to file"); - } else { - println!("{}", event); - } - } - - if !rx.is_empty() { - println!("Exiting..."); - break; - } - } -} diff --git a/source/tools/detect/net/rtrace/src/utils/logdistribution.rs b/source/tools/detect/net/rtrace/src/utils/logdistribution.rs deleted file mode 100644 index 3fec5010f4d5744cb2815b5f6bd8eac206023ef6..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/utils/logdistribution.rs +++ /dev/null @@ -1,76 +0,0 @@ -#[derive(Clone, Debug)] -pub struct LogDistribution { - /// distribution - pub dis: [usize; 32], -} - -impl Default for LogDistribution { - fn default() -> Self { - LogDistribution { dis: [0; 32] } - } -} - -impl std::fmt::Display for LogDistribution { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - let mut cnt = 0; - let mut idx = 0; - let mut max_idx = 0; - - for i in self.dis { - cnt += i; - if i != 0 { - max_idx = idx; - } - idx += 1; - } - - if cnt == 0 { - cnt = 1; - max_idx = 0; - } - - println!( - "{:^24}: {:<10}: {:<50}", - "LATENCY", "FREQUENCY", "DISTRIBUTION" - ); - - idx = 0; - for i in self.dis { - if idx > max_idx { - break; - } - - let starnum = i * 50 / cnt; - writeln!( - f, - "{:>10} -> {:<10}: {:<10} |{:<50}|", - ((1 as usize) << idx) - 1, - ((1 as usize) << (idx + 1)) - 1, - i, - "*".repeat(starnum) - )?; - - idx += 1; - } - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_logdis_display() { - let logdis = LogDistribution::default(); - println!("{}", logdis); - } - - #[test] - fn test_logdis_display2() { - let mut logdis = LogDistribution::default(); - logdis.dis[1] = 50; - println!("{}", logdis); - } -} diff --git a/source/tools/detect/net/rtrace/src/utils/macros.rs b/source/tools/detect/net/rtrace/src/utils/macros.rs deleted file mode 100644 index c359efdbc255fa43416338fd2d863acdcb4e83b1..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/utils/macros.rs +++ /dev/null @@ -1,55 +0,0 @@ -macro_rules! struct_member_sub_assign { - ($res: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $res.$mem = $first.$mem - $second.$mem; )+ - }; -} - -/// first minuend -/// second subtrahend -macro_rules! same_struct_member_sub { - ($first: ident, $second: ident, $mem: ident) => { - $first.$mem - $second.$mem - }; -} - -/// $first -macro_rules! struct_members_max_assign { - ($assign: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $assign.$mem = $first.$mem.max($second.$mem); ) + - }; -} - -/// $first -macro_rules! struct_members_min_assign { - ($assign: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $assign.$mem = $first.$mem.min($second.$mem);) + - }; -} - - -/// -macro_rules! struct_members_normalization_assign { - ($assign: ident, $target: ident, $min: ident, $max: ident, $precision: ident, $($mem: ident), +) => { - - $( $assign.$mem = $target.$mem * $precision / 1.max($max.$mem - $min.$mem);) + - }; -} - -/// -macro_rules! ebpf_common_use { - ($name: ident) => { - use crate::$name::skel::*; - use anyhow::{bail, Result}; - use structopt::StructOpt; - use crate::common::*; - use std::fmt; - } -} - -pub(crate) use { - same_struct_member_sub, struct_members_max_assign, struct_members_min_assign, - struct_members_normalization_assign, ebpf_common_use -}; diff --git a/source/tools/detect/net/rtrace/src/utils/mod.rs b/source/tools/detect/net/rtrace/src/utils/mod.rs deleted file mode 100644 index fe0d62202dff53c8fdc0c18800b8ceac79d623f8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/src/utils/mod.rs +++ /dev/null @@ -1,18 +0,0 @@ - - - -mod logdistribution; -mod distribution; - -mod delta_dev; -mod delta_netstat; -mod delta_snmp; - -pub mod macros; - -pub use { - self::logdistribution::LogDistribution, - self::delta_dev::DeltaDev, - self::delta_netstat::DeltaNetstat, - self::delta_snmp::DeltaSnmp, -}; \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/utils/Cargo.toml b/source/tools/detect/net/rtrace/utils/Cargo.toml deleted file mode 100644 index 71e77658646b5f963f2e60bb669ad5d68e39359e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/Cargo.toml +++ /dev/null @@ -1,22 +0,0 @@ -[package] -name = "utils" -version = "0.1.0" -edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -libbpf-rs = "0.19" -anyhow = "1.0.57" -libc = "0.2" -libbpf-sys = { version = "1.0.3" } -paste = "1.0.9" -procfs = "0.14.0" -eutils-rs = { git = "https://github.com/chengshuyi/eutils-rs.git" } -cenum-rs = { path = "../cenum-rs" } -num_cpus = "1.0" -once_cell = "1.8.0" -log = "0.4.17" -byteorder = "1.4.3" -serde_json = "1.0" -serde = {version = "1.0", features = ["derive"]} \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/utils/src/drop/delta_dev.rs b/source/tools/detect/net/rtrace/utils/src/drop/delta_dev.rs deleted file mode 100644 index bc8a50b79968e86703c2151d2d8641a10e6727e4..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/drop/delta_dev.rs +++ /dev/null @@ -1,228 +0,0 @@ -use crate::macros::*; -use anyhow::Result; -use procfs::net::DeviceStatus; -use std::collections::HashMap; -use std::fmt; - -use serde::{Deserialize, Serialize}; - -#[derive(Default, Debug, Clone, Deserialize, Serialize)] -pub struct NetDev { - dev : HashMap>, -} - -fn device_status_to_hashmap(ds: &DeviceStatus) -> HashMap { - let mut hm = HashMap::default(); - - hm.insert("recv_bytes".to_owned(), ds.recv_bytes as isize); - hm.insert("recv_packets".to_owned(), ds.recv_packets as isize); - hm.insert("recv_errs".to_owned(), ds.recv_errs as isize); - hm.insert("recv_drop".to_owned(), ds.recv_drop as isize); - hm.insert("recv_fifo".to_owned(), ds.recv_fifo as isize); - hm.insert("recv_frame".to_owned(), ds.recv_frame as isize); - hm.insert("recv_compressed".to_owned(), ds.recv_compressed as isize); - hm.insert("recv_multicast".to_owned(), ds.recv_multicast as isize); - hm.insert("sent_bytes".to_owned(), ds.sent_bytes as isize); - hm.insert("sent_packets".to_owned(), ds.sent_packets as isize); - hm.insert("sent_errs".to_owned(), ds.sent_errs as isize); - hm.insert("sent_drop".to_owned(), ds.sent_drop as isize); - hm.insert("sent_fifo".to_owned(), ds.sent_fifo as isize); - hm.insert("sent_colls".to_owned(), ds.sent_colls as isize); - hm.insert("sent_carrier".to_owned(), ds.sent_carrier as isize); - hm.insert("sent_compressed".to_owned(), ds.sent_compressed as isize); - - hm -} - -impl NetDev { - pub fn new() -> Result { - let devs = procfs::net::dev_status()?; - let mut hm = HashMap::default(); - for (name, dev) in devs { - hm.insert(name, device_status_to_hashmap(&dev)); - } - - Ok(Self { dev: hm }) - } -} - -#[derive(Default, Debug, Clone)] -pub struct DeltaDev { - predev: HashMap, - curdev: HashMap, -} - -pub struct DeviceDropStatus { - pub dev: String, - pub key: String, - pub count: isize, - pub reason: String, -} - -impl DeltaDev { - pub fn new() -> Result { - let curdev = procfs::net::dev_status()?; - Ok(DeltaDev { - predev: curdev.clone(), - curdev, - }) - } - - pub fn update(&mut self) -> Result<()> { - std::mem::swap(&mut self.predev, &mut self.curdev); - self.curdev = procfs::net::dev_status()?; - Ok(()) - } - - pub fn drop_reason(&self) -> Vec { - let mut ret = vec![]; - - let mut stats: Vec<_> = self.curdev.values().collect(); - stats.sort_by_key(|s| &s.name); - for stat in &stats { - let subtrahend = self.predev.get(&stat.name).unwrap(); - - let mut cnt = same_struct_member_sub!(stat, subtrahend, sent_errs); - if cnt > 0 { - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "sent_errs".to_owned(), - count: cnt as isize, - reason: "硬件发包出错".into(), - }); - } - - cnt = same_struct_member_sub!(stat, subtrahend, sent_drop); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "sent_drop".into(), - count: cnt as isize, - reason: "硬件发包出错".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, sent_fifo); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "sent_fifo".into(), - count: cnt as isize, - reason: "硬件发包出错, fifo缓冲区不足".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, sent_colls); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "sent_colls".into(), - count: cnt as isize, - reason: "硬件发包出错,出现冲突".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, sent_carrier); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "sent_carrier".into(), - count: cnt as isize, - reason: "硬件发包出错,出现冲突".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, recv_errs); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "recv_errs".into(), - count: cnt as isize, - reason: "硬件收包出错,可能是包解析出错".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, recv_drop); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "recv_drop".into(), - count: cnt as isize, - reason: "硬件收包出错".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, recv_fifo); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "recv_fifo".into(), - count: cnt as isize, - reason: "硬件收包出错,fifo缓冲区不足,可能是流量过大".into(), - }); - - cnt = same_struct_member_sub!(stat, subtrahend, recv_frame); - ret.push(DeviceDropStatus { - dev: stat.name.clone(), - key: "recv_frame".into(), - count: cnt as isize, - reason: "硬件收包出错".into(), - }); - } - - ret - } -} - -impl fmt::Display for DeltaDev { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut stats: Vec<_> = self.curdev.values().collect(); - - stats.sort_by_key(|s| &s.name); - writeln!( - f, - "{:<10} {:<10} {:<10} {:<10} {:<10} {:<10}", - "Interface", "SendErrs", "SendDrop", "SendFifo", "SendColls", "SendCarrier" - )?; - - for stat in &stats { - let subtrahend = self.predev.get(&stat.name).unwrap(); - writeln!( - f, - "{:<10} {:<10} {:<10} {:<10} {:<10} {:<10}", - stat.name, - same_struct_member_sub!(stat, subtrahend, sent_errs), - same_struct_member_sub!(stat, subtrahend, sent_drop), - same_struct_member_sub!(stat, subtrahend, sent_fifo), - same_struct_member_sub!(stat, subtrahend, sent_colls), - same_struct_member_sub!(stat, subtrahend, sent_carrier), - )?; - } - - writeln!( - f, - "{:<10} {:<10} {:<10} {:<10} {:<10}", - "Interface", "RecvErrs", "RecvDrop", "RecvFifo", "RecvFrameErr" - )?; - - for stat in &stats { - let subtrahend = self.predev.get(&stat.name).unwrap(); - writeln!( - f, - "{:<10} {:<10} {:<10} {:<10} {:<10}", - stat.name, - same_struct_member_sub!(stat, subtrahend, recv_errs), - same_struct_member_sub!(stat, subtrahend, recv_drop), - same_struct_member_sub!(stat, subtrahend, recv_fifo), - same_struct_member_sub!(stat, subtrahend, recv_frame), - )?; - } - - write!(f, "") - } -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn test_delta_dev() { - let delta = DeltaDev::new(); - assert!(delta.is_ok()); - } - - #[test] - fn test_delta_dev_update_display() { - let mut delta = DeltaDev::new().unwrap(); - std::thread::sleep(std::time::Duration::from_millis(100)); - delta.update().unwrap(); - println!("{}", delta); - } -} diff --git a/source/tools/detect/net/rtrace/utils/src/lib.rs b/source/tools/detect/net/rtrace/utils/src/lib.rs deleted file mode 100644 index 2efe53a7960c7b90afb555194658196d47d7d1f8..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/lib.rs +++ /dev/null @@ -1,63 +0,0 @@ -pub mod macros; -pub mod perf; -pub mod timestamp; - -mod drop; -pub use {drop::delta_dev, drop::delta_netstat, drop::delta_snmp}; - -pub mod pstree; - -pub mod net; -use anyhow::{bail, Result}; - -pub mod kernel_stack; -pub mod percpu_queue; -// pub mod process; - -pub fn to_vec(t: T) -> Vec { - unsafe { - std::slice::from_raw_parts(&t as *const T as *const u8, std::mem::size_of::()).to_vec() - } -} - -pub fn bump_memlock_rlimit() -> Result<()> { - let rlimit = libc::rlimit { - rlim_cur: 128 << 20, - rlim_max: 128 << 20, - }; - - if unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlimit) } != 0 { - bail!("Failed to increase rlimit"); - } - - Ok(()) -} - -pub fn cpus_number() -> usize { - num_cpus::get() -} - -pub fn alloc_percpu_variable() -> Vec -where - T: Default, -{ - let mut ret = vec![]; - for _ in 0..cpus_number() { - ret.push(T::default()) - } - ret -} - -pub fn kernel_version() -> Result { - let mut info = unsafe { std::mem::MaybeUninit::::zeroed().assume_init() }; - let mut release_version = Vec::with_capacity(info.release.len()); - let ret = unsafe { libc::uname(&mut info as *mut libc::utsname) }; - if ret < 0 { - bail!("failed to call function: libc::uname, error code: {}", ret) - } - - for i in info.release { - release_version.push(i as u8); - } - Ok(String::from_utf8(release_version)?) -} diff --git a/source/tools/detect/net/rtrace/utils/src/macros.rs b/source/tools/detect/net/rtrace/utils/src/macros.rs deleted file mode 100644 index 12426000991de745fbd83d312f3bb3d52383e579..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/macros.rs +++ /dev/null @@ -1,66 +0,0 @@ -#[macro_export] -macro_rules! init_zeroed { - () => { - unsafe { std::mem::MaybeUninit::zeroed().assume_init() } - } -} - -#[macro_export] -macro_rules! to_vecu8 { - ($item: expr, $type: ty) => { - unsafe { - std::slice::from_raw_parts( - $item as *const $type as *const u8, - std::mem::size_of::<$type>(), - ) - .to_vec() - } - } -} - - -macro_rules! struct_member_sub_assign { - ($res: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $res.$mem = $first.$mem - $second.$mem; )+ - }; -} - -/// first minuend -/// second subtrahend -macro_rules! same_struct_member_sub { - ($first: ident, $second: ident, $mem: ident) => { - $first.$mem - $second.$mem - }; -} - -/// $first -macro_rules! struct_members_max_assign { - ($assign: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $assign.$mem = $first.$mem.max($second.$mem); ) + - }; -} - -/// $first -macro_rules! struct_members_min_assign { - ($assign: ident, $first: ident, $second: ident, $($mem: ident), +) => { - - $( $assign.$mem = $first.$mem.min($second.$mem);) + - }; -} - - -/// -macro_rules! struct_members_normalization_assign { - ($assign: ident, $target: ident, $min: ident, $max: ident, $precision: ident, $($mem: ident), +) => { - - $( $assign.$mem = $target.$mem * $precision / 1.max($max.$mem - $min.$mem);) + - }; -} - - -pub(crate) use { - same_struct_member_sub, struct_members_max_assign, struct_members_min_assign, - struct_members_normalization_assign -}; diff --git a/source/tools/detect/net/rtrace/utils/src/net.rs b/source/tools/detect/net/rtrace/utils/src/net.rs deleted file mode 100644 index 0310300c40e175fe2ef57ec837a3adc655e6840a..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/net.rs +++ /dev/null @@ -1,153 +0,0 @@ -use cenum_rs::CEnum; - -#[derive(Debug, CEnum)] -#[cenum(i32)] -pub enum ProtocolType { - #[cenum(value = "libc::IPPROTO_ICMP", display = "icmp")] - Icmp, - #[cenum(value = "libc::IPPROTO_TCP", display = "tcp")] - Tcp, - #[cenum(value = "libc::IPPROTO_UDP", display = "udp")] - Udp, -} - -#[derive(Debug, CEnum)] -#[cenum(i32)] -pub enum CongestionState { - #[cenum(value = "0", display = "Open")] - Open, - #[cenum(value = "1", display = "Disorder")] - Disorder, - #[cenum(value = "2", display = "CWR")] - Cwr, - #[cenum(value = "3", display = "Recovery")] - Recovery, - #[cenum(value = "4", display = "Loss")] - Loss, -} - -use anyhow::{bail, Result}; -use std::fmt::Display; -use std::fs::read_to_string; -use std::path::Path; -use std::str::FromStr; -use std::{collections::HashMap, ops::Add}; - -#[derive(Default, Debug, Clone, PartialEq)] -struct Netstat { - hm: HashMap<(String, String), isize>, -} - -impl FromStr for Netstat { - type Err = anyhow::Error; - fn from_str(content: &str) -> Result { - let mut netstat = Netstat::default(); - - let lines = content.split('\n').collect::>(); - - for i in 0..lines.len() / 2 { - let line1 = lines[i * 2]; - let line2 = lines[i * 2 + 1]; - - let mut iter1 = line1.split_whitespace(); - let mut iter2 = line2.split_whitespace(); - - let prefix; - if let Some(x) = iter1.next() { - prefix = x.to_string(); - } else { - bail!("failed to parse: prefix not found") - } - iter2.next(); - loop { - let k; - let v: isize; - if let Some(x) = iter1.next() { - k = x; - } else { - break; - } - - if let Some(x) = iter2.next() { - v = x.parse()?; - } else { - bail!("failed to parse: number of item is not match.") - } - - netstat.insert((prefix.clone(), k.to_string()), v); - } - } - - Ok(netstat) - } -} - -impl Netstat { - pub fn from_file

(path: P) -> Result - where - P: AsRef, - { - let string = read_to_string(path)?; - Netstat::from_str(&string) - } - - pub(crate) fn insert(&mut self, k: (String, String), v: isize) { - self.hm.insert(k, v); - } -} - -use std::net::IpAddr; -use std::net::Ipv4Addr; -use std::net::SocketAddr; - -pub struct Addrpair { - pub local: SocketAddr, - pub remote: SocketAddr, -} - -impl Default for Addrpair { - fn default() -> Self { - Addrpair { - local: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0), - remote: SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), 0), - } - } -} - -impl Display for Addrpair { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} -> {}", self.local, self.remote) - } -} - -#[macro_export] -macro_rules! addr_pair_2_Addrpair { - ($ap: expr) => { - Addrpair::new($ap.saddr, $ap.sport, $ap.daddr, $ap.dport) - }; -} - -impl Addrpair { - pub fn new(saddr: u32, sport: u16, daddr: u32, dport: u16) -> Self { - Addrpair { - local: SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(saddr))), sport), - remote: SocketAddr::new(IpAddr::V4(Ipv4Addr::from(u32::from_be(daddr))), dport), - } - } - - // pub fn from_string() -> Self { - - // } -} - -pub fn parse_protocol(proto: &str) -> Result { - let mut protocol = 0; - match proto { - "all" => protocol = 0, - "tcp" => protocol = libc::IPPROTO_TCP, - "udp" => protocol = libc::IPPROTO_UDP, - "icmp" => protocol = libc::IPPROTO_ICMP, - _ => bail!("failed to parse protocol: {}", protocol), - } - Ok(protocol as u16) -} diff --git a/source/tools/detect/net/rtrace/utils/src/percpu_queue.rs b/source/tools/detect/net/rtrace/utils/src/percpu_queue.rs deleted file mode 100644 index 616e87583abcc2377e484a9f2420ab9a6df46e78..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/percpu_queue.rs +++ /dev/null @@ -1,24 +0,0 @@ -use std::collections::{BTreeMap, VecDeque}; - -use crate::alloc_percpu_variable; - -pub struct PercpuVecDeque { - lls: Vec>, -} - -impl PercpuVecDeque -where - T: Default, -{ - pub fn new() -> PercpuVecDeque { - PercpuVecDeque { - lls: alloc_percpu_variable::>(), - } - } - - pub fn push(&mut self, cpu: usize, val: T) { - self.lls[cpu].push_back(val) - } - - // pub fn retain -} diff --git a/source/tools/detect/net/rtrace/utils/src/perf.rs b/source/tools/detect/net/rtrace/utils/src/perf.rs deleted file mode 100644 index ba35daf74e2a7d1817f15fde61de7d90d76baad2..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/perf.rs +++ /dev/null @@ -1,210 +0,0 @@ -use anyhow::{bail, Result}; -use core::ffi::c_void; -use libbpf_rs::Map; -use libbpf_rs::MapType; -use std::boxed::Box; -use std::slice; -use std::time::Duration; - -fn is_power_of_two(i: usize) -> bool { - i > 0 && (i & (i - 1)) == 0 -} - -// Workaround for `trait_alias` -// (https://doc.rust-lang.org/unstable-book/language-features/trait-alias.html) -// not being available yet. This is just a custom trait plus a blanket implementation. -pub trait SampleCb: FnMut(i32, &[u8]) {} -impl SampleCb for T where T: FnMut(i32, &[u8]) {} - -pub trait LostCb: FnMut(i32, u64) {} -impl LostCb for T where T: FnMut(i32, u64) {} - -struct CbStruct<'b> { - sample_cb: Option>, - lost_cb: Option>, -} - -/// Builds [`PerfBuffer`] instances. -pub struct PerfBufferBuilder<'a, 'b> { - map: &'a Map, - pages: usize, - sample_cb: Option>, - lost_cb: Option>, -} - -impl<'a, 'b> PerfBufferBuilder<'a, 'b> { - pub fn new(map: &'a Map) -> Self { - Self { - map, - pages: 64, - sample_cb: None, - lost_cb: None, - } - } -} - -impl<'a, 'b> PerfBufferBuilder<'a, 'b> { - /// Callback to run when a sample is received. - /// - /// This callback provides a raw byte slice. You may find libraries such as - /// [`plain`](https://crates.io/crates/plain) helpful. - /// - /// Callback arguments are: `(cpu, data)`. - pub fn sample_cb(self, cb: NewCb) -> PerfBufferBuilder<'a, 'b> { - PerfBufferBuilder { - map: self.map, - pages: self.pages, - sample_cb: Some(Box::new(cb)), - lost_cb: self.lost_cb, - } - } - - /// Callback to run when a sample is received. - /// - /// Callback arguments are: `(cpu, lost_count)`. - pub fn lost_cb(self, cb: NewCb) -> PerfBufferBuilder<'a, 'b> { - PerfBufferBuilder { - map: self.map, - pages: self.pages, - sample_cb: self.sample_cb, - lost_cb: Some(Box::new(cb)), - } - } - - /// The number of pages to size the ring buffer. - pub fn pages(self, pages: usize) -> PerfBufferBuilder<'a, 'b> { - PerfBufferBuilder { - map: self.map, - pages, - sample_cb: self.sample_cb, - lost_cb: self.lost_cb, - } - } - - pub fn build(self) -> Result> { - if self.map.map_type() != MapType::PerfEventArray { - bail!("Must use a PerfEventArray map".to_string(),) - } - - if !is_power_of_two(self.pages) { - bail!("Page count must be power of two".to_string(),) - } - - let c_sample_cb: libbpf_sys::perf_buffer_sample_fn = if self.sample_cb.is_some() { - Some(Self::call_sample_cb) - } else { - None - }; - - let c_lost_cb: libbpf_sys::perf_buffer_lost_fn = if self.lost_cb.is_some() { - Some(Self::call_lost_cb) - } else { - None - }; - - let callback_struct_ptr = Box::into_raw(Box::new(CbStruct { - sample_cb: self.sample_cb, - lost_cb: self.lost_cb, - })); - - let ptr = unsafe { - libbpf_sys::perf_buffer__new( - self.map.fd(), - self.pages as libbpf_sys::size_t, - c_sample_cb, - c_lost_cb, - callback_struct_ptr as *mut _, - std::ptr::null(), - ) - }; - let err = unsafe { libbpf_sys::libbpf_get_error(ptr as *const _) }; - if err != 0 { - bail!("Unable to create perf buffer: {}", err) - } else { - Ok(PerfBuffer { - ptr, - _cb_struct: unsafe { Box::from_raw(callback_struct_ptr) }, - }) - } - } - - unsafe extern "C" fn call_sample_cb(ctx: *mut c_void, cpu: i32, data: *mut c_void, size: u32) { - let callback_struct = ctx as *mut CbStruct; - - if let Some(cb) = &mut (*callback_struct).sample_cb { - cb(cpu, slice::from_raw_parts(data as *const u8, size as usize)); - } - } - - unsafe extern "C" fn call_lost_cb(ctx: *mut c_void, cpu: i32, count: u64) { - let callback_struct = ctx as *mut CbStruct; - - if let Some(cb) = &mut (*callback_struct).lost_cb { - cb(cpu, count); - } - } -} - -/// Represents a special kind of [`Map`]. Typically used to transfer data between -/// [`Program`]s and userspace. -pub struct PerfBuffer<'b> { - ptr: *mut libbpf_sys::perf_buffer, - // Hold onto the box so it'll get dropped when PerfBuffer is dropped - _cb_struct: Box>, -} - -impl<'b> PerfBuffer<'b> { - pub fn epoll_fd(&self) -> i32 { - unsafe { libbpf_sys::perf_buffer__epoll_fd(self.ptr) } - } - - pub fn poll(&self, timeout: Duration) -> Result<()> { - let ret = unsafe { libbpf_sys::perf_buffer__poll(self.ptr, timeout.as_millis() as i32) }; - if ret < 0 { - bail!("Err({}) occurs on perf poll", ret) - } else { - Ok(()) - } - } - - pub fn buffer_cnt(&self) -> usize { - unsafe { libbpf_sys::perf_buffer__buffer_cnt(self.ptr) as usize } - } -} - -impl<'b> Drop for PerfBuffer<'b> { - fn drop(&mut self) { - unsafe { - libbpf_sys::perf_buffer__free(self.ptr); - } - } -} - -unsafe impl<'b> Send for PerfBuffer<'b> {} - -#[cfg(test)] -mod tests { - use super::*; - - fn is_power_of_two_slow(i: usize) -> bool { - if i == 0 { - return false; - } - - let mut n = i; - while n > 1 { - if n & 0x01 as usize == 1 { - return false; - } - n >>= 1; - } - true - } - - #[test] - fn test_is_power_of_two() { - for i in 0..=256 { - assert_eq!(is_power_of_two(i), is_power_of_two_slow(i)); - } - } -} diff --git a/source/tools/detect/net/rtrace/utils/src/process.rs b/source/tools/detect/net/rtrace/utils/src/process.rs deleted file mode 100644 index 0784b8c5dfe38091626d6bee2d3648ef35633a02..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/process.rs +++ /dev/null @@ -1,43 +0,0 @@ -use std::fs::File; - -use anyhow::Result; -use procfs::process; - -pub struct Stat { - pid: u32, - stat: process::Stat, - now_stat: process::Stat, -} - -fn get_process_stat(pid: u32) -> Result { - Ok(process::Stat::from_reader(File::open(format!("/proc/{}/stat", pid))?)?) -} - -impl Stat { - pub fn new(pid: u32) -> Result { - let stat = get_process_stat(pid)?; - Ok(Stat { - pid, - stat: stat.clone(), - now_stat: stat, - }) - } - - pub fn update(&mut self) -> Result<()>{ - std::mem::swap(&mut self.stat, &mut self.now_stat); - self.now_stat = get_process_stat(self.pid)?; - Ok(()) - } - - pub fn ucpu_usage(&self, delta: usize) { - // self.now_stat.utime - } - - pub fn scpu_usage(&self) { - - } - - pub fn mem_usage(&self) { - - } -} diff --git a/source/tools/detect/net/rtrace/utils/src/pstree.rs b/source/tools/detect/net/rtrace/utils/src/pstree.rs deleted file mode 100644 index a2528c8cda63a4e0663476e863bc96e99975e94f..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/pstree.rs +++ /dev/null @@ -1,66 +0,0 @@ -use anyhow::{bail, Result}; -use eutils_rs::proc::FdType; -use eutils_rs::proc::Pid; -use std::collections::HashMap; -use std::fs; - -pub struct Pstree { - pids: HashMap, - // map of socket inode number and pid - inum_map: HashMap, -} - -impl Pstree { - pub fn new() -> Pstree { - Pstree { - pids: HashMap::default(), - inum_map: HashMap::default(), - } - } - - pub fn update(&mut self) -> Result<()> { - for entry in fs::read_dir("/proc")? { - let entry_instance = entry?; - match entry_instance.file_name().to_string_lossy().parse::() { - Ok(pid) => { - self.pids.insert(pid, Pid::from_file(entry_instance.path())); - } - Err(_) => {} - } - } - - for (i, j) in &self.pids { - for fd in &j.fds { - match fd.fdtype() { - FdType::SocketFd(inum) => { - self.inum_map.entry(inum).or_insert(*i); - } - _ => {} - } - } - } - - Ok(()) - } - - /// find pid who own this inode number - pub fn inum_pid(&self, inum: u32) -> Result { - if let Some(pid) = self.inum_map.get(&inum) { - return Ok(*pid); - } - bail!("failed to find pid of {}", inum) - } - - pub fn pid_comm(&self, pid: i32) -> Result { - // /proc//comm - let path = format!("/proc/{}/comm", pid); - match fs::read_to_string(&path) { - Ok(comm) => { - return Ok(String::from(comm.trim())); - } - Err(e) => { - bail!("failed to open {}, error: {}", path, e) - } - } - } -} diff --git a/source/tools/detect/net/rtrace/utils/src/timestamp.rs b/source/tools/detect/net/rtrace/utils/src/timestamp.rs deleted file mode 100644 index 22cfa10554b5dd00c0fd7557d0b647b4c2a07d6e..0000000000000000000000000000000000000000 --- a/source/tools/detect/net/rtrace/utils/src/timestamp.rs +++ /dev/null @@ -1,52 +0,0 @@ -/// https://linux.die.net/man/3/clock_gettime - -/// Monotonically increasing timestamp, incremented by 1 when the clock interrupt -/// is triggered. This clock source is used by the bpf_ktime_get_ns function. -pub fn current_monotime() -> u64 { - let mut ts = libc::timespec { - tv_sec: 0, - tv_nsec: 0, - }; - unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC, &mut ts) }; - - (ts.tv_sec as u64) * 1000_000_000 + (ts.tv_nsec as u64) -} - -/// System-wide realtime clock. It is generally synchronized with the clock of -/// the master server through the ntp protocol. -pub fn current_realtime() -> u64 { - let mut ts = libc::timespec { - tv_sec: 0, - tv_nsec: 0, - }; - unsafe { libc::clock_gettime(libc::CLOCK_REALTIME, &mut ts) }; - - (ts.tv_sec as u64) * 1000_000_000 + (ts.tv_nsec as u64) -} - -pub fn delta_of_mono_real_time() -> u64 { - let x1 = current_monotime(); - let y1 = current_realtime(); - let y2 = current_realtime(); - let x2 = current_monotime(); - (y2 - x2 + y1 - x1) / 2 -} - -#[cfg(test)] -mod tests { - use super::*; - #[test] - fn test_timestamp_current_monotime() { - assert_ne!(current_monotime(), 0); - } - - #[test] - fn test_timestamp_current_realtime() { - assert_ne!(current_realtime(), 0); - } - - #[test] - fn test_timestamp_delta_of_mono_real_time() { - assert_ne!(delta_of_mono_real_time(), 0); - } -} \ No newline at end of file diff --git a/source/tools/detect/sched/rtdelay/Makefile b/source/tools/detect/sched/rtdelay/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..7eef6024abe0f5e9dea86bd1286158062b794d7e --- /dev/null +++ b/source/tools/detect/sched/rtdelay/Makefile @@ -0,0 +1,10 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := $(wildcard bpf/*.bpf.c) +csrcs := $(wildcard *.c) +target := rtdelay +EXTRA_LDFLAGS += -lrt -lsysak +EXTRA_CFLAGS += -static + +include $(SRC)/mk/bpf.mk diff --git a/source/tools/detect/sched/rtdelay/README.md b/source/tools/detect/sched/rtdelay/README.md new file mode 100644 index 0000000000000000000000000000000000000000..896dee58a888b77dea9d8c7821bb82994dc1d71c --- /dev/null +++ b/source/tools/detect/sched/rtdelay/README.md @@ -0,0 +1,29 @@ +# 功能说明 +用于分析统计应用oncpu时间、offcpu原因及时间的工具,主要针对于java应用,以及在RT过程中进程号不发生改变的应用。 +# 使用说明 +``` +USAGE: sysak rtdelay [--help] [-p PID] [-d DURATION] +EXAMPLES: + rtdelay # trace RT time until Ctrl-C + rtdelay -p 185 # only trace threads for PID 185 + rtdelay -d 10 # trace for 10 seconds only + + -d, --duration=DURATION Total duration of trace in seconds + -p, --pid=PID Trace this PID only + -?, --help Give this help list + --usage Give a short usage message + -V, --version Print program version +``` +# 使用举例 +## 运行说明 +下面的例子使用java应用进行请求处理,并在请求处理过程中java应用发送请求获取服务器数据,服务器返回数据,java应用再进行请求返回。 +``` +$sudo sysak rtdelay -d 10 -p 91279 +``` +## 日志输出说明 +上面结果输出说明如下: +``` + java应用收到请求时间戳 oncpu时间 runqueue时间 futex时间 lock时间 存储时间 等待网络时间 等待服务器返回时间 其他未分类时间 + | \ | | | / / / / +read_ts:104881254092291, on:1162, runqueue:7, futex:20, lock:0, io:0, net:0, server:3000329, other:0 +``` \ No newline at end of file diff --git a/source/tools/detect/sched/rtdelay/bpf/rtdelay.bpf.c b/source/tools/detect/sched/rtdelay/bpf/rtdelay.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..df4f0af6db489276a721b4f91ecf9152792b3583 --- /dev/null +++ b/source/tools/detect/sched/rtdelay/bpf/rtdelay.bpf.c @@ -0,0 +1,970 @@ +#include +#include +#include +#include +#include "../rtdelay.h" + + +#define PERF_MAX_STACK_DEPTH 127 +#define MAX_ENTRIES 10240 +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) +#define MAX_PARAM_ENTRIES 8192 + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; + +const uint64_t NSEC_PER_SEC = 1000000000L; +const uint64_t USER_HZ = 100; +const int ConnStatsBytesThreshold = 131072; +const int ConnStatsPacketsThreshold = 128; + +struct psockaddr { + struct sockaddr *addr; + int sockfd; +}; + +struct send_server_param { + struct sockaddr *addr; //dest + int sockfd; //source +}; + +struct union_addr{ + // struct sockaddr saddr; + struct sockaddr daddr; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} test_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} fd_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} oncpu_poll_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, struct data_param_t); + __uint(max_entries, MAX_PARAM_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} write_param_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u16); + __type(value, u64); + __uint(max_entries, MAX_PARAM_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} port_readts_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, u64); + __uint(max_entries, MAX_PARAM_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} server_fd_time SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, struct data_param_t); + __uint(max_entries, MAX_PARAM_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); +} read_param_map SEC(".maps"); + + +struct internal_key_on { + u64 start_ts; + struct key_on key; +}; + +struct internal_key_off { + u64 start_ts; + struct key_t key; +}; + +struct internal_key_server { + u64 read_ts; + u64 start_ts; +}; + +struct r_fd { + int fd; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct r_fd); + __uint(max_entries, MAX_ENTRIES); +} request_fd SEC(".maps"); + + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, enum OFFCPU_REASON); + __uint(max_entries, MAX_ENTRIES); +} reason_map SEC(".maps"); + + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct bpfarg); +} argmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct read_key); + __type(value, struct val_t_on); + __uint(max_entries, MAX_ENTRIES); +} info_on SEC(".maps"); + + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct key_t); + __type(value, struct val_t); + __uint(max_entries, MAX_ENTRIES); +} info_off SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, struct stacks_q); + __uint(max_entries, MAX_ENTRIES); +} request_head SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct stacks_q); + __type(value, struct stacks_q); + __uint(max_entries, MAX_ENTRIES); +} request_stacks SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} start_on SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, int); + __uint(max_entries, MAX_ENTRIES); +} send_sockfd SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct internal_key_off); + __uint(max_entries, MAX_ENTRIES); +} start_off SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} start_runqueue SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} read_time_map SEC(".maps"); + + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define GETARG_FROM_ARRYMAP(map,argp,type,member,oldv)({ \ + type retval = (type)oldv; \ + int i = 0; \ + argp = bpf_map_lookup_elem(&map, &i); \ + if (argp) { \ + retval = _(argp->member); \ + } \ + retval; \ + }) + +static bool allow_record(uint64_t id) +{ + pid_t targ_tgid; + uint32_t tgid = id >> 32; + struct bpfarg *argp; + targ_tgid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_tgid,-1); + if (targ_tgid != -1 && targ_tgid != tgid) + return false; + return true; +} + +static bool is_server_pid(uint64_t id) +{ + pid_t server_pid; + uint32_t tgid = id >> 32; + struct bpfarg *argp; + server_pid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, server_pid,-1); + if (server_pid != -1 && server_pid == tgid) + return true; + return false; +} + +static bool allow_record_tgid(uint32_t tgid) +{ + /*filter with tgid*/ + pid_t targ_tgid; + struct bpfarg *argp; + targ_tgid = GETARG_FROM_ARRYMAP(argmap, argp, pid_t, targ_tgid,-1); + if (targ_tgid != -1 && targ_tgid != tgid) + return false; + return true; +} + +static void delete_unwrite_pid(uint32_t pid){ + /*delete information without write process*/ + u64 *read_ts; + read_ts = bpf_map_lookup_elem(&read_time_map, &pid); + if (!read_ts){ + return; + } + struct read_key r_k={}; + struct val_t_on *valp; + r_k.read_ts = *read_ts; + valp = bpf_map_lookup_elem(&info_on, &r_k); + if (!valp) + { + return; + } + if (valp->flag==0){ + bpf_map_delete_elem(&info_on, &r_k); + } +} + +static u64 get_readts(uint32_t pid){ + u64 *read_ts,ts; + read_ts = bpf_map_lookup_elem(&read_time_map, &pid); + if (!read_ts){ + return 0; + } + ts = *read_ts; + return ts; +} + +static int get_org_fd(uint32_t pid){ + u64 read_ts = get_readts(pid); + if (read_ts){ + struct r_fd *orig_fd; + orig_fd = bpf_map_lookup_elem(&request_fd, &pid); + if (orig_fd) + return orig_fd->fd; + } + return 0; +} + +static void handle_read(uint64_t id,int fd, void *ctx){ + // 新建请求,会将该pid之前的未write的请求抹去 + u64 start_ts=bpf_ktime_get_ns(); + u64 read_ts = start_ts; + + uint32_t pid = id; + delete_unwrite_pid(pid); + + bpf_map_update_elem(&start_on, &pid, &start_ts, 0); + bpf_map_update_elem(&read_time_map, &pid, &read_ts,0); + + struct read_key r_k={}; + r_k.read_ts = read_ts; + struct val_t_on val; + __builtin_memset(&val, 0, sizeof(val)); + val.delta = 0; + val.flag = 0; + bpf_map_update_elem(&info_on,&r_k,&val,BPF_NOEXIST); + + struct stacks_q new_stack={}; + new_stack.kern_stack_id = 0; + new_stack.read_ts = read_ts; + bpf_map_update_elem(&request_head, &read_ts, &new_stack,BPF_NOEXIST); + + struct r_fd orig_fd={}; + orig_fd.fd = fd; + bpf_map_update_elem(&request_fd, &pid, &orig_fd,0); + +} + + +static int write_record(uint64_t id, void *ctx){ + // 记录请求响应 + uint32_t pid = id; + u64 *start_ts,*read_ts; + start_ts = bpf_map_lookup_elem(&start_on, &pid); + if (!start_ts) + goto cleanup; + read_ts = bpf_map_lookup_elem(&read_time_map, &pid); + if (!read_ts) + goto cleanup; + s64 delta, rtlatency; + u64 now_ts = bpf_ktime_get_ns(); + delta = (s64)(now_ts - *start_ts); + struct read_key r_k={}; + struct val_t_on *valp; + r_k.read_ts = *read_ts; + delta = (u64)delta/1000U; + rtlatency = (s64)(now_ts-*read_ts); + rtlatency = (u64)rtlatency/1000U; + valp = bpf_map_lookup_elem(&info_on, &r_k); + if (!valp) + { + goto cleanup; + } + __sync_fetch_and_add(&valp->delta, delta); + __sync_fetch_and_add(&valp->flag, 1); + __sync_fetch_and_add(&valp->rtlatency, rtlatency); + +cleanup: + bpf_map_delete_elem(&read_time_map, &pid); + bpf_map_delete_elem(&start_on, &pid); + return 0; +} + +static void handle_write(uint64_t id, int fd, void *ctx){ + uint32_t pid = id; + struct r_fd *orig_fd; + orig_fd = bpf_map_lookup_elem(&request_fd,&pid); + if (!orig_fd){ + return; + } + if (orig_fd->fd == fd){ + write_record(id,ctx); + bpf_map_delete_elem(&request_fd,&pid); + bpf_map_delete_elem(&send_sockfd,&pid); + } +} + +/* server端收到请求 */ +static void handle_server_read(u64 read_ts, void *ctx){ + + u64 now_ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&server_fd_time, &read_ts, &now_ts, 0); + +} + +/* server端处理请求 */ +static void handle_server_write(u64 read_ts, void *ctx){ + + u64 *start_ts; + start_ts = bpf_map_lookup_elem(&server_fd_time, &read_ts); + + if (start_ts){ + u64 delta; + delta = (s64)(bpf_ktime_get_ns() - *start_ts); + delta = (u64)delta/1000U; + struct read_key r_k = {}; + r_k.read_ts = read_ts; + struct val_t_on *valp_on; + valp_on = bpf_map_lookup_elem(&info_on, &r_k); + if (!valp_on){ + goto cleanup; + } + __sync_fetch_and_add(&valp_on->server_delta, delta); + + } +cleanup: + bpf_map_delete_elem(&server_fd_time, &read_ts); + +} + +struct sys_enter_read_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + long buf; + long count; + char __data[0]; +}; + + +SEC("tp/syscalls/sys_enter_read") +// ssize_t read(int fd, void *buf, size_t count); +int tp__sys_enter_read(struct sys_enter_read_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + if (!allow_record(id)) { + goto server_process; + } + struct data_param_t read_param = {}; + read_param.syscall_func = FuncRead; + read_param.fd = (int)ctx->fd; + // bpf_probe_read(&read_param.fd, sizeof(read_param.fd), &ctx->fd); + read_param.buf = (const char*)ctx->buf; + bpf_map_update_elem(&read_param_map, &id, &read_param, BPF_ANY); + + return 0; + +server_process: + /* server端是否能匹配到从connaddr_map来的请求 */ + if (is_server_pid(id)){ + int sockfd; + sockfd = (int)ctx->fd; + handle_server_read(sockfd, ctx); + } + return 0; +} + + +SEC("tracepoint/syscalls/sys_exit_read") +int tp_sys_exit_read(struct trace_event_raw_sys_exit *ctx) { + uint64_t id = bpf_get_current_pid_tgid(); + if (!allow_record(id) && !is_server_pid(id)) { + return 0; + } + uint32_t pid = id; + + struct data_param_t *read_param = bpf_map_lookup_elem(&read_param_map, &id); + if (read_param != NULL && read_param->real_conn && read_param->buf ) { + if (allow_record(id)){ + int *sock_fd; + sock_fd = bpf_map_lookup_elem(&send_sockfd,&pid); + if (sock_fd && *sock_fd==read_param->fd){ + // bpf_map_delete_elem(&send_sockfd,&pid); + }else{ + handle_read(id,read_param->fd,ctx); + } + } + + } + bpf_map_delete_elem(&read_param_map, &id); + return 0; + + return 0; +} + +struct sys_enter_write_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + long *buf; + long count; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_write") +int tp_sys_enter_write(struct sys_enter_write_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + + if (!allow_record(id)) + return 0; + + struct data_param_t write_param = {}; + write_param.syscall_func = FuncWrite; + write_param.fd = (int)ctx->fd; + write_param.iovlen = (size_t)ctx->count; + bpf_map_update_elem(&write_param_map, &id, &write_param, BPF_ANY); + return 0; +} + + +SEC("tracepoint/syscalls/sys_exit_write") +int tp_sys_exit_write(struct trace_event_raw_sys_exit *ctx) { + uint64_t id = bpf_get_current_pid_tgid(); + if (!allow_record(id)) { + return 0; + } + + struct data_param_t *write_param = bpf_map_lookup_elem(&write_param_map, &id); + if (write_param != NULL && write_param->real_conn && write_param->iovlen) { + handle_write(id,write_param->fd,ctx); + } + bpf_map_delete_elem(&write_param_map, &id); + + return 0; +} + + +/*从socket读数据*/ +SEC("kprobe/security_socket_recvmsg") +// int security_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size) +int BPF_KPROBE(kprobe_security_socket_recvmsg, struct socket *sock, void *msg, int size) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + if (!allow_record(id)) { + goto server_process; + } + + struct data_param_t *read_param = bpf_map_lookup_elem(&read_param_map, &id); + if (read_param != NULL) { + read_param->real_conn = true; + } + +server_process: + if (!is_server_pid(id)) + return 0; + struct sock *psk= (struct sock *)_(sock->sk); + struct sock_common sk_c; + bpf_probe_read(&sk_c, sizeof(struct sock_common), &psk->__sk_common); + u16 dport = sk_c.skc_dport; //远端 + int port = __builtin_bswap16(dport); + u64 *read_ts; + read_ts = bpf_map_lookup_elem(&port_readts_map, &port); + if (!read_ts) + return 0; + handle_server_read(*read_ts,ctx); + + return 0; +} + +SEC("kprobe/security_socket_sendmsg") +int BPF_KPROBE(kprobe_security_socket_sendmsg, struct socket *sock, void *msg, int size) +{ + uint64_t id = bpf_get_current_pid_tgid(); + if (!allow_record(id) && !is_server_pid(id)) + return 0; + + uint32_t pid = id; + struct data_param_t *write_param = bpf_map_lookup_elem(&write_param_map, &id); + if (write_param != NULL) { + write_param->real_conn = true; + } + u64 read_ts; + + struct sock *psk= (struct sock *)_(sock->sk); + struct sock_common sk_c; + bpf_probe_read(&sk_c, sizeof(struct sock_common), &psk->__sk_common); + u16 sport = sk_c.skc_num; + u16 dport = sk_c.skc_dport; + + int fd = get_org_fd(pid); + read_ts = get_readts(pid); + if (fd && write_param && fd != write_param->fd){ + bpf_map_update_elem(&port_readts_map, &sport, &read_ts, 0); + bpf_map_update_elem(&send_sockfd, &pid, &write_param->fd,0); + } + +server_process: + if (!is_server_pid(id)) + return 0; + u64 *pread_ts; + dport = __builtin_bswap16(dport); + pread_ts = bpf_map_lookup_elem(&port_readts_map, &dport); + if (pread_ts){ + handle_server_write(*pread_ts, ctx); + bpf_map_delete_elem(&port_readts_map, &dport); + } + + return 0; +} + +struct sys_enter_connect_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + struct sockaddr *uservaddr; + long addrlen; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_connect") +int tp_sys_enter_connect(struct sys_enter_connect_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + if (allow_record(id)) { + int fd = (int)ctx->fd; + bpf_map_update_elem(&send_sockfd, &pid, &fd,0); + } + return 0; +} + +struct sys_enter_sendmsg_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + struct user_msghdr * msg; + long flags; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_sendmsg") +// ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags); +int tp_sys_enter_sendmsg(struct sys_enter_sendmsg_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + int sockfd = (int)ctx->fd; + if (allow_record(id)) { + struct data_param_t write_param = {}; + write_param.syscall_func = FuncSendMsg; + write_param.fd = sockfd; + bpf_map_update_elem(&write_param_map, &id, &write_param, BPF_ANY); + } + + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_sendmsg") +int tp_sys_exit_sendmsg(struct trace_event_raw_sys_exit *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + if (allow_record(id)) { + struct data_param_t *write_param = bpf_map_lookup_elem(&write_param_map, &id); + if (write_param != NULL) { + struct r_fd *orig_fd; + orig_fd = bpf_map_lookup_elem(&request_fd,&pid); + if (orig_fd && write_param->fd == orig_fd->fd){ + handle_write(id,write_param->fd,ctx); + } + } + bpf_map_delete_elem(&write_param_map, &id); + } + return 0; +} + +struct sys_enter_recvmsg_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + struct user_msghdr * msg; + long flags; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_recvmsg") +// ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags); +int tp_sys_enter_recvmsg(struct sys_enter_recvmsg_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + if (allow_record(id)) { + struct data_param_t read_param = {}; + read_param.syscall_func = FuncRecvMsg; + read_param.fd = (int)ctx->fd; + bpf_map_update_elem(&read_param_map, &id, &read_param, BPF_ANY); + } + if (is_server_pid(id)){ + int sockfd = (int)ctx->fd; + handle_server_read(sockfd, ctx); + } + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_recvmsg") +int tp_sys_exit_recvmsg(struct trace_event_raw_sys_exit *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + if (allow_record(id)) { + struct data_param_t *read_param = bpf_map_lookup_elem(&read_param_map, &id); + if (read_param != NULL) { + int *sock_fd; + sock_fd = bpf_map_lookup_elem(&send_sockfd,&pid); + if (sock_fd && *sock_fd==read_param->fd){ + // bpf_map_delete_elem(&send_sockfd,&pid); + }else{ + handle_read(id, read_param->fd, ctx); + } + } + bpf_map_delete_elem(&read_param_map, &id); + } + return 0; +} + +struct sys_enter_sendto_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + long buff; + long len; + long flags; + struct sockaddr *addr; + long addr_len; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_sendto") +// ssize_t sendto(int sockfd, const void *buf, size_t len, int flags, +// const struct sockaddr *dest_addr, socklen_t addrlen); +int tp_sys_enter_sendto(struct sys_enter_sendto_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + int sockfd = (int)ctx->fd; + + if (allow_record(id)) { + struct data_param_t write_param = {}; + write_param.syscall_func = FuncSendTo; + write_param.fd = sockfd; + // write_param.buf = (const char *)_(ctx->buff); + bpf_map_update_elem(&write_param_map, &id, &write_param, BPF_ANY); + } + + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_sendto") +int tp_sys_exit_sendto(struct trace_event_raw_sys_exit *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + if (allow_record(id)) { + struct data_param_t *write_param = bpf_map_lookup_elem(&write_param_map, &id); + if (write_param != NULL) { + + struct r_fd *orig_fd; + orig_fd = bpf_map_lookup_elem(&request_fd,&pid); + if (orig_fd && write_param->fd == orig_fd->fd){ + handle_write(id,write_param->fd,ctx); + } + } + bpf_map_delete_elem(&write_param_map, &id); + } + return 0; +} + +struct sys_enter_recvfrom_args { + struct trace_entry ent; + long __syscall_nr; + long fd; + long ubuf; + long size; + long flags; + long addr; + long addr_len; + char __data[0]; +}; + +SEC("tp/syscalls/sys_enter_recvfrom") +// ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags, +// struct sockaddr *src_addr, socklen_td *addrlen); +int tp_sys_enter_recvfrom(struct sys_enter_recvfrom_args *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + + if (allow_record(id)) { + struct data_param_t read_param = {}; + read_param.syscall_func = FuncRecvFrom; + read_param.fd = (int)ctx->fd; + read_param.buf = (const char *)ctx->ubuf; + bpf_map_update_elem(&read_param_map, &id, &read_param, BPF_ANY); + } + if (is_server_pid(id)){ + int sockfd = (int)ctx->fd; + handle_server_read(sockfd, ctx); + } + + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_recvfrom") +int tp_sys_exit_recvfrom(struct trace_event_raw_sys_exit *ctx) +{ + uint64_t id = bpf_get_current_pid_tgid(); + uint32_t pid = id; + struct data_param_t *read_param = bpf_map_lookup_elem(&read_param_map, &id); + if (read_param != NULL && read_param->real_conn && read_param->buf) { + if (allow_record(id)){ + int *sock_fd; + sock_fd = bpf_map_lookup_elem(&send_sockfd,&pid); + if (sock_fd && *sock_fd==read_param->fd){ + // bpf_map_delete_elem(&send_sockfd,&pid); + }else{ + handle_read(id,read_param->fd,ctx); + } + + } + + } + bpf_map_delete_elem(&read_param_map, &id); + return 0; + +} + +struct sched_switch_tp_args { + struct trace_entry ent; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long int prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; + char __data[0]; +}; + +SEC("tp/sched/sched_switch") +// int tp__sched_switch(struct sched_switch_tp_args *ctx) +int tp__sched_switch(struct sched_switch_tp_args *ctx) +{ + struct task_struct *prev = (void *)bpf_get_current_task(); + uint32_t prev_pid = _(prev->pid), prev_tgid = _(prev->tgid); + uint32_t next_pid; + + struct perf_oncpu p_o={}; + u64 *start_ts, *read_ts, *read_ts_next, *start_rq; + u64 now_ts = bpf_ktime_get_ns(); + struct val_t val,*valp; + struct val_t_on *valp_on; + s64 delta; + struct read_key r_k={}; + + if (allow_record_tgid(prev_tgid)) + { + + if (prev_pid == 0) + prev_pid = bpf_get_smp_processor_id(); + + // record oncpu time + start_ts = bpf_map_lookup_elem(&start_on, &prev_pid); + if (!start_ts) + goto next_pid_process; + + delta = (s64)(now_ts - *start_ts); + + read_ts = bpf_map_lookup_elem(&read_time_map, &prev_pid); + if (!read_ts) + { + bpf_map_delete_elem(&start_on, &prev_pid); + goto next_pid_process; + } + + r_k.read_ts = *read_ts; + delta = (u64)delta/1000U; + valp_on = bpf_map_lookup_elem(&info_on, &r_k); + if (!valp_on){ + bpf_map_delete_elem(&start_on, &prev_pid); + goto next_pid_process; + } + __sync_fetch_and_add(&valp_on->delta, delta); + + bpf_map_delete_elem(&start_on, &prev_pid); + + // record offcpu stacks + struct internal_key_off i_key={}; + i_key.start_ts = now_ts; + i_key.key.read_ts = *read_ts; + /* There have be a bug in linux-4.19 for bpf_get_stackid in raw_tracepoint */ + i_key.key.kern_stack_id = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + + if (!bpf_map_lookup_elem(&info_off, &i_key.key)) + { + __builtin_memset(&val, 0, sizeof(val)); + // bpf_probe_read_str(&val.comm, sizeof(prev->comm), prev->comm); + val.delta = 0; + bpf_map_update_elem(&info_off, &i_key.key, &val, BPF_NOEXIST); + struct stacks_q new_head={}, *old_head; + old_head = bpf_map_lookup_elem(&request_head, &r_k.read_ts); + if (!old_head) + goto next_pid_process; + new_head.kern_stack_id = i_key.key.kern_stack_id; + new_head.read_ts = *read_ts; + bpf_map_update_elem(&request_stacks, &new_head, old_head, BPF_NOEXIST); + bpf_map_update_elem(&request_head, read_ts, &new_head, 0); + } + bpf_map_update_elem(&start_off, &prev_pid, &i_key, 0); + bpf_map_update_elem(&start_runqueue,&prev_pid,&now_ts,0); + } + +next_pid_process: + next_pid = ctx->next_pid; + if (1) + { + // 只有进入了read的thread才需要记录 + read_ts_next = bpf_map_lookup_elem(&read_time_map, &next_pid); + if (!read_ts_next) + return 0; + + bpf_map_update_elem(&start_on, &next_pid, &now_ts, 0); + + start_rq = bpf_map_lookup_elem(&start_runqueue, &next_pid); + if (!start_rq) + return 0; + delta = (s64)(now_ts - *start_rq); + delta = (u64)delta/1000U; + r_k.read_ts = *read_ts_next; + valp_on = bpf_map_lookup_elem(&info_on, &r_k); + if (!valp_on) + goto cleanup; + + __sync_fetch_and_add(&valp_on->runqueue, delta); + + } +cleanup: + bpf_map_delete_elem(&start_runqueue, &next_pid); + return 0; +} + +struct sched_wakeup_tp_args { + struct trace_entry ent; + char comm[16]; + pid_t pid; + int prio; + int success; + int target_cpu; + char __data[0]; +}; + +static int wakeup(void *ctx, pid_t pid) +{ + u64 now_ts = bpf_ktime_get_ns(); + + // 记录off时间 + struct internal_key_off *i_keyp; + i_keyp = bpf_map_lookup_elem(&start_off, &pid); + if (!i_keyp){ + goto cleanup; + } + s64 delta; + struct val_t *valp; + + delta = (s64)(now_ts- i_keyp->start_ts); + if (delta < 0) + { + goto cleanup; + } + delta = (u64)delta/1000U; + valp = bpf_map_lookup_elem(&info_off, &i_keyp->key); + if (!valp) + { + goto cleanup; + } + __sync_fetch_and_add(&valp->delta, delta); + + // 记录进入runqueue的时间戳 + bpf_map_update_elem(&start_runqueue,&pid,&now_ts,0); + +cleanup: + bpf_map_delete_elem(&start_off, &pid); + + return 0; +} + + +SEC("tp/sched/sched_wakeup") +int tp__sched_wakeup(struct sched_wakeup_tp_args *ctx) +{ + pid_t pid = 0; + bpf_probe_read(&pid, sizeof(pid), &(ctx->pid)); + wakeup(ctx, pid); + return 0; +} diff --git a/source/tools/detect/sched/rtdelay/main.c b/source/tools/detect/sched/rtdelay/main.c new file mode 100644 index 0000000000000000000000000000000000000000..7de7f018a566a8f56cd607777243a2d8dd0dd962 --- /dev/null +++ b/source/tools/detect/sched/rtdelay/main.c @@ -0,0 +1,107 @@ +#include "rtdelay_api.h" +#include +#include +#include + +const char *argp_program_version = "rtdelay 0.1"; +const char argp_program_doc[] = + "Summarize off-CPU time(in us) by stack trace.\n" + "\n" + "USAGE: rtdelay [--help] [-p PID] [-d DURATION] [-s ServerPID] [-u]\n" + "EXAMPLES:\n" + " rtdelay # trace RT time until Ctrl-C\n" + " rtdelay -p 185 # only trace threads for PID 185\n" + " rtdelay -p 185 -s 827732 # only trace threads for PID 185 and server PID is 827732\n" + " rtdelay -u # show result in user-friendly interface \n" + " rtdelay -d 10 # trace for 10 seconds only\n"; + +static const struct argp_option opts[] = { + {"pid", 'p', "PID", 0, "Trace this PID only"}, + {"server", 's', "SeverPID", 0, "Trace Server PID"}, + {"duration", 'd', "DURATION", 0, "Total duration of trace in seconds"}, + {"user", 'u', NULL, 0, "Show result in user-friendly interface"}, + {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, + {}, +}; + +static struct env +{ + pid_t pid; + time_t duration; + pid_t server_pid; + int json_out; +} env = { + .pid = -1, + .duration = 99999999, + .server_pid = -1, + .json_out = 1, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + time_t duration; + + switch (key) + { + case ARGP_KEY_ARG: + argp_usage(state); + break; + case 'u': + env.json_out = 0; + break; + case 'h': + argp_state_help(state, stderr, ARGP_HELP_STD_HELP); + break; + case 'p': + errno = 0; + env.pid = strtol(arg, NULL, 10); + if (errno) + { + fprintf(stderr, "invalid PID: %s\n", arg); + argp_usage(state); + } + break; + case 's': + errno = 0; + env.server_pid = strtol(arg, NULL, 10); + if (errno) + { + fprintf(stderr, "invalid PID: %s\n", arg); + argp_usage(state); + } + break; + case 'd': + errno = 0; + duration = strtol(arg, NULL, 10); + if (errno || duration <= 0) + { + fprintf(stderr, "invalid DURATION: %s\n", arg); + argp_usage(state); + } + env.duration = duration; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +int main(int argc, char **argv) +{ + + int err; + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + + /* Parse command line arguments */ + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + err = rtdelay(env.pid, env.server_pid, env.duration, env.json_out); + + return err < 0 ? -err : 0; +} diff --git a/source/tools/detect/sched/rtdelay/rtdelay.c b/source/tools/detect/sched/rtdelay/rtdelay.c new file mode 100644 index 0000000000000000000000000000000000000000..d77794e5bc80f14bb9b75684cb3bfcedb106625f --- /dev/null +++ b/source/tools/detect/sched/rtdelay/rtdelay.c @@ -0,0 +1,433 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf/rtdelay.skel.h" +#include "trace_helpers.h" +#include +#include +#include + +#include "rtdelay.h" +#include "rtdelay_api.h" + +static volatile bool exiting = false; +#define TASK_COMM_LEN 16 +#define MAX_FILENAME_LEN 512 + +#define debug_print 0 + +static volatile int output_sysom = 1; + +static struct env +{ + pid_t pid; + bool verbose; + int perf_max_stack_depth; + int stack_storage_size; + time_t duration; +} env = { + .pid = -1, + .verbose = false, + .perf_max_stack_depth = 127, + .stack_storage_size = 1024, +}; + +struct StackReason +{ + char *stack; + enum OFFCPU_REASON reason; +}; + +struct FindList +{ + struct StackReason data; + struct FindList *next; +}; +static struct FindList FL; + +struct Request +{ + __u64 read_ts; + __u64 oncpu_t; + __u64 runqueue_t; + __u64 rtlatency_t; + __u64 offcpu_FUTEX_t; + __u64 offcpu_IO_t; + __u64 offcpu_NET_t; + __u64 offcpu_OTHER_t; + __u64 offcpu_SERVER_t; + __u64 offcpu_LOCK_t; + struct Request *next; +}; + +time_t get_boot_time() +{ + struct sysinfo info; + time_t cur_time = 0; + time_t boot_time = 0; + if (sysinfo(&info)) + { + printf("Failed to get sysinfo, reason"); + return 0; + } + time(&cur_time); + if (cur_time > info.uptime) + { + boot_time = cur_time - info.uptime; + } + return boot_time; +} + +void initData(struct FindList *head) +{ + char *stacks[14] = {"futex_wait", "net_write", "net_read", "mutex_lock", "sys_recvfrom", "ep_poll", "do_sys_poll", "do_sys_openat", "tcp_recvmsg", "vfs_", "ext4_", "block_", "blk_"}; + enum OFFCPU_REASON reasons[14] = {FUTEX_R, NET_R, NET_R, LOCK_R, NET_R, FUTEX_R, FUTEX_R, IO_R, NET_R, IO_R, IO_R, IO_R, IO_R}; + struct FindList *p = head; + int i; + for (i = 0; i < sizeof(stacks); i++) + { + if (!stacks[i]) + { + break; + } + struct FindList *temp = (struct FindList *)malloc(sizeof(struct FindList)); + (temp->data).stack = stacks[i]; + // strcpy((temp->data).stack,stacks[i]); + (temp->data).reason = reasons[i]; + p->next = temp; + p = p->next; + } + p->next = NULL; +} + +enum OFFCPU_REASON search_by_stack(struct FindList *head, const char *stack) +{ + enum OFFCPU_REASON o_r = UNKNOWN_R; + struct FindList *p = head; + p = p->next; + while (p != NULL) + { + if (sizeof((p->data).stack) <= 1) + { + p = p->next; + continue; + } + if (strcasestr(stack, (p->data).stack)) + { + o_r = (p->data).reason; + break; + } + else + { + p = p->next; + } + } + return o_r; +} + +static void sig_handler(int sig) +{ + exiting = true; +} + +static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static void bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + if (setrlimit(RLIMIT_MEMLOCK, &rlim_new)) + { + fprintf(stderr, "Failed to increase RLIMIT_MEMLOCK limit!\n"); + exit(1); + } +} + +enum OFFCPU_REASON find_offcpu_reason(int stack_id, struct rtdelay_bpf *obj) +{ + int rmap, err; + enum OFFCPU_REASON off_reason; + rmap = bpf_map__fd(obj->maps.reason_map); + err = bpf_map_lookup_elem(rmap, &stack_id, &off_reason); + if (err < 0 || err == ENOENT) + { + return UNKNOWN_R; + } + return off_reason; +} + +enum OFFCPU_REASON add_offcpu_reason(struct ksyms *ksyms, struct rtdelay_bpf *obj, int stack_id) +{ + __u64 *ip_k; + const struct ksym *ksym; + int sfd, rmp; + sfd = bpf_map__fd(obj->maps.stackmap); + rmp = bpf_map__fd(obj->maps.reason_map); + enum OFFCPU_REASON offcpu_reason; + + ip_k = calloc(env.perf_max_stack_depth, sizeof(*ip_k)); + if (!ip_k) + { + fprintf(stderr, "failed to alloc ip_k\n"); + goto cleanup; + } + + if (bpf_map_lookup_elem(sfd, &stack_id, ip_k) != 0) + { +#if debug_print + printf("[Missed Kernel Stack];"); +#endif + goto cleanup; + } + int i = 0; + + for (i = 0; i < env.perf_max_stack_depth && ip_k[i]; i++) + { + ksym = ksyms__map_addr(ksyms, ip_k[i]); + + if (!ksym) + { + continue; + } + // print stacks +#if debug_print + printf("%s;", ksym ? ksym->name : "Unknown"); +#endif + offcpu_reason = search_by_stack(&FL, ksym->name); + if (offcpu_reason != UNKNOWN_R) + { + bpf_map_update_elem(rmp, &stack_id, &offcpu_reason, BPF_NOEXIST); + return offcpu_reason; + } + } +cleanup: + free(ip_k); + offcpu_reason = OTHER_R; + bpf_map_update_elem(rmp, &stack_id, &offcpu_reason, BPF_NOEXIST); + return OTHER_R; +} + +static void add_offcpu_delta(struct Request *request, enum OFFCPU_REASON reason, int delta) +{ + switch (reason) + { + case FUTEX_R: + __sync_fetch_and_add(&request->offcpu_FUTEX_t, delta); + break; + case IO_R: + __sync_fetch_and_add(&request->offcpu_IO_t, delta); + break; + case NET_R: + __sync_fetch_and_add(&request->offcpu_NET_t, delta); + break; + case LOCK_R: + __sync_fetch_and_add(&request->offcpu_LOCK_t, delta); + break; + case SERVER_R: + __sync_fetch_and_add(&request->offcpu_SERVER_t, delta); + break; + case UNKNOWN_R: + case OTHER_R: + __sync_fetch_and_add(&request->offcpu_OTHER_t, delta); + break; + } +} + +static void offcpu_analyse(struct ksyms *ksyms, + struct rtdelay_bpf *obj, const struct read_key *key, struct Request *request) +{ + int err, ifd_on, rfd, rsfd, ifd; + struct val_t_on val_on; + struct stacks_q head, stack_next; + struct key_t k_t = {}; + struct val_t val_off; + time_t boot_time = get_boot_time(); + + ifd = bpf_map__fd(obj->maps.info_off); + ifd_on = bpf_map__fd(obj->maps.info_on); + rfd = bpf_map__fd(obj->maps.request_head); + rsfd = bpf_map__fd(obj->maps.request_stacks); + + // oncpu time + err = bpf_map_lookup_elem(ifd_on, key, &val_on); + if (err < 0) + { + fprintf(stderr, "failed to lookup info: %d\n", err); + return; + } +#if debug_print + printf("%ld;%ld\n", val_on.delta, key->read_ts); +#endif + request->read_ts = key->read_ts / 1e9 + boot_time; + request->oncpu_t = val_on.delta; + request->runqueue_t = val_on.runqueue; + request->rtlatency_t = val_on.rtlatency; + request->offcpu_SERVER_t = val_on.server_delta; + k_t.read_ts = key->read_ts; + + // find reason for offcpu + err = bpf_map_lookup_elem(rfd, &key->read_ts, &head); + if (err < 0) + { + fprintf(stderr, "failed to lookup info: %d\n", err); + return; + } + while (head.kern_stack_id != 0) + { + k_t.kern_stack_id = head.kern_stack_id; + // find existing reason + enum OFFCPU_REASON off_reason; + off_reason = find_offcpu_reason(head.kern_stack_id, obj); + if (off_reason == UNKNOWN_R) + { + // add reason + off_reason = add_offcpu_reason(ksyms, obj, head.kern_stack_id); + } + + // add delta + err = bpf_map_lookup_elem(ifd, &k_t, &val_off); + if (err < 0) + { + fprintf(stderr, "failed to lookup info: %d\n", err); + return; + } + add_offcpu_delta(request, off_reason, val_off.delta); + // next stacks + err = bpf_map_lookup_elem(rsfd, &head, &stack_next); + if (err == ENOENT) + { + return; + } + head.kern_stack_id = stack_next.kern_stack_id; + } + if (request->offcpu_NET_t > request->offcpu_SERVER_t) + { + request->offcpu_NET_t -= request->offcpu_SERVER_t; + } + else + { + request->offcpu_SERVER_t = request->offcpu_NET_t; + request->offcpu_NET_t = 0; + } +} + +static void analyse(struct ksyms *ksyms, + struct rtdelay_bpf *obj) +{ + struct read_key lookup_key = {}, next_key; + int err, ifd_on; + struct tm *beijing_time; + char buffer[80]; + + ifd_on = bpf_map__fd(obj->maps.info_on); + if (output_sysom) + printf("["); + else + printf("RT时延分析如下:\n"); + + while (!bpf_map_get_next_key(ifd_on, &lookup_key, &next_key)) + { + struct Request r = {}; + struct val_t_on val_on; + err = bpf_map_lookup_elem(ifd_on, &next_key, &val_on); + if (val_on.flag == 0) + { + lookup_key = next_key; + continue; + } + offcpu_analyse(ksyms, obj, &next_key, &r); + lookup_key = next_key; + + if (!output_sysom) + { + beijing_time = localtime(&r.read_ts); + strftime(buffer, 80, "%Y-%m-%d %H:%M:%S", beijing_time); + printf("%s %8s", buffer, " "); + printf("RT时延:%lld\n cpu时间:%lld, 运行队列:%lld, futex:%lld, mutex:%lld, 存储:%lld, 网络:%lld, server端:%lld, 其他:%lld\n", r.rtlatency_t, r.oncpu_t, r.runqueue_t, r.offcpu_FUTEX_t, r.offcpu_LOCK_t, r.offcpu_IO_t, r.offcpu_NET_t, r.offcpu_SERVER_t, r.offcpu_OTHER_t); + } + else + printf("{\"read_ts\":%lld, \"on\":%lld, \"runqueue\":%lld, \"rt_latency\":%lld, \"futex\":%lld, \"lock\":%lld, \"io\":%lld, \"net\":%lld, \"server\":%lld, \"other\":%lld},\n", r.read_ts, r.oncpu_t, r.runqueue_t, r.rtlatency_t, r.offcpu_FUTEX_t, r.offcpu_LOCK_t, r.offcpu_IO_t, r.offcpu_NET_t, r.offcpu_SERVER_t, r.offcpu_OTHER_t); + } + + if (output_sysom) + printf("{}]"); +} + +int rtdelay(pid_t pid, pid_t server_pid, int duration, int json_out) +{ + initData(&FL); + + struct rtdelay_bpf *skel; + int err, argfd, key = 0; + struct ksyms *ksyms = NULL; + // struct syms_cache *syms_cache = NULL; + struct bpfarg bpfarg; + + /* Set up libbpf errors and debug info callback */ + libbpf_set_print(libbpf_print_fn); + bump_memlock_rlimit(); + /* Cleaner handling of Ctrl-C */ + signal(SIGINT, sig_handler); + signal(SIGTERM, sig_handler); + + /* Load and verify BPF application */ + skel = rtdelay_bpf__open(); + if (!skel) + { + fprintf(stderr, "Failed to open and load BPF skeleton\n"); + return 1; + } + + /* Load & verify BPF programs */ + err = rtdelay_bpf__load(skel); + if (err) + { + fprintf(stderr, "Failed to load and verify BPF skeleton\n"); + goto cleanup; + } + + argfd = bpf_map__fd(skel->maps.argmap); + bpfarg.targ_tgid = pid; + bpfarg.server_pid = server_pid; + output_sysom = json_out; + bpf_map_update_elem(argfd, &key, &bpfarg, 0); + + ksyms = ksyms__load(); + if (!ksyms) + { + fprintf(stderr, "failed to load kallsyms\n"); + goto cleanup; + } + + /* Attach tracepoints */ + err = rtdelay_bpf__attach(skel); + if (err) + { + fprintf(stderr, "Failed to attach BPF skeleton\n"); + goto cleanup; + } + + sleep(duration); + analyse(ksyms, skel); + +cleanup: + /* Clean up */ + + rtdelay_bpf__destroy(skel); + ksyms__free(ksyms); + + return err < 0 ? -err : 0; +} diff --git a/source/tools/detect/sched/rtdelay/rtdelay.h b/source/tools/detect/sched/rtdelay/rtdelay.h new file mode 100644 index 0000000000000000000000000000000000000000..2726a22d57372d567bb0a20a3a1bf223620b9d20 --- /dev/null +++ b/source/tools/detect/sched/rtdelay/rtdelay.h @@ -0,0 +1,120 @@ +#ifndef _RTDELAY_H +#define _RTDELAY_H + +#define TASK_COMM_LEN 16 + +enum support_syscall_e { + FuncUnknown, + FuncWrite, + FuncRead, + FuncSend, + FuncRecv, + FuncSendTo, + FuncRecvFrom, + FuncSendMsg, + FuncRecvMsg, + FuncMmap, + FuncSockAlloc, + FuncAccept, + FuncAccept4, + FuncSecuritySendMsg, + FuncSecurityRecvMsg, +}; + +struct conn_param_t { + const struct sockaddr *addr; + int32_t fd; +}; + + +struct data_param_t { + enum support_syscall_e syscall_func; + bool real_conn; + int32_t fd; + const char *buf; + const struct iovec *iov; + size_t iovlen; + unsigned int *msg_len; +}; + +struct bpfarg { + // bool kernel_threads_only; + // bool user_threads_only; + // __u64 max_block_ns; + // __u64 min_block_ns; + pid_t targ_tgid; + pid_t server_pid; + // pid_t targ_pid; + // long state; +}; + +struct config_info_t { + int32_t port; + int32_t self_pid; + int32_t data_sample; + int32_t threhold_ms; +}; + +struct perf_test { + enum support_syscall_e syscall_func; + int32_t fd; + uint64_t start_ts; + uint64_t id; +}; + +struct key_on { + uint32_t pid; +}; + +struct val_t_on { + uint64_t delta; + int flag; + uint64_t runqueue; + uint64_t rtlatency; + uint64_t server_delta; +}; + + +struct val_t { + uint64_t delta; + // char comm[TASK_COMM_LEN]; +}; + +struct perf_oncpu { + uint64_t delta; + uint64_t read_ts; + uint64_t now_ts; + // int flag; +}; + +struct key_t { + // uint32_t pid; + // uint32_t tgid; + // int user_stack_id; + uint64_t read_ts; + int kern_stack_id; +}; + + +struct read_key { + uint64_t read_ts; +}; + + +struct stacks_q{ + int kern_stack_id; + uint64_t read_ts; +}; + + +enum OFFCPU_REASON{ + FUTEX_R, + LOCK_R, + IO_R, + NET_R, + SERVER_R, + OTHER_R, + UNKNOWN_R, +}; + +#endif diff --git a/source/tools/detect/sched/rtdelay/rtdelay_api.h b/source/tools/detect/sched/rtdelay/rtdelay_api.h new file mode 100644 index 0000000000000000000000000000000000000000..1062ff32cde8533558bc9a29407264affa2732bf --- /dev/null +++ b/source/tools/detect/sched/rtdelay/rtdelay_api.h @@ -0,0 +1 @@ +int rtdelay(int pid, int server_pid, int duration, int json_out); diff --git a/source/tools/detect/sched/rtdelay/trace_helpers.c b/source/tools/detect/sched/rtdelay/trace_helpers.c new file mode 100644 index 0000000000000000000000000000000000000000..70e5dae30f327485d522477582971234e8015919 --- /dev/null +++ b/source/tools/detect/sched/rtdelay/trace_helpers.c @@ -0,0 +1,172 @@ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace_helpers.h" + +struct ksyms { + struct ksym *syms; + int ksyms_p; + int ksym_capacity; + char *strs; + int name_sz; + int name_capacity; +}; + +static int ksyms__add_symbol(struct ksyms *ksyms, const char *name, unsigned long addr) +{ + size_t new_cap, name_len = strlen(name) + 1; + struct ksym *ksym; + void *tmp; + + if (ksyms->name_sz + name_len > ksyms->name_capacity) { + new_cap = ksyms->name_capacity * 4 / 3; + if (new_cap < ksyms->name_sz + name_len) + new_cap = ksyms->name_sz + name_len; + if (new_cap < 1024) + new_cap = 1024; + tmp = realloc(ksyms->strs, new_cap); + if (!tmp) + return -1; + ksyms->strs = tmp; + ksyms->name_capacity = new_cap; + } + if (ksyms->ksyms_p + 1 > ksyms->ksym_capacity) { + new_cap = ksyms->ksym_capacity * 4 / 3; + if (new_cap < 1024) + new_cap = 1024; + tmp = realloc(ksyms->syms, sizeof(*ksyms->syms) * new_cap); + if (!tmp) + return -1; + ksyms->syms = tmp; + ksyms->ksym_capacity = new_cap; + } + + ksym = &ksyms->syms[ksyms->ksyms_p]; + /* while constructing, re-use pointer as just a plain offset */ + ksym->name = (void *)(unsigned long)ksyms->name_sz; + ksym->addr = addr; + + memcpy(ksyms->strs + ksyms->name_sz, name, name_len); + ksyms->name_sz += name_len; + ksyms->ksyms_p++; + + return 0; +} + +static int ksym_cmp(const void *p1, const void *p2) +{ + const struct ksym *s1 = p1, *s2 = p2; + + if (s1->addr == s2->addr) + return strcmp(s1->name, s2->name); + return s1->addr < s2->addr ? -1 : 1; +} + +struct ksyms *ksyms__load(void) +{ + char sym_type, sym_name[256]; + struct ksyms *ksyms; + unsigned long sym_addr; + int i, ret; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) + return NULL; + + ksyms = calloc(1, sizeof(*ksyms)); + if (!ksyms) + goto err_out; + + while (true) { + ret = fscanf(f, "%lx %c %s%*[^\n]\n", + &sym_addr, &sym_type, sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 3) + goto err_out; + if (ksyms__add_symbol(ksyms, sym_name, sym_addr)) + goto err_out; + } + + /* now when strings are finalized, adjust pointers properly */ + for (i = 0; i < ksyms->ksyms_p; i++) + ksyms->syms[i].name += (unsigned long)ksyms->strs; + + qsort(ksyms->syms, ksyms->ksyms_p, sizeof(*ksyms->syms), ksym_cmp); + + fclose(f); + return ksyms; + +err_out: + ksyms__free(ksyms); + fclose(f); + return NULL; +} + +void ksyms__free(struct ksyms *ksyms) +{ + if (!ksyms) + return; + + free(ksyms->syms); + free(ksyms->strs); + free(ksyms); +} + +const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, + unsigned long addr) +{ + int start = 0, end = ksyms->ksyms_p - 1, mid; + unsigned long sym_addr; + + /* find largest sym_addr <= addr using binary search */ + while (start < end) { + mid = start + (end - start + 1) / 2; + sym_addr = ksyms->syms[mid].addr; + + if (sym_addr <= addr) + start = mid; + else + end = mid - 1; + } + + if (start == end && ksyms->syms[start].addr <= addr) + return &ksyms->syms[start]; + return NULL; +} + +const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, + const char *name) +{ + int i; + + for (i = 0; i < ksyms->ksyms_p; i++) { + if (strcmp(ksyms->syms[i].name, name) == 0) + return &ksyms->syms[i]; + } + + return NULL; +} + +unsigned long long get_ktime_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} diff --git a/source/tools/detect/sched/rtdelay/trace_helpers.h b/source/tools/detect/sched/rtdelay/trace_helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..268fc371a660b6a0bc009643dce6e66659b5ff38 --- /dev/null +++ b/source/tools/detect/sched/rtdelay/trace_helpers.h @@ -0,0 +1,24 @@ +#ifndef __TRACE_HELPERS_H +#define __TRACE_HELPERS_H + +#include + +#define NSEC_PER_SEC 1000000000ULL + +struct ksym { + const char *name; + unsigned long addr; +}; + +struct ksyms; + +struct ksyms *ksyms__load(void); +void ksyms__free(struct ksyms *ksyms); +const struct ksym *ksyms__map_addr(const struct ksyms *ksyms, + unsigned long addr); +const struct ksym *ksyms__get_symbol(const struct ksyms *ksyms, + const char *name); + +unsigned long long get_ktime_ns(void); + +#endif /* __TRACE_HELPERS_H */ diff --git a/source/tools/detect/sched/tasktop/README.md b/source/tools/detect/sched/tasktop/README.md index 558e8a26d36fb0e76d198897b57e251a913c8341..bbca34fa69bf33a58b2bb6944682bd5235fe1d0d 100644 --- a/source/tools/detect/sched/tasktop/README.md +++ b/source/tools/detect/sched/tasktop/README.md @@ -1,17 +1,21 @@ -# Tasktop - +# 功能说明 针对于负载问题的分析工具,相比loadtask对于细分场景进行了覆盖。 -## Usgae +# 使用说明 + +## 基础用法 ```bash Usage: tasktop [OPTION...] -A light top, display the process/thread cpu utilization in peroid. +Load analyze & D stack catch. -USAGE: tasktop [--help] [-t] [-p TID] [-d DELAY] [-i ITERATION] [-s SORT] [-f -LOGFILE] [-l LIMIT] [-H] [-e D-LIMIT] +USAGE: +load analyze: tasktop [--help] [-t] [-p TID] [-d DELAY] [-i ITERATION] [-s +SORT] [-f LOGFILE] [-l LIMIT] [-H] [-e D-LIMIT] +catch D task stack: tasktop [--mode blocked] [--threshold TIME] [--run TIME] EXAMPLES: +1. Load analyze examples: tasktop # run forever, display the cpu utilization. tasktop -t # display all thread. tasktop -p 1100 # only display task with pid 1100. @@ -20,26 +24,102 @@ EXAMPLES: tasktop -s user # top tasks sorted by user time. tasktop -l 20 # limit the records number no more than 20. tasktop -e 10 # limit the d-stack no more than 10, default is 20. - tasktop -H # output time string, not timestamp. + tasktop -H # output time string, not timestamp. tasktop -f a.log # log to a.log. tasktop -e 10 # most record 10 d-task stack. +2. blocked analyze examples: + tasktop --mode blocked --threshold 1000 --run 120 # tasktop run (120s) +catch the task that blocked in D more than (1000 ms) + + + -b, --threshold=TIME(ms) dtask blocked threshold, default is 3000 ms -d, --delay=DELAY Sample peroid, default is 3 seconds -e, --d-limit=D-LIMIT Specify the D-LIMIT D tasks's stack to display -f, --logfile=LOGFILE Logfile for result, default /var/log/sysak/tasktop/tasktop.log -H, --human Output human-readable time info. -i, --iter=ITERATION Output times, default run forever + -k, --kthread blocked-analyze output kernel-thread D stack + information -l, --r-limit=LIMIT Specify the top R-LIMIT tasks to display + -m, --mode=MODE MODE is load or blocked, default is load -p, --pid=TID Specify thread TID + -r, --run=TIME(s) run time in secnonds -s, --sort=SORT Sort the result, available options are user, sys and cpu, default is cpu -t, --thread Thread mode, default process + -v, --verbose ebpf program output verbose message -?, --help Give this help list --usage Give a short usage message -V, --version Print program version + +Mandatory or optional arguments to long options are also mandatory or optional +for any corresponding short options. ``` -## 使用说明 +## 典型使用场景 + +### 负载问题分析 详细的使用案例,请查看`tasktopSelftest`目录下的`test.md`。 + +### 抓取进程D状态超时栈 + +如果是负载高由D进程导致,可以抓取超时的D进程栈实现进一步地定位和分析。 + +#### 具体使用方法 +```bash +sysak tasktop --mode blocked --threshold 1000 -f /dev/stdout --kthread --run 60 + +--mode: 抓取D进程超时栈 +--threshold: 设定超时门限,单位毫秒。默认为3000ms。 +-f: 设定日志输出位置 +--kthread: 抓取内核线程超时栈,默认只抓取用户进程。 +--run: 运行持续时间,单位秒,默认永久运行。 +``` + +#### 输出数据格式 + 输出数据包含两类事件,Timeout为超时事件,当task的D状态持续时间达到超时门限时会输出该事件以及对应的内核栈;Stop-D为task的D状态结束事件,此时会输出D进程具体的持续时长。`Start(ns)`为该次D状态发生时的时间戳,该值为系统自启动以来的运行总时长。可以通过`Start(ns)`和`pid`属性确认该次超时发生在哪个进程的哪一次D状态睡眠。同时通过`Start(ns)`可以分析不同进程进入D状态的事件顺序,以及分析同一进程多次进入D进程的事件顺序。 + +* Time: 日志时间戳 +* Event: 事件类型,分为超时事件(Timeout)以及D状态结束事件(Stop-D)。 +* Comm: 进程命令 +* Pid: 进程ID +* Start(ns): 进入D状态时的时间戳,为系统启动以来运行总时长。 +* Stack|Delya(ms): 事件不同时输出不同的信息,Timeout事件输出超时的内核栈,Stop-D事件输出D状态持续时长,单位为ms。 + +```bash + Time Event Comm Pid Start(ns) Stack|Delya(ms) + 2023-10-31 07:08:03 Timeout cat 45722 13817098190487947 [<0>] open_proc+0x53/0x7e [mutex_block] + [<0>] proc_reg_open+0x72/0x130 + [<0>] do_dentry_open+0x23a/0x3a0 + [<0>] path_openat+0x768/0x13e0 + [<0>] do_filp_open+0x99/0x110 + [<0>] do_sys_open+0x12e/0x210 + [<0>] do_syscall_64+0x55/0x1a0 + [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<0>] 0xffffffffffffffff + 2023-10-31 07:08:06 Stop-D cat 45722 13817098190487947 3052 + 2023-10-31 07:08:08 Timeout cat 45885 13817103246221434 [<0>] open_proc+0x53/0x7e [mutex_block] + [<0>] proc_reg_open+0x72/0x130 + [<0>] do_dentry_open+0x23a/0x3a0 + [<0>] path_openat+0x768/0x13e0 + [<0>] do_filp_open+0x99/0x110 + [<0>] do_sys_open+0x12e/0x210 + [<0>] do_syscall_64+0x55/0x1a0 + [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<0>] 0xffffffffffffffff + 2023-10-31 07:08:11 Stop-D cat 45885 13817103246221434 3052 + 2023-10-31 07:08:13 Timeout cat 45996 13817108302333045 [<0>] open_proc+0x53/0x7e [mutex_block] + [<0>] proc_reg_open+0x72/0x130 + [<0>] do_dentry_open+0x23a/0x3a0 + [<0>] path_openat+0x768/0x13e0 + [<0>] do_filp_open+0x99/0x110 + [<0>] do_sys_open+0x12e/0x210 + [<0>] do_syscall_64+0x55/0x1a0 + [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<0>] 0xffffffffffffffff + 2023-10-31 07:08:16 Stop-D cat 45996 13817108302333045 3052 +``` + diff --git a/source/tools/detect/sched/tasktop/bpf/tasktop.bpf.c b/source/tools/detect/sched/tasktop/bpf/tasktop.bpf.c index 60f86d456d21ea923e85013d0200e4c658a72b6b..f659c27427cebd9a27c704eda402d146ba2bd72b 100644 --- a/source/tools/detect/sched/tasktop/bpf/tasktop.bpf.c +++ b/source/tools/detect/sched/tasktop/bpf/tasktop.bpf.c @@ -1,12 +1,13 @@ #include +#include #include #include -#include #include #include "../common.h" -#define MAX_PID +#define TASK_UNINTERRUPTIBLE 2 +#define PF_KTHREAD 0x00200000 #define _(P) \ ({ \ @@ -15,6 +16,13 @@ val; \ }) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, struct arg_to_bpf); +} arg_map SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, 4096); @@ -29,6 +37,26 @@ struct { __type(value, u64); } cnt_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, struct d_task_key_t); + __type(value, struct d_task_info_t); +} d_task_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start_query_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(u32)); +} d_task_notify_map SEC(".maps"); + struct trace_event_sched_process_fork_args { struct trace_entry ent; char parent_comm[16]; @@ -37,6 +65,34 @@ struct trace_event_sched_process_fork_args { pid_t child_pid; }; +struct trace_event_sched_switch_args { + struct trace_entry ent; + char prev_comm[16]; + pid_t prev_pid; + int prev_prio; + long prev_state; + char next_comm[16]; + pid_t next_pid; + int next_prio; +}; + +struct trace_event_sched_stat_blocked_args { + struct trace_entry ent; + char comm[16]; + pid_t pid; + u64 delay; +}; + +/* for eBPF-CORE process different kernel version */ +struct task_struct___old { + long state; +} __attribute__((preserve_access_index)); + +struct task_struct_mock { + struct thread_info info; + long state; +}; + static __always_inline void update_cnt_map() { u32 zero = 0; u64 *value = 0; @@ -70,11 +126,186 @@ static __always_inline void update_fork_map() { } } +static __always_inline int fork_enable() { + u32 key = 0; + struct arg_to_bpf *args = bpf_map_lookup_elem(&arg_map, &key); + if (!args) { + return 0; + } + return args->fork_enable; +} + SEC("tp/sched/sched_process_fork") -int handle__sched_process_fork(struct trace_event_sched_process_fork_args *ctx) { +int handle__sched_process_fork( + struct trace_event_sched_process_fork_args *ctx) { + if (fork_enable() == 0) { + return 0; + } + update_cnt_map(); update_fork_map(); return 0; } -char LICENSE[] SEC("license") = "GPL"; +static __always_inline void insert_one_task(u32 pid, char *comm, int verbose) { + struct d_task_key_t key; + struct d_task_info_t val; + __builtin_memset(&key, 0, sizeof(struct d_task_key_t)); + __builtin_memset(&val, 0, sizeof(struct d_task_info_t)); + int err = 0; + u64 now = 0; + + now = bpf_ktime_get_ns(); + if (!comm) { + return; + } + + err = bpf_map_update_elem(&start_query_map, &pid, &now, BPF_NOEXIST); + if (err) { + char fmt[] = + "error[insert_one_task]: update start_query_map error. err=(%d) " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + } + + key.pid = pid; + key.start_time_ns = now; + val.is_recorded = 0; + bpf_probe_read(val.comm, 16, comm); + + err = bpf_map_update_elem(&d_task_map, &key, &val, BPF_NOEXIST); + if (err) { + char fmt[] = + "error[insert_one_task]: update d_task_map error. err=(%d) " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + } + + if (verbose) { + char fmt[] = + "debug[insert_one_task]: success record start time " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), pid); + } +} + +static void __always_inline delete_one_task(u32 pid, u64 threshold, void *ctx, + int verbose) { + struct d_task_key_t key; + struct d_task_blocked_event_t ev; + struct d_task_info_t *val = 0; + u64 *start_ns = 0; + u64 now = bpf_ktime_get_ns(); + u64 duration_ns = 0; + int err = 0; + + __builtin_memset(&key, 0, sizeof(struct d_task_key_t)); + __builtin_memset(&ev, 0, sizeof(struct d_task_blocked_event_t)); + + key.pid = pid; + start_ns = (u64 *)bpf_map_lookup_elem(&start_query_map, &pid); + if (!start_ns) { + return; + } + key.start_time_ns = *start_ns; + + val = bpf_map_lookup_elem(&d_task_map, &key); + if (!val) { + char fmt[] = + "error[delete_one_task]: query bpf_map_lookup_elem error. " + "err=(%d) " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + return; + } + + duration_ns = now - key.start_time_ns; + if (duration_ns >= threshold || val->is_recorded != 0) { + ev.duration_ns = duration_ns; + ev.pid = pid; + ev.start_time_ns = key.start_time_ns; + bpf_probe_read(ev.info.comm, 16, val->comm); + + err = bpf_perf_event_output(ctx, &d_task_notify_map, BPF_F_CURRENT_CPU, + &ev, sizeof(ev)); + if (err) { + char fmt[] = + "error[send_event]: bpf_perf_event_output error. err=(%d) " + "pid=(%lu)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + return; + } + } + + err = bpf_map_delete_elem(&start_query_map, &pid); + if (err) { + char fmt[] = + "error[delete_one_task]: delete start_query_map error. err=(%d) " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + return; + } + + err = bpf_map_delete_elem(&d_task_map, &key); + if (err) { + char fmt[] = + "error[delete_one_task]: delete d_task_map error. err=(%d) " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), err, pid); + return; + } + + if (verbose) { + char fmt[] = + "debug[delete_one_task]: success delete record " + "pid=(%d)\n"; + bpf_trace_printk(fmt, sizeof(fmt), pid); + } +} + +SEC("tp/sched/sched_switch") +int handle__sched_switch(struct trace_event_sched_switch_args *ctx) { + u32 key = 0, flags = 0; + long state = 0; + struct task_struct *tsk = 0; + + struct arg_to_bpf *args = bpf_map_lookup_elem(&arg_map, &key); + if (!args || args->blocked_enable == 0) { + return 0; + } + + tsk = (void *)bpf_get_current_task(); + if (bpf_core_field_exists(tsk->__state)) { + state = BPF_CORE_READ(tsk, __state); + } else { + struct task_struct___old *t_old = (void *)tsk; + state = BPF_CORE_READ(t_old, state); + } + // state = _(tsk_mock->state); + + flags = BPF_CORE_READ(tsk, flags); + /* if not enable kernel-thread, only record user-thread*/ + if (state & TASK_UNINTERRUPTIBLE && + (args->kthread_enable != 0 || ~flags & PF_KTHREAD)) { + insert_one_task(ctx->prev_pid, ctx->prev_comm, args->verbose_enable); + } + + delete_one_task(ctx->next_pid, args->threshold_ns, ctx, + args->verbose_enable); + return 0; +} + +SEC("tp/sched/sched_stat_blocked") +int handle__sched_stat_blocked( + struct trace_event_sched_stat_blocked_args *ctx) { + u32 key = 0; + struct arg_to_bpf *args = bpf_map_lookup_elem(&arg_map, &key); + if (!args || args->blocked_enable == 0) { + return 0; + } + + delete_one_task(ctx->pid, args->threshold_ns, ctx, args->verbose_enable); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/detect/sched/tasktop/common.h b/source/tools/detect/sched/tasktop/common.h index 78f250a4cdc00e4fdf2e5d16afda9819a56868fe..b4fa3646db3350030ffd79ea8726f2efe06ae521 100644 --- a/source/tools/detect/sched/tasktop/common.h +++ b/source/tools/detect/sched/tasktop/common.h @@ -1,10 +1,41 @@ #ifndef TASKTOP_COMMON_H #define TASKTOP_COMMON_H +typedef u_int64_t u64; +typedef u_int32_t u32; +typedef int32_t s32; +typedef int64_t s64; + struct proc_fork_info_t { - pid_t pid; - pid_t ppid; - u_int64_t fork; - char comm[16]; + u32 pid; + u32 ppid; + u64 fork; + char comm[32]; +}; + +struct d_task_info_t { + char comm[32]; + s32 is_recorded; +}; + +struct d_task_blocked_event_t { + struct d_task_info_t info; + u32 pid; + u64 start_time_ns; + u64 duration_ns; }; + +struct arg_to_bpf { + u64 threshold_ns; + s32 fork_enable; + s32 blocked_enable; + s32 verbose_enable; + s32 kthread_enable; +}; + +struct d_task_key_t { + u32 pid; + u64 start_time_ns; +}; + #endif \ No newline at end of file diff --git a/source/tools/detect/sched/tasktop/tasktop.c b/source/tools/detect/sched/tasktop/tasktop.c index 5835394ac13776fadc370a68f573e521c2942ef0..cadd9f5de03771f398a84be56a549fc1953665fa 100644 --- a/source/tools/detect/sched/tasktop/tasktop.c +++ b/source/tools/detect/sched/tasktop/tasktop.c @@ -1,50 +1,58 @@ + +#include "tasktop.h" + #include -#include -#include -#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include "bpf/tasktop.skel.h" -#include "procstate.h" -#include "tasktop.h" #include "common.h" +#include "procstate.h" -// #define DEBUG -// #define LOG_DEBUG -// #define ONLY_THREAD -// #define STRESS_TEST +#define NSEC_PER_SECOND (1000 * 1000 * 1000) +#define NSEC_PER_MILLSECOND (1000 * 1000) char log_dir[FILE_PATH_LEN] = "/var/log/sysak/tasktop"; char default_log_path[FILE_PATH_LEN] = "/var/log/sysak/tasktop/tasktop.log"; time_t btime = 0; -u_int64_t pidmax = 0; +u64 pidmax = 0; char* log_path = 0; int nr_cpu; -u_int64_t* prev_delay; static volatile sig_atomic_t exiting; +enum Mode { LOAD, BLOCKED }; + struct env { - bool thread_mode; time_t delay; pid_t tid; - int64_t nr_iter; + s64 nr_iter; + s64 stack_limit; + s64 cgroup_limit; + s64 limit; + int run; enum sort_type rec_sort; - int64_t limit; + enum Mode mode; + u64 blocked_ms; + FILE* dest; + bool thread_mode; bool human; - int64_t stack_limit; - int64_t cgroup_limit; + bool verbose; + bool kthread; } env = {.thread_mode = false, .delay = 3, .tid = -1, @@ -53,18 +61,29 @@ struct env { .nr_iter = LONG_MAX - 1, .limit = INT_MAX, .stack_limit = 20, - .cgroup_limit = 20}; + .cgroup_limit = 20, + .mode = LOAD, + .blocked_ms = 3000, + .verbose = false, + .dest = 0, + .kthread = false, + .run = -1}; const char* argp_program_version = "tasktop 0.1"; const char argp_program_doc[] = - "A light top, display the process/thread cpu utilization in peroid.\n" + "Load analyze & D stack catch.\n" "\n" - "USAGE: tasktop [--help] [-t] [-p TID] [-d DELAY] [-i ITERATION] [-s SORT] " + "USAGE: \n" + "load analyze: tasktop [--help] [-t] [-p TID] [-d DELAY] [-i ITERATION] " + "[-s SORT] " "[-f LOGFILE] [-l LIMIT] [-H] [-e D-LIMIT]\n" + "catch D task stack: tasktop [--mode blocked] [--threshold TIME] [--run " + "TIME]\n" "\n" "EXAMPLES:\n" + "1. Load analyze examples:\n" " tasktop # run forever, display the cpu utilization.\n" " tasktop -t # display all thread.\n" " tasktop -p 1100 # only display task with pid 1100.\n" @@ -76,7 +95,12 @@ const char argp_program_doc[] = "20.\n" " tasktop -H # output time string, not timestamp.\n" " tasktop -f a.log # log to a.log.\n" - " tasktop -e 10 # most record 10 d-task stack.\n"; + " tasktop -e 10 # most record 10 d-task stack.\n" + "\n" + "2. blocked analyze examples:\n" + " tasktop --mode blocked --threshold 1000 --run 120 # tasktop run " + "(120s) catch the task that blocked in D more than (1000 ms)\n" + "\n"; static const struct argp_option opts[] = { {"human", 'H', 0, 0, "Output human-readable time info."}, @@ -92,19 +116,19 @@ static const struct argp_option opts[] = { {"r-limit", 'l', "LIMIT", 0, "Specify the top R-LIMIT tasks to display"}, {"d-limit", 'e', "D-LIMIT", 0, "Specify the D-LIMIT D tasks's stack to display"}, - + {"mode", 'm', "MODE", 0, "MODE is load or blocked, default is load"}, + {"threshold", 'b', "TIME(ms)", 0, + "dtask blocked threshold, default is 3000 ms"}, + {"verbose", 'v', 0, 0, "ebpf program output verbose message"}, + {"run", 'r', "TIME(s)", 0, "run time in secnonds"}, + {"kthread", 'k', 0, 0, + "blocked-analyze output kernel-thread D stack information"}, {NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help"}, {}, }; -/* PROCESS MODE - /proc/pid/stat -- calculate process cpu util - /proc/pid/task/tid/stat -- check task state, if d read the stack */ +u64 get_now_ns(); -/* THREAD MODE -/proc/pid/task/tid/stat -- calcualte thread cpu util -/proc/pid/task/tid/sat -- read task state, if d read stack -*/ static int prepare_directory(char* path) { int ret; @@ -142,6 +166,9 @@ static error_t parse_arg(int key, char* arg, struct argp_state* state) { case 'h': argp_state_help(state, stderr, ARGP_HELP_STD_HELP); break; + case 'v': + env.verbose = true; + break; case 't': env.thread_mode = true; break; @@ -205,6 +232,34 @@ static error_t parse_arg(int key, char* arg, struct argp_state* state) { case 'H': env.human = true; break; + case 'm': + if (!strcmp(arg, "load")) { + env.mode = LOAD; + } else if (!strcmp(arg, "blocked")) { + env.mode = BLOCKED; + } else { + argp_usage(state); + } + break; + case 'b': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse blocked threshold.\n"); + argp_usage(state); + } + env.blocked_ms = val; + break; + case 'r': + err = parse_long(arg, &val); + if (err || val <= 0) { + fprintf(stderr, "Failed parse run time.\n"); + argp_usage(state); + } + env.run = val; + break; + case 'k': + env.kthread = true; + break; case ARGP_KEY_ARG: break; default: @@ -308,19 +363,11 @@ cleanup: static int read_d_task(struct id_pair_t* pids, int nr_thread, int* stack_num, struct D_task_record_t* d_tasks) { -#ifdef DEBUG - fprintf(stderr, "DEBUG: read_d_task\n"); -#endif - int i = 0; - int err = 0; - -#ifdef DEBUG - struct timeval start, end; - err = gettimeofday(&start, 0); - if (err) fprintf(stderr, "read start time error.\n"); -#endif + if (env.verbose) { + fprintf(stderr, "DEBUG: read_d_task\n"); + } - int d_num = 0; + int i = 0, err = 0, d_num = 0; for (i = 0; i < nr_thread; i++) { if (d_num >= env.stack_limit) break; int pid = pids[i].pid; @@ -332,18 +379,10 @@ static int read_d_task(struct id_pair_t* pids, int nr_thread, int* stack_num, } } *stack_num = d_num; - -#ifdef DEBUG - err = gettimeofday(&end, 0); - if (err) fprintf(stderr, "read end time error.\n"); - fprintf(stderr, "read %d thread user %lds %ldus.\n", nr_thread, - end.tv_sec - start.tv_sec, end.tv_usec - start.tv_usec); -#endif - return err; } -static int read_sched_delay(struct sys_record_t* sys_rec) { +static int read_sched_delay(struct sys_record_t* sys_rec, u64* prev_delay) { FILE* fp = fopen(SCHEDSTAT_PATH, "r"); int err = 0; if (!fp) { @@ -561,7 +600,7 @@ cleanup: return err; }; -static u_int64_t read_pid_max() { +static u64 read_pid_max() { int err = 0; FILE* fp = fopen(PIDMAX_PATH, "r"); if (!fp) { @@ -576,12 +615,12 @@ static u_int64_t read_pid_max() { return err; } -static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { +static int read_all_pids(struct id_pair_t* pids, u64* num) { int err = 0; DIR* dir = NULL; DIR* task_dir = NULL; - u_int64_t nr_thread = 0; + u64 nr_thread = 0; struct dirent* proc_de = NULL; struct dirent* task_de = NULL; long val; @@ -606,7 +645,10 @@ static int read_all_pids(struct id_pair_t* pids, u_int64_t* num) { snprintf(taskpath, FILE_PATH_LEN, "/proc/%d/task", pid); task_dir = opendir(taskpath); if (!task_dir) { - // fprintf(stderr, "Failed opendir %s\n", taskpath); + if (env.verbose) { + fprintf(stderr, "Failed opendir %s\n", taskpath); + } + continue; } @@ -689,7 +731,7 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, fscanf(fp, " %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld " - "%ld %ld %ld %llu", + "%ld %ld %ld %lu", &proc_info.state, &proc_info.ppid, &proc_info.pgrp, &proc_info.session, &proc_info.tty_nr, &proc_info.tpgid, &proc_info.flags, &proc_info.minflt, &proc_info.cminflt, @@ -703,6 +745,7 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, data->ppid = proc_info.ppid; data->starttime = proc_info.starttime; data->pid = proc_info.pid; + data->ts_ns = get_now_ns(); strcpy(data->comm, proc_info.comm); @@ -712,7 +755,10 @@ static int read_proc(pid_t pid, pid_t tid, struct task_cputime_t** prev, if (prev[pid] && !strcmp(prev[pid]->comm, now[pid]->comm)) { long udelta = now[pid]->utime - prev[pid]->utime; long sdelta = now[pid]->stime - prev[pid]->stime; - long base = env.delay * sysconf(_SC_CLK_TCK); + /* if want more accurate, should calculate with clock */ + // long base = env.delay * sysconf(_SC_CLK_TCK); + long base = (now[pid]->ts_ns - prev[pid]->ts_ns) / NSEC_PER_SECOND * + sysconf(_SC_CLK_TCK); if (base != 0) { /* only process cpu utilization > 0 */ @@ -789,7 +835,6 @@ static void build_str(int day, int hour, int min, int sec, char* buf) { snprintf(tmp, 32, "%dd,", day); strcat(buf, tmp); } - if (hour > 0) { snprintf(tmp, 32, "%dh,", hour); strcat(buf, tmp); @@ -853,7 +898,7 @@ static void output_per_cpu(struct record_t* rec, FILE* dest) { char cpu_name[16]; snprintf(cpu_name, 16, "cpu-%d", i - 1); fprintf(dest, - "%7s %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %10llu\n", + "%7s %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %10lu\n", cpu_name, sys->cpu[i].usr, sys->cpu[i].sys, sys->cpu[i].nice, sys->cpu[i].idle, sys->cpu[i].iowait, sys->cpu[i].irq, sys->cpu[i].softirq, sys->cpu[i].steal, @@ -873,7 +918,7 @@ static void output_cgroup(struct record_t* rec, int cgroup_num, FILE* dest) { "nr_periods", "nr_throttled", "throttled_time", "nr_burst", "burst_time"); } - fprintf(dest, "%20s %15d %15d %15llu %15d %15llu\n", + fprintf(dest, "%20s %15d %15d %15lu %15d %15lu\n", cgroups[i].cgroup_name, cgroups[i].nr_periods, cgroups[i].nr_throttled, cgroups[i].throttled_time, cgroups[i].nr_burst, cgroups[i].burst_time); @@ -922,6 +967,25 @@ static void output_tasktop(struct record_t* rec, int rec_num, FILE* dest) { } } +static void output_stack_with_offset(int off, char* str, FILE* dest) { + const char delim[2] = "\n"; + char* token; + + token = strtok(str, delim); + fprintf(dest, "%s\n", token); + + while (true) { + token = strtok(NULL, delim); + if (!token) break; + int cnt = 0; + while (cnt < off) { + fprintf(dest, " "); + cnt++; + } + fprintf(dest, "%s\n", token); + } +} + static void output_d_stack(struct record_t* rec, int d_num, FILE* dest) { int i; struct D_task_record_t* d_tasks = rec->d_tasks; @@ -937,17 +1001,7 @@ static void output_d_stack(struct record_t* rec, int d_num, FILE* dest) { strncpy(str, d_tasks[i].stack, STACK_CONTENT_LEN - 1); - const char delim[2] = "\n"; - char* token; - - token = strtok(str, delim); - fprintf(dest, "%s\n", token); - - while (true) { - token = strtok(NULL, delim); - if (!token) break; - fprintf(dest, "%18s %6s %6s %s\n", "", "", "", token); - } + output_stack_with_offset(18 + 6 + 6 + 3, str, dest); } free(str); @@ -968,11 +1022,6 @@ static bool inline is_high_D(struct record_t* rec) { return rec->sys.nr_D >= THRESHOLD_D; } -// static bool fork_detect(struct record_t* rec, FILE* dest) { -// #define THRESHOLD_FORK_PS 2000 -// return rec->sys.nr_fork >= THRESHOLD_FORK_PS; -// } - double inline calculate_sys(cpu_util_t* cpu) { double sys_util = cpu->iowait + cpu->sys + cpu->softirq + cpu->irq; return sys_util; @@ -1156,7 +1205,7 @@ static int make_records(struct id_pair_t* pids, int nr_thread, struct task_cputime_t** now_task, int* rec_num) { struct R_task_record_t** records = rec->r_tasks; int err = 0; - u_int64_t i; + u64 i; int nr_rec = 0; for (i = 0; i < nr_thread; i++) { @@ -1214,6 +1263,7 @@ static FILE* open_logfile() { static int libbpf_print_fn(enum libbpf_print_level level, const char* format, va_list args) { + if (level == LIBBPF_DEBUG && !env.verbose) return 0; return vfprintf(stderr, format, args); } @@ -1229,8 +1279,8 @@ static int bump_memlock_rlimit(void) { static int check_fork(int fork_map_fd, struct sys_record_t* sys_rec) { int fd; int err; - u_int64_t total = 0; - u_int64_t lookup_key = -1, next_key; + u64 total = 0; + u64 lookup_key = -1, next_key; struct proc_fork_info_t info; fd = fork_map_fd; @@ -1260,18 +1310,204 @@ static int check_fork(int fork_map_fd, struct sys_record_t* sys_rec) { static void sigint_handler(int signo) { exiting = 1; } -// #define SEG_TRAP +void handle_lost_events(void* ctx, int cpu, __u64 lost_cnt) { + fprintf(stderr, "Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +void handle_event(void* ctx, int cpu, void* data, __u32 data_sz) { + const struct d_task_blocked_event_t* ev = data; + char stime_str[BUF_SIZE] = {0}; + char* tstr = ts2str(time(0), stime_str, BUF_SIZE); + + fprintf(env.dest, "%20s %10s %16s %10d %20lu %lu\n", tstr, "Stop-D", + ev->info.comm, ev->pid, ev->start_time_ns, + ev->duration_ns / NSEC_PER_MILLSECOND); +} + +u64 get_now_ns() { + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * NSEC_PER_SECOND + ts.tv_nsec; +} + +u64 check_d_task_timeout(int d_task_map_fd, u64 threshold_ns, FILE* dest) { + struct d_task_info_t info; + u64 now_ns = 0, min_start_time_ns = UINT64_MAX; + int err = 0; + struct d_task_key_t lookup_key = {.pid = 0, .start_time_ns = 0}; + struct d_task_key_t next_key; + char stime_str[BUF_SIZE] = {0}; + char* tstr = ts2str(time(0), stime_str, BUF_SIZE); + + char* str = calloc(STACK_CONTENT_LEN, sizeof(char)); + + /* check all d task */ + while (!bpf_map_get_next_key(d_task_map_fd, &lookup_key, &next_key)) { + err = bpf_map_lookup_elem(d_task_map_fd, &next_key, &info); + if (err < 0) { + // this d task maybe deleted by eBPF + goto check_next_key; + } + + if (info.is_recorded == 0) { + /* must get now again, avoid time resolution problem*/ + now_ns = get_now_ns(); + if ((now_ns - next_key.start_time_ns) >= threshold_ns) { + D_task_record_t st; + err = read_stack(next_key.pid, next_key.pid, &st); + if (err || strlen(st.stack) == 0) { + goto check_next_key; + } + + /* if stack get caught the mark the task first */ + info.is_recorded = 1; + err = bpf_map_update_elem(d_task_map_fd, &next_key, &info, + BPF_EXIST); + if (err) { + goto check_next_key; + } + + strncpy(str, st.stack, STACK_CONTENT_LEN - 1); + fprintf(dest, "%20s %10s %16s %10d %20lu ", tstr, "Timeout", + info.comm, next_key.pid, next_key.start_time_ns); + + output_stack_with_offset(20 + 10 + 16 + 10 + 20 + 5, str, dest); + } else { + min_start_time_ns = next_key.start_time_ns < min_start_time_ns + ? next_key.start_time_ns + : min_start_time_ns; + } + } + check_next_key: + lookup_key = next_key; + } + free(str); + return min_start_time_ns; +} + +void wait_d_task_timeout(u64 threshold_ns, u64 min_start_time_ns) { + /* default sleep threshold_ns, if there is a task will timeout, no need + * sleep threshold_ns*/ + u64 now_ns = get_now_ns(); + + /* reset err to return 0 if exiting */ + u64 sleep_ns = threshold_ns; + struct timespec ts, rem; + int err = 0; + /* sleep some time */ + if (min_start_time_ns != UINT64_MAX) { + u64 wait_time_ns = min_start_time_ns + threshold_ns > now_ns + ? min_start_time_ns + threshold_ns - now_ns + : 0; + sleep_ns = wait_time_ns; + } + + ts.tv_sec = sleep_ns / NSEC_PER_SECOND; + ts.tv_nsec = sleep_ns % NSEC_PER_SECOND; + +wait_sleep: + err = nanosleep(&ts, &rem); + /* if interupt by signal, but not SIGINT contine sleep */ + if (err != 0 && !exiting) { + ts = rem; + goto wait_sleep; + } + return; +} + +struct tasktop_state { + struct id_pair_t* pids; + struct task_cputime_t** prev_task; + struct task_cputime_t** now_task; + struct cgroup_cpu_stat_t** prev_cgroup; + struct sys_cputime_t** prev_sys; + struct sys_cputime_t** now_sys; + struct record_t* rec; + u64* prev_delay; +} tasktop_state = {.pids = 0, + .prev_task = 0, + .now_task = 0, + .prev_cgroup = 0, + .prev_sys = 0, + .now_sys = 0, + .rec = 0, + .prev_delay = 0}; + +int init_state() { + int i = 0; + tasktop_state.rec = calloc(1, sizeof(struct record_t)); + tasktop_state.rec->sys.cpu = calloc(nr_cpu + 1, sizeof(struct cpu_util_t)); + tasktop_state.rec->sys.percpu_sched_delay = calloc(nr_cpu, sizeof(u64)); + tasktop_state.rec->d_tasks = + calloc(env.stack_limit, sizeof(struct D_task_record_t)); + tasktop_state.rec->cgroups = + calloc(env.cgroup_limit, sizeof(cgroup_cpu_stat_t)); + + if (!tasktop_state.rec || !tasktop_state.rec->sys.cpu || + !tasktop_state.rec->sys.percpu_sched_delay || + !tasktop_state.rec->d_tasks || !tasktop_state.rec->cgroups) { + fprintf(stderr, "Failed calloc memory\n"); + return -1; + } + + tasktop_state.prev_cgroup = + calloc(env.cgroup_limit, sizeof(struct cgroup_cpu_stat_t*)); + tasktop_state.prev_delay = calloc(nr_cpu, sizeof(u64)); + tasktop_state.pids = calloc(pidmax + 1, sizeof(struct id_pair_t)); + tasktop_state.prev_task = + calloc(pidmax + 1, sizeof(struct task_cputime_t*)); + tasktop_state.now_task = calloc(pidmax + 1, sizeof(struct task_cputime_t*)); + tasktop_state.prev_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); + tasktop_state.now_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); + + for (i = 0; i <= nr_cpu; i++) { + tasktop_state.prev_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); + tasktop_state.now_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); + } + tasktop_state.prev_delay = calloc(nr_cpu, sizeof(u64)); + if (!tasktop_state.prev_task || !tasktop_state.now_task || + !tasktop_state.prev_delay || !tasktop_state.pids || + !tasktop_state.prev_sys || !tasktop_state.now_sys || + !tasktop_state.prev_cgroup) { + fprintf(stderr, "Failed calloc memory.\n"); + return -1; + } + return 0; +} + +void destory_state() { + int i = 0; + + if (tasktop_state.rec) { + free(tasktop_state.rec->cgroups); + free(tasktop_state.rec->d_tasks); + free(tasktop_state.rec->sys.percpu_sched_delay); + free(tasktop_state.rec->sys.cpu); + free(tasktop_state.rec); + } + + if (tasktop_state.now_sys && tasktop_state.prev_sys) { + for (i = 0; i <= nr_cpu; i++) { + free(tasktop_state.prev_sys[i]); + free(tasktop_state.now_sys[i]); + } + + free(tasktop_state.now_sys); + free(tasktop_state.prev_sys); + } + + if (tasktop_state.now_task) free(tasktop_state.now_task); + if (tasktop_state.prev_task) free(tasktop_state.prev_task); + if (tasktop_state.pids) free(tasktop_state.pids); + if (tasktop_state.prev_delay) free(tasktop_state.prev_delay); + if (tasktop_state.prev_cgroup) free(tasktop_state.prev_cgroup); +} + int main(int argc, char** argv) { - int err = 0, fork_map_fd = -1, i = 0; + int err = 0, fork_map_fd = -1, d_task_map_fd = -1, + d_task_notify_map_fd = -1, arg_map_fd = -1; FILE* stat_log = 0; struct tasktop_bpf* skel = 0; - struct id_pair_t* pids = 0; - struct task_cputime_t **prev_task = 0, **now_task = 0; - struct cgroup_cpu_stat_t** prev_cgroup = 0; - struct sys_cputime_t **prev_sys = 0, **now_sys = 0; - struct record_t* rec = 0; - - nr_cpu = sysconf(_SC_NPROCESSORS_ONLN); if (signal(SIGINT, sigint_handler) == SIG_ERR) { fprintf(stderr, "Failed set signal handler.\n"); @@ -1294,55 +1530,11 @@ int main(int argc, char** argv) { libbpf_set_print(libbpf_print_fn); bump_memlock_rlimit(); - /* init pid_max and btime */ - err = read_pid_max(); - if (err) { - fprintf(stderr, "Failed read pid max.\n"); - goto cleanup; - } - - err = read_btime(); - if (err) { - fprintf(stderr, "Failed read btime.\n"); - goto cleanup; - } - - rec = calloc(1, sizeof(struct record_t)); - rec->sys.cpu = calloc(nr_cpu + 1, sizeof(struct cpu_util_t)); - rec->sys.percpu_sched_delay = calloc(nr_cpu, sizeof(int)); - rec->d_tasks = calloc(env.stack_limit, sizeof(struct D_task_record_t)); - rec->cgroups = calloc(env.cgroup_limit, sizeof(cgroup_cpu_stat_t)); - - if (!rec || !rec->sys.cpu || !rec->sys.percpu_sched_delay || - !rec->d_tasks || !rec->cgroups) { - err = 1; - fprintf(stderr, "Failed calloc memory\n"); - goto cleanup; - } - - prev_cgroup = calloc(env.cgroup_limit, sizeof(struct cgroup_cpu_stat_t*)); - prev_delay = calloc(nr_cpu, sizeof(unsigned long long)); - pids = calloc(pidmax + 1, sizeof(struct id_pair_t)); - prev_task = calloc(pidmax + 1, sizeof(struct task_cputime_t*)); - now_task = calloc(pidmax + 1, sizeof(struct task_cputime_t*)); - prev_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); - now_sys = calloc(1 + nr_cpu, sizeof(struct sys_cputime_t*)); - - for (i = 0; i <= nr_cpu; i++) { - prev_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); - now_sys[i] = calloc(1, sizeof(struct sys_cputime_t)); - } - - if (!prev_task || !now_task || !prev_delay || !pids || !prev_sys || - !now_sys) { - err = 1; - fprintf(stderr, "Failed calloc memory.\n"); - goto cleanup; - } - + /* prepare the logfile */ prepare_directory(log_dir); - stat_log = open_logfile(); - if (!stat_log) { + /* prepare ebpf */ + env.dest = open_logfile(); + if (!env.dest) { fprintf(stderr, "Failed open stat log file.\n"); goto cleanup; } @@ -1361,6 +1553,9 @@ int main(int argc, char** argv) { } fork_map_fd = bpf_map__fd(skel->maps.fork_map); + d_task_map_fd = bpf_map__fd(skel->maps.d_task_map); + d_task_notify_map_fd = bpf_map__fd(skel->maps.d_task_notify_map); + arg_map_fd = bpf_map__fd(skel->maps.arg_map); err = tasktop_bpf__attach(skel); if (err) { @@ -1368,79 +1563,126 @@ int main(int argc, char** argv) { goto cleanup; } - bool first = true; - while (env.nr_iter-- && !exiting) { - u_int64_t nr_thread = 0; - int rec_num = 0; - int d_num = 0; - int cgroup_num = 0; - -#ifndef ONLY_THREAD - read_cgroup_throttle(rec->cgroups, &cgroup_num, prev_cgroup); - read_sched_delay(&rec->sys); - check_fork(fork_map_fd, &rec->sys); - runnable_proc(&rec->sys); - unint_proc(&rec->sys); - read_stat(prev_sys, now_sys, &rec->sys); -#endif - - /* get all process now */ - read_all_pids(pids, &nr_thread); - - read_d_task(pids, nr_thread, &d_num, rec->d_tasks); - -#ifndef ONLY_THREAD - rec->r_tasks = calloc(nr_thread, sizeof(struct R_task_record_t*)); + /* send argument to kernel space */ + struct arg_to_bpf arg = { + .fork_enable = env.mode == LOAD ? 1 : 0, + .blocked_enable = env.mode == BLOCKED ? 1 : 0, + .verbose_enable = env.verbose ? 1 : 0, + .kthread_enable = env.kthread ? 1 : 0, + .threshold_ns = env.blocked_ms * NSEC_PER_MILLSECOND}; + int key = 0; + bpf_map_update_elem(arg_map_fd, &key, &arg, BPF_ANY); + + if (env.mode == LOAD) { + /* prepare load analyse */ + nr_cpu = sysconf(_SC_NPROCESSORS_ONLN); + /* init pid_max and btime */ + err = read_pid_max(); + if (err) { + fprintf(stderr, "Failed read pid max.\n"); + goto cleanup; + } - /* if prev process info exist produce record*/ - err = make_records(pids, nr_thread, rec, prev_task, now_task, &rec_num); + err = read_btime(); if (err) { - fprintf(stderr, "Failed make records.\n"); + fprintf(stderr, "Failed read btime.\n"); goto cleanup; } - /* sort record by sort type */ - sort_records(rec, rec_num, env.rec_sort); + err = init_state(); + if (err) { + fprintf(stderr, "Failed init state.\n"); + goto cleanup; + } - /* output record */ - if (!first) - output(rec, rec_num, stat_log, d_num, cgroup_num); - else - first = false; + bool first = true; + while (env.nr_iter-- && !exiting) { + u64 nr_thread = 0; + int rec_num = 0; + int d_num = 0; + int cgroup_num = 0; + + read_cgroup_throttle(tasktop_state.rec->cgroups, &cgroup_num, + tasktop_state.prev_cgroup); + read_sched_delay(&tasktop_state.rec->sys, tasktop_state.prev_delay); + check_fork(fork_map_fd, &tasktop_state.rec->sys); + runnable_proc(&tasktop_state.rec->sys); + unint_proc(&tasktop_state.rec->sys); + read_stat(tasktop_state.prev_sys, tasktop_state.now_sys, + &tasktop_state.rec->sys); + /* get all process now */ + read_all_pids(tasktop_state.pids, &nr_thread); + read_d_task(tasktop_state.pids, nr_thread, &d_num, + tasktop_state.rec->d_tasks); + /* onlu alloc a array, the taskinfo allco in make records */ + tasktop_state.rec->r_tasks = + calloc(nr_thread, sizeof(struct R_task_record_t*)); + /* if prev process info exist produce record*/ + err = make_records(tasktop_state.pids, nr_thread, tasktop_state.rec, + tasktop_state.prev_task, tasktop_state.now_task, + &rec_num); + if (err) { + fprintf(stderr, "Failed make records.\n"); + goto cleanup; + } - free_records(rec, nr_thread); + /* sort record by sort type */ + sort_records(tasktop_state.rec, rec_num, env.rec_sort); + /* output record */ + if (!first) + output(tasktop_state.rec, rec_num, env.dest, d_num, cgroup_num); + else + first = false; + free_records(tasktop_state.rec, nr_thread); + /* update old info and free nonexist process info */ + now_to_prev(tasktop_state.pids, nr_thread, pidmax, + tasktop_state.prev_task, tasktop_state.now_task, + tasktop_state.prev_sys, tasktop_state.now_sys); + + if (env.nr_iter) { + sleep(env.delay); + } + } + } else if (env.mode == BLOCKED) { + int err = 0; + uint64_t min_start_time_ns = UINT64_MAX, + threshold_ns = env.blocked_ms * NSEC_PER_MILLSECOND; + struct perf_buffer_opts pb_opts = {.sample_cb = handle_event, + .lost_cb = handle_lost_events}; + struct perf_buffer* pb = + perf_buffer__new(d_task_notify_map_fd, 8, &pb_opts); + if (libbpf_get_error(pb)) { + fprintf(stderr, "Failed to create perf buffer.\n"); + goto cleanup; + } - /* update old info and free nonexist process info */ - now_to_prev(pids, nr_thread, pidmax, prev_task, now_task, prev_sys, - now_sys); -#ifdef STRESS_TEST - usleep(10000); -#else - if (env.nr_iter) sleep(env.delay); -#endif -#endif - } + fprintf(env.dest, "%20s %10s %16s %10s %20s %s\n", "Time", "Event", + "Comm", "Pid", "Start(ns)", "Stack|Delya(ms)"); + time_t run_start_s = time(0); -cleanup: + while (!exiting) { + if (env.run != -1 && time(0) - run_start_s > env.run) { + break; + } - if (pids) free(pids); + min_start_time_ns = + check_d_task_timeout(d_task_map_fd, threshold_ns, env.dest); - if (prev_task) { - for (i = 0; i < pidmax; i++) { - if (prev_task[i]) free(prev_task[i]); - } - free(prev_task); - } + /* process d task become running*/ + err = perf_buffer__poll(pb, 0); + if (err < 0 && err != -EINTR) { + fprintf(stderr, "error polling perf buffer: %s\n", + strerror(-err)); + goto cleanup; + } - if (now_task) { - for (i = 0; i < pidmax; i++) { - if (now_task[i]) free(now_task[i]); + wait_d_task_timeout(threshold_ns, min_start_time_ns); + fflush(env.dest); } - free(now_task); } - +cleanup: + destory_state(); if (stat_log) fclose(stat_log); - tasktop_bpf__destroy(skel); return err; } diff --git a/source/tools/detect/sched/tasktop/tasktop.h b/source/tools/detect/sched/tasktop/tasktop.h index 3fba6c6d1261dad2806cbdfaab7934e928f71b5e..da790daf4bae5a473d29caa4b48ce86a9022a75e 100644 --- a/source/tools/detect/sched/tasktop/tasktop.h +++ b/source/tools/detect/sched/tasktop/tasktop.h @@ -23,80 +23,82 @@ struct id_pair_t { }; struct proc_stat_t { - int pid; + s32 pid; char comm[MAX_COMM_LEN]; char state; - int ppid; - int pgrp; - int session; - int tty_nr; - int tpgid; - unsigned int flags; - u_int64_t minflt; - u_int64_t cminflt; - u_int64_t majflt; - u_int64_t cmajflt; - u_int64_t utime; - u_int64_t stime; - int64_t cutime; - int64_t cstime; - int64_t priority; - int64_t nice; - int64_t num_threads; - int64_t itrealvalue; - unsigned long long starttime; + s32 ppid; + s32 pgrp; + s32 session; + s32 tty_nr; + s32 tpgid; + u32 flags; + u64 minflt; + u64 cminflt; + u64 majflt; + u64 cmajflt; + u64 utime; + u64 stime; + s64 cutime; + s64 cstime; + s64 priority; + s64 nice; + s64 num_threads; + s64 itrealvalue; + u64 starttime; }; struct task_cputime_t { - int pid; - int ppid; + s32 pid; + s32 ppid; char comm[MAX_COMM_LEN]; - u_int64_t stime; - u_int64_t utime; - u_int64_t starttime; + u64 stime; + u64 utime; + u64 starttime; + u64 ts_ns; }; struct sys_cputime_t { char cpu[CPU_NAME_LEN]; - long usr; - long nice; - long sys; - long idle; - long iowait; - long irq; - long softirq; - long steal; - long guest; - long guest_nice; + s64 usr; + s64 nice; + s64 sys; + s64 idle; + s64 iowait; + s64 irq; + s64 softirq; + s64 steal; + s64 guest; + s64 guest_nice; }; typedef struct R_task_record_t { - int pid; - int ppid; + s32 pid; + s32 ppid; char comm[MAX_COMM_LEN]; time_t runtime; time_t begin_ts; double system_cpu_rate; double user_cpu_rate; double all_cpu_rate; + u64 ts_ns; } R_task_record_t; typedef struct D_task_record_t { - int pid; - int tid; + s32 pid; + s32 tid; char comm[MAX_COMM_LEN]; char stack[STACK_CONTENT_LEN]; } D_task_record_t; typedef struct cgroup_cpu_stat_t { char cgroup_name[MAX_CGROUP_NAME_LEN]; - int nr_periods; - int nr_throttled; - unsigned long long throttled_time; - unsigned long long wait_sum; - unsigned long long current_bw; - int nr_burst; - unsigned long long burst_time; + s32 nr_periods; + s32 nr_throttled; + u64 throttled_time; + u64 wait_sum; + u64 current_bw; + s32 nr_burst; + u64 burst_time; time_t last_update; } cgroup_cpu_stat_t; @@ -119,12 +121,12 @@ typedef struct sys_record_t { /* load */ float load1; - int nr_R; - int nr_D; - int nr_fork; + s32 nr_R; + s32 nr_D; + s32 nr_fork; struct proc_fork_info_t most_fork_info; - unsigned long long *percpu_sched_delay; + u64 *percpu_sched_delay; } sys_record_t; struct record_t { diff --git a/source/tools/detect/sched/tasktop/tasktopSelftest/mod/mutex_block.c b/source/tools/detect/sched/tasktop/tasktopSelftest/mod/mutex_block.c index f78f023cd414c744fd966ed5e9ede4f385333183..8fc48fd7f0aeca4ebc11ec8c44babb6fa01cd262 100644 --- a/source/tools/detect/sched/tasktop/tasktopSelftest/mod/mutex_block.c +++ b/source/tools/detect/sched/tasktop/tasktopSelftest/mod/mutex_block.c @@ -53,8 +53,11 @@ static struct file_operations proc_fops = {.open = open_proc, .write = write_proc, .release = release_proc}; -// struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct -// proc_dir_entry *parent, const struct file_operations *proc_fops); +// if no definition of file_operations, use below +// static struct proc_ops proc_fops = {.proc_open = open_proc, +// .proc_write = write_proc, +// .proc_read = read_proc, +// .proc_release = release_proc}; static struct proc_dir_entry *ent = 0; static int __init mod_init(void) { diff --git a/source/tools/inject/goom/Makefile b/source/tools/inject/goom/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..92fb5271d39fddb0f4023c25f108a5833d1b64d9 --- /dev/null +++ b/source/tools/inject/goom/Makefile @@ -0,0 +1,5 @@ +target := goom +mods := main.o + +EXTRA_LDFLAGS = -lpthread +include $(SRC)/mk/cc.mk diff --git a/source/tools/inject/goom/main.cpp b/source/tools/inject/goom/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..110d765f2db9c67b441c83de8d189bc99ab2fb1d --- /dev/null +++ b/source/tools/inject/goom/main.cpp @@ -0,0 +1,41 @@ +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +#define PAGESIZE 4096 +#define MALLOC_SIZE (20 * 1024 * 1024) + +void* foo(void* arg) +{ + while(1) { + char *p=(char*)malloc(MALLOC_SIZE); + int i; + + if (!p) + exit(1); + for(i = 0; i < MALLOC_SIZE/PAGESIZE; i++) { + *p = 'a'; + p += PAGESIZE; + } + } + + return NULL; +} + +int main(int argc,char** argv) +{ + int i, thread_nr, cpu_nr; + pthread_t thread; + + if(pthread_create(&thread, NULL, foo, NULL)) + return -1; + + pthread_join(thread,NULL); + return 0; +} diff --git a/source/tools/inject/high_sys/Makefile b/source/tools/inject/high_sys/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..0fd618ce92e01a87d52994748ba898c2f848100f --- /dev/null +++ b/source/tools/inject/high_sys/Makefile @@ -0,0 +1,5 @@ +target := high_sys +mods := main.o + +EXTRA_LDFLAGS = -lpthread +include $(SRC)/mk/cc.mk diff --git a/source/tools/inject/high_sys/main.cpp b/source/tools/inject/high_sys/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..001cbc1279e6e373382ad4f0a683965ed3c2dc56 --- /dev/null +++ b/source/tools/inject/high_sys/main.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" int gettid(void); + +using namespace std; + +static pthread_mutex_t foo_mutex; + +void* foo(void* arg) +{ + char buff[128]; + int idx = (long)arg + 1; + + printf("thread start..\n"); +// sprintf(buff, "echo %d > /sys/fs/cgroup/cpu/test%i/tasks", gettid(), idx); +// system(buff); + + while(1){ + pthread_mutex_lock(&foo_mutex); + int size=rand()/(1024*150); + char *p=(char*)malloc(size); + memset(p,0x00,size); + string xx; + xx.assign(p,size); + pthread_mutex_unlock(&foo_mutex); + free(p); + } + + printf("thread end..\n"); + + return NULL; +} + +int main(int argc,char** argv) +{ + int i, thread_nr, cpu_nr; + pthread_t *thread; + + cpu_nr = sysconf(_SC_NPROCESSORS_CONF); + if (cpu_nr <= 0) + exit(1); + + thread_nr = cpu_nr * 20; + pthread_mutex_init(&foo_mutex, NULL); + + thread = (pthread_t *)malloc(thread_nr * sizeof(pthread_t)); + for(i = 0; i < thread_nr; i++){ + if(pthread_create(&thread[i],NULL,foo,(void *)i)) + exit(1); + } + + for(i = 0; i < thread_nr; i++) { + pthread_join(thread[i],NULL); + } + + return 0; +} diff --git a/source/tools/inject/process_limit/Makefile b/source/tools/inject/process_limit/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..bf6176e23d84a79334e3f3f3e0ca2ca3a55af44e --- /dev/null +++ b/source/tools/inject/process_limit/Makefile @@ -0,0 +1,6 @@ +target := process_limit +mods := main.o + +LDFLAGS += -lpthread + +include $(SRC)/mk/csrc.mk diff --git a/source/tools/inject/process_limit/main.c b/source/tools/inject/process_limit/main.c new file mode 100644 index 0000000000000000000000000000000000000000..12e5c2b540e35aa750a8b5da49883772f81c694e --- /dev/null +++ b/source/tools/inject/process_limit/main.c @@ -0,0 +1,179 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define BUF_LEN 128 + +#define PID_MAX "/proc/sys/kernel/pid_max" +#define THREADS_MAX "/proc/sys/kernel/threads-max" + +int g_shmid; +unsigned long int *task_created; +pthread_mutex_t *g_mutex; + +int get_system_process_limit(void) +{ + int pid_max = -1; + int threads_max; + FILE *fp; + char buf[BUF_LEN]; + + fp = fopen(PID_MAX, "r"); + if (!fp) { + printf("open %s failed\n", PID_MAX); + return -1; + } + + memset(buf, 0, BUF_LEN); + if (fgets(buf, BUF_LEN, fp)) { + pid_max = atoi(buf); + } + + fclose(fp); + if (pid_max < 0) + return -1; + + fp = fopen(THREADS_MAX, "r"); + if (!fp) { + printf("open %s failed\n", THREADS_MAX); + return -1; + } + + memset(buf, 0, BUF_LEN); + if (fgets(buf, BUF_LEN, fp)) { + threads_max = atoi(buf); + } + + fclose(fp); + if (threads_max < 0) + return -1; + + return pid_max < threads_max ? pid_max: threads_max; +} + +int check_rlimit(int max) +{ + struct rlimit limit; + + if (getrlimit(RLIMIT_NPROC, &limit)) + return -1; + + if (limit.rlim_cur < max) { + limit.rlim_cur = max; + if (limit.rlim_max < max) + limit.rlim_max = max; + + return setrlimit(RLIMIT_NPROC, &limit); + } + + return 0; +} + +void *thread_func(void *arg) +{ + while(1) { + /*just sleep*/ + sleep(5); + } +} + +int create_threads(int nr) +{ + int i, err; + pthread_attr_t attr; + pthread_t *thread; + void *ret; + + pthread_attr_init(&attr); + thread = (pthread_t *)malloc(nr * sizeof(pthread_t)); + + for (i = 0; i < nr; i++) { + err = pthread_create(&thread[i], &attr, &thread_func, NULL); + if (err) + break; + } + + *task_created = i; + pthread_mutex_unlock(g_mutex); + nr = i; + printf("created %d threads in child\n", nr); + for (i = 0; i < nr; i++) { + pthread_join(thread[i], &ret); + } + + exit(-1); +} + +int create_tasks(unsigned long int nr) +{ + int ret, pid; + unsigned long int created = 0; + + pthread_mutex_lock(g_mutex); + while(nr > 0) { + pid = fork(); + if (pid < 0) { + break; + } else if (pid == 0) { + void *shaddr = shmat(g_shmid, NULL, 0); + + if (!shaddr) { + perror("shmat"); + exit(-1); + } + + g_mutex = (pthread_mutex_t *)shaddr; + task_created = (unsigned long *)(shaddr + sizeof(pthread_mutex_t)); + create_threads(nr); + } else { + pthread_mutex_lock(g_mutex); + created += *task_created; + nr -= *task_created; + } + } + + return created > 0 ? 0 : -1; +} + +int main(int argc ,char *argv[]) +{ + int ret, nr = 0; + void *shaddr; + pthread_mutexattr_t attr; + + if (argc == 2) + nr = atoi(argv[1]); + + if (nr <= 0) + nr = get_system_process_limit(); + + if (nr <= 0) + return -1; + + if (check_rlimit(nr)) + return -1; + + g_shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT); + if (g_shmid == -1) { + perror("shmget"); + return -1; + } + shaddr = shmat(g_shmid, NULL, 0); + if (!shaddr) + return -1; + + g_mutex = (pthread_mutex_t *)shaddr; + task_created = (unsigned long *)(shaddr + sizeof(pthread_mutex_t)); + + pthread_mutexattr_init(&attr); + pthread_mutexattr_setpshared(&attr, 1); + pthread_mutex_init(g_mutex, &attr); + printf("Will create %d tasks to limit\n", nr); + + return create_tasks(nr); +} diff --git a/source/tools/monitor/ioMonitor/displayClass.py b/source/tools/monitor/ioMonitor/displayClass.py index cc51352ecf36ffa2b488a22b5e1c94961e544d00..8913cc23fe57bba13731ce0a47d4f194b5bcbb31 100755 --- a/source/tools/monitor/ioMonitor/displayClass.py +++ b/source/tools/monitor/ioMonitor/displayClass.py @@ -37,6 +37,8 @@ def iolatencyResultReport(*argvs): nf = argvs[1] nfPutPrefix = str(argvs[2]) statusReportDicts = argvs[3] + mode = argvs[4] + analysisFile = argvs[5] ioburst = False nfPrefix = [] iolatStartT = statusReportDicts['iolatency']['startT'] @@ -154,10 +156,21 @@ def iolatencyResultReport(*argvs): result.append(diagret+','+reason+','+suggest) for e, p in zip(result, nfPrefix): - # print(e+'\n') - #nf.put(nfPutPrefix, p+' '+e) - nf.puts(nfPutPrefix+p+' '+e) - statusReportDicts['iolatency']['valid'] = True + if mode == "diagnose": + dic = dict(item.split('=') for item in p[1:].split(',')) + matches = re.findall(r'(\w+)="(.*?)"', e) + info_dict = {key: value for key, value in matches} + dic.update(info_dict) + json_str = json.dumps(dic) + directory = os.path.dirname(analysisFile) + if not os.path.exists(directory): + os.makedirs(directory) + with open(analysisFile, 'a+') as f: + f.write(json_str+"\n") + statusReportDicts['iolatency']['valid'] = True + else: + nf.puts(nfPutPrefix+p+' '+e) + statusReportDicts['iolatency']['valid'] = True def iohangResultReport(*argvs): @@ -168,6 +181,8 @@ def iohangResultReport(*argvs): nfPutPrefix=str(argvs[2]) statusReportDicts = argvs[3] nfPrefix=[] + mode = argvs[4] + analysisFile = argvs[5] os.system('ls -rtd '+argvs[0]+'/../* | head -n -5 |'\ ' xargs --no-run-if-empty rm {} -rf') @@ -246,9 +261,21 @@ def iohangResultReport(*argvs): result.append(diagret+','+reason+','+suggest) for e, p in zip(result, nfPrefix): - nf.puts(nfPutPrefix+p+' '+e) - #nf.put(nfPutPrefix, p+' '+e) - statusReportDicts['iohang']['valid'] = True + if mode == "diagnose": + dic = dict(item.split('=') for item in p[1:].split(',')) + matches = re.findall(r'(\w+)="(.*?)"', e) + info_dict = {key: value for key, value in matches} + dic.update(info_dict) + json_str = json.dumps(dic) + directory = os.path.dirname(analysisFile) + if not os.path.exists(directory): + os.makedirs(directory) + with open(analysisFile, 'a+') as f: + f.write(json_str+"\n") + statusReportDicts['iohang']['valid'] = True + else: + nf.puts(nfPutPrefix+p+' '+e) + statusReportDicts['iohang']['valid'] = True def ioutilDataParse(data, resultInfo): @@ -279,7 +306,7 @@ def ioutilDataParse(data, resultInfo): return totalIops,totalBw,tUnit -def ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret): +def ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret, mode, analysisFile): top = 1 suggestPS = reason = '' resultInfo = \ @@ -318,8 +345,17 @@ def ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret): diagret, reason, suggest) #nf.put(nfPutPrefix, if reason != '': - nf.puts(nfPutPrefix+putIdx+putField) - # print(prefix+reason+suggest+'\n') + if mode == "diagnose": + dic = dict({'diag_type': 'IO-Burst', 'devname': "——", + 'diagret': diagret,'reason': reason,'solution': suggest}) + json_str = json.dumps(dic) + directory = os.path.dirname(analysisFile) + if not os.path.exists(directory): + os.makedirs(directory) + with open(analysisFile, 'a+') as f: + f.write(json_str+"\n") + else: + nf.puts(nfPutPrefix+putIdx+putField) def ioutilResultReport(*argvs): @@ -331,6 +367,8 @@ def ioutilResultReport(*argvs): maxIops = maxBw = 0 minIops = minBw = sys.maxsize tUnit = None + mode = argvs[4] + analysisFile = argvs[5] os.system('ls -rtd '+os.path.dirname(argvs[0])+'/../* | head -n -5 |'\ ' xargs --no-run-if-empty rm {} -rf') @@ -358,7 +396,7 @@ def ioutilResultReport(*argvs): ', Bps:'+humConvert(minBw).replace('s', tUnit)+\ '~'+humConvert(maxBw).replace('s', tUnit) diagret = 'IO-Burst('+content+') detected' - ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret) + ioutilReport(nf, nfPutPrefix, resultInfo, tUnit, diagret, mode, analysisFile) statusReportDicts['ioutil']['valid'] = True @@ -380,7 +418,7 @@ def iowaitDataParse(data, resultInfo): return data['global iowait'],unkownDisable -def iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret): +def iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret, mode, analysisFile): top = 0 reason = '' resDicts = { @@ -427,11 +465,20 @@ def iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret): else: suggest = 'Report stacktrace to OS kernel specialist' - putIdx = ',diag_type=IOwait-high ' - putField = 'diagret=\"%s\",reason=\"%s\",solution=\"%s\"' %( - diagret, reason, suggest) - #nf.put(nfPutPrefix, - nf.puts(nfPutPrefix+putIdx+putField) + if mode == "diagnose": + dic = dict({'diag_type': 'IOwait-high', 'devname': "——", + 'diagret': diagret,'reason': reason,'solution': suggest}) + json_str = json.dumps(dic) + directory = os.path.dirname(analysisFile) + if not os.path.exists(directory): + os.makedirs(directory) + with open(analysisFile, 'a+') as f: + f.write(json_str+"\n") + else: + putIdx = ',diag_type=IOwait-high ' + putField = 'diagret=\"%s\",reason=\"%s\",solution=\"%s\"' %( + diagret, reason, suggest) + nf.puts(nfPutPrefix+putIdx+putField) def iowaitResultReport(*argvs): @@ -442,6 +489,8 @@ def iowaitResultReport(*argvs): maxGiowait = 0 minGiowait = sys.maxsize unkownDisable = None + mode = argvs[4] + analysisFile = argvs[5] os.system('ls -rtd '+os.path.dirname(argvs[0])+'/../* | head -n -5 |'\ ' xargs --no-run-if-empty rm {} -rf') @@ -465,13 +514,12 @@ def iowaitResultReport(*argvs): if resultInfo: content = str(minGiowait)+'%~'+str(maxGiowait)+'%' diagret = 'IOwait high('+content+') detected' - iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret) + iowaitReport(nf, nfPutPrefix, unkownDisable, resultInfo, diagret, mode, analysisFile) statusReportDicts['iowait']['valid'] = True - # print(diagret+reason+solution+'\n') class displayClass(object): - def __init__(self, sender): + def __init__(self, sender, mode, resultPath): self.funcResultReportDicts = { 'iohang': iohangResultReport, 'ioutil': ioutilResultReport, @@ -487,6 +535,8 @@ class displayClass(object): } self._sender = sender self._nfPutPrefix = 'IOMonDiagLog' + self._mode = mode + self._resultpath = resultPath def markIoburst(self, now): self.statusReportDicts['iolatency']['lastIOburstT'] = now @@ -503,7 +553,7 @@ class displayClass(object): self.statusReportDicts[diagType]['endT'] = endTime self.statusReportDicts[diagType]['valid'] = False argvs = [ - filepath, self._sender, self._nfPutPrefix, self.statusReportDicts] + filepath, self._sender, self._nfPutPrefix, self.statusReportDicts, self._mode, self._resultpath] timer = threading.Timer(timeout, self.funcResultReportDicts[diagType], argvs) diff --git a/source/tools/monitor/ioMonitor/exceptDiagnoseClass.py b/source/tools/monitor/ioMonitor/exceptDiagnoseClass.py index 7a0d5f74c6873a18c2b489c6086ed3c4b76a1bc8..c38e3324861ebe2a0913ba590e0c6d63e68c0d5f 100755 --- a/source/tools/monitor/ioMonitor/exceptDiagnoseClass.py +++ b/source/tools/monitor/ioMonitor/exceptDiagnoseClass.py @@ -11,7 +11,7 @@ from displayClass import displayClass class runDiag(object): - def __init__(self, logRootPath, sender): + def __init__(self, logRootPath, sender, mode, resultPath): self.funcDicts = { 'iohang': self.startIohangDiagnose, 'ioutil': self.startIoutilDiagnose, @@ -19,7 +19,7 @@ class runDiag(object): 'iowait': self.startIowaitDiagnose} self.lastDiagTimeDicts = \ {'iohang': 0, 'ioutil': 0, 'iolatency': 0, 'iowait': 0} - self.display = displayClass(sender) + self.display = displayClass(sender, mode, resultPath) self.sysakPath = 'sysak' self.logRootPath = logRootPath @@ -55,6 +55,7 @@ class runDiag(object): devname = argv[0] thresh = argv[1] ioburst = argv[2] + mode = argv[3] now = time.time() if now - self.lastDiagTimeDicts['iolatency'] <= 60: return @@ -67,15 +68,24 @@ class runDiag(object): except Exception: return self.lastDiagTimeDicts['iolatency'] = now - if devname is not None: - os.system(self.sysakPath+' -g iosdiag latency -t '+str(thresh) + - ' -T 45 -f '+logdir+' '+devname+' > '+outlog+' &') + if mode == "monitor": + # str(thresh) + if devname is not None: + os.system(self.sysakPath+' -g iosdiag latency -t ' + str(thresh) + + ' -T 20 -m -f '+logdir+' '+devname+' > '+outlog+' &') + else: + os.system(self.sysakPath+' -g iosdiag latency -t ' + str(thresh) + + ' -T 20 -m -f '+logdir+' > '+outlog+' &') else: - os.system(self.sysakPath+' -g iosdiag latency -t '+str(thresh) + - ' -T 45 -f '+logdir+' > '+outlog+' &') - if ioburst: - self.display.markIoburst(now) - self.display.start(60, 'iolatency', logdir, now, now+60) + if devname is not None: + os.system(self.sysakPath+' -g iosdiag latency -t '+ str(thresh) + + ' -T 20 -f '+logdir+' '+devname+' > '+outlog+' &') + else: + os.system(self.sysakPath+' -g iosdiag latency -t '+ str(thresh) + + ' -T 20 -f '+logdir+' > '+outlog+' &') + if ioburst: + self.display.markIoburst(now) + self.display.start(60, 'iolatency', logdir, now, now+60) def startIoutilDiagnose(self, *argv): @@ -94,7 +104,7 @@ class runDiag(object): except Exception: return self.lastDiagTimeDicts['ioutil'] = now - #self.display.setIoburstThresh(iopsThresh, bwThresh) + self.display.setIoburstThresh(iopsThresh, bwThresh) argvs = ['-j',outlog,'-n','-m','-c','1','-t','5','-T','40', '-i',str(iopsThresh),'-b',str(bwThresh)] threading.Thread(target=iofsstatStart, args=(argvs,)).start() @@ -125,8 +135,9 @@ class runDiag(object): class diagnoseClass(runDiag): - def __init__(self, window, logRootPath, sender): - super(diagnoseClass, self).__init__(logRootPath, sender) + def __init__(self, window, logRootPath, sender, mode, resultPath): + super(diagnoseClass, self).__init__(logRootPath, sender, mode, resultPath) + self.mode = mode self.window = window self.diagnoseDicts = OrderedDict() self._diagStat = OrderedDict( @@ -232,8 +243,14 @@ class diagnoseClass(runDiag): diagStat[diagType]['run'] = True if len(value) > 1: diagStat[diagType]['argv'][0] = None + # max_threshold = diagnoseDicts[value[0]]['iolatency']['diagArgs'][0] + # for dev in value: + # if max_threshold <= diagnoseDicts[dev]['iolatency']['diagArgs'][0]: + # diagStat[diagType]['argv'][0] = dev + # max_threshold = diagnoseDicts[dev]['iolatency']['diagArgs'][0] elif len(value) == 1: - diagStat[diagType]['argv'][0] = value[0] + diagStat[diagType]['argv'][0] = None + # diagStat[diagType]['argv'][0] = value[0] else: diagStat[diagType]['run'] = False @@ -251,6 +268,7 @@ class diagnoseClass(runDiag): for dev in diagInfo['iolatency']], reverse=True)[-1] diagStat['iolatency']['argv'][2] = ioburst + diagStat['iolatency']['argv'][3] = self.mode for diagType, stat in diagStat.items(): if stat['run'] == True: @@ -263,3 +281,4 @@ class diagnoseClass(runDiag): # clear the valid mark before each diagnosis def recentDiagnoseValid(self, diagType): return self._recentDiagnoseValid(diagType) + diff --git a/source/tools/monitor/ioMonitor/ioMonCfgClass.py b/source/tools/monitor/ioMonitor/ioMonCfgClass.py index f1282279600a1ef8c0e8f58e349b65499265efc0..5cf7e38318b145e70bf385bc9be7044adfab5766 100755 --- a/source/tools/monitor/ioMonitor/ioMonCfgClass.py +++ b/source/tools/monitor/ioMonitor/ioMonCfgClass.py @@ -31,7 +31,7 @@ class ioMonCfgClass(object): hasArgs = any(list(cfg.values())) if not os.path.exists(self.cfgPath) or resetCfg: cfg['iowait'] = int(cfg['iowait']) if cfg['iowait'] else 5 - cfg['await'] = int(cfg['await']) if cfg['await'] else 10 + cfg['await'] = int(cfg['await']) if cfg['await'] else 2 cfg['util'] = int(cfg['util']) if cfg['util'] else 20 cfg['iops'] = int(cfg['iops']) if cfg['iops'] else 150 cfg['bps'] = int(cfg['bps']) if cfg['bps'] else 31457280 @@ -39,6 +39,7 @@ class ioMonCfgClass(object): cfg['diagIowait'] = cfg['diagIowait'] if cfg['diagIowait'] else 'off' cfg['diagIoburst'] = cfg['diagIoburst'] if cfg['diagIoburst'] else 'off' cfg['diagIolat'] = cfg['diagIolat'] if cfg['diagIolat'] else 'off' + # cfg['diagIolat'] = 'on' cfg['diagIohang'] = cfg['diagIohang'] if cfg['diagIohang'] else 'off' self._updateCfg(cfg) return @@ -135,3 +136,4 @@ class ioMonCfgClass(object): if val.isdigit(): val = int(val) return val + diff --git a/source/tools/monitor/ioMonitor/ioMonitor.py b/source/tools/monitor/ioMonitor/ioMonitor.py index 921a343550f23126ec91c5ea91147adbb4ac6ce9..12e8c94ae33f339d2deb4044242a8a0375651528 100755 --- a/source/tools/monitor/ioMonitor/ioMonitor.py +++ b/source/tools/monitor/ioMonitor/ioMonitor.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import argparse import signal +import os from ioMonCfgClass import ioMonCfgClass from ioMonitorClass import ioMonitorClass @@ -57,6 +58,9 @@ def main(): help='Reset cfg to default') parser.add_argument('-o','--only_set_cfg', action='store_true', help='Only set cfg') + parser.add_argument('-t','--timeout', type=int, default=0, help='Monitoring duration') + parser.add_argument('-a','--analysis_file', type=str, default=\ + '/var/log/sysak/iosdiag/iodiagnose/iodiagnose.log', help='Store diagnosis result') args = parser.parse_args() signal.signal(signal.SIGCHLD, signal.SIG_IGN) @@ -71,10 +75,19 @@ def main(): ioMonCfg = ioMonCfgClass(args.set_cfg, args.reset_cfg, logRootPath) ioMonCfg.notifyIoMon() return + + mode = "monitor" + if args.timeout < 0: + args.timeout = 0 + elif args.timeout > 0: + mode = "diagnose" + + if os.path.exists(args.analysis_file): + os.remove(args.analysis_file) ioMonCfg = ioMonCfgClass(args.set_cfg, args.reset_cfg, logRootPath) - ioMon = ioMonitorClass(logRootPath, ioMonCfg, pipeFile) - ioMon.monitor() + ioMon = ioMonitorClass(logRootPath, ioMonCfg, pipeFile, mode, args.analysis_file) + ioMon.monitor(args.timeout) if __name__ == "__main__": main() diff --git a/source/tools/monitor/ioMonitor/ioMonitorClass.py b/source/tools/monitor/ioMonitor/ioMonitorClass.py index 2a9dec0cd56e16e4bb2243e96f171e17c15b821b..ee12436b5bafa5d158321055eb06e0b7b03ab2c9 100755 --- a/source/tools/monitor/ioMonitor/ioMonitorClass.py +++ b/source/tools/monitor/ioMonitor/ioMonitorClass.py @@ -13,8 +13,8 @@ from collections import OrderedDict from nfPut import CnfPut class ioMonitorClass(object): - def __init__(self, logRootPath, cfg, pipeFile): - self.window = 60 + def __init__(self, logRootPath, cfg, pipeFile, mode, resultPath): + self.window = 20 self.cfg = cfg self.cfg.createCfgFlagFile() self.diagSwitch = { @@ -28,12 +28,13 @@ class ioMonitorClass(object): 'esi':'IO-Hang'} } self._sender = CnfPut(pipeFile) + self._mode = mode self._nfPutTlb = 'IOMonIndForDisksIO' self._nfPutTlb4System = 'IOMonIndForSystemIO' self.fieldDicts = OrderedDict() self.exceptChkDicts = {'system': exceptCheckClass(self.window)} self.exceptChkDicts['system'].addItem('iowait') - self.diagnose = diagnoseClass(self.window, logRootPath, self._sender) + self.diagnose = diagnoseClass(self.window, logRootPath, self._sender, self._mode, resultPath) self.diagnose.addItem('system', 'iowait', 0, 60) self.fDiskStats = open("/proc/diskstats") self.cpuStatIowait = {'sum': 0, 'iowait': 0} @@ -298,7 +299,7 @@ class ioMonitorClass(object): self.fDiskStats.seek(0) for stat in self.fDiskStats.readlines(): stat = stat.split() - if os.path.exists('/sys/block/'+stat[2]) == False: + if os.path.exists('/sys/block/'+stat[2]) == False or stat[2].startswith("loop"): if stat[2] in fieldDicts.keys(): self._removeDiskMonitor(stat[2]) continue @@ -318,7 +319,7 @@ class ioMonitorClass(object): value[0] = long(stat[int(idx) + 2]) - def _collectEnd(self, secs): + def _collectEnd(self, secs, timeout): fieldDicts = self.fieldDicts exceptChkDicts = self.exceptChkDicts uploadInter = self.uploadInter @@ -335,7 +336,7 @@ class ioMonitorClass(object): self.fDiskStats.seek(0) for stat in self.fDiskStats.readlines(): stat = stat.split() - if os.path.exists('/sys/block/'+stat[2]) == False: + if os.path.exists('/sys/block/'+stat[2]) == False or stat[2].startswith("loop"): if stat[2] in fieldDicts.keys(): self._removeDiskMonitor(stat[2]) continue @@ -356,17 +357,24 @@ class ioMonitorClass(object): # Detect await exception self._checkAwaitException(devname, io['wait'], ioburst) - if ((self.uploadInter * secs) % 60) == 0: + if ((self.uploadInter * secs) % 60 ) == 0 and not timeout: self._reportDataToRemote(fieldDicts.keys()) - def monitor(self): + def monitor(self, timeout): + startTime = time.time() while True: secs = self.cfg.getCfgItem('cycle') / 1000.0 + if timeout: + currentTime = time.time() + diagnoseTime = currentTime - startTime + if diagnoseTime >= timeout: + break self._collectBegin() time.sleep(secs) - self._collectEnd(secs) + self._collectEnd(secs, timeout) # Check if it is necessary to start the diagnosis self.diagnose.checkDiagnose() self.fDiskStats.close() + diff --git a/source/tools/monitor/logMonitor/Makefile b/source/tools/monitor/logMonitor/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..da579a8a12553b184f66b2900ad8f8940409650c --- /dev/null +++ b/source/tools/monitor/logMonitor/Makefile @@ -0,0 +1,6 @@ +target := logMonitor +mods := logMonitor +pymods += loglist +DEPEND := "python-dep{all}" +#include $(SRC)/mk/pyinstaller.mk +include $(SRC)/mk/py.mk diff --git a/source/tools/monitor/logMonitor/logMonitor.py b/source/tools/monitor/logMonitor/logMonitor.py new file mode 100644 index 0000000000000000000000000000000000000000..163e77c54e046dddd372ed9145aea2b62c766859 --- /dev/null +++ b/source/tools/monitor/logMonitor/logMonitor.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +import os,sys,re +import traceback +from loglist import kmsg_log_list +from datetime import date, datetime, timedelta +import requests +import json + +KMSG_FILE_PATH = "/proc/kmsg" +WARNING_REPEAT_INTERVAL = 1 # mins + +def push_logwarn(text,pattern): + try: + url = "http://127.0.0.1:8400/api/alert" + data = {"alert_item": "kmsg", + "alert_category": "MONITOR", + "alert_source_type": "kmsg", + "alert_level": "WARNING", + "status": "FIRING", + "labels": {"pattern":pattern + }, + "annotations": { + "summary": text.replace("\"","").replace("\'","") + }, + } + res = requests.post(url, data=json.dumps(data)) + print(res.content, res) + except: + traceback.print_exc() + +def main(): + fd = open(KMSG_FILE_PATH, 'r') + try: + warn_end_time = datetime.now() + warnlog = {} + while True: + warn_ignore = 0 + text_line = fd.readline() + if text_line: + for p in kmsg_log_list: + match = re.search(p,text_line) + if match: + warn_end_time = datetime.now() + if p in warnlog: + if (warn_end_time - warnlog[p]).seconds < WARNING_REPEAT_INTERVAL*60: + warn_ignore = 1 + if warn_ignore == 0: + warnlog[p] = warn_end_time + push_logwarn(text_line,p) + else: + break + except: + traceback.print_exc() + fd.close() + +if __name__ == "__main__": + main() diff --git a/source/tools/monitor/logMonitor/loglist.py b/source/tools/monitor/logMonitor/loglist.py new file mode 100644 index 0000000000000000000000000000000000000000..9712aa0c05c6aa3ebcf3156b8175ec079dee0220 --- /dev/null +++ b/source/tools/monitor/logMonitor/loglist.py @@ -0,0 +1,30 @@ +kmsg_log_list = [ + "Task in /(.+) killed as a result of limit of .*", + "kernel has no deadlock", + "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+", + "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\.", + "task docker:\\w+ blocked for more than \\w+ seconds\\.", + "TCP: out of memory -- consider tuning tcp_mem", + "TCP: too many orphaned sockets", + "nf_conntrack: table full, dropping packet", + "\\w+: neighbor table overflow!", + "Buffer I/O error on device (.+), logical block \\d+", + "EXT4-fs error .*", + "blk_update_request: I/O error, dev \\w+, sector \\d+", + "VFS: file-max limit \\d+ reached", + "cache_from_obj: Wrong slab cache. (.+) but object is from (.+)", + "page allocation failure(.) order:[3-5],(.+)", + "BUG: soft lockup - CPU#\\d+ stuck for (.+)", + "BUG: scheduling while atomic:(.+)", + "INFO: \\w+ self-detected stall on CPU (.+)", + "Card not present on Slot(.+)", + "invoked oom-killer", + "rcu_sched detected stalls", + "BUG: scheduling while atomic", + "EXT4-fs error", + "EXT4-fs warning", + "Buffer I/O error on device", + "Remounting filesystem read-only", + "blocked for more than" + +] diff --git a/source/tools/monitor/monctl/monctl.c b/source/tools/monitor/monctl/monctl.c index a8d0d24de68539f42b7ac4829919f0f295665fc5..df4b1ec2d1e4b3631cb32fbc971f324e5326cca1 100644 --- a/source/tools/monitor/monctl/monctl.c +++ b/source/tools/monitor/monctl/monctl.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #define DEFAULT_CONF_FILE_PATH "/usr/local/sysak/monctl.conf" #define LEN_1024 1024 @@ -11,6 +13,7 @@ #define LEN_32 32 #define W_SPACE " \t\r\n" #define MAX_MOD_NUM 32 +pid_t child_pid; struct module { char name[LEN_32]; @@ -24,6 +27,7 @@ struct command { struct module *mod_list[MAX_MOD_NUM] = {0}; struct command *cmd_store[MAX_MOD_NUM] = {0}; +pid_t child_pids[MAX_MOD_NUM] = {0}; int total_mod_num = 0; static void usage(void) @@ -49,21 +53,16 @@ static void parse_mod(char *mod_name) tool_name = strstr(mod_name,"_"); if (tool_name == '\0') return; - printf("tool_name is %s\n",tool_name); - token = strtok(NULL, W_SPACE); - printf("token is %s\n",token); if (token && strcasecmp(token, "on") && strcasecmp(token, "enable")) { return; } - printf("total_mod_num is %d\n",total_mod_num); /* check if the mod load already */ for ( i = 0; i < total_mod_num; i++ ) { mod = mod_list[i]; - printf("name is %s,i is %d\n",mod->name,i); if (!strcmp(mod->name, tool_name)) { return; } @@ -95,7 +94,6 @@ static void parse_cmd(const char *cmd_name) for ( i = 0; i < total_mod_num; i++ ) { mod = mod_list[i]; - printf("mod->name is %s,tool_name is %s\n",mod->name, tool_name); if (!strcmp(mod->name, tool_name)) { cmd = cmd_store[i] = malloc(sizeof(struct command)); if (cmd == NULL){ @@ -106,7 +104,6 @@ static void parse_cmd(const char *cmd_name) strncpy(cmd->name, tool_name, LEN_32); strncpy(cmd->cmd, cmd_name + strlen(cmd_name) + 1, LEN_512 - 1); cmd->cmd[LEN_512 - 1] = 0; - printf("cmd is %s\n",cmd->cmd); } } } @@ -180,16 +177,65 @@ static void parse_config_file(const char *file_name) } } +static void sig_handler(int sig, siginfo_t *info, void *act) +{ + int i; + + for ( i = 0; i < total_mod_num; i++ ) + { + if (child_pids[i] > 0 ) + kill(-child_pids[i], sig); + } + exit(0); +} + +static int register_sig_handler(void) +{ + struct sigaction act; + int ret = -1; + + sigemptyset(&act.sa_mask); + act.sa_flags=SA_SIGINFO; + act.sa_sigaction=sig_handler; + printf("register_sig_handler\n"); + + if ( sigaction(SIGINT, &act, NULL) == 0 || sigaction(SIGQUIT, &act, NULL) == 0 || + sigaction(SIGTERM, &act, NULL) == 0 ) + ret = 0; + + return ret; +} + static void exec_command(void) { int i; + int status; struct command *cmd; + pid_t pid, wpid; for ( i = 0; i < total_mod_num; i++ ) { cmd = cmd_store[i]; - system(cmd->cmd); + pid = fork(); + if (pid < 0) { + continue; + } else if (pid == 0) { + //execl("/usr/bin/sh", "sh", "-c", cmd->cmd, NULL); + //system(cmd->cmd); + execl(cmd->cmd, NULL); + exit(0); + } + child_pids[i] = pid; + } + + if (register_sig_handler()){ + printf("register sig failed!"); + while ((wpid = wait(&status)) > 0); + return; } + while ((wpid = wait(&status)) > 0) + while (1) + sleep(60); } int main(int argc, char **argv) diff --git a/source/tools/monitor/observ/Makefile b/source/tools/monitor/observ/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..6a58b3a5ece546dfb4ae837da8a53425d6927491 --- /dev/null +++ b/source/tools/monitor/observ/Makefile @@ -0,0 +1 @@ +include $(SRC)/mk/sub.mk diff --git a/source/tools/detect/net/rtrace/bpfskel/Cargo.toml b/source/tools/monitor/observ/ntopo/Cargo.toml similarity index 40% rename from source/tools/detect/net/rtrace/bpfskel/Cargo.toml rename to source/tools/monitor/observ/ntopo/Cargo.toml index b2f7e841fde7a53bca7822615fa854356d58fc97..6d2d22d1fbac62db5d7543171bf0e70085caabab 100644 --- a/source/tools/detect/net/rtrace/bpfskel/Cargo.toml +++ b/source/tools/monitor/observ/ntopo/Cargo.toml @@ -1,21 +1,23 @@ [package] -name = "bpfskel" +name = "ntopo" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -syn = "1.0" -quote = "1.0" -anyhow = "1.0.57" -paste = "1.0" +anyhow = "1.0.71" +chrono = "0.4.26" +libbpf-rs = "0.21.2" +once_cell = "1.17.1" +plain = "0.2.3" +libc = "0.2" structopt = "0.3.26" -libbpf-cargo = "0.13" -libbpf-rs = "0.19" -libbpf-sys = { version = "1.0.3" } -memmap2 = "0.5" -scroll = "0.11" -scroll_derive = "0.11" -num_enum = "0.5" -tempfile = "3.3.0" +log4rs = "1.2.0" +log = "0.4.19" +local-ip-address = "0.5.5" +regex = "1.10.2" + +[build-dependencies] +libbpf-cargo = "0.21.2" +bindgen = "0.60.1" diff --git a/source/tools/monitor/observ/ntopo/Makefile b/source/tools/monitor/observ/ntopo/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..e07f2022f18a977216d492d1166ed4f18b64fb35 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/Makefile @@ -0,0 +1,13 @@ +TARGET_PATH := $(OBJ_TOOLS_ROOT) + +.PHONY: ntopo + +ntopo: bin target_rule + +bin: + cargo build --release + cp target/release/ntopo $(TARGET_PATH)/ + +target := ntopo + +include $(SRC)/mk/target.inc \ No newline at end of file diff --git a/source/tools/monitor/observ/ntopo/build.rs b/source/tools/monitor/observ/ntopo/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..8fd6fdf64512b03562f03f5778b241805bacab44 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/build.rs @@ -0,0 +1,28 @@ +use libbpf_cargo::SkeletonBuilder; +use std::env; +use std::path::PathBuf; + +const SRC: &str = "src/bpf/ntopo.bpf.c"; +const HDR: &str = "src/bpf/ntopo.h"; + +fn main() { + let mut out = + PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); + out.push("ntopo.skel.rs"); + SkeletonBuilder::new() + .source(SRC) + .clang_args("-Wno-compare-distinct-pointer-types") + .build_and_generate(&out) + .unwrap(); + + out.pop(); + out.push("ntopo.rs"); + let bindings = bindgen::Builder::default() + .header(HDR) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .unwrap(); + bindings.write_to_file(&out).unwrap(); + println!("cargo:rerun-if-changed={SRC}"); + println!("cargo:rerun-if-changed={HDR}"); +} diff --git a/source/tools/monitor/observ/ntopo/src/bpf/ntopo.bpf.c b/source/tools/monitor/observ/ntopo/src/bpf/ntopo.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..ccee24f03c54965f37a3d0d043b99ba6c66964cb --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/bpf/ntopo.bpf.c @@ -0,0 +1,350 @@ + +#define NTOPO_BPF_DEBUG +#define BPF_NO_GLOBAL_DATA + +#include "vmlinux.h" +#include +#include +#include +#include +#include "ntopo.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sock *); + __type(value, struct sock_info); + __uint(max_entries, 1024000); +} socks SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct pid_info); + __uint(max_entries, 1024); +} pids SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct edge_info_key); + __type(value, struct edge_info); + __uint(max_entries, 1024); +} edges SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct node_info_key); + __type(value, struct node_info); + __uint(max_entries, 1024); +} nodes SEC(".maps"); + +static inline void debug_sock_role(struct sock_info *info) +{ + // __bpf_printk("role: %d\n", info->role); +} + +static inline void debug_pid_info(struct pid_info *info) +{ + // __bpf_printk("container id: %s\n", info->container_id); +} + +static inline void set_addr_pair_by_sock(struct sock *sk, struct addrpair *ap) +{ + bpf_probe_read(&ap->daddr, sizeof(ap->daddr), &sk->__sk_common.skc_daddr); + bpf_probe_read(&ap->dport, sizeof(ap->dport), &sk->__sk_common.skc_dport); + bpf_probe_read(&ap->saddr, sizeof(ap->saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read(&ap->sport, sizeof(ap->sport), &sk->__sk_common.skc_num); + ap->dport = bpf_ntohs(ap->dport); +} + +static inline void update_pid_info(struct pid_info *info) +{ + struct task_struct *curr_task; + struct kernfs_node *knode, *pknode; + + info->valid = 1; + bpf_get_current_comm(info->comm, sizeof(info->comm)); + curr_task = (struct task_struct *)bpf_get_current_task(); + knode = BPF_CORE_READ(curr_task, cgroups, subsys[0], cgroup, kn); + pknode = BPF_CORE_READ(knode, parent); + if (pknode != NULL) + { + char *aus; + bpf_core_read(&aus, sizeof(void *), &knode->name); + bpf_core_read_str(info->container_id, 128, aus); +#ifdef NTOPO_BPF_DEBUG + debug_pid_info(info); +#endif + } + else + info->container_id[0] = '\0'; +} + +static inline enum role get_sock_role(struct sock_info *info, struct sock *sk) +{ + if (info->role == ROLE_UNKNOWN) + { + int max_ack_backlog = 0; + bpf_probe_read(&max_ack_backlog, sizeof(max_ack_backlog), &sk->sk_max_ack_backlog); + + info->role = max_ack_backlog == 0 ? ROLE_CLIENT : ROLE_SERVER; + } + return info->role; +} + +static inline void update_edges(struct sock_info *info, int role) +{ + struct edge_info_key key = {0}; + struct edge_info val = {0}; + + key.saddr = info->ap.saddr; + key.daddr = info->ap.daddr; + if (role == ROLE_SERVER) + { + int tmp = key.saddr; + key.saddr = key.daddr; + key.daddr = tmp; + } + bpf_map_update_elem(&edges, &key, &val, BPF_ANY); +} + +static inline void update_nodes(struct sock_info *info, int role, int rt) +{ + struct node_info_key key = {0}; + key.addr = info->ap.saddr; + + struct node_info *ninfo = bpf_map_lookup_elem(&nodes, &key); + if (!ninfo) + { + struct node_info val = {0}; + bpf_map_update_elem(&nodes, &key, &val, BPF_ANY); + } + + ninfo = bpf_map_lookup_elem(&nodes, &key); + if (!ninfo) + return; + + ninfo->in_bytes += info->in_bytes; + ninfo->out_bytes += info->out_bytes; + info->in_bytes = 0; + info->out_bytes = 0; + ninfo->pid = info->pid; + ninfo->requests++; + if (role == ROLE_CLIENT) + { + ninfo->client_tot_rt_hz += 1; + ninfo->client_tot_rt_us += rt; + if (rt > ninfo->client_max_rt_us) + { + ninfo->client_addr = info->ap.saddr; + ninfo->server_addr = info->ap.daddr; + ninfo->sport = info->ap.sport; + ninfo->dport = info->ap.dport; + ninfo->client_max_rt_us = rt; + } + } + else + { + ninfo->server_tot_rt_hz += 1; + ninfo->server_tot_rt_us += rt; + if (rt > ninfo->server_max_rt_us) + { + ninfo->server_addr = info->ap.saddr; + ninfo->client_addr = info->ap.daddr; + ninfo->sport = info->ap.dport; + ninfo->dport = info->ap.sport; + ninfo->server_max_rt_us = rt; + } + } + +} + +static inline bool try_add_sock(struct sock *sk) +{ + u64 tgid = bpf_get_current_pid_tgid(); + u32 pid = tgid >> 32; + struct pid_info *pinfop; + pinfop = bpf_map_lookup_elem(&pids, &pid); + if (!pinfop) + return false; + if (!pinfop->valid) + update_pid_info(pinfop); + + struct sock_info info = {0}; + info.pid = pid; + set_addr_pair_by_sock(sk, &info.ap); + if (info.ap.saddr == info.ap.daddr) + return false; + + info.role = get_sock_role(&info, sk); + // if (info.role == ROLE_SERVER) { + // int tmp; + // tmp = info.ap.saddr; + // info.ap.saddr = info.ap.daddr; + // info.ap.daddr = tmp; + // } + bpf_map_update_elem(&socks, &sk, &info, BPF_ANY); +#ifdef NTOPO_BPF_DEBUG + debug_sock_role(&info); +#endif + return true; +} + +static inline void handle_client_send_request(struct sock_info *info) +{ + if (info->egress_min != 0 && info->ingress_min != 0) + { + u32 rt_us = (info->ingress_max - info->egress_min) / 1000; + info->ingress_min = 0; + info->egress_min = 0; + update_nodes(info, ROLE_CLIENT, rt_us); + update_edges(info, ROLE_CLIENT); + } +} + +static inline void handle_client_recv_response(struct sock_info *info) +{ + // do nothing +} + +static inline void handle_server_recv_request(struct sock_info *info) +{ + if (info->egress_min != 0 && info->ingress_min != 0) + { + u32 rt_us = (info->egress_max - info->ingress_min) / 1000; + info->ingress_min = 0; + info->egress_min = 0; + update_nodes(info, ROLE_SERVER, rt_us); + update_edges(info, ROLE_SERVER); + } +} + +static inline void handle_server_send_response(struct sock_info *info) +{ +} + +static inline void handle_client_close(struct sock_info *info) +{ + handle_client_send_request(info); +} + +static inline void handle_server_close(struct sock_info *info) +{ + handle_server_recv_request(info); +} + +// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +SEC("kprobe/tcp_sendmsg_locked") +int BPF_KPROBE(kprobe_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size) +{ + if (size == 0) + return 0; + struct sock_info *info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + if (try_add_sock(sk)) + info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + return 0; + enum role role = get_sock_role(info, sk); + if (role == ROLE_CLIENT) + handle_client_send_request(info); + else + handle_server_send_response(info); + + u64 ts = bpf_ktime_get_ns(); + info->out_bytes += size; + if (info->egress_min == 0) + info->egress_min = ts; + info->egress_max = ts; + return 0; +} + +struct tcp_rcv_space_adjust_args +{ + u32 pad[2]; + struct sock *sk; +}; + +SEC("tracepoint/tcp/tcp_rcv_space_adjust") +int tracepoint_tcp_rcv_space_adjust(struct tcp_rcv_space_adjust_args *ctx) +{ + struct sock *sk = ctx->sk; + struct sock_info *info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + if (try_add_sock(sk)) + info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + return 0; + + struct tcp_sock *tp = sk; + u32 copied_seq, seq; + bpf_probe_read(&copied_seq, sizeof(copied_seq), &tp->copied_seq); + bpf_probe_read(&seq, sizeof(copied_seq), &tp->rcvq_space.seq); + + enum role role = get_sock_role(info, sk); + if (role == ROLE_CLIENT) + handle_client_recv_response(info); + else + handle_server_recv_request(info); + + u64 ts = bpf_ktime_get_ns(); + info->in_bytes += (copied_seq - seq); + if (info->ingress_min == 0) + info->ingress_min = ts; + info->ingress_max = ts; + return 0; +} + +#if 0 + +SEC("kprobe/tcp_cleanup_rbuf") +int BPF_KPROBE(kprobe_tcp_cleanup_rbuf, struct sock *sk, int copied) +{ + if (copied == 0) + return 0; + struct sock_info *info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + if (try_add_sock(sk)) + info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + return 0; + + enum role role = get_sock_role(info, sk); + if (role == ROLE_CLIENT) + handle_client_recv_response(info); + else + handle_server_recv_request(info); + + u64 ts = bpf_ktime_get_ns(); + info->in_bytes += copied; + if (info->ingress_min == 0) + info->ingress_min = ts; + info->ingress_max = ts; + return 0; +} + +#endif + +// void tcp_close(struct sock *sk, long timeout); +SEC("kprobe/tcp_close") +int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) +{ + struct sock_info *info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + return 0; + + u64 ts = bpf_ktime_get_ns(); + enum role role = get_sock_role(info, sk); + if (role == ROLE_CLIENT) + handle_client_close(info); + else + handle_server_close(info); + + bpf_map_delete_elem(&socks, &sk); + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/monitor/observ/ntopo/src/bpf/ntopo.h b/source/tools/monitor/observ/ntopo/src/bpf/ntopo.h new file mode 100644 index 0000000000000000000000000000000000000000..230bfc860bb8ad5d6cc31bdac81b72a82e459687 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/bpf/ntopo.h @@ -0,0 +1,84 @@ +#ifndef NTOPO_H +#define NTOPO_H + +enum role +{ + ROLE_UNKNOWN, + ROLE_CLIENT, + ROLE_SERVER, +}; + +struct addrpair +{ + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; +}; + +struct sock_info +{ + struct addrpair ap; + enum role role; + + unsigned int pid; + unsigned long long ingress_min; + unsigned long long ingress_max; + unsigned long long egress_min; + unsigned long long egress_max; + unsigned long long in_bytes; + unsigned long long out_bytes; +}; + +struct edge_info_key +{ + unsigned int saddr; + unsigned int daddr; +}; + +struct edge_info +{ + int empty; +}; + +struct node_info_key +{ + unsigned int addr; +}; + +struct node_info +{ + unsigned long long in_bytes; + unsigned long long out_bytes; + + unsigned int pid; + unsigned int client_addr; + unsigned int server_addr; + unsigned short sport; + unsigned short dport; + unsigned int client_max_rt_us; + unsigned int client_min_rt_us; + unsigned int client_tot_rt_us; + unsigned int client_tot_rt_hz; + + unsigned int server_max_rt_us; + unsigned int server_min_rt_us; + unsigned int server_tot_rt_us; + unsigned int server_tot_rt_hz; + + unsigned int requests; +}; + +struct pid_info +{ + unsigned int valid; + unsigned char comm[16]; + unsigned char container_id[128]; +}; + +struct config +{ + int latency_threshold; // ms +}; + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/retran/src/bpf/vmlinux.h b/source/tools/monitor/observ/ntopo/src/bpf/vmlinux.h similarity index 44% rename from source/tools/detect/net/rtrace/retran/src/bpf/vmlinux.h rename to source/tools/monitor/observ/ntopo/src/bpf/vmlinux.h index 8ef5cc3b7aa30651c7830d79afcf99edcc28ee1b..53ad51e4f8454829043cd2cdde7dfa5a721207a1 100644 --- a/source/tools/detect/net/rtrace/retran/src/bpf/vmlinux.h +++ b/source/tools/monitor/observ/ntopo/src/bpf/vmlinux.h @@ -2,9 +2,9 @@ #ifndef __VMLINUX_ARCH_H__ #define __VMLINUX_ARCH_H__ #if defined(__TARGET_ARCH_x86) - #include "../../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" #elif defined(__TARGET_ARCH_arm64) - #include "../../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" #endif #endif \ No newline at end of file diff --git a/source/tools/monitor/observ/ntopo/src/edge.rs b/source/tools/monitor/observ/ntopo/src/edge.rs new file mode 100644 index 0000000000000000000000000000000000000000..75a2349b56a41bb2ec6bf381da02469718ce7069 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/edge.rs @@ -0,0 +1,62 @@ +use std::net::Ipv4Addr; + +use crate::edge_info_key; +use libbpf_rs::MapHandle; + +use crate::node::AppKind; + +pub struct Edge { + pub client_ip: String, + pub server_ip: String, + pub app: AppKind, + pub request_bytes: u64, + pub response_bytes: u64, +} + +impl Edge { + pub fn to_line_protocol(&self) -> String { + format!( + "sysom_metrics_ntopo_edge,ClientIp={},ServerIp={},LinkId={}{},ReqBytes={},RespBytes={},APP={} Value=1", + self.client_ip, + self.server_ip, + self.client_ip, + self.server_ip, + self.request_bytes, + self.response_bytes, + self.app, + ) + } + + pub fn from_key_and_value(_: &String, key_bytes: &Vec, _: &Vec) -> Self { + let (head, body, _tail) = unsafe { key_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let key = &body[0]; + + let client_ip = Ipv4Addr::from(u32::from_be(key.saddr)).to_string(); + let server_ip = Ipv4Addr::from(u32::from_be(key.daddr)).to_string(); + + Edge { + client_ip, + server_ip, + app: AppKind::Mysql, + request_bytes: 0, + response_bytes: 0, + } + } +} + +pub fn get_edges(local_ip: &String, map: &MapHandle) -> Vec { + let mut res = vec![]; + for key in map.keys() { + if let Some(val) = map + .lookup(&key, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup edges map") + { + res.push(Edge::from_key_and_value(local_ip, &key, &val)); + + map.delete(&key).expect("failed to delete edges map"); + } + } + + res +} diff --git a/source/tools/monitor/observ/ntopo/src/main.rs b/source/tools/monitor/observ/ntopo/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..ea3749141b44a33b7740323061b115cb4ffe4577 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/main.rs @@ -0,0 +1,121 @@ +include!(concat!(env!("OUT_DIR"), "/ntopo.skel.rs")); +include!(concat!(env!("OUT_DIR"), "/ntopo.rs")); + +use anyhow::{bail, Result}; +use libbpf_rs::skel::*; +use structopt::StructOpt; +mod edge; +mod node; +mod pid; +use local_ip_address::local_ip; +mod sock; + +use crate::{ + edge::get_edges, + node::get_nodes, + pid::Pids, + sock::{unix_sock_recv, unix_sock_send}, +}; + +#[derive(Debug, StructOpt)] +#[structopt(name = "ntopo", about = "network topology and response time")] +pub struct Command { + #[structopt(long, help = "tracing pid list")] + pids: Vec, + #[structopt(long, help = "Enable tracing mysql")] + enable_mysql: bool, + #[structopt(long, default_value = "16", help = "size of ntopo node table")] + nodes: u32, + #[structopt(long, default_value = "32", help = "size of ntopo edge table")] + edges: u32, + #[structopt(short, long, help = "Verbose debug output")] + verbose: bool, +} + +fn update_pids_map(map: &mut libbpf_rs::Map, pid: u32) { + let val = [0; std::mem::size_of::()]; + map.update(&pid.to_ne_bytes(), &val, libbpf_rs::MapFlags::ANY) + .expect("Failed to update pids map"); +} + +fn bump_memlock_rlimit() -> Result<()> { + let rlimit = libc::rlimit { + rlim_cur: libc::RLIM_INFINITY, + rlim_max: libc::RLIM_INFINITY, + }; + + if unsafe { libc::setrlimit(libc::RLIMIT_MEMLOCK, &rlimit) } != 0 { + bail!("Failed to increase rlimit"); + } + + Ok(()) +} + +fn main() { + bump_memlock_rlimit().unwrap(); + let opts = Command::from_args(); + + let mut builder = NtopoSkelBuilder::default(); + builder.obj_builder.debug(opts.verbose); + + let mut open_skel = builder.open().unwrap(); + open_skel + .maps_mut() + .edges() + .set_max_entries(opts.edges) + .expect("failed to set edge table size"); + open_skel + .maps_mut() + .nodes() + .set_max_entries(opts.nodes) + .expect("failed to set node table size"); + let mut skel = open_skel.load().expect("failed to load ntopo"); + + for pid in &opts.pids { + update_pids_map(skel.maps_mut().pids(), *pid); + } + + let pidsmap = libbpf_rs::MapHandle::try_clone(skel.maps().pids()).unwrap(); + let nodesmap = libbpf_rs::MapHandle::try_clone(skel.maps().nodes()).unwrap(); + let edgesmap = libbpf_rs::MapHandle::try_clone(skel.maps().edges()).unwrap(); + + skel.attach().unwrap(); + + let pidsmap_for_thread = libbpf_rs::MapHandle::try_clone(skel.maps().pids()).unwrap(); + + let _ = std::thread::spawn(move || { + unix_sock_recv(&pidsmap_for_thread, "/var/ntopo"); + }); + + let local_ip = local_ip().unwrap().to_string(); + + println!("This is my local IP address: {:?}", local_ip); + + let mut pids = Pids::default(); + + loop { + std::thread::sleep(std::time::Duration::from_secs(30)); + + pids.update(&pidsmap); + + let mut nodes = get_nodes(&local_ip, &nodesmap); + let edges = get_edges(&local_ip, &edgesmap); + + let mut data = vec![]; + + for n in &mut nodes { + n.set_pid_info(&pids); + } + + for n in &nodes { + data.push(n.to_line_protocol()); + } + + for e in &edges { + data.push(e.to_line_protocol()); + } + + let data_string = data.join("\n"); + unix_sock_send("/var/sysom/outline", &data_string); + } +} diff --git a/source/tools/monitor/observ/ntopo/src/node.rs b/source/tools/monitor/observ/ntopo/src/node.rs new file mode 100644 index 0000000000000000000000000000000000000000..b4b1d68c8e6140e2c82dc088144096281ef28884 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/node.rs @@ -0,0 +1,191 @@ +use libbpf_rs::MapHandle; + +use crate::node_info; +use crate::node_info_key; +use crate::pid::Pids; +use std::cmp::Ordering; +use std::fmt; +use std::net::Ipv4Addr; + +pub enum NodeKind { + Pod, + Node, +} + +impl fmt::Display for NodeKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + NodeKind::Pod => "Pod", + NodeKind::Node => "Node", + }; + write!(f, "{s}") + } +} + +pub enum AppKind { + Other, + Mysql, +} + +impl fmt::Display for AppKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + AppKind::Other => "OTHER", + AppKind::Mysql => "MYSQL", + }; + write!(f, "{s}") + } +} + +pub enum IconKind { + Node, + Mysql, +} + +impl fmt::Display for IconKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let s = match self { + IconKind::Node => "NODE", + IconKind::Mysql => "MYSQL", + }; + write!(f, "{s}") + } +} + +pub struct Node { + ip: String, + kind: NodeKind, + pod: String, + pid: u32, + comm: String, + poduuid: String, + container_id: String, + namespace: String, + app: AppKind, + title: String, + icon: IconKind, + + in_bytes: i64, + out_bytes: i64, + + server_ip: String, + client_max_rt: u32, + client_avg_rt: u32, + client_min_rt: u32, + + client_ip: String, + server_max_rt: u32, + server_avg_rt: u32, + server_min_rt: u32, + sport: u16, + dport: u16, + requests: u32, +} + +impl Node { + pub fn to_line_protocol(&self) -> String { + format!( + "sysom_metrics_ntopo_node,Ip={},Kind={},Pod={},Pid={},Comm={},Title={},Icon={},PodUUID={},ContainerID={},NameSpace={},APP={},InBytes={},OutBytes={},MaxRT={},Connection={},AvgRT={},Requests={} Value=1", + self.ip, + self.kind, + self.pod, + self.pid, + self.comm, + if let NodeKind::Pod = self.kind { + self.pod.clone() + } else { + format!("Node({})", self.ip) + }, + self.icon, + if self.poduuid.is_empty() { "NULL".to_owned() } else { self.poduuid.clone() }, + if self.container_id.is_empty() { "NULL".to_owned() } else { self.container_id.clone() }, + self.namespace, + self.app, + self.in_bytes, + self.out_bytes, + std::cmp::max(self.client_max_rt, self.server_max_rt), + format!("{}:{}->{}:{}", self.client_ip, self.sport, self.server_ip, self.dport), + std::cmp::max(self.client_avg_rt, self.server_avg_rt), + self.requests, + ) + } + + pub fn set_pid_info(&mut self, pids: &Pids) { + if let Some(pi) = pids.pids.get(&self.pid) { + self.container_id = pi.container_id.clone(); + self.comm = pi.comm.clone(); + self.poduuid = pi.podid.clone(); + } + } + + pub fn from_key_and_value(local_ip: &String, key_bytes: &Vec, val_bytes: &Vec) -> Self { + let (head, body, _tail) = unsafe { key_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let key = &body[0]; + + let (head, body, _tail) = unsafe { val_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let info = &body[0]; + + let ip = Ipv4Addr::from(u32::from_be(key.addr)).to_string(); + let kind; + if ip.cmp(local_ip) == Ordering::Equal { + kind = NodeKind::Node; + } else { + kind = NodeKind::Pod; + } + + Node { + ip, + kind, + pid: info.pid, + comm: Default::default(), + pod: Default::default(), + poduuid: Default::default(), + container_id: Default::default(), + namespace: Default::default(), + app: AppKind::Mysql, + title: Default::default(), + icon: IconKind::Mysql, + + in_bytes: info.in_bytes as i64, + out_bytes: info.out_bytes as i64, + + server_ip: Ipv4Addr::from(u32::from_be(info.server_addr)).to_string(), + client_max_rt: info.client_max_rt_us, + client_avg_rt: if info.client_tot_rt_hz == 0 { + 0 + } else { + info.client_tot_rt_us / info.client_tot_rt_hz + }, + client_min_rt: info.client_min_rt_us, + + client_ip: Ipv4Addr::from(u32::from_be(info.client_addr)).to_string(), + server_max_rt: info.server_max_rt_us, + server_avg_rt: if info.server_tot_rt_hz == 0 { + 0 + } else { + info.server_tot_rt_us / info.server_tot_rt_hz + }, + server_min_rt: info.server_min_rt_us, + sport: info.sport, + dport: info.dport, + requests: info.requests, + } + } +} + +pub fn get_nodes(local_ip: &String, map: &MapHandle) -> Vec { + let mut res = vec![]; + for key in map.keys() { + if let Some(val) = map + .lookup(&key, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup pid map") + { + res.push(Node::from_key_and_value(local_ip, &key, &val)); + map.delete(&key).expect("failed to delete nodes map"); + } + } + + res +} diff --git a/source/tools/monitor/observ/ntopo/src/pid.rs b/source/tools/monitor/observ/ntopo/src/pid.rs new file mode 100644 index 0000000000000000000000000000000000000000..6781c45fd59a92f6390334702f4ec0385e45f90f --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/pid.rs @@ -0,0 +1,238 @@ +use crate::pid_info; +use libbpf_rs::MapHandle; +use std::collections::HashMap; + +use regex::Regex; +use std::fs; +use std::io::Error; +use std::path::Path; +use std::process::Command; + +const CONTAINER_TYPE_DOCKER: usize = 0; +const CONTAINER_TYPE_CRI_CONTAINERD: usize = 1; +const CONTAINER_TYPE_CRIO: usize = 2; +const CONTAINER_TYPE_K8S_OTHER: usize = 3; +const CONTAINER_TYPE_UNKNOWN: usize = 4; + +const CONTAINER_TYPE_STR: [&str; 5] = ["docker", "cri-containerd", "crio", "kubepods", "unknown"]; + +const CONTAINER_TYPE_REGEX_STR: [&str; 5] = ["docker", "cri-containerd", "crio", "\\S+", "unknown"]; + +struct RegMatch { + burstable_path_regex: Regex, + besteffort_path_regex: Regex, + guaranteed_path_regex: Regex, +} + +impl RegMatch { + fn is_match(&self, path: &str) -> bool { + if path.contains("burstable") { + self.burstable_path_regex.is_match(path) + } else if path.contains("besteffort") { + self.besteffort_path_regex.is_match(path) + } else { + self.guaranteed_path_regex.is_match(path) + } + } +} + +fn get_container_type(path: &str) -> &str { + if path.contains(CONTAINER_TYPE_STR[CONTAINER_TYPE_DOCKER]) { + return CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_DOCKER]; + } else if path.contains(CONTAINER_TYPE_STR[CONTAINER_TYPE_CRI_CONTAINERD]) { + return CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_CRI_CONTAINERD]; + } else if path.contains(CONTAINER_TYPE_STR[CONTAINER_TYPE_CRIO]) { + return CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_CRIO]; + } else if path.contains(CONTAINER_TYPE_STR[CONTAINER_TYPE_K8S_OTHER]) { + return CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_K8S_OTHER]; + } + return CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_UNKNOWN]; +} + +fn get_cgroup_matcher(path: &str) -> Option { + let pod_regex = "[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; + let container_regex = "[0-9a-f]{64}"; + + let container_type = get_container_type(path); + if container_type == CONTAINER_TYPE_REGEX_STR[CONTAINER_TYPE_UNKNOWN] { + return None; + } + + let mut matcher = RegMatch { + burstable_path_regex: Regex::new(&format!( + "^.*kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod{}.slice/{}-{}.scope/cgroup.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(), + besteffort_path_regex: Regex::new(&format!( + "^.*kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod{}.slice/{}-{}.scope/cgroup.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(), + guaranteed_path_regex: Regex::new(&format!( + "^.*kubepods.slice/kubepods-pod{}.slice/{}-{}.scope/cgroup.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(), + }; + + if matcher.is_match(path) { + return Some(matcher); + } else { + matcher.guaranteed_path_regex = Regex::new(&format!( + "^.*kubepods/pod{}/{}-{}\\.+/cgroup\\.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(); + matcher.besteffort_path_regex = Regex::new(&format!( + "^.*kubepods/besteffort/pod{}/{}-{}\\.+/cgroup\\.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(); + matcher.burstable_path_regex = Regex::new(&format!( + "^.*kubepods/burstable/pod{}/{}-{}\\.+/cgroup\\.procs$", + pod_regex, container_type, container_regex + )) + .unwrap(); + if matcher.is_match(path) { + return Some(matcher); + } else { + matcher.guaranteed_path_regex = Regex::new(&format!( + "^.*docker/{}-{}/cgroup\\.procs$", + container_type, container_regex + )) + .unwrap(); + if matcher.is_match(path) { + return Some(matcher); + } + } + } + None +} + +fn get_cgroup_path(pid: i32, t: &str) -> Result { + let data = match fs::read_to_string(format!("/proc/{}/cgroup", pid)) { + Ok(content) => content, + Err(err) => return Err(err.to_string()), + }; + let lines: Vec<&str> = data.split('\n').collect(); + for line in lines { + let parts: Vec<&str> = line.split(':').collect(); + if parts.len() < 3 { + continue; + } + if parts[1] == t { + return Ok(parts[2].to_string()); + } + } + Err(format!("cgroup not found for pid {}", pid)) +} + +fn get_container_id_by_pid(pid: i32) -> Option> { + let mut container_id = String::new(); + let mut pod_id = String::new(); + let path = match get_cgroup_path(pid, "cpu,cpuacct") { + Ok(path) => path, + Err(_) => return None, + }; + let matcher = get_cgroup_matcher(&format!("{}/cgroup.procs", path)); + if matcher.is_none() { + return None; + } + let container_regex = "[0-9a-f]{64}"; + let re_container = Regex::new(container_regex).unwrap(); + container_id = re_container.find(&path)?.as_str().to_string(); + let pod_regex = "[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"; + let re_pod = Regex::new(pod_regex).unwrap(); + pod_id = re_pod.find(&path)?.as_str().to_string(); + Some(vec![container_id, pod_id]) +} + +#[derive(Debug, Default)] +pub struct Pid { + pub pid: u32, + pub comm: String, + pub container_id: String, + pub podid: String, +} + +impl Pid { + pub fn from_key_and_value(key_bytes: &Vec, val_bytes: &Vec) -> Self { + let (head, body, _tail) = unsafe { val_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let info = &body[0]; + + let pid_arr = [key_bytes[0], key_bytes[1], key_bytes[2],key_bytes[3]]; + let pid = u32::from_ne_bytes(pid_arr); + + // 0-9a-f + let mut container_id = unsafe { + String::from_utf8_unchecked(info.container_id.to_vec()) + .trim_matches(char::from(0)) + .to_owned() + }; + + let mut start = 0; + loop { + let mut matches = 0; + for c in container_id[start..].chars() { + if matches == 12 { + break; + } + if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') { + matches += 1; + continue; + } + break; + } + + if matches != 12 { + if start >= container_id.len() { + break; + } + } else { + container_id = container_id[start..(start + 12)].to_owned(); + break; + } + start += 1; + } + + let mut podid = String::default(); + if let Some(ids) = get_container_id_by_pid(pid as i32) { + podid = ids[1].clone(); + } + + Pid { + pid, + comm: unsafe { + String::from_utf8_unchecked(info.comm.to_vec()) + .trim_matches(char::from(0)) + .to_owned() + }, + container_id, + podid, + } + } +} + +#[derive(Debug, Default)] +pub struct Pids { + pub pids: HashMap, +} + +impl Pids { + pub fn update(&mut self, map: &MapHandle) { + for key in map.keys() { + if let Some(val) = map + .lookup(&key, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup pid map") + { + let pid = Pid::from_key_and_value(&key, &val); + + let pid_arr = [key[0], key[1], key[2], key[3]]; + let pid_num = u32::from_ne_bytes(pid_arr); + self.pids.insert(pid_num, pid); + } + } + } +} diff --git a/source/tools/monitor/observ/ntopo/src/sock.rs b/source/tools/monitor/observ/ntopo/src/sock.rs new file mode 100644 index 0000000000000000000000000000000000000000..501b8c09cdd5896335544aaa7f6c00b63b39f243 --- /dev/null +++ b/source/tools/monitor/observ/ntopo/src/sock.rs @@ -0,0 +1,67 @@ +use std::cmp::Ordering; +use std::io::Read; +use std::os::unix::net::UnixDatagram; +use std::os::unix::net::UnixListener; + +use libbpf_rs::MapHandle; + +pub fn unix_sock_send(addr: &str, data: &String) { + if data.len() == 0 { + return; + } + let sock = UnixDatagram::unbound().expect("failed to create unix sock"); + match sock.connect(addr) { + Ok(()) => { + if let Err(e) = sock.send(&data.as_bytes()) { + println!("failed to send data to sock: {addr}, error: {e}, data: {data}"); + } + } + Err(e) => { + println!("failed to connnect to sock: {addr}, error: {e}, data: {data}"); + } + } +} + +pub fn unix_sock_recv(pidsmap: &MapHandle, addr: &str) { + let _ = std::fs::remove_file(addr); + let listen = UnixListener::bind(addr).expect("failed to listen unix sock"); + + for stream in listen.incoming() { + let mut stream = stream.unwrap(); + + let mut recvstring = String::new(); + stream.read_to_string(&mut recvstring).unwrap(); + + let slices: Vec<&str> = recvstring.split(",").collect(); + + if slices.len() == 0 { + continue; + } + + let mut pid_array = vec![]; + + if slices[0].cmp("mysql") == Ordering::Equal { + for pid in &slices[1..] { + let pid_num: u32 = pid.parse::().unwrap(); + pid_array.push(pid_num); + } + + let len = pid_array[0] as usize; + if len != pid_array.len() - 1 { + println!("data format is not right"); + } else { + for key in pidsmap.keys() { + pidsmap.delete(&key).expect("failed to delete pidsmap key"); + } + + let val = vec![0; std::mem::size_of::()]; + + for pid in &pid_array[1..] { + pidsmap + .update(&pid.to_ne_bytes(), &val, libbpf_rs::MapFlags::NO_EXIST) + .expect("failed to update pidsmap"); + } + } + } + } +} diff --git a/source/tools/monitor/observ/sql-obs/Makefile b/source/tools/monitor/observ/sql-obs/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..26e2062941ba0d00a6334165971dd5ce30c02805 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/Makefile @@ -0,0 +1,3 @@ +target := sql-obs + +include $(SRC)/mk/go.mk \ No newline at end of file diff --git a/source/tools/monitor/observ/sql-obs/README.MD b/source/tools/monitor/observ/sql-obs/README.MD new file mode 100644 index 0000000000000000000000000000000000000000..0a87ee292c01297774fdc922d74a6de6657ac3b0 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/README.MD @@ -0,0 +1,77 @@ +# 功能说明 + +mysql应用观测服务agent,以应用为视角,集指标、追踪、事件分析、日志分析的Mysql可观测程序,针对mysql服务端场景,具备收集mysql应用、系统侧监控指标,异常事件、以及打通SysOM事件根因诊断分析的能力 + +# 功能依赖 + +sql-obs依赖sysak unity监控组件,在机器上部署之后,可直接通过systemctrl start sysak的方式启动 + +- 此程序为mysql应用观测agent,应用观测的最终UI界面呈现,安装SysOM使用 + + +# 使用说明 + +在unity的yaml配置文件中,路径:source/tools/monitor/unity/etc/base.yaml中,forkRun标签中添加sql-obs的启动配置,此外由于mysql应用侧指标需要事先配置mysql数据库监控账户信息,因此默认情况下不会采集mysql应用侧指标 +配置启动sql-obs如下: + +``` +forkRun: +... + - cmd: "../../../sql-obs" + args: + ["-y", "/etc/sysak/base.yaml"] +... +``` + +# 功能详情 + +## 指标说明 + +### 异常告警统计指标 + +- Alarm_Process_Mysql_Error_Type, mysql错误告警次数统计 +- Alarm_Process_Mysql_Slow_Sql_Type, mysql慢查询次数统计 +- Alarm_Process_OOM_Type, mysql被系统OOM杀掉次数统计 +- Alarm_Process_RT_Type, mysql RT高次数统计 +- Alarm_Process_Sched_Delay_Type, mysql调度延迟高次数统计 +- Alarm_Direct_Reclaim_Type, mysql内存申请被阻塞次数统计 +- Alarm_Process_CPU_HIGH_Type, mysql CPU高占用次数统计 + +### Mysql资源消耗指标统计 + +- Mysql连接池使用详情 +- Mysql线程池使用详情 +- Mysql CPU占用率 +- Mysql CPU让出率 +- Mysql Undolog链表长度&长事务统计 +- Mysql buffer pool使用详情 +- Mysql内存使用布局 +- Mysql redolog使用详情 + +### Mysql延迟相关指标 + +- Mysql RT指标 +- Mysql请求详情表 +- Mysql等待IO资源延迟 +- Mysql申请OS内存延迟 +- Mysql OS调度延迟 +- Mysql IO处理延迟分布 + +### Mysql吞吐带宽相关指标 + +- Mysql网络吞吐 +- Mysql请求数 +- Mysql IO吞吐 +- Mysql OS脏页统计 + +## 异常事件说明 + +### 应用侧事件 + +- 慢查询事件检测、原因分析、SysOM根因诊断联动 + +### 系统侧事件 + +- Mysql IO高事件、IO延迟高事件检测、原因分析、SysOM根因诊断联动 +- Mysql RT高事件、调度延迟高、CPU占用高事件检测、原因分析、SysOM根因诊断联动 +- Mysql OOM事件、申请内存受阻事件检测、原因分析、SysOM根因诊断联动 diff --git a/source/tools/monitor/observ/sql-obs/common/bucketPercentile.go b/source/tools/monitor/observ/sql-obs/common/bucketPercentile.go new file mode 100644 index 0000000000000000000000000000000000000000..d45227833b74745bb60420e779d89eae639034e6 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/bucketPercentile.go @@ -0,0 +1,38 @@ +package common + +var threshold = []uint64{ + 1000000, 2000000, 3000000, 4000000, 6000000, 10000000, + 15000000, 22000000, 34000000, 51000000, 100000000, + 170000000, 250000000, 410000000, 750000000, 1000000000, + 1300000000, 1700000000, 2500000000, 3500000000, 5000000000, + 7000000000, 10000000000, +} + +func BuildBucket() *[]uint64 { + bucket := make([]uint64, len(threshold)) + for index := 0; index < len(threshold); index++ { + bucket[index] = 0 + } + return &bucket +} + +func InsertBucket(value uint64, bucket *[]uint64) { + for index, v := range threshold { + if value <= v { + (*bucket)[index] += 1 + break + } + } +} + +func GetPercentile(bucket *[]uint64, percent uint64, count uint64) uint64 { + count *= percent + var sum uint64 = 0 + for index, value := range *bucket { + sum += value * 100 + if sum >= count { + return threshold[index] + } + } + return threshold[len(threshold)-1] +} diff --git a/source/tools/monitor/observ/sql-obs/common/contaier.go b/source/tools/monitor/observ/sql-obs/common/contaier.go new file mode 100644 index 0000000000000000000000000000000000000000..6bfdea72c85f1245a99868695cbc633efa28c7da --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/contaier.go @@ -0,0 +1,458 @@ +package common + +import ( + "fmt" + "io/ioutil" + "regexp" + "strings" + "os" + "strconv" + "sort" +) + +const ( + Container_Type_Docker = iota + Container_Type_CRI_Containerd + Container_Type_CRIO + Container_Type_K8s_Other + Container_Type_Unknown +) + +var containerTypeStr = []string{ + "docker", + "cri-containerd", + "crio", + "kubepods", + "unknow", +} +var containerTypeRegexStr = []string{ + "docker", + "cri-containerd", + "crio", + "\\S+", + "unknow", +} + +type regMatch struct { + burstablePathRegex *regexp.Regexp + besteffortPathRegex *regexp.Regexp + guaranteedPathRegex *regexp.Regexp +} + +func getContainerType(path string) string { + if strings.Contains(path, containerTypeStr[Container_Type_Docker]) { + return containerTypeRegexStr[Container_Type_Docker] + } else if strings.Contains(path, containerTypeStr[Container_Type_CRI_Containerd]) { + return containerTypeRegexStr[Container_Type_CRI_Containerd] + } else if strings.Contains(path, containerTypeStr[Container_Type_CRIO]) { + return containerTypeRegexStr[Container_Type_CRIO] + } else if strings.Contains(path, containerTypeStr[Container_Type_K8s_Other]) { + // "/sys/fs/cgroup/cpu,cpuacct/kubepods.slice/kubepods-burstable.slice/" + return containerTypeRegexStr[Container_Type_K8s_Other] + } + return containerTypeRegexStr[Container_Type_Unknown] +} + +func getContainerTypeByPid(pid int) string { + cgPath, err := GetCgroupPath(pid, "cpu,cpuacct") + if err != nil { + return containerTypeStr[Container_Type_Unknown] + } + i := Container_Type_Docker; + for ; i < Container_Type_Unknown; i++ { + if strings.Contains(cgPath, containerTypeStr[i]) { + break + } + } + return containerTypeStr[i] +} + +func regexSearch(buffer string, reg *regexp.Regexp) []string { + return reg.FindAllString(buffer, -1) +} + +func regexMatch(buffer string, reg *regexp.Regexp) bool { + return reg.MatchString(buffer) +} + +func (r *regMatch) isMatch(path string) bool { + if strings.Contains(path, "burstable") { + return regexMatch(path, r.burstablePathRegex) + } else if strings.Contains(path, "besteffort") { + return regexMatch(path, r.besteffortPathRegex) + } else { + return regexMatch(path, r.guaranteedPathRegex) + } +} + +func getCGroupMatcher(path string) *regMatch { + podRegex := "[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}" + + "[-_][0-9a-f]{4}[-_][0-9a-f]{12}" + containerRegex := "[0-9a-f]{64}" + matcher := regMatch{} + + containerType := getContainerType(path) + if containerType == containerTypeRegexStr[Container_Type_Unknown] { + return nil + } + /* + STANDARD: kubepods.slice/kubepods-pod2b801b7a_5266_4386_864e_45ed71136371.slice + /cri-containerd-20e061fc708d3b66dfe257b19552b34a1307a7347ed6b5bd0d8c5e76afb1a870 + .scope/cgroup.procs + */ + matcher.guaranteedPathRegex = + regexp.MustCompile("^.*kubepods.slice/kubepods-pod" + podRegex + + ".slice/" + containerType + "-" + containerRegex + + ".scope/cgroup.procs$") + + /* + STANDARD: kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-pod0d206349 + _0faf_445c_8c3f_2d2153784f15.slice/cri-containerd-efd08a78ad94af4408bcdb097fbcb603 + a31a40e4d74907f72ff14c3264ee7e85.scope/cgroup.procs + */ + matcher.besteffortPathRegex = + regexp.MustCompile("^.*kubepods.slice/kubepods-besteffort.slice/" + + "kubepods-besteffort-pod" + podRegex + ".slice/" + + containerType + "-" + containerRegex + + ".scope/cgroup.procs$") + + /* + STANDARD: kubepods.slice/kubepods-burstable.slice/kubepods-burstable-podee10fb7d + _d989_47b3_bc2a_e9ffbe767849.slice/cri-containerd-4591321a5d841ce6a60a777223cf7f + e872d1af0ca721e76a5cf20985056771f7.scope/cgroup.procs + */ + matcher.burstablePathRegex = + regexp.MustCompile("^.*kubepods.slice/kubepods-burstable.slice/" + + "kubepods-burstable-pod" + podRegex + ".slice/" + containerType + + "-" + containerRegex + ".scope/cgroup.procs$") + + if matcher.isMatch(path) { + return &matcher + } + + /* + GKE: kubepods/pod8dbc5577-d0e2-4706-8787-57d52c03ddf2/14011c7d92a9e513dfd6 + 9211da0413dbf319a5e45a02b354ba6e98e10272542d/cgroup.procs + */ + matcher.guaranteedPathRegex = + regexp.MustCompile("^.*kubepods/pod" + podRegex + "/" + + containerRegex + "/cgroup.procs$") + + /* + GKE: kubepods/besteffort/pod8dbc5577-d0e2-4706-8787-57d52c03ddf2/14011c7d9 + 2a9e513dfd69211da0413dbf319a5e45a02b354ba6e98e10272542d/cgroup.procs + */ + matcher.besteffortPathRegex = + regexp.MustCompile("^.*kubepods/besteffort/pod" + podRegex + "/" + + containerRegex + "/cgroup.procs$") + + /* + GKE: kubepods/burstable/pod8dbc5577-d0e2-4706-8787-57d52c03ddf2/14011c7d92 + a9e513dfd69211da0413dbf319a5e45a02b354ba6e98e10272542d/cgroup.procs + */ + matcher.burstablePathRegex = + regexp.MustCompile("^.*kubepods/burstable/pod" + podRegex + "/" + + containerRegex + "/cgroup.procs$") + + if matcher.isMatch(path) { + return &matcher + } + + /* + pure docker: /sys/fs/cgroup/cpu/docker/ + 1ad2ce5889acb209e1576339741b1e504480db77d + */ + matcher.guaranteedPathRegex = + regexp.MustCompile("^.*docker/" + containerRegex + "/cgroup\\.procs$") + if matcher.isMatch(path) { + return &matcher + } + return nil +} + +func GetCgroupPath(pid int, t string) (string, error) { + data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/cgroup", pid)) + if err != nil { + return "", err + } + lines := strings.Split(string(data), "\n") + for _, line := range lines { + parts := strings.Split(line, ":") + if len(parts) < 3 { + continue + } + if parts[1] == t { + return parts[2], nil + } + } + return "", PrintOnlyErrMsg("cgroup not found for pid %d", pid) +} + +func GetContainerIdByPid(pid int) []string { + var containerID, podID string + + path, err := GetCgroupPath(pid, "cpu,cpuacct") + if err != nil { + return nil + } + matcher := getCGroupMatcher(path + "/cgroup.procs") + if matcher == nil { + return nil + } + + containerRegex := `[0-9a-f]{64}` + reContainer := regexp.MustCompile(containerRegex) + containerID = reContainer.FindString(path) + + podRegex := `[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}` + rePod := regexp.MustCompile(podRegex) + podID = rePod.FindString(path) + + return []string{containerID, podID} +} + +func getMountPathByMatch(match ...string) ([]string, error) { + var mnt []string + filter := "" + for _, m := range match { + if len(m) > 0 { + filter += fmt.Sprintf("| grep -E \"%s\"", m) + } + } + s, err := ExecShell(fmt.Sprintf("mount %s", filter)) + if err != nil { + PrintOnlyErrMsg("get match mount info fail") + return nil, err + } + for _, entry := range s { + if len(entry) > 0 { + mnt = append(mnt, strings.Fields(entry)[2]) + } + } + if len(mnt) == 0 { + err = PrintOnlyErrMsg( + "not found mount path for match %s", match) + } + return mnt, err +} + +func getOverlayFSIdByContainerId(containerId string) string { + ret, err := ExecShell(fmt.Sprintf( + "docker inspect --format='{{.GraphDriver.Data.MergedDir}}' %s", + containerId)) + if err != nil { + return "" + } + for _, r := range ret { + overlayfsId := regexSearch(r, regexp.MustCompile("[0-9a-f]{64}")) + if len(overlayfsId) > 0 { + return overlayfsId[0] + } + } + return "" +} + +func getMntFromPidMountInfo(containerFile string, pid int) ([]string) { + fsRoot := "" + mountPoint := "" + device := "" + data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/mountinfo", pid)) + if err != nil { + PrintSysError(err) + return nil + } + lines := strings.Split(string(data), "\n") + if len(lines[len(lines)-1]) == 0 { + lines = lines[:len(lines)-1] + } + for _, line := range lines { + f := strings.Fields(line) + // fields[4] is mount point + if len(f[4]) < len(containerFile) { + if strings.HasPrefix(containerFile, f[4]) { + if len(f[4]) > len(fsRoot) { + fsRoot = f[3] + mountPoint = f[4] + device = f[len(f) - 2] + } + } + } + } + if len(fsRoot) == 0 { + return nil + } + return []string{fsRoot, mountPoint, device} +} + +func filterPort(ns string, pid int) (int, error) { + cmdStr := fmt.Sprintf("ip netns exec %s netstat -tanp", ns) + lines, err := ExecShell(cmdStr) + if err != nil { + return 0, err + } + for _, line := range lines { + parts := strings.Fields(line) + if strings.Split(parts[len(parts)-1], "/")[0] == fmt.Sprintf("%d", pid) { + arr := strings.Split(parts[3], ":") + port, err := strconv.Atoi(arr[len(arr)-1]) + if err != nil { + return 0, err + } + return port, nil + } + } + return 0, PrintOnlyErrMsg("failed to find port for pid %d\n", pid) +} + +func filterIP(ns string, pid int) (string, error) { + cmdStr := fmt.Sprintf("ip netns exec %s ip addr show", ns) + lines, err := ExecShell(cmdStr) + if err != nil { + return "", err + } + for _, line := range lines { + if strings.Contains(line, "scope global") { + parts := strings.Fields(line) + return strings.Split(parts[1], "/")[0], nil + } + } + return "", PrintOnlyErrMsg("failed to find ip for pid %d\n", pid) +} + +func getPodIpAndPort(pid int) (string, int, error) { + cmdStr := fmt.Sprintf("ip netns identify %d", pid) + lines, err := ExecShell(cmdStr, "origin") + if err != nil { + return "", 0, err + } + ns := lines[0] + port, err := filterPort(ns[:len(ns)-1], pid) + if err != nil { + PrintSysError(err) + return "", 0, err + } + ip, err := filterIP(ns[:len(ns)-1], pid) + if err != nil { + PrintSysError(err) + return "", 0, err + } + return ip, port, nil +} + +func FindPodIpAndPort(pid int, containerId string) (string, int, error) { + ip, port, retErr := getPodIpAndPort(pid) + if retErr != nil { + if getContainerTypeByPid(pid) == containerTypeStr[Container_Type_Docker] { + retErr = nil + cmdStr := fmt.Sprintf("docker port %s", containerId) + lines, err := ExecShell(cmdStr, "origin") + if err != nil { + PrintSysError(err) + return "", 0, err + } + var ports []int + for _, line := range lines { + if len(line) > 0 { + p, err := strconv.Atoi(strings.Split(line, "/")[0]) + if err != nil { + continue + } + ports = append(ports, p) + } + } + if len(ports) > 0 { + sort.Ints(ports) + port = ports[0] + } + if port == 0 { + return "", 0, PrintOnlyErrMsg("failed to find port for pid %d\n", pid) + } + cmdStr = fmt.Sprintf("docker inspect %s | grep IPAddress", containerId) + lines, err = ExecShell(cmdStr, "origin") + if err != nil { + PrintSysError(err) + return "", 0, err + } + for _, line := range lines { + if len(line) > 0 { + if strings.Contains(line, `"IPAddress":`) { + reg := regexp.MustCompile( + `"IPAddress":\s*"(\d+\.\d+\.\d+\.\d+)"`) + match := reg.FindStringSubmatch(line) + if len(match) >= 2 { + ip = match[1] + break + } + } + } + } + if ip == "" { + return "", 0, PrintOnlyErrMsg("failed to find ip for pid %d\n", pid) + } + } + } + return ip, port, retErr +} + +func GetDeviceByFile(pid int, containerFile string) string { + mntinfo := getMntFromPidMountInfo(containerFile, pid) + if mntinfo != nil { + return mntinfo[2] + } + return "" +} + +func GetHostFilePathByContainerPath(containerId string, + podId string, pid int, containerFile string) (string, error) { + if len(containerId) == 0 && len(podId) == 0 { + return containerFile, nil + } + cgPath, err := GetCgroupPath(pid, "cpu,cpuacct") + if err != nil { + return "", err + } + containerType := getContainerType(cgPath) + if containerType == containerTypeRegexStr[Container_Type_Unknown] { + return "", PrintOnlyErrMsg( + "unknow container type, cgroup path: %s", cgPath) + } else if containerType == containerTypeRegexStr[Container_Type_Docker] { + podId = getOverlayFSIdByContainerId(containerId) + } + mntinfo := getMntFromPidMountInfo(containerFile, pid) + if mntinfo != nil { + match := containerId + if len(podId) > 0 { + match += ("|" + podId) + if strings.Contains(podId, "_") { + tmp := strings.ReplaceAll(podId, "_", `-`) + match += ("|" + tmp) + } else if strings.Contains(podId, "-") { + tmp := strings.ReplaceAll(podId, "-", `_`) + match += ("|" + tmp) + } + } + fsRoot := mntinfo[0] + mountPoint := mntinfo[1] + device := mntinfo[2] + mntPath, err := getMountPathByMatch(device, match) + if err != nil { + mntPath, err = getMountPathByMatch(device) + if err != nil { + return "", err + } + } + for _, mnt := range mntPath { + path := mnt + fsRoot + containerFile + if mountPoint != "/" { + path = mnt + fsRoot + strings.Replace( + containerFile, mountPoint, "", -1) + } + if _, err := os.Stat(path); !os.IsNotExist(err) { + return path, nil + } + } + } + return "", PrintOnlyErrMsg( + "not found host path for %s(from container %s)", + containerFile, containerId) +} diff --git a/source/tools/monitor/observ/sql-obs/common/dbUserPaser.go b/source/tools/monitor/observ/sql-obs/common/dbUserPaser.go new file mode 100644 index 0000000000000000000000000000000000000000..8554ca0901f15049b877537dc7e7dd813c387e84 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/dbUserPaser.go @@ -0,0 +1,69 @@ +package common + +import ( + // "crypto/rand" + // "crypto/rsa" + // "crypto/x509" + // "encoding/pem" + "encoding/base64" + // "flag" +) + +func GetUsersInfo() (string, string) { + username, password, secretType := GetRawUsersInfo() + if secretType != "" { + username = decryptData(username, secretType) + password = decryptData(password, secretType) + } + return username, password +} + +func decryptData(data string, secretType string) string { + if secretType == "RSA" { + return decryptRsa([]byte(data)) + } else if secretType == "BASE" { + return decryptBase64(data) + } + return "" +} + +func decryptBase64(data string) string { + decodedBytes, err := base64.StdEncoding.DecodeString(data) + if err != nil { + PrintOnlyErrMsg("Failed to decode Base64 string: %s", data) + return "" + } + return string(decodedBytes) +} + +func decryptRsa(data []byte) string { + // decryptedData, err := rsa.DecryptPKCS1v15(rand.Reader, decodeSecretKey(), data) + // if err != nil { + // PrintOnlyErrMsg("Failed to decrypt data: %s", data) + // return "" + // } + // return string(decryptedData) + return string(data) +} + +// func decodeSecretKey() *rsa.PrivateKey { +// privateKeyBytes := "" +// privateKeyBlock, _ := pem.Decode([]byte(privateKeyBytes)) +// if privateKeyBlock == nil || privateKeyBlock.Type != "PRIVATE KEY" { +// PrintOnlyErrMsg("Failed to decode public key") +// return nil +// } + +// publicKeyInterface, err := x509.ParsePKIXPublicKey(privateKeyBlock.Bytes) +// if err != nil { +// PrintOnlyErrMsg("Failed to parse key") +// return nil +// } + +// privateKey, ok := publicKeyInterface.(*rsa.PrivateKey) +// if !ok { +// PrintOnlyErrMsg("Failed to convert key") +// return nil +// } +// return privateKey +// } diff --git a/source/tools/monitor/observ/sql-obs/common/error.go b/source/tools/monitor/observ/sql-obs/common/error.go new file mode 100644 index 0000000000000000000000000000000000000000..7398686719e4f1f4144b9e2c1559cf1489c0716b --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/error.go @@ -0,0 +1,122 @@ +package common + +import ( + "errors" + "fmt" + "runtime" + "strings" +) + +type ErrorCode int + +type DefinedErr struct { + err ErrorCode + errMsg string +} + +const ( + Success = 0 + Fail_Create_DB_Connect = iota - 258 + Fail_Get_DB_Variables + Fail_Slow_Log_Is_OFF + Fail_File_Watcher_Exit + Fail_Slow_Sql_Analyzer_Exit + Fail_Unrecognized_Slow_Log_Format + Fail_Analyzer_Data_Formatting_JSON + Fail_Get_MySQLD_Proc + Fail_Upload_Data + Fail_Notify_Not_Register + Fail_Init_Data_Export + Fail_Init_Mysqld_Instances_Info +) + +var dErrTlb = []DefinedErr { + {err: Fail_Create_DB_Connect, errMsg: "create mysql connection fail"}, + {err: Fail_Get_DB_Variables, errMsg: "get mysql variable fail"}, + {err: Fail_Slow_Log_Is_OFF, errMsg: "slow_query_log is OFF, please turn on"}, + {err: Fail_File_Watcher_Exit, errMsg: "file watcher is exited"}, + {err: Fail_Unrecognized_Slow_Log_Format, errMsg: "unrecognized slow log format"}, + {err: Fail_Analyzer_Data_Formatting_JSON, errMsg: "analyzer data formatting JSON failed"}, + {err: Fail_Get_MySQLD_Proc, errMsg: "mysqld process doesn't exist"}, + {err: Fail_Upload_Data, errMsg: "fail to send data by unix.sock"}, + {err: Fail_Notify_Not_Register, errMsg: "notify not register"}, + {err: Fail_Init_Data_Export, errMsg: "fail to init data export"}, + {err: Fail_Init_Mysqld_Instances_Info, errMsg: "fail to get Mysqld Instances Info"}, +} + +type MyError struct { + err error + fun string + file string + line int +} + +const Max_Print_Error_Cnt = 50 +var printErrorLimit int = 0 + +func (e *MyError) Error() string { + return fmt.Sprintf("[%s:%d] %s: %s", e.file, e.line, e.fun, e.err.Error()) +} + +func checkPrintLimit() bool { + if printErrorLimit >= (Max_Print_Error_Cnt + 1) { + if printErrorLimit == (Max_Print_Error_Cnt + 1) { + PrintOnlyErrMsg("start limit error log output") + } + return true + } + return false +} + +func newMyError(err error, skip ...int) error { + printErrorLimit += 1 + skipTrace := 2 + if len(skip) > 0 { + skipTrace = skip[0] + } + funPc, file, line, _ := runtime.Caller(skipTrace) + moduleName := "sql-obs" + s := strings.Split(file, moduleName) + relativePath := moduleName + s[1] + return &MyError{ + err: err, + fun: runtime.FuncForPC(funPc).Name(), + file: relativePath, + line: line, + } +} + +func PrintOnlyErrMsg(errMsg string, a ...interface{}) error { + err := newMyError(errors.New("error: " + fmt.Sprintf(errMsg, a...)), 2) + if !checkPrintLimit() { + fmt.Println(err) + } + return err +} + +func PrintDefinedErr(err ErrorCode, msg ...string) { + errMsg := "unkown error" + if checkPrintLimit() { + return + } + for i := 0; i < len(dErrTlb); i++ { + if err == dErrTlb[i].err { + errMsg = dErrTlb[i].errMsg + break + } + } + if len(msg) > 0 { + errMsg += "\n" + for _, m := range msg { + errMsg += m + } + } + fmt.Println(newMyError(errors.New("error: "+errMsg))) +} + +func PrintSysError(err error) { + if checkPrintLimit() { + return + } + fmt.Println(newMyError(err)) +} diff --git a/source/tools/monitor/observ/sql-obs/common/fileWatcher.go b/source/tools/monitor/observ/sql-obs/common/fileWatcher.go new file mode 100644 index 0000000000000000000000000000000000000000..1d9fec018f404d730d6704f33dccb3199039f7bc --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/fileWatcher.go @@ -0,0 +1,271 @@ +package common + +import ( + "bufio" + //"fmt" + // "unsafe" + "github.com/fsnotify/fsnotify" + "os" + // "strconv" + "time" + "sync" +) + +type fileOpWatcherHandler func(event *fsnotify.Event, priData *interface{}) int +const ( + Handle_Done = 1 + Handle_Exited = 2 +) + +const ( + Has_data = 1 + Watcher_Exited = 2 +) + +type fileWriteWatcher struct { + fileName string + data []string + changeLines int + status chan int +} + +func getFileTailPos(file *os.File) (int64, error) { + readPos, err := file.Seek(0, os.SEEK_END) + if err != nil { + return -1, err + } + return readPos, nil +} + +func isRegularFile(file *os.File) bool { + fileInfo, _ := file.Stat() + if fileInfo.Mode().IsRegular() { + return true + } else { + return false + } +} + +func readLastLines(buf *[]string, scanner *bufio.Scanner, + file *os.File, startPos *int64) (int, error) { + lines := 0 + endPos, err := getFileTailPos(file) + if err != nil { + return -1, err + } + + if endPos > 0 && endPos < *startPos { + *startPos = 0 + } + + if *startPos < endPos { + _, err = file.Seek(*startPos, 0) + if err != nil { + return -1, err + } + } + + for scanner.Scan() { + line := scanner.Text() + (*buf)[lines] = line + *startPos += int64(len(line) + 1) + lines += 1 + if lines >= cap(*buf) || (endPos > 0 && *startPos >= endPos) { + break + } + } + + if err := scanner.Err(); err != nil { + return -1, err + } + return lines, nil +} + +func (fw *fileWriteWatcher)readNotRegularFile(scanner *bufio.Scanner, + file *os.File, startPos *int64) { + lines := 0 + var dataNotifyLock sync.Mutex + lineChan := make(chan string) + errChan := make(chan error) + go func() { + for scanner.Scan() { + line := scanner.Text() + lineChan <- line + } + errChan <- scanner.Err() + }() + timeout := 2 * time.Second + timer := time.NewTimer(timeout) + defer timer.Stop() + for { + select { + case line := <-lineChan: + timer.Reset(timeout) + dataNotifyLock.Lock() + fw.data[lines] = line + lines += 1 + if lines >= cap(fw.data) { + fw.changeLines = lines + fw.status <- Has_data + lines = 0 + } + dataNotifyLock.Unlock() + case err := <-errChan: + if err != nil { + return + } + case <-timer.C: + dataNotifyLock.Lock() + if lines > 0 { + fw.changeLines = lines + fw.status <- Has_data + lines = 0 + } + dataNotifyLock.Unlock() + } + } +} + +func (fw *fileWriteWatcher) readChangedFile() { + file, err := os.Open(fw.fileName) + if err != nil { + PrintSysError(err) + return + } + defer file.Close() + + startPos, err := getFileTailPos(file) + if err != nil { + PrintSysError(err) + return + } + + watcher, err := fsnotify.NewWatcher() + if err != nil { + PrintSysError(err) + return + } + defer watcher.Close() + + err = watcher.Add(fw.fileName) + if err != nil { + PrintSysError(err) + return + } + + //Scanner for reading recent changes in files + scanner := bufio.NewScanner(file) + scanner.Split(bufio.ScanLines) + + //Default read up to 5000 lines at a time + defaultCapacity := cap(fw.data) + if defaultCapacity == 0 { + defaultCapacity = 5000 + } + if cap(fw.data) == 0 || len(fw.data) != cap(fw.data) { + for i := len(fw.data); i < defaultCapacity; i++ { + fw.data = append(fw.data, "") + } + } + + //Start watching... + if isRegularFile(file) { + for { + select { + case event := <-watcher.Events: + //File write event occurs + if event.Op&fsnotify.Write == fsnotify.Write { + lines, err := readLastLines( + &fw.data, scanner, file, &startPos) + if err != nil { + PrintSysError(err) + return + } + fw.changeLines = lines + fw.status <- Has_data + } + case err := <-watcher.Errors: + PrintSysError(err) + return + } + } + } else { + fw.readNotRegularFile(scanner, file, &startPos) + } +} + +func (fw *fileWriteWatcher) StartWatch() { + //done := make(chan bool) + go fw.readChangedFile() + //<-done +} + +func (fw *fileWriteWatcher) Data() ([]string) { + return fw.data +} + +func (fw *fileWriteWatcher) ChangeLines() (int) { + return fw.changeLines +} + +func (fw *fileWriteWatcher) Status() (chan int) { + return fw.status +} + +func StartFilesOpWatcher(files []string, Op fsnotify.Op, + handle fileOpWatcherHandler, closeSource func(*interface{}), + priData *interface{}) error { + go func() { + watcher, err := fsnotify.NewWatcher() + if err != nil { + PrintSysError(err) + return + } + defer watcher.Close() + + for _, f := range files { + err = watcher.Add(f) + if err != nil { + PrintSysError(err) + return + } + } + if closeSource != nil { + defer closeSource(priData) + } + for { + select { + case event := <-watcher.Events: + if event.Op&Op == Op { + ret := handle(&event, priData) + if ret != Handle_Done { + return + } + } + case err := <-watcher.Errors: + PrintSysError(err) + return + } + } + }() + return nil +} + +/** + * new_file_write_watcher - Create a monitor to monitor the specified file + * and obtain the latest updated data + * fileName: Target file for monitoring + * readlines: After the file is updated, read the last 'readlines' lines + * if readlines is 0, Read all newly added data + * + * If the function is successful, a fileWriteWatcher object will return + */ + func NewFileWriteWatcher(fileName string, readlines int) (fileWriteWatcher) { + fw := fileWriteWatcher{ + fileName: fileName, + data: make([]string, readlines), + changeLines: 0, + status: make(chan int), + } + return fw +} \ No newline at end of file diff --git a/source/tools/monitor/observ/sql-obs/common/nfPut.go b/source/tools/monitor/observ/sql-obs/common/nfPut.go new file mode 100644 index 0000000000000000000000000000000000000000..7992440fc23aea1981a38c22c68ef3a4be3a0cdd --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/nfPut.go @@ -0,0 +1,89 @@ +package common + +import ( + "encoding/json" + //"fmt" + "net" + "os" + "strings" +) + +const ( + //PIPE_PATH string = "/var/sysom/outline" // 参考 YAML 中的配置 + MAX_BUFF int = 64 * 1024 // 最大消息长度 + TIME_FORMAT = "2006-01-02 15:04:05" +) + +type CnfPut struct { + sock *net.UnixConn + // sock net.Conn + path string +} + +var gCnfPut CnfPut + +func newCnfPut(path string) error { + gCnfPut.path = path + if _, err := os.Stat(gCnfPut.path); os.IsNotExist(err) { + PrintSysError(err) + return err + } + addr, err := net.ResolveUnixAddr("unixgram", gCnfPut.path) + if err != nil { + PrintSysError(err) + return err + } + gCnfPut.sock, err = net.DialUnix("unixgram", nil, addr) + if err != nil { + PrintSysError(err) + return err + } + return nil +} + +func (c *CnfPut) puts(s string) error { + if len(s) > MAX_BUFF { + return PrintOnlyErrMsg( + "message len %d, is too long, should less than %d, data :\n%s", + len(s), MAX_BUFF, s) + } + + if _, err := c.sock.Write([]byte(s)); err != nil { + PrintSysError(err) + return err + } + return nil +} + +func ExportData(s string) error { + if err := gCnfPut.puts(s); err != nil { + return err + } + return nil +} + +func Struct2String(s any) string { + b, err := json.Marshal(s) + if err != nil { + return "" + } + t_s := string(b) + t_s = strings.ReplaceAll(t_s, ":", "=") + t_s = strings.ReplaceAll(t_s, `"`, "") + //fmt.Println(t_s) + return t_s[1 : len(t_s)-1] +} + +func InitDataExport(path string) error { + err := newCnfPut(path) + if err != nil { + return err + } + return nil +} + +func UninitDataExport() { + if gCnfPut.sock != nil { + gCnfPut.sock.Close() + } +} diff --git a/source/tools/monitor/observ/sql-obs/common/optPaser.go b/source/tools/monitor/observ/sql-obs/common/optPaser.go new file mode 100644 index 0000000000000000000000000000000000000000..31f92795653d5c9acc9b6d05d24e8345ffd93c51 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/optPaser.go @@ -0,0 +1,63 @@ +package common + +import ( + // "crypto/rand" + // "crypto/rsa" + // "crypto/x509" + // "encoding/pem" + "unsafe" + // "flag" + "fmt" + "os" +) + +func usage() { + fmt.Printf("Usage: %s [options]\n", os.Args[0]) + fmt.Printf("Options:\n") + fmt.Printf(" u The DB monitor username\n") + fmt.Printf(" p The DB monitor password\n") + fmt.Printf(" s Encryption type of account information(BASE,RSA)\n") + fmt.Printf(" y Specify the yaml file(default is /etc/sysak/base.yaml)\n") + os.Exit(0) +} + +func GetRawUsersInfo() (string, string, string) { + usersInfo := getArgsFromCmdline([]string{"-u", "-p", "-s"}, true) + return usersInfo[0], usersInfo[1], usersInfo[2] +} + +func GetYamlFile() (string) { + yaml := getArgsFromCmdline([]string{"-y"}, false)[0] + if yaml == "" { + return "/etc/sysak/base.yaml" + } + return yaml +} + +func getArgsFromCmdline(opts []string, hide bool) []string { + args := os.Args[1:] + retArgs := make([]string, len(opts)) + for idx, val := range opts { + if val == "-h" || val == "--help" { + usage() + } + retArgs[idx] = "" + for i := 0; i < len(args); i++ { + if args[i] == val && len(args) > (i + 1) { + retArgs[idx] = string([]byte(args[i + 1])) + if hide { + hideParam(i + 2) + } + break + } + } + } + return retArgs +} + +func hideParam(index int) { + p := *(*unsafe.Pointer)(unsafe.Pointer(&os.Args[index])) + for i := 0; i < len(os.Args[index]); i++ { + *(*uint8)(unsafe.Pointer(uintptr(p) + uintptr(i))) = 'x' + } +} \ No newline at end of file diff --git a/source/tools/monitor/observ/sql-obs/common/process.go b/source/tools/monitor/observ/sql-obs/common/process.go new file mode 100644 index 0000000000000000000000000000000000000000..a5b706e23baa28b008cd43f997d10f48ad95bab9 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/process.go @@ -0,0 +1,415 @@ +package common + +import ( + "fmt" + "os" + "path/filepath" + "reflect" + "regexp" + "sort" + "strconv" + "strings" + "io/ioutil" + //"unsafe" + "github.com/fsnotify/fsnotify" +) + +type processInfo struct { + Pid int + Port int + Ip string + Comm string + PodId string + ContainerId string + Slowlog string + Errlog string + CnfPath string + PvDevice []string +} + +var appInfoList = make(map[string]*processInfo) + +func findPort(pid int) (int, error) { + out, err := ExecShell("netstat -tanp", "origin") + if err != nil { + return 0, err + } + re := regexp.MustCompile(fmt.Sprintf(".*LISTEN\\s+%d/mysqld", pid)) + matches := re.FindAllString(out[0], -1) + if len(matches) > 0 { + var ports []int + for _, match := range matches { + s := strings.Fields(match) + arr := strings.Split(s[3], ":") + port, err := strconv.Atoi(arr[len(arr)-1]) + if err != nil { + continue + } + ports = append(ports, port) + } + if len(ports) > 0 { + sort.Ints(ports) + return ports[0], nil + } + } + return 0, PrintOnlyErrMsg("failed to find port for pid %d\n", pid) +} + +func findLogFile(pid int, logType string, moreMatch ...string) (string, error) { + basePath := fmt.Sprintf("/proc/%d/", pid) + logPath := "" + + if logType == "err" { + logPath = filepath.Join(basePath, "fd", "2") + realpath, err := os.Readlink(logPath) + if err != nil { + PrintSysError(err) + return "", err + } + logPath = realpath + for _, m := range moreMatch { + if !strings.Contains(logPath, m) { + logPath = "" + } + } + } else { + logPath = filepath.Join(basePath, "fd") + cmdStr := fmt.Sprintf("ls -l %s | grep %q", logPath, + strings.ReplaceAll(logType, ".", `\.`)) + for _, m := range moreMatch { + cmdStr += fmt.Sprintf( + "| grep %q", strings.ReplaceAll(m, ".", `\.`)) + } + matches, err := ExecShell(cmdStr) + if err != nil { + return "", PrintOnlyErrMsg("not found the path of %s log", logType) + } + for _, match := range matches { + s := strings.Fields(match) + if len(s) > 0 { + logPath = s[len(s)-1] + for _, m := range moreMatch { + if !strings.Contains(logPath, m) { + logPath = "" + continue + } + } + break + } + } + } + if len(logPath) == 0 { + return "", PrintOnlyErrMsg("not found the path of %s log", logType) + } + return logPath, nil +} + +func getKeyValueFromString(str string, key string) string { + re := regexp.MustCompile(key + `=([^ \t\n\r\f\v]+)`) + match := re.FindStringSubmatch(str) + if len(match) >= 2 { + return match[1] + } + return "" +} + +func getDeviceList(pid int) []string { + var deviceList []string + dirScanRecordList := make(map[string]bool) + addString := func (s *[]string, substr string) { + for _, str := range *s { + if str == substr { + return + } + } + *s = append(*s, substr) + } + + fds, err := filepath.Glob(fmt.Sprintf("/proc/%d/fd/*", pid)) + if err != nil { + return deviceList + } + for _, fd := range fds { + realPath, err := os.Readlink(fd) + if err != nil { + continue + } + if len(realPath) > 0 && filepath.IsAbs(realPath){ + dir := filepath.Dir(realPath) + if _, ok := dirScanRecordList[dir]; !ok { + dirScanRecordList[dir] = true + device := GetDeviceByFile(pid, realPath) + if len(device) > 0 && strings.Contains(device, "/dev/") { + addString(&deviceList, device) + } + } + } + } + return deviceList +} + +func getProcessInfo(psEntry string) (processInfo, error) { + port := 0 + ip := "localhost" + errLogPath := "" + slowLogPath := "" + cnfPath := "" + podId := "NULL" + containerId := "NULL" + pid, err := strconv.Atoi(strings.Fields(psEntry)[1]) + if err != nil { + return processInfo{}, err + } + + id := GetContainerIdByPid(pid) + if id != nil { + if len(id[0]) > 0 { + containerId = id[0] + } + if len(id[1]) > 0 { + podId = id[1] + } + } + //fmt.Printf("conrainer id: %s, pod id: %s\n", containerId, podId) + p := getKeyValueFromString(psEntry, "--port") + if len(p) > 1 { + port, err = strconv.Atoi(p) + if err != nil { + port = 0 + } + } + + if port == 0 { + if containerId == "NULL" { + port, err = findPort(pid) + if err != nil { + return processInfo{}, err + } + } else { + ip, port, err = FindPodIpAndPort(pid, containerId) + if err != nil { + return processInfo{}, err + } + } + } + + errLogPath = getKeyValueFromString(psEntry, "--log-error") + slowLogPath = getKeyValueFromString(psEntry, "--slow-query-log") + cnfPath = getKeyValueFromString(psEntry, "--defaults-file") + + if len(slowLogPath) == 0 { + slowLogPath, _ = findLogFile(pid, "slow", ".log") + } + if len(errLogPath) == 0 { + errLogPath, _ = findLogFile(pid, "err", ".log") + } + if id != nil { + if len(slowLogPath) > 0 { + slowLogPath, _ = GetHostFilePathByContainerPath(containerId, + podId, pid, slowLogPath) + } + if len(errLogPath) > 0 { + errLogPath, _ = GetHostFilePathByContainerPath(containerId, + podId, pid, errLogPath) + } + } + + if len(cnfPath) == 0 { + cnfPath, err = findLogFile(pid, "cnf", ".cnf") + } + if containerId != "NULL" { + containerId = containerId[:12] + } + return processInfo{ + pid, port, ip, "", podId, containerId, slowLogPath, errLogPath, + cnfPath, getDeviceList(pid)}, nil +} + +func initAppInfoList(comm string) error { + cmdStr := fmt.Sprintf("ps -ef | grep %s | grep -v grep", comm) + matches, err := ExecShell(cmdStr) + if err != nil { + return err + } + for _, match := range matches { + if len(match) > 0 { + commBytes, err := ioutil.ReadFile( + "/proc/" + strings.Fields(match)[1] + "/comm") + if err != nil { + continue + } + commStr := strings.TrimSpace(string(commBytes)) + if commStr != comm { + continue + } + pInfo, err := getProcessInfo(match) + if err != nil { + //PrintSysError(err) + continue + } + key := string(pInfo.ContainerId) + ":" + comm + if pInfo.ContainerId == "NULL" { + key = comm + ":" + strconv.Itoa(pInfo.Port) + } + if _, ok := appInfoList[key]; ok { + appInfoList[key].Pid = pInfo.Pid + } else { + pInfo.Comm = comm + appInfoList[key] = &pInfo + } + } + } + if len(appInfoList) == 0 { + return PrintOnlyErrMsg("not found app %s", comm) + } + return nil +} + +func getKeyByPidFromAppInfoList(pid int) string { + for key, app := range appInfoList { + if pid == app.Pid { + return key + } + } + return "" +} + +func deleteAppInstancesInfo(event *fsnotify.Event, priData *interface{}) int { + re := regexp.MustCompile(`(\d+)`) + m := re.FindStringSubmatch(event.Name) + if len(m) > 1 { + pid, err := strconv.Atoi(m[1]) + if err != nil { + return Handle_Done + } + key := getKeyByPidFromAppInfoList(pid) + if len(key) > 0 { + delete(appInfoList, key) + } + } + return Handle_Done +} + +func foundAppInstances(value reflect.Value, + match map[string]interface{}) bool { + matchCnt := len(match) + for m, v := range match { + field := value.FieldByName(m) + if field.IsValid() { + val := reflect.ValueOf(v) + vType := reflect.TypeOf(v) + if field.Type() == vType && + field.Interface() == val.Interface() { + matchCnt-- + continue + } + } + } + if matchCnt == 0 { + return true + } + return false +} + +func ForeachAppInstances(comm string, inArgs []string, + f func(values []interface{})) { + for _, app := range appInfoList { + if comm == app.Comm { + var argsVal []interface{} + value := reflect.ValueOf(app).Elem() + for _, a := range inArgs { + field := value.FieldByName(a) + if field.IsValid() { + argsVal = append(argsVal, field.Interface()) + } + } + f(argsVal) + } + } +} + +func GetAppInstanceMemberByPid(pid int, member string) interface{} { + key := getKeyByPidFromAppInfoList(pid) + if _, ok := appInfoList[key]; ok { + app := appInfoList[key] + value := reflect.ValueOf(app).Elem() + field := value.FieldByName(member) + if field.IsValid() { + return field.Interface() + } + } + return "" +} + +func GetAppInstanceInfo(match map[string]interface{}, + member string) interface{} { + for _, app := range appInfoList { + value := reflect.ValueOf(app).Elem() + found := foundAppInstances(value, match) + if found { + field := value.FieldByName(member) + if field.IsValid() { + return field.Interface() + } + } + } + return nil +} + +func AppInstancesAjustMember(match map[string]interface{}, + ajust map[string]interface{}) { + for _, app := range appInfoList { + value := reflect.ValueOf(app).Elem() + found := foundAppInstances(value, match) + if found { + for m, v := range ajust { + field := value.FieldByName(m) + if field.IsValid() { + if reflect.TypeOf(v) == reflect.TypeOf("") && + filepath.IsAbs(v.(string)) { + if len(app.ContainerId) > 0 { + val, err := GetHostFilePathByContainerPath( + app.ContainerId, app.PodId, app.Pid, + v.(string)) + if err == nil { + v = val + } + } + _, err := os.Stat(v.(string)) + if !os.IsNotExist(err) { + field.Set(reflect.ValueOf(v)) + } + continue + } + field.Set(reflect.ValueOf(v)) + } + } + break + } + } +} + +func InitAppInstancesInfo(comm string) error { + err := initAppInfoList(comm) + if err != nil { + return err + } + + var files []string + for _, app := range appInfoList { + files = append(files, + "/proc/"+strconv.Itoa(app.Pid)) + } + return StartFilesOpWatcher(files, fsnotify.Remove, + deleteAppInstancesInfo, nil, nil) +} + +func GetAppInstanceCnt() int { + cnt := 0 + for _, app := range appInfoList { + if app != nil { + cnt++ + } + } + return cnt +} diff --git a/source/tools/monitor/observ/sql-obs/common/shellCmd.go b/source/tools/monitor/observ/sql-obs/common/shellCmd.go new file mode 100644 index 0000000000000000000000000000000000000000..99e494786f36e6b1a9c53975e7598c6cc726378f --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/shellCmd.go @@ -0,0 +1,31 @@ +package common + +import ( + "os/exec" + "strings" +) + +func ExecShell(cmdStr ...string) ([]string, error) { + cmd := exec.Command("sh", "-c", cmdStr[0]) + out, err := cmd.Output() + if err != nil { + PrintSysError(err) + return nil, err + } + if len(cmdStr) > 1 && cmdStr[1] == "origin" { + return []string{string(out)}, nil + } + s := strings.Split(string(out), "\n") + if len(s[len(s)-1]) == 0 { + s = s[:len(s)-1] + } + return s, nil +} + +func DetectSqlObs() error { + strs, _ := ExecShell("ps -ef | grep sql-obs | grep -v grep") + if len(strs) > 1 { + return PrintOnlyErrMsg("Please exit the running sql-obs first, and then try again.") + } + return nil +} diff --git a/source/tools/monitor/observ/sql-obs/common/slideWindow.go b/source/tools/monitor/observ/sql-obs/common/slideWindow.go new file mode 100644 index 0000000000000000000000000000000000000000..c0ae7daa0aeae7b44a15c823d145958e9bd4086d --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/slideWindow.go @@ -0,0 +1,209 @@ +package common + +import ( + "bufio" + "fmt" + "math" + "os" +) + +type DataItem struct { + window uint32 + usedWin uint32 + nrSample uint32 + movWinData []float64 + curWinMinVal float64 + curWinMaxVal float64 + moveAvg float64 + baseThresh float64 + + comThresh float64 + shouldUpdThreshComp bool + decRangeThreshAvg float64 + decRangeCnt uint32 + minStableThresh float64 + maxStableThresh float64 + stableThreshAvg float64 + nrStableThreshSample uint32 + + dynThresh float64 +} + +// type DataAggregator struct { +// DataDic map[string]*DataItem +// } + +func AddItem(dataAggregator map[string]*DataItem, name string, _window uint32) { + dataAggregator[name] = new(DataItem) + dataAggregator[name].window = _window + dataAggregator[name].usedWin = 0 + dataAggregator[name].nrSample = 0 + dataAggregator[name].movWinData = []float64{} + dataAggregator[name].curWinMinVal = math.MaxFloat64 + dataAggregator[name].curWinMaxVal = 0 + dataAggregator[name].moveAvg = 0 + dataAggregator[name].baseThresh = 0 + dataAggregator[name].comThresh = 0 + dataAggregator[name].shouldUpdThreshComp = true + dataAggregator[name].decRangeThreshAvg = 0 + dataAggregator[name].decRangeCnt = 0 + dataAggregator[name].minStableThresh = math.MaxFloat64 + dataAggregator[name].maxStableThresh = 0 + dataAggregator[name].stableThreshAvg = 0 + dataAggregator[name].nrStableThreshSample = 0 + dataAggregator[name].dynThresh = math.MaxFloat64 +} + +func calBaseThresh(dataAggregator map[string]*DataItem, key string, value float64) float64 { + data := dataAggregator[key] + thresh := -1.0 + data.nrSample += 1 + data.curWinMaxVal = math.Max(data.curWinMaxVal, value) + data.curWinMinVal = math.Min(data.curWinMinVal, value) + // fmt.Println(key, data.nrSample, data.window) + if data.nrSample >= data.window { + if len(data.movWinData) < int(data.window) { + data.movWinData = append(data.movWinData, value) + } else { + data.movWinData[data.usedWin%data.window] = value + } + moveAvg := avg(data.movWinData, data.window) + nrThreshSample := math.Min(float64(data.nrSample + 1 - data.window), float64(3 * data.window)) + thresh = math.Max(data.curWinMaxVal-moveAvg, moveAvg-data.curWinMinVal) + threshAvg := (data.baseThresh*float64(nrThreshSample-1) + thresh) / float64(nrThreshSample) + data.baseThresh = threshAvg + data.moveAvg = moveAvg + } else { + data.movWinData = append(data.movWinData, value) + } + data.usedWin += 1 + if data.usedWin >= data.window { + data.curWinMaxVal = 0 + data.curWinMinVal = math.MaxFloat64 + data.usedWin = 0 + } + return thresh +} + +func calStableThresh(dataAggregator map[string]*DataItem, key string, curBaseThresh float64, curThresh float64) { + data := dataAggregator[key] + avg := data.decRangeThreshAvg + if curThresh-avg < ((curBaseThresh - avg) / 10.0) { + tSum := data.stableThreshAvg*float64(data.nrStableThreshSample) + curThresh + data.nrStableThreshSample += 1 + data.stableThreshAvg = tSum / float64(data.nrStableThreshSample) + data.minStableThresh = math.Min(data.minStableThresh, curThresh) + data.maxStableThresh = math.Max(data.maxStableThresh, curThresh) + if data.nrStableThreshSample*2 >= data.window*3 { + data.comThresh = math.Max(data.stableThreshAvg-data.minStableThresh, data.maxStableThresh-data.stableThreshAvg) + data.shouldUpdThreshComp = false + data.minStableThresh = math.MaxFloat64 + data.maxStableThresh = 0.0 + data.stableThreshAvg = 0 + data.decRangeThreshAvg = 0 + data.nrStableThreshSample = 0 + data.decRangeCnt = 0 + } + } +} + +func CalCompThresh(dataAggregator map[string]*DataItem, key string, lastBaseThresh float64, curThresh float64) { + data := dataAggregator[key] + curBaseThresh := data.baseThresh + if data.shouldUpdThreshComp && (data.comThresh < curBaseThresh || data.usedWin == 0) { + data.comThresh = curBaseThresh + } + if curBaseThresh < lastBaseThresh { + tSum := data.decRangeThreshAvg*float64(data.decRangeCnt) + curThresh + data.decRangeCnt += 1 + data.decRangeThreshAvg = tSum / float64(data.decRangeCnt) + if data.decRangeCnt*2 >= data.window*3 { + calStableThresh(dataAggregator, key, curBaseThresh, curThresh) + } + } else { + data.minStableThresh = math.MaxFloat64 + data.maxStableThresh = 0 + data.stableThreshAvg = 0 + data.decRangeThreshAvg = 0 + data.nrStableThreshSample = 0 + data.decRangeCnt = 0 + } +} + +func UpdateDynThresh(dataAggregator map[string]*DataItem, key string, value float64) { + // fmt.Println(key, len(dataAggregator), len(dataAggregator[key].movWinData)) + data := dataAggregator[key] + lastBaseThresh := data.baseThresh + + curThresh := calBaseThresh(dataAggregator, key, value) + if curThresh != -1.0 { + CalCompThresh(dataAggregator, key, lastBaseThresh, curThresh) + data.dynThresh = data.baseThresh + data.moveAvg + data.comThresh + } +} + +func DisableThreshComp(dataAggregator map[string]*DataItem, key string) { + data := dataAggregator[key] + if data.shouldUpdThreshComp == true { + data.shouldUpdThreshComp = false + data.dynThresh = data.baseThresh + data.moveAvg + data.comThresh = 0.000001 + } +} + +func GetNrDataSample(dataAggregator map[string]*DataItem, key string) uint32 { + return dataAggregator[key].nrSample +} + +func GetDynThresh(dataAggregator map[string]*DataItem, key string) float64 { + return dataAggregator[key].dynThresh +} + +func GetBaseThresh(dataAggregator map[string]*DataItem, key string) float64 { + return dataAggregator[key].baseThresh +} + +func GetMoveAvg(dataAggregator map[string]*DataItem, key string) float64 { + return dataAggregator[key].moveAvg +} + +func GetComThresh(dataAggregator map[string]*DataItem, key string) float64 { + return dataAggregator[key].comThresh +} + +func avg(list []float64, count uint32) float64 { + sum := 0.0 + for _, v := range list { + sum += v + } + return sum / float64(count) +} + +func Sum(list []float64) float64 { + sum := 0.0 + for _, v := range list { + sum += v + } + return sum +} + +func ReadFileToList(path string) []string { + f, err := os.Open(path) + if err != nil { + fmt.Printf("Open file %s error.", path) + return nil + } + defer f.Close() + + var result []string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + result = append(result, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + fmt.Printf("Open file %s error.", path) + return nil + } + return result +} diff --git a/source/tools/monitor/observ/sql-obs/common/sqlCmd.go b/source/tools/monitor/observ/sql-obs/common/sqlCmd.go new file mode 100644 index 0000000000000000000000000000000000000000..1d9eaccd9b375d0b37cf618a2b9424b67d888cfe --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/sqlCmd.go @@ -0,0 +1,232 @@ +package common + +import ( + //"fmt" + "database/sql" + _ "github.com/go-sql-driver/mysql" + "strconv" + "strings" +) + +type ConnDesc struct { + dbDriverName string + user string + passwd string + host string + port int + containerID string + podID string +} + +type DBConnect struct { + db *sql.DB + desc *ConnDesc +} + +type DataHandle func(rows *sql.Rows, data *[]string) error + +func (dbConn *DBConnect) getDBDataByQueryCmd( + query_cmd string, handler DataHandle, data *[]string) error { + rows, err := dbConn.db.Query(query_cmd) + if err != nil { + return err + } + return handler(rows, data) +} + +func getQueryRowInfo(rows *sql.Rows, data *[]string) error { + columns, err := rows.Columns() + if err != nil { + return err + } + + values := make([]interface{}, len(columns)) + for i := range values { + values[i] = new(sql.RawBytes) + } + + for rows.Next() { + if err = rows.Scan(values...); err != nil { + return err + } + rowData := "" + for i, value := range values { + //convert data to "key1=value1,key2=value1..." + val := string(*value.(*sql.RawBytes)) + if len(val) == 0 { + val = "NULL" + } + rowData += columns[i] + "=" + val + "," + } + if len(rowData) > 0 { + (*data) = append(*data, rowData[0:(len(rowData)-1)]) + } + } + return rows.Err() +} + +func getDataMapFromSliceData(data *[]string) []map[string]interface{} { + if len(*data) <= 0 { + return nil + } + mCnt := 0 + keyPos := 0 + mData := make([]map[string]interface{}, len(*data)) + s := strings.FieldsFunc((*data)[0], func(r rune) bool { + return r == '=' || r == ',' + }) + //get index of real variable_name + if strings.ToLower(s[0]) == "variable_name" && + strings.ToLower(s[2]) == "value" { + keyPos = 1 + } + //convert data "key1=value1,key2=value1..." to map['']='' + for _, entry := range *data { + if len(entry) <= 0 { + continue + } + s := strings.FieldsFunc(entry, func(r rune) bool { + return r == '=' || r == ',' + }) + sLen := len(s) + m := make(map[string]interface{}, sLen/(2+2*keyPos)) + for i := keyPos; i < sLen; i += (2 + 2*keyPos) { + m[s[i]] = s[i+1+keyPos] + } + mData[mCnt] = m + mCnt++ + } + return mData +} + +/** + * GetRowsByQueryCmd - get databese Row data by query cmd + * + * sql_cmd: query command + * + * if function successfully, a []map will return, and + * The format of each element is 'key=value', the key is the + * variable or columns name + */ +func (dbConn *DBConnect) GetRowsByQueryCmd( + sql_cmd string) ([]map[string]interface{}, error) { + var data []string + err := dbConn.getDBDataByQueryCmd( + sql_cmd, getQueryRowInfo, &data) + if err != nil { + PrintSysError(err) + return nil, err + } + return getDataMapFromSliceData(&data), nil +} + +func (dbConn *DBConnect) GetRowsListByQueryCmd( + sql_cmd string) (*[]string, error) { + var data []string + err := dbConn.getDBDataByQueryCmd( + sql_cmd, getQueryRowInfo, &data) + if err != nil { + PrintSysError(err) + return nil, err + } + return &data, nil +} + +func (dbConn *DBConnect) ParseSQL(sqlStr string) (string) { + complexity := "simple" + stmt, err := dbConn.db.Prepare(sqlStr) + if err != nil { + if strings.Contains(sqlStr, "JOIN") || strings.Contains(sqlStr, "GROUP") || + strings.Contains(sqlStr, "ORDER") || strings.Contains(sqlStr, "DISTINCT") || + strings.Contains(sqlStr, "UNION") { + complexity = "complex" + } + return complexity + } + defer stmt.Close() + + keywords := make(map[string]bool) + for _, word := range strings.Fields(strings.ToUpper(sqlStr)) { + if _, ok := keywords[word]; !ok { + keywords[word] = true + } + } + + if _, ok := keywords["JOIN"]; ok { + complexity = "complex" + } + if _, ok := keywords["GROUP"]; ok { + complexity = "complex" + } + if _, ok := keywords["ORDER"]; ok { + complexity = "complex" + } + if _, ok := keywords["DISTINCT"]; ok { + complexity = "complex" + } + if _, ok := keywords["LIMIT"]; ok { + complexity = "complex" + } + if _, ok := keywords["UNION"]; ok { + complexity = "complex" + } + return complexity +} + +func (dbConn *DBConnect) GetPort() int { + return dbConn.desc.port +} + +func (dbConn *DBConnect) GetIp() string { + return dbConn.desc.host +} + +func (dbConn *DBConnect) GetContainerID() string { + return dbConn.desc.containerID +} + +func (dbConn *DBConnect) GetPodID() string { + return dbConn.desc.podID +} + +func (dbConn *DBConnect) DBConnIsVaild() bool { + if dbConn.db != nil { + return true + } + return false +} + +func ConnectToDB(dbConn *DBConnect, user string, passwd string) { + dataSourceName := user + ":" + passwd + "@tcp(" + + dbConn.desc.host + ":" + strconv.Itoa(dbConn.desc.port) + ")/" + db, err := sql.Open(dbConn.desc.dbDriverName, dataSourceName) + if err != nil { + PrintSysError(err) + return + } + dbConn.db = db +} + +func NewDBConnection(dbDriverName string, + host string, port int, containerID string, podID string) (*DBConnect, error) { + desc := ConnDesc{ + dbDriverName: dbDriverName, + user: "", + passwd: "", + host: host, + port: port, + containerID: containerID, + podID: podID, + } + dbConn := &DBConnect{ + db: nil, + desc: &desc, + } + return dbConn, nil +} + +func (dbConn *DBConnect) CloseDBConnection() { + if dbConn.DBConnIsVaild() { + dbConn.db.Close() + } +} diff --git a/source/tools/monitor/observ/sql-obs/common/unityQuery.go b/source/tools/monitor/observ/sql-obs/common/unityQuery.go new file mode 100644 index 0000000000000000000000000000000000000000..3f57821c845f922a141a79594648a95d4256e556 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/common/unityQuery.go @@ -0,0 +1,226 @@ +package common + +import ( + "context" + "time" + "encoding/json" + "fmt" + "io/ioutil" + "net/http" + "strconv" + "strings" + "unsafe" + //"reflect" +) + +var queryUrl string = "http://127.0.0.1:8400/api/query" +var queryPeriod = 30 + +func PostReqToUnity(url string, data string) ([]byte, error) { + timeout := 3 * time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + client := &http.Client{ + Timeout: timeout, + } + payload := strings.NewReader(data) + req, err := http.NewRequestWithContext( + ctx, "POST", "http://127.0.0.1:8400" + url, payload) + if err != nil { + return nil, err + } + req.Header.Add("content-type", "application/json") + res, err := client.Do(req) + if err != nil { + return nil, err + } + defer res.Body.Close() + bodyBytes, err := ioutil.ReadAll(res.Body) + if err != nil { + return nil, err + } + return bodyBytes, nil +} + +func qByTable(table string, timeSecs int) ([]map[string]interface{}, error) { + // timeout := 3 * time.Second + // ctx, cancel := context.WithTimeout(context.Background(), timeout) + // defer cancel() + + // var m []map[string]interface{} + // client := &http.Client{ + // Timeout: timeout, + // } + // payload := strings.NewReader(fmt.Sprintf( + // "{\"mode\": \"last\", \"time\": \"%ds\", \"table\": [\"%s\"]}", + // timeSecs, table)) + // req, err := http.NewRequestWithContext(ctx, "POST", queryUrl, payload) + // if err != nil { + // return nil, err + // } + // req.Header.Add("content-type", "application/json") + // res, err := client.Do(req) + // if err != nil { + // return nil, err + // } + // defer res.Body.Close() + // bodyBytes, err := ioutil.ReadAll(res.Body) + var m []map[string]interface{} + data := fmt.Sprintf( + "{\"mode\": \"last\", \"time\": \"%ds\", \"table\": [\"%s\"]}", + timeSecs, table) + bodyBytes, err := PostReqToUnity("/api/query", data) + if err != nil { + return nil, err + } + err = json.Unmarshal(bodyBytes, &m) + if err != nil { + return nil, err + } + return m, nil +} + +func getMetricsFromUnity(table string, metrics []string, + lastNums ...int) []interface{} { + if len(lastNums) > 0 { + queryPeriod = lastNums[0] * 30 + } + m, err := qByTable(table, queryPeriod) + if err != nil { + PrintSysError(err) + return nil + } + result := make([]interface{}, len(m)) + for index, value := range m { + result[index] = make([]interface{}, len(metrics)) + for index2, value2 := range metrics { + result[index].([]interface{})[index2] = + value["values"].(map[string]interface{})[value2] + } + } + return result +} + +func GetSingleMetricsMulti[T interface{}](table string, metrics string, + lastNums ...int) []T { + met := getMetricsFromUnity(table, []string{metrics}, lastNums...) + if met == nil { + return nil + } + return *(*[]T)(unsafe.Pointer(&met)) +} + +func GetSingleMetrics[T interface{}]( + table string, metrics string) (T, error) { + m := GetSingleMetricsMulti[T](table, metrics) + if m == nil { + var zero T + return zero, PrintOnlyErrMsg("get metrics fail!") + } + return m[0], nil +} + +func GetMultiMetricsMulti[T interface{}](table string, metrics []string, + lastNums ...int) [][]T { + met := getMetricsFromUnity(table, metrics, lastNums...) + if met == nil { + return nil + } + return *(*[][]T)(unsafe.Pointer(&met)) +} + +func GetMultiMetrics[T interface{}](table string, metrics []string) []T { + m := GetMultiMetricsMulti[T](table, metrics) + if m == nil { + return nil + } + return m[0] +} + +func GetIOMetrics(table string, metrics []string, + lastNums ...int) []interface{} { + if len(lastNums) > 0 { + queryPeriod = lastNums[0] * 30 + } + m, err := qByTable(table, queryPeriod) + if err != nil { + PrintSysError(err) + return nil + } + result := make([]interface{}, 0) + for _, value := range m { + name := value["labels"].(map[string]interface{})["disk_name"].(string) + if _, err := strconv.Atoi(name[len(name)-1:]); err == nil { + continue + } + result = append(result, make([]interface{}, len(metrics)+1)) + result[len(result)-1].([]interface{})[0] = name + for index2, value2 := range metrics { + result[len(result)-1].([]interface{})[index2+1] = + value["values"].(map[string]interface{})[value2] + } + } + return result +} + +func GetAppMetrics(table string, appName string, + labels []string, metrics []string) []interface{} { + m, err := qByTable(table, queryPeriod) + if err != nil { + PrintSysError(err) + return nil + } + result := make([]interface{}, 0) + for _, value := range m { + name := value["labels"].(map[string]interface{})["comm"].(string) + if name != appName { + continue + } + result = append(result, make(map[string]interface{})) + for _, label := range labels { + result[len(result)-1].(map[string]interface{})[label] = + value["labels"].(map[string]interface{})[label] + } + for _, metric := range metrics { + result[len(result)-1].(map[string]interface{})[metric] = + value["values"].(map[string]interface{})[metric] + } + } + return result +} + +func GetAppLatency(table string, labels []string) map[string]map[string]string { + m, err := qByTable(table, queryPeriod) + if err != nil { + PrintSysError(err) + return nil + } + + result := make(map[string]map[string]string) + for _, line := range m { + if line["labels"].(map[string]interface{})["APP"].(string) != "MYSQL" { + continue + } + containerID := "NULL" + if val, ok := line["labels"].( + map[string]interface{})["ContainerID"]; ok && (val != nil) { + containerID = val.(string) + if len(containerID) >= 12 { + containerID = containerID[:12] + } else if len(containerID) == 0 { + containerID = "NULL" + } + } + pid := line["labels"].(map[string]interface{})["Pid"].(string) + key := containerID + ":" + pid + if _, ok := result[key]; !ok { + result[key] = map[string]string{} + } + for _, label := range labels { + result[key][label] = + line["labels"].(map[string]interface{})[label].(string) + } + } + return result +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/alarm.go b/source/tools/monitor/observ/sql-obs/events/analyzer/alarm.go new file mode 100644 index 0000000000000000000000000000000000000000..11a80e665468344b6d45c72a4af4c01f8b79a791 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/alarm.go @@ -0,0 +1,464 @@ +package analyzer + +import ( + "encoding/json" + "fmt" + "time" + // "math" + "sql-obs/common" + // "os" + "strconv" + // "regexp" + // "unsafe" + "reflect" + "strings" + "sync" +) + +type eventsDesc struct { + ts int64 + desc string + extra string //some other desc...json format + alarm bool + descLock sync.Mutex +} + +type alarmManage struct { + agingTime int + desc []eventsDesc + cacheDescExtra []map[string]string +} + +type alarmStatics struct { + alarmStr string + count int +} + +const Default_Aging_Time = 120 //120 seconds +var appAlarmEventsStaticsTlbName = "sysom_obser_mysqld_alarm" +var osAlarmEventsStaticsTlbName = "sysom_obser_os_alarm" +var alarmManageTlb = make([]*alarmManage, Notify_Type_Max) +var alarmStatMap = make(map[string][]*alarmStatics) + +type diagnoseApiDsec struct { + defaultApiName string + titleDesc string + allowAnotherDiag bool +} + +// According to the Notify_Type table order correspondence +// The order must be consistent with the Notify_Type table in notify.go +var generalDiagnoseApiTable = []diagnoseApiDsec { + {"diagnose/storage/iofsstat?disk=$disk$&", "IO burst", false}, //Notify_IO_Burst_Type is the first + {"diagnose/storage/iolatency?disk=$disk$&threshold=$threshold$&", "IO delay", false}, + {"diagnose/storage/iowait?", "IO wait", false}, + {"null", "null", false}, + {"diagnose/cpu/loadtask?", "UN-status", false}, + {"diagnose/memory/memgraph?", "direct reclaim", false}, + {"diagnose/memory/memgraph?", "memleak", false}, + {"journal/node?", "null", false}, + {"journal/node?", "null", false}, + {"journal/node?", "null", false}, + {"diagnose/memory/oomcheck?time=$ts$&", "null", false}, + {"diagnose/storage/iohang?disk=$disk$&threshold=3000&", "IO hang", false}, + {"diagnose/cpu/cpuhigh?time=$ts$&", "lockup", false}, + {"diagnose/link/rtdelay?pid=$pid$&", "RT", true}, + {"diagnose/cpu/schedmoni?threshold=100&", "sched delay", false}, + {"diagnose/net/packetdrop?", "pkg drops", false}, + {"journal/node?", "slow SQL", true}, + {"journal/node?", "null", false}, + {"diagnose/cpu/cpuhigh?time=$ts$&", "CPU High", false}, //Notify_Process_CPU_HIGH_Type is the last +} + +func getApiNameFromDesc(desc string) (string, int) { + apiName := "null" + tIdx := Notify_Type_Max + for typeIdx, diag := range generalDiagnoseApiTable { + if strings.Contains(desc, diag.titleDesc) { + apiName = diag.defaultApiName + tIdx = typeIdx + break + } + } + return apiName, tIdx +} + +func getDiagnoseApiName(alarmType int, desc string) (name string) { + if (alarmType < len(generalDiagnoseApiTable)) { + data := fromStringToMapInterface(desc) + apiName := generalDiagnoseApiTable[alarmType].defaultApiName + if (generalDiagnoseApiTable[alarmType].allowAnotherDiag) { + description := data["value"].(string) + if _, exist := data["os_log"]; exist { + _, ok := data["os_log"].(map[string]interface{})["value"] + if ok { + description = data["os_log"].( + map[string]interface{})["value"].(string) + } else { + description = "default diagnose" + } + } + newApiName, aType := getApiNameFromDesc(description) + if newApiName != "null" && aType != alarmType { + apiName = newApiName + } + } + if strings.Count(apiName, "$") > 0 { + d := data + if _, exist := data["os_log"]; exist { + _, ok := data["os_log"].(map[string]interface{})["value"] + if ok { + d = data["os_log"].(map[string]interface{}) + } + } + for key, value := range d { + apiName = strings.ReplaceAll(apiName, + fmt.Sprintf("$%s$", key), fmt.Sprintf("%v", value)) + } + } + return apiName + } else { + return "null" + } +} + +func addAlarmStat(alarmType int, extraVal map[string]string, add int) { + getAlarmStat := func(alarmType int, extraVal map[string]string) *alarmStatics { + key := "os" + if _, ok := extraVal["containerId"]; ok { + key = extraVal["podId"] + ":" + extraVal["containerId"] + ":" + + extraVal["tag_set"] + ":" + extraVal["port"] + } + if _, ok := alarmStatMap[key]; !ok { + alarmStatMap[key] = make([]*alarmStatics, Notify_Type_Max) + } + if alarmStatMap[key][alarmType] == nil { + alarmStatMap[key][alarmType] = &alarmStatics{ + alarmStr: strings.ReplaceAll( + mTypeStrTlb[alarmType], "Notify", "Alarm"), + count: 0, + } + } + return alarmStatMap[key][alarmType] + } + getAlarmStat(alarmType, extraVal).count += (add) +} + +func alarmEventAging(alarmType int, descIdx int) (bool, bool) { + alarm := alarmManageTlb[alarmType] + desc := &alarm.desc[descIdx] + agingTime := alarm.agingTime + hasAlarm := false + now := time.Now().Unix() + desc.descLock.Lock() + if desc.alarm { + hasAlarm = true + start := desc.ts + if int(now-start) >= agingTime { + desc.alarm = false + desc.desc = "" + desc.extra = "" + addAlarmStat(alarmType, alarm.cacheDescExtra[descIdx], (-1)) + alarm.cacheDescExtra[descIdx] = map[string]string{} + desc.descLock.Unlock() + return true, hasAlarm + } + } + desc.descLock.Unlock() + return false, hasAlarm +} + +func getAlarmEventIdx(alarmType int) int { + lenDesc := len(alarmManageTlb[alarmType].desc) + descIdx := lenDesc + for idx := 0; idx < lenDesc; idx++ { + aging, alarm := alarmEventAging(alarmType, idx) + if aging || !alarm { + if descIdx >= lenDesc { + descIdx = idx + } + } + } + return descIdx +} + +func fromStringToMapInterface(extra string) map[string]interface{} { + var extraVal map[string]interface{} + err := json.Unmarshal([]byte(extra), &extraVal) + if err != nil { + return nil + } + return extraVal +} + +func fromStringToMapString(extra string) map[string]string { + var extraValString, extraVal map[string]string + var extraValInterface map[string]interface{} + err := json.Unmarshal([]byte(extra), &extraValString) + if err != nil { + err = json.Unmarshal([]byte(extra), &extraValInterface) + if err != nil { + return nil + } + extraVal = make(map[string]string) + for k, v := range extraValInterface { + if reflect.TypeOf(v).Kind() != reflect.String { + v = fmt.Sprintf("%v", v) + } + extraVal[k] = v.(string) + } + } else { + extraVal = extraValString + } + return extraVal +} + +func alarmEvent(alarmType int, ts int64, desc string, extra string) { + extraVal := fromStringToMapString(extra) + if extraVal == nil { + return + } + addAlarmStat(alarmType, extraVal, 1) + descIdx := getAlarmEventIdx(alarmType) + if descIdx >= len(alarmManageTlb[alarmType].desc) { + alarmManageTlb[alarmType].desc = append( + alarmManageTlb[alarmType].desc, eventsDesc{}) + alarmManageTlb[alarmType].cacheDescExtra = append( + alarmManageTlb[alarmType].cacheDescExtra, + map[string]string{}) + } + eDsec := &alarmManageTlb[alarmType].desc[descIdx] + eDsec.alarm = true + eDsec.ts = ts + eDsec.desc = desc + eDsec.extra = extra + alarmManageTlb[alarmType].cacheDescExtra[descIdx] = extraVal +} + +func GetAlarmDescs(alarmType int) []map[string]string { + now := time.Now().Unix() + lenDesc := len(alarmManageTlb[alarmType].desc) + for idx := 0; idx < lenDesc; idx++ { + alarmInvalid := false + start := alarmManageTlb[alarmType].desc[idx].ts + if int(now-start) > Default_Aging_Time { + alarmInvalid = true + } + if alarmManageTlb[alarmType].agingTime > 0 || alarmInvalid { + alarmEventAging(alarmType, idx) + } + } + isAllElementsEmpty := func(s []map[string]string) bool { + for _, v := range s { + if len(v) != 0 { + return false + } + } + return true + } + if isAllElementsEmpty(alarmManageTlb[alarmType].cacheDescExtra) { + return nil + } + return alarmManageTlb[alarmType].cacheDescExtra +} + +func addFieldToExtra(extra string, field string) string { + str := strings.TrimSuffix(extra, "}") + str += ("," + field + "}") + return str +} + +func makeAlarmBody(alarmType int, desc string, descExtra string) string { + ts := time.Now().Unix() + time := time.Unix(ts, 0).Format(common.TIME_FORMAT) + + alarmEvent(alarmType, ts, desc, descExtra) + // return fmt.Sprintf(`node_event event_type="log_exception",`+ + // `description="%s",extra=%s,ts="%s"`, + // desc, strconv.Quote(addFieldToExtra(descExtra, + // "\"root_analyz_flag\":\"" + getDiagnoseApiName(alarmType, + // descExtra) + "\"")), now) + alarmItem := strings.ReplaceAll(mTypeStrTlb[alarmType], "Type", "Alarm") + alarmItem = strings.ReplaceAll(alarmItem, "Notify", "Sqlobs") + diag_url := getDiagnoseApiName(alarmType, descExtra) + labels := fmt.Sprintf( + `{"desc":"%s","diag_url":"%s","ts":"%s"}`, + desc, diag_url, time) + return fmt.Sprintf( + `{"alert_item":"%s",`+ + `"alert_category":"APPLICATION",`+ + `"status":"FIRING",`+ + `"alert_source_type":"sysak",`+ + `"labels":%s,`+ + `"annotations":%s}`, + alarmItem, + labels, + addFieldToExtra( + descExtra, "\"root_analyz_flag\":\"" + diag_url + "\"")) +} + +func GetLogEventsDesc(alarmType int, level string, tag_set string, + desc string, extra ...string) string { + extraVal := desc + ts := time.Now().Unix() + nowFormat := time.Unix(ts, 0).Format(common.TIME_FORMAT) + if len(extra) > 0 { + extraVal = extra[0] + } + descExtra := extraVal + if !json.Valid([]byte(descExtra)) { + descExtra = fmt.Sprintf(`{"level":"%s"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"tag_set":"%s"}`, + level, extraVal, nowFormat, tag_set) + } + return makeAlarmBody(alarmType, desc, descExtra) +} + +func GetMetricsEventsDesc(alarmType int, comm string, pid string, + extraPodId string, extraContainerId string, metricsName string, + desc string, extra ...string) string { + extraVal := desc + ts := time.Now().Unix() + nowFormat := time.Unix(ts, 0).Format(common.TIME_FORMAT) + if len(extra) > 0 { + extraVal = extra[0] + } + descExtra := extraVal + if !json.Valid([]byte(descExtra)) { + pidInt, _ := strconv.Atoi(pid) + portStr := common.GetAppInstanceMemberByPid(pidInt, "Port") + if portStr != "" { + portStr = strconv.Itoa(portStr.(int)) + } + descExtra = fmt.Sprintf(`{"metrics":"%s"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"tag_set":"%s"`+ + `,"pid":"%s"`+ + `,"port":"%s"`+ + `,"podId":"%s"`+ + `,"containerId":"%s"}`, + metricsName, extraVal, nowFormat, + comm, pid, portStr, extraPodId, extraContainerId) + } + return makeAlarmBody(alarmType, desc, descExtra) +} + +func SubmitAlarm(data string) error { + //fmt.Println(data) + _, err := common.PostReqToUnity("/api/alert", data) + if err != nil { + //fmt.Println(bodyBytes) + common.PrintSysError(err) + return err + } + //fmt.Println(string(bodyBytes)) + return nil +} + +func ExportAlarmStatics() { + data := "" + for key, value := range alarmStatMap { + prefix := osAlarmEventsStaticsTlbName + ",tag_set=os" + if key != "os" { + prefix = appAlarmEventsStaticsTlbName + ",tag_set=app" + s := strings.Split(key, ":") + podIp := common.GetAppInstanceInfo(map[string]interface{}{ + "Comm": s[2], "PodId": s[0], "ContainerId": s[1]}, "Ip").(string) + prefix += ",podId=" + s[0] + ",containerId=" + s[1] + + ",comm=" + s[2] + ",podIp=" + podIp + ",port=" + s[3] + } + fields := "" + for t, stat := range value { + field := "" + if stat == nil { + f := strings.ReplaceAll(mTypeStrTlb[t], "Notify", "Alarm") + field = fmt.Sprintf("%s=0,", f) + } else { + field = fmt.Sprintf("%s=%d,", stat.alarmStr, stat.count) + } + fields += field + } + if len(fields) > 0 { + if len(data) > 0 { + data += "\n" + } + fields = fields[:len(fields)-1] + data += (prefix + " " + fields) + } + } + if len(data) > 0 { + common.ExportData(data) + } +} + +func SetAlarmAgingTime(eventType int, time int) { + if eventType < Notify_Type_Max { + alarmManageTlb[eventType].agingTime = time + } +} + +func initAppAlarm(values []interface{}) { + podId := values[0].(string) + containerId := values[1].(string) + comm := values[2].(string) + port := strconv.Itoa(values[3].(int)) + for i := 0; i < Notify_Type_Max; i++ { + addAlarmStat(i, map[string]string{ + "podId" : podId, "containerId": containerId, + "tag_set": comm, "port": port}, 0) + } +} + +var agingAlarmTicker *time.Ticker +func agingAlarmTimer() { + ticker := time.NewTicker(time.Second * Default_Aging_Time) + agingAlarmTicker = ticker + for { + <-ticker.C + now := time.Now().Unix() + for i := 0; i < Notify_Type_Max; i++ { + lenDesc := len(alarmManageTlb[i].desc) + for idx := 0; idx < lenDesc; idx++ { + alarmInvalid := false + start := alarmManageTlb[i].desc[idx].ts + if int(now-start) > Default_Aging_Time { + alarmInvalid = true + } + if alarmManageTlb[i].agingTime > 0 || alarmInvalid { + alarmEventAging(i, idx) + } + } + } + } +} + +func DestroyAlarmResource() { + if agingAlarmTicker != nil { + agingAlarmTicker.Stop() + } +} + +func InitAlarmManage() { + for i := 0; i < Notify_Type_Max; i++ { + alarmManageTlb[i] = &alarmManage{ + agingTime: Default_Aging_Time, + desc: make([]eventsDesc, 32), + cacheDescExtra: make([]map[string]string, 32), + } + addAlarmStat(i, map[string]string{"tag_set" : "os"}, 0) + } + + common.ForeachAppInstances("mysqld", []string{ + "PodId", "ContainerId", "Comm", "Port"}, initAppAlarm) + go agingAlarmTimer() + SetAlarmAgingTime(Notify_IO_Wait_Type, 0) + SetAlarmAgingTime(Notify_Direct_Reclaim_Type, 0) + SetAlarmAgingTime(Notify_Memleak_Type, 0) + SetAlarmAgingTime(Notify_IO_Error_Type, 0) + SetAlarmAgingTime(Notify_FS_Error_Type, 0) + SetAlarmAgingTime(Notify_Net_Link_Down_Type, 0) + SetAlarmAgingTime(Notify_OS_Lockup_Type, 0) +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/analyzer.go b/source/tools/monitor/observ/sql-obs/events/analyzer/analyzer.go new file mode 100644 index 0000000000000000000000000000000000000000..d5d15d18ab99a41faf38dec461df77ab303c0e25 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/analyzer.go @@ -0,0 +1,71 @@ +package analyzer + +import ( + //"fmt" + "sql-obs/common" + //"strings" + //"os" +) + +type DBConnect = common.DBConnect +type ErrorCode = common.ErrorCode + +type handler func(data *[]string, dataLen int, pri *interface{}) +type Analyzer struct { + status chan int + data []string + dataLen int + handleData handler + private *interface{} +} + +const ( + Analyzer_WakeUp = 1 + Analyzer_Exited +) + +func (A *Analyzer) defaultAnalyzerHandler() { + for { + status := <-A.status + if status == Analyzer_WakeUp { + A.handleData(&A.data, A.dataLen, A.private) + } else if status == Analyzer_Exited { + common.PrintDefinedErr( + ErrorCode(common.Fail_Slow_Sql_Analyzer_Exit)) + return + } + } +} + +func (A *Analyzer) wakeUpAnalyzer() { + A.status <- Analyzer_WakeUp +} + +func (A *Analyzer) ExitAnalyzer() { + A.status <- Analyzer_Exited +} + +/* +func (A *Analyzer) MatchDBConnectTOAnalyzer(dbConn *DBConnect) { + A.dbConn = dbConn +}*/ + +func (A *Analyzer) CopyDataToAnalyzer(data []string, dataLen int) { + if A.data == nil { + A.data = make([]string, len(data)) + } + copy(A.data, data[:dataLen]) + A.dataLen = dataLen + A.wakeUpAnalyzer() +} + +func NewAnalyzer(h handler) (*Analyzer) { + A := Analyzer{ + status: make(chan int), + data: nil, + dataLen: 0, + handleData: h, + } + go A.defaultAnalyzerHandler() + return &A +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/notify.go b/source/tools/monitor/observ/sql-obs/events/analyzer/notify.go new file mode 100644 index 0000000000000000000000000000000000000000..ad52f2de5d4d9b02a9f5dfa26de477061a922d97 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/notify.go @@ -0,0 +1,109 @@ +package analyzer + +import ( + "fmt" + "sql-obs/common" +) + +type MetricsType int + +const ( + Notify_IO_Burst_Type = iota + Notify_IO_Delay_Type + Notify_IO_Wait_Type + Notify_IO_Except_Type + Notify_Long_Time_D_Type + Notify_Direct_Reclaim_Type + Notify_Memleak_Type + Notify_IO_Error_Type + Notify_FS_Error_Type + Notify_Net_Link_Down_Type + Notify_Process_OOM_Type + Notify_IO_Hang_Type + Notify_OS_Lockup_Type + Notify_Process_RT_Type + Notify_Process_Sched_Delay_Type + Notify_Process_Net_Drops_Type + Notify_Process_Mysql_Slow_Sql_Type + Notify_Process_Mysql_Error_Type + Notify_Process_CPU_HIGH_Type + + Notify_Type_Max +) + +var mTypeStrTlb = []string{ + "Notify_IO_Burst_Type", + "Notify_IO_Delay_Type", + "Notify_IO_Wait_Type", + "Notify_IO_Except_Type", + "Notify_Long_Time_D_Type", + "Notify_Direct_Reclaim_Type", + "Notify_Memleak_Type", + "Notify_IO_Error_Type", + "Notify_FS_Error_Type", + "Notify_Net_Link_Down_Type", + "Notify_Process_OOM_Type", + "Notify_IO_Hang_Type", + "Notify_OS_Lockup_Type", + "Notify_Process_RT_Type", + "Notify_Process_Sched_Delay_Type", + "Notify_Process_Net_Drops_Type", + "Notify_Process_Mysql_Slow_Sql_Type", + "Notify_Process_Mysql_Error_Type", + "Notify_Process_CPU_HIGH_Type", +} + +type notifyHandler func(data []interface{}) +type eventsNotify struct { + trigger chan bool + hList []notifyHandler + data []interface{} + notify []bool +} + +var eNotfiy *eventsNotify + +func MarkEventsNotify(mType MetricsType, data ...interface{}) { + eNotfiy.notify[mType] = true + eNotfiy.data[mType] = data +} + +func TriggerNotify() { + eNotfiy.trigger <- true +} + +func RegisterNotify(mType MetricsType, nh notifyHandler) { + eNotfiy.hList[mType] = nh +} + +func (e *eventsNotify) runNotifyHandler() { + fmt.Println("run events notify handler") + for { + trigger := <-e.trigger + if trigger { + for mType, notify := range e.notify { + if notify { + e.notify[mType] = false + if e.hList[mType] == nil { + common.PrintDefinedErr( + ErrorCode(common.Fail_Notify_Not_Register), + "events notify type:"+mTypeStrTlb[int(mType)]) + continue + } + e.hList[mType](e.data[mType].([]interface{})) + } + } + } + } +} + +func StartEventNotify() { + eNotfiy = &eventsNotify{ + trigger: make(chan bool), + hList: make([]notifyHandler, Notify_Type_Max), + data: make([]interface{}, Notify_Type_Max), + notify: make([]bool, Notify_Type_Max), + } + fmt.Println("start events notify") + go eNotfiy.runNotifyHandler() +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/osAnalyzer.go b/source/tools/monitor/observ/sql-obs/events/analyzer/osAnalyzer.go new file mode 100644 index 0000000000000000000000000000000000000000..66fb84c6b606240ec404b6248c4500db3a5bd054 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/osAnalyzer.go @@ -0,0 +1,103 @@ +package analyzer + +import ( + "fmt" + //"unsafe" + //"encoding/json" + "sql-obs/common" + //"os" + "strings" + //"strconv" +) + +func AnalyzOSEvents(podId string, containerId string, + pid int) map[string]string { + getAlarmMetrics := func(alarmType int, podId string, containerId string, + pid int) (map[string]string, bool) { + alarm := GetAlarmDescs(alarmType) + if alarm != nil { + for _, a := range alarm { + if podId == a["podId"] && + containerId == a["containerId"] && + strings.Contains(a["tag_set"], "mysqld") { + return a, true + } + } + } + return nil, false + } + getAlarmLogIO := func(alarmType int, podId string, containerId string, + pid int) (map[string]string, bool) { + alarm := GetAlarmDescs(alarmType) + if alarm != nil { + diskList := common.GetAppInstanceMemberByPid( + pid, "PvDevice") + if diskList != "" { + for _, disk := range diskList.([]string) { + for _, a := range alarm { + if _, ok := a["disk"]; ok && strings.Contains(disk, a["disk"]) { + return a, true + } + } + } + } + } + return nil, false + } + getAlarmLog := func(alarmType int, podId string, containerId string, + pid int) (map[string]string, bool) { + alarm := GetAlarmDescs(alarmType) + if alarm != nil { + for _, a := range alarm { + return a, true + } + } + return nil, false + } + + //rtDesc, rtLarge := getAlarmMetrics( + // Notify_Process_RT_Type, podId, containerId, pid) + sdDesc, sdLarge := getAlarmMetrics( + Notify_Process_Sched_Delay_Type, podId, containerId, pid) + dtDesc, dtLarge := getAlarmMetrics( + Notify_Long_Time_D_Type, podId, containerId, pid) + dropDesc, dropLarge := getAlarmMetrics( + Notify_Process_Net_Drops_Type, podId, containerId, pid) + ioHDesc, ioHLarge := getAlarmLogIO( + Notify_IO_Hang_Type, podId, containerId, pid) + ioBDesc, ioBLarge := getAlarmLogIO( + Notify_IO_Burst_Type, podId, containerId, pid) + ioDDesc, ioDLarge := getAlarmLogIO( + Notify_IO_Delay_Type, podId, containerId, pid) + ioWDesc, ioWLarge := getAlarmLog( + Notify_IO_Wait_Type, podId, containerId, pid) + drDesc, drDLarge := getAlarmLog( + Notify_Direct_Reclaim_Type, podId, containerId, pid) + + if ioHLarge { + return ioHDesc + } else if ioDLarge { + if ioBLarge { + return ioBDesc + } + return ioDDesc + } else if ioBLarge { + return ioBDesc + } else if drDLarge { + return drDesc + } else if ioWLarge { + return ioWDesc + } else if sdLarge { + return sdDesc + } else if dtLarge { + return dtDesc + } else if dropLarge { + return dropDesc + } else { + return nil + } +} + +func NewOSExceptAnalyzer() { + fmt.Println("new OS except analyzer...") +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/osChk.go b/source/tools/monitor/observ/sql-obs/events/analyzer/osChk.go new file mode 100644 index 0000000000000000000000000000000000000000..a0fcd4e7bf394ddd5fc5ce79da1b6a8437edb3c9 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/osChk.go @@ -0,0 +1,508 @@ +package analyzer + +import ( + "fmt" + "math" + "sql-obs/common" + "time" + + //"os" + //"time" + "regexp" + "strconv" + "strings" + "encoding/json" +) + +var osEventsTable string = "mysql_observ_osEvents" +var aggregator map[string]*common.DataItem = map[string]*common.DataItem{} +var config OsCheckConfig + +type defineDataExport func(args ...interface{}) + +func initSlideWindow(configPath string) { + setOsCheckConfig(&config, configPath) + common.AddItem(aggregator, "iowait", config.Window) +} + +// data is [containerId, comm, pid, data1, data2, ...] +// if process not in container, containerId should be "" +func chkProcessMetricsGeneral(alarmType int, metricsName string, + msgFormat string, thresh float32, data []interface{}, + exportHandler ...defineDataExport) { + cidIdx := 0 + commIdx := 1 + pidIdx := 2 + dataStartIndex := 3 + if len(data) < (dataStartIndex + 1) { + return + } + metrics := data[dataStartIndex].(float64) + if float32(metrics) > thresh { + comm := data[commIdx].(string) + pid := data[pidIdx].(string) + format := "%s(%s) " + msgFormat + desc := fmt.Sprintf(format, comm, pid, metrics) + extraContainerId := "NULL" + extraPodId := "NULL" + if len(data[cidIdx].(string)) > 0 { + extraContainerId = data[cidIdx].(string) + pidInt, _ := strconv.Atoi(pid) + extraPodId = common.GetAppInstanceMemberByPid( + pidInt, "PodId").(string) + if len(extraPodId) == 0 { + extraPodId = "NULL" + } + } + // add container ID + if len(exportHandler) > 0 { + for _, handler := range exportHandler { + handler(alarmType, comm, pid, extraPodId, extraContainerId, + metricsName, desc) + } + } else { + SubmitAlarm(GetMetricsEventsDesc(alarmType, comm, + pid, extraPodId, extraContainerId, metricsName, + desc)) + } + } +} + +// IO Events: +// IO Wait High, data should be [global iowait] +func osChkIOWaitEvents(data []interface{}) { + if len(data) > 0 { + iowait := data[0].(float64) + if iowait >= float64(config.Iowait) { + common.DisableThreshComp(aggregator, "iowait") + } + // detect iowait exception + minThresh := float64(config.Iowait) + iowaitThresh := math.Max( + common.GetDynThresh(aggregator, "iowait"), minThresh) + if iowait > iowaitThresh { + SubmitAlarm(GetLogEventsDesc(Notify_IO_Wait_Type, + "warning", "os", "IO wait high")) + } + common.UpdateDynThresh(aggregator, "iowait", iowait) + } +} + +// IO Burst & IO Hang & IO Delay +// data should be [dev, await, util, iops, bps, qusize] +// [devname, busy, reads, writes, rmsec, wmsec, rkb, wkb, backlog] +func osChkIOExceptEvents(data []interface{}) { + data = data[0].([]interface{}) + if len(data) >= 1 { + for _, v := range data { + value := v.([]interface{}) + devname := value[0].(string) + util := value[1].(float64) + iops := value[2].(float64) + value[3].(float64) + await := (value[4].(float64) + value[5].(float64)) + if iops > 0 { + await /= iops + } + bps := value[6].(float64) + value[7].(float64) + qusize := value[8].(float64) / 1000.0 + fieldSlice := []string{"util", "await", "iops", "bps"} + for _, item := range fieldSlice { + if _, ok := aggregator[devname+item]; !ok { + common.AddItem(aggregator, devname+item, config.Window) + } + } + osChkIOUtilEvents(devname, util, iops, bps, qusize) + osChkIODelayEvents(devname, await) + } + } +} + +// func debugIOburstDyn(devname string, iops float64, bps float64) { +// data := "" +// LowW := map[string]uint32{"iops": config.Iops, "bps": config.Bps} +// for str, val := range map[string]float64{"iops": iops, "bps": bps} { +// HighW := math.Max( +// common.GetDynThresh(aggregator, devname+str), float64(LowW[str])) +// fieldCurr := "curr" + str +// currValue := uint64(val) +// fieldThresh := str + "Thresh" +// currThresh := uint64(HighW) +// Basethresh := uint64(common.GetBaseThresh(aggregator, devname+str)) +// fieldBasethresh := str + "BaseThresh" +// MoveAvg := uint64(common.GetMoveAvg(aggregator, devname+str)) +// fieldMoveAvg := str + "MoveAvg" +// ComThresh := uint64(common.GetComThresh(aggregator, devname+str)) +// fieldComThresh := str + "ComThresh" +// data += fmt.Sprintf(`%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,`, +// fieldCurr, currValue, fieldThresh, currThresh, fieldBasethresh, +// Basethresh, fieldMoveAvg, MoveAvg, fieldComThresh, ComThresh) +// } +// data = strings.TrimRight(data, ",") +// d := (`debugDynThreshTlb,disk=` + devname + ` ` + data) +// common.ExportData(d) +// } + +// IO Burst & IO Hang +// data should be [dev, util, iops, bps, qusize] +func osChkIOUtilEvents( + devname string, util float64, iops float64, bps float64, qusize float64) { + ioburst := false + utilMinThresh := config.Util + + utilThresh := math.Max( + common.GetDynThresh(aggregator, devname+"util"), float64(utilMinThresh)) + if util > utilThresh { + ioburst = osChkIOBurstEvents(devname, bps, iops) + if !ioburst { + osChkIOHangEvents(devname, util, iops, qusize) + } + } + // debugIOburstDyn(devname, iops, bps) + common.UpdateDynThresh(aggregator, devname+"util", util) + common.UpdateDynThresh(aggregator, devname+"iops", iops) + common.UpdateDynThresh(aggregator, devname+"bps", bps) +} + +// IO Burst, data should be [dev, util, iops, bps] +func osChkIOBurstEvents(devname string, bps float64, iops float64) bool { + bpsLowW := config.Bps + bpsHighW := math.Max(common.GetDynThresh(aggregator, devname+"bps"), float64(bpsLowW)) + bpsMidW := math.Max(float64(bpsLowW), bpsHighW/2) + + iopsLowW := config.Iops + iopsHighW := math.Max(common.GetDynThresh(aggregator, devname+"iops"), float64(iopsLowW)) + iopsMidW := math.Max(float64(iopsLowW), iopsHighW/2) + + // fmt.Println(devname, bpsHighW, bpsMidW, iopsLowW, iopsHighW, iopsMidW) + ioburst := false + if iops >= iopsMidW || bps >= bpsMidW { + ioburst = true + } + + iopsOver := false + if iops >= iopsHighW { + iopsOver = true + } + + bpsOver := false + if bps >= bpsHighW { + bpsOver = true + } + + if iopsOver || bpsOver { + fieldCurr := "currIops" + currValue := iops + if bpsOver { + fieldCurr = "currBps" + currValue = bps + } + // ioburst, put data: dev + desc := fmt.Sprintf("IO burst in disk %s", devname) + extra := fmt.Sprintf(`{"level":"warning"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"disk":"%s"`+ + `,"%s":"%d"`+ + `,"tag_set":"os"}`, + desc, time.Unix(time.Now().Unix(), 0).Format(common.TIME_FORMAT), devname, + fieldCurr, uint64(currValue)) + SubmitAlarm(GetLogEventsDesc( + Notify_IO_Burst_Type, "", "", desc, extra)) + } + return ioburst +} + +// IO Hang, data should be [dev, util, iops, bps, qusize] +func osChkIOHangEvents(devname string, util float64, iops float64, qusize float64) { + if util >= 99 && qusize >= 1 && iops < 50 { + desc := fmt.Sprintf("IO hang in disk %s", devname) + extra := fmt.Sprintf(`{"level":"fatal"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"disk":"%s"`+ + `,"tag_set":"os"}`, + desc, time.Unix(time.Now().Unix(), 0).Format(common.TIME_FORMAT), devname) + SubmitAlarm(GetLogEventsDesc( + Notify_IO_Hang_Type, "", "", desc, extra)) + } +} + +// IO Delay High, data should be [dev, await] +func osChkIODelayEvents(devname string, await float64) { + awaitMinThresh := config.Await + awaitThresh := math.Max( + common.GetDynThresh(aggregator, devname+"await"), float64(awaitMinThresh)) + if await >= awaitThresh { + desc := fmt.Sprintf("IO delay high in disk %s", devname) + extra := fmt.Sprintf(`{"level":"warning"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"disk":"%s"`+ + `,"curr":"%d"`+ + `,"threshold":"%d"`+ + `,"tag_set":"os"}`, + desc, time.Unix(time.Now().Unix(), 0).Format(common.TIME_FORMAT), devname, + int32(await), int32(awaitThresh)) + SubmitAlarm(GetLogEventsDesc( + Notify_IO_Delay_Type, "", "", desc, extra)) + } + common.UpdateDynThresh(aggregator, devname+"await", await) +} + +func chkKmsgErrorLogGenaral(alarmType int, data []interface{}) { + if len(data) > 1 { + SubmitAlarm(GetLogEventsDesc(alarmType, + "warning", "os", data[0].(string), data[1].(string))) + } +} + +// IO error, data should be dmesg errlog about io +// contains "timeout error","I/O error" +// contains +func osChkIOErrEvents(data []interface{}) { + chkKmsgErrorLogGenaral(Notify_IO_Error_Type, data) +} + +// FS error, data should be dmesg errlog about FS +func osChkFilesystemErrEvents(data []interface{}) { + chkKmsgErrorLogGenaral(Notify_FS_Error_Type, data) +} + +// Net Events: +// pkg droped, data should be[containerId, comm, pid, process drops/retran] +func osChkNetDropEvents(data []interface{}) { + chkProcessMetricsGeneral(Notify_Process_Net_Drops_Type, "pkgDrops", + "pkg drops %.1f packet loss", 0, data) +} + +// net link down, data should be dmesg for net +func osChkNetLinkDownEvents(data []interface{}) { + if len(data) > 1 { + re := regexp.MustCompile("ethd") + ethx := re.FindString(data[1].(string)) + if strings.Contains(ethx, "eth") { + SubmitAlarm(GetLogEventsDesc(Notify_Net_Link_Down_Type, + "warning", "os", data[0].(string), ethx+` link down`)) + } + } +} + +// process RT check, data should be[containerId, comm, pid, process RT95, process RT99] +// if process not in container, containerId should be "" +func osChkNetProcessRTEvents(data []interface{}) { + rtEventExportHandler := func(argvs ...interface{}) { + appLog := "{}" + jOSEve := "{}" + reason := "-" + nowFormat := time.Unix(time.Now().Unix(), 0).Format(common.TIME_FORMAT) + pid,_ := strconv.Atoi(argvs[2].(string)) + osEve := AnalyzOSEvents(argvs[3].(string), argvs[4].(string), pid) + if osEve != nil { + if _, ok := osEve["value"]; ok { + reason = osEve["value"] + } + j, err := json.Marshal(osEve) + if err == nil { + jOSEve = string(j) + } else { + common.PrintSysError(err) + } + } + pidInt, _ := strconv.Atoi(argvs[2].(string)) + port := common.GetAppInstanceMemberByPid(pidInt, "Port") + if port == "" { + return + } + portStr := strconv.Itoa(port.(int)) + extra := fmt.Sprintf(`{"metrics":"%s"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"app_log":%s`+ + `,"reason":"%s"`+ + `,"os_log":%s`+ + `,"tag_set":"%s"`+ + `,"pid":"%s"`+ + `,"podId":"%s"`+ + `,"port":"%s"`+ + `,"containerId":"%s"}`, + argvs[5].(string), argvs[6].(string), nowFormat, appLog, reason, + jOSEve, argvs[1].(string), argvs[2].(string), argvs[3].(string), + portStr, argvs[4].(string)) + SubmitAlarm(GetMetricsEventsDesc(argvs[0].(int), "", + "", "", "", "", argvs[6].(string), extra)) + } + data[3] = (data[3].(float64) / 1000.0) + chkProcessMetricsGeneral(Notify_Process_RT_Type, "responseTimeAvg", + " server RT over 100ms(%.2fms)", 100, data, rtEventExportHandler) +} + +// Mem Events: +// direct reclaim, data should be +// +// [containerId, comm, pid, process pgfault, +// pgscan_direct, direct_reclaim_delay] +// +// if process not in container, containerId should be "" +func osChkMemDirectReclaimEvents(data []interface{}) { + commIdx := 1 + pidIdx := 2 + dataStartIndex := 3 + if len(data) >= (2 + dataStartIndex) { + pgScanDirect := data[dataStartIndex+1].(float64) + if pgScanDirect > 0 { + events := "direct reclaim occurs" + pgFault := data[dataStartIndex].(float64) + var reclaimDelay float32 = -1.0 + if len(data) >= (3 + dataStartIndex) { + reclaimDelay = float32(data[dataStartIndex+2].(float64)) + } + if reclaimDelay >= 10 { + events = fmt.Sprintf( + "%s and delay %.2fms", events, reclaimDelay) + } + + containerId := data[0].(string) + containerRegex := `[0-9a-f]{12}` + reContainer := regexp.MustCompile(containerRegex) + if pgFault > 0 || reContainer.MatchString(containerId) { + comm := data[commIdx].(string) + pid := data[pidIdx].(string) + events = fmt.Sprintf( + "%s when %s(%s) alloc mem", events, comm, pid) + } else if reclaimDelay < 10 { + events = "" + } + + if len(events) > 0 { + SubmitAlarm(GetLogEventsDesc(Notify_Direct_Reclaim_Type, + "warning", "os", events)) + } + } + } +} + +// (mysqld)OOM killer, data should be dmesg errlog about oom +func osChkMemProcessOOMEvents(data []interface{}) { + if len(data) > 1 { + if strings.Contains(data[1].(string), "mysqld") { + extraContainerId := "NULL" + extraPodId := "NULL" + process := "mysqld" + pid := "-1" + nowFormat := time.Unix( + time.Now().Unix(), 0).Format(common.TIME_FORMAT) + re := regexp.MustCompile(`Killed process (\d+) \(([^)]+)\)`) + result := re.FindStringSubmatch(data[1].(string)) + if len(result) > 0 { + pidInt, _ := strconv.Atoi(result[1]) + extraPodId = common.GetAppInstanceMemberByPid( + pidInt, "PodId").(string) + if len(extraPodId) == 0 { + extraPodId = "NULL" + } + extraContainerId = common.GetAppInstanceMemberByPid( + pidInt, "ContainerId").(string) + if len(extraContainerId) == 0 { + extraPodId = "NULL" + } + pid = result[1] + process = result[2] + } + pidInt, _ := strconv.Atoi(pid) + port := common.GetAppInstanceMemberByPid(pidInt, "Port") + if port == "" { + return + } + portStr := strconv.Itoa(port.(int)) + extra := fmt.Sprintf(`{"level":"fatal"`+ + `,"value":"mysqld exited by OOM killer"`+ + `,"details":"%s"`+ + `,"ts":"%s"`+ + `,"tag_set":"%s"`+ + `,"pid":"%s"`+ + `,"podId":"%s"`+ + `,"port":"%s"`+ + `,"containerId":"%s"}`, + data[1].(string), nowFormat, process, pid, + extraPodId, portStr, extraContainerId) + SubmitAlarm(GetLogEventsDesc(Notify_Process_OOM_Type, + "", "", "mysqld exited by OOM killer", extra)) + } + } +} + +// memleak check, data should be[unreclaim, ] +func osChkMemleakEvents(data []interface{}) { + fmt.Println("Check mem events") +} + +// Sched Events: +// sched delay, data should be [containerId, comm, pid, process sched delay] +// if process not in container, containerId should be "" +func osChkSchedProcessDelayEvents(data []interface{}) { + chkProcessMetricsGeneral(Notify_Process_Sched_Delay_Type, "schedDelay", + "sched delay over 100ms(%.2fms)", 50, data) +} + +// (mysqld)long time D-status check, data should be +// [containerId, comm, pid, D-status time] +// if process not in container, containerId should be "" +func osChkSchedProcessDStatusEvents(data []interface{}) { + chkProcessMetricsGeneral(Notify_Long_Time_D_Type, "UNStatusTime", + "Time of UN-status over 300ms(%.2fms)", 300, data) +} + +// Assess os lockup(soft/hard) risks, data should be +// [kernel nosched, irq off] +func osChkSchedOSLockupEvents(data []interface{}) { + fmt.Println("check schedule events") +} + +// process CPU check, data should be[containerId, comm, pid, cpu_total, cpu_user, cpu_sys] +// if process not in container, containerId should be "" +func osChkCpuHighEvents(data []interface{}) { + if data[3].(float64) > 60 { + desc := " CPU High Total over 60%%(%.2f%%) " + details := fmt.Sprintf("due to sys(%.2f%%) user(%.2f%% is high)", + data[5].(float64), data[4].(float64)) + if data[5].(float64) > 15 { + details = fmt.Sprintf("due to sys(%.2f%% is high) user(%.2f%%)", + data[5].(float64), data[4].(float64)) + } + desc += strings.Replace(details, "%", "%%", -1) + chkProcessMetricsGeneral(Notify_Process_CPU_HIGH_Type, "cpuTotal", + desc, 60, data) + } else if data[5].(float64) > 15 { + chkProcessMetricsGeneral(Notify_Process_CPU_HIGH_Type, "cpuSys", + " CPU High SYS over 15%%(%.2f%%)", 15, data) + } +} + +func OsChkStart() { + fmt.Println("start OS events check") + initSlideWindow("") + //register some check handler for IO events + RegisterNotify(Notify_IO_Wait_Type, osChkIOWaitEvents) + RegisterNotify(Notify_IO_Except_Type, osChkIOExceptEvents) + // RegisterNotify(Notify_IO_Burst_Type, osChkIOBurstEvents) + // RegisterNotify(Notify_IO_Delay_Type, osChkIODelayEvents) + // RegisterNotify(Notify_IO_Hang_Type, osChkIOHangEvents) + RegisterNotify(Notify_IO_Error_Type, osChkIOErrEvents) + RegisterNotify(Notify_FS_Error_Type, osChkFilesystemErrEvents) + + //register some check handler for net events + RegisterNotify(Notify_Process_Net_Drops_Type, osChkNetDropEvents) + RegisterNotify(Notify_Process_RT_Type, osChkNetProcessRTEvents) + RegisterNotify(Notify_Net_Link_Down_Type, osChkNetLinkDownEvents) + + //register some check handler for mem events + RegisterNotify(Notify_Direct_Reclaim_Type, osChkMemDirectReclaimEvents) + RegisterNotify(Notify_Process_OOM_Type, osChkMemProcessOOMEvents) + RegisterNotify(Notify_Memleak_Type, osChkMemleakEvents) + + //register some check handler for sched events + RegisterNotify(Notify_Process_Sched_Delay_Type, osChkSchedProcessDelayEvents) + RegisterNotify(Notify_OS_Lockup_Type, osChkSchedOSLockupEvents) + RegisterNotify(Notify_Long_Time_D_Type, osChkSchedProcessDStatusEvents) + RegisterNotify(Notify_Process_CPU_HIGH_Type, osChkCpuHighEvents) +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/osIOChkConfig.go b/source/tools/monitor/observ/sql-obs/events/analyzer/osIOChkConfig.go new file mode 100644 index 0000000000000000000000000000000000000000..35a88f287494f4654ceeb53c00aa78c6f8b88e57 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/osIOChkConfig.go @@ -0,0 +1,54 @@ +package analyzer + +import ( + // "encoding/json" + // "fmt" + // "os" + "reflect" + "strconv" +) + +type OsCheckConfig struct { + Window uint32 `json:"window" default:"6"` + // Window uint32 `json:"window" default:"10"` + Iowait uint32 `json:"iowait" default:"5"` + Await uint32 `json:"await" default:"10"` + Util uint32 `json:"util" default:"20"` + Iops uint32 `json:"iops" default:"150"` + Bps uint32 `json:"bps" default:"30720"` // KB/s +} + +func setOsCheckConfig(config *OsCheckConfig, path string) { + // content, err := os.ReadFile("config.json") + // if err != nil { + // fmt.Println("config file not exist.") + // // return + // } + // if err := json.Unmarshal(content, config); err != nil { + // panic(err) + // } + val := reflect.ValueOf(config).Elem() + for i := 0; i < val.NumField(); i++ { + parseField(val.Field(i), val.Type().Field(i)) + } +} + +func parseField(value reflect.Value, field reflect.StructField) error { + tagVal := field.Tag.Get("default") + + if !value.IsZero() { + // A value is set on this field so there's no need to set a default value. + return nil + } + + switch value.Kind() { + case reflect.Uint32: + i, err := strconv.ParseUint(tagVal, 10, 32) + if err != nil { + return err + } + value.SetUint(i) + return nil + } + return nil +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlAnalyzer.go b/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlAnalyzer.go new file mode 100644 index 0000000000000000000000000000000000000000..68eb08e7b69156466f6b9a65e6c504c95c051dd6 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlAnalyzer.go @@ -0,0 +1,401 @@ +package analyzer + +import ( + "encoding/json" + "fmt" + "sql-obs/common" + "unsafe" + //"os" + "strconv" + "strings" + "time" +) + +type ssAnalyzer struct { + A *Analyzer + dbConn *DBConnect + mSlowData []map[string]interface{} +} + +var mysqlEventsTable string = "mysql_observ_appEvents" +var osChkStrFlag string = "OS events also should be chk" + +func convertLogToSlowData(log []string, logLen int, + mSlowData *[]map[string]interface{}, budget int) (int, int) { + retInfoCnt := 0 + leftLen := logLen + /** slow log: + * # Time: 2023-05-23T03:13:35.902947Z + * # User@Host: junran[junran] @ [140.205.118.245] Id: 618 + * # Query_time: 2.680824 Lock_time: 0.000003 Rows_sent: 5322 Rows_examined: 5314419 + * SET timestamp=1684811613; + * SELECT * FROM orders WHERE user_id=752; + */ + for ; leftLen > 0; leftLen-- { + i := logLen - leftLen + //get slow query parameters + if !strings.HasSuffix(log[i], ";") { + validDataStartPos := 0 + if strings.HasPrefix(log[i], "# ") { + validDataStartPos = 2 + } + s := strings.Split(log[i][validDataStartPos:], ": ") + if len(s)%2 != 0 { + var sTmp []string + for _, str := range s { + index := strings.LastIndex(str, " ") + if index >= 0 { + //non-standard like this: "junran[junran] @ [140.205.118.245] Id" + nonStandard := []string{str[:index], str[index+1:]} + for _, str1 := range nonStandard { + str1 := strings.ReplaceAll(str1, " ", "") + if len(str1) != 0 { + sTmp = append(sTmp, str1) + } + } + } else { + sTmp = append(sTmp, str) + } + } + if len(sTmp)%2 != 0 { + common.PrintDefinedErr( + ErrorCode(common.Fail_Unrecognized_Slow_Log_Format), + "log info: "+log[i]) + return retInfoCnt, -1 + } + s = sTmp + } + for n, str := range s { + if n%2 == 0 { + (*mSlowData)[retInfoCnt][str] = s[n+1] + } + } + } else { + //get sql statement and start_timestamp + if strings.HasPrefix(log[i], "SET timestamp=") { + s := strings.Split(log[i], "timestamp=") + (*mSlowData)[retInfoCnt]["timestamp"] = + strings.ReplaceAll(s[1], ";", "") + } else { + if _, ok := (*mSlowData)[retInfoCnt]["timestamp"]; ok { + (*mSlowData)[retInfoCnt]["statement"] = log[i] + retInfoCnt++ + if retInfoCnt == budget { + break + } + } + } + continue + } + } + return retInfoCnt, leftLen +} + +func (ssA *ssAnalyzer) getQueryExplain(sqlCmd string) []map[string]interface{} { + cmd := "EXPLAIN " + sqlCmd + if ssA.dbConn.DBConnIsVaild() { + data, err := ssA.dbConn.GetRowsByQueryCmd(cmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return nil + } + return data + } else { + return nil + } +} + +func selectTypeAnalyze(sType string) string { + // select_type: simple to complex: + // SIMPLE + // PRIMARY + // DERIVED + // SUBQUERY + // UNION + // DEPENDENT UNION + // DEPENDENT SUBQUERY + // UNION RESULT (UNQ.E) + // DEPENDENT UNION RESULT + if sType == "SIMPLE" { + return "A simple select query" + } else if sType == "PRIMARY" { + return "Include subqueries in query" + } else if sType == "SUBQUERY" { + return "Include subqueries in the SELECT or WHERE" + } else if sType == "DERIVED" { + return "A subquery included in the From" + } else if sType == "UNION" { + return "A union query" + } else if sType == "UNION RESULT" { + return "SELECT the result from the UNION table" + } else if sType == "DEPENDENT SUBQUERY" { + return "Dependent subquery in the SELECT or WHERE" + } + return "" +} + +func extraInfoAnalyze(extra string) string { + // Extra: good to bad: + // Using index + // Using where + // Using temporary + // Using filesort + // Using join buffer (Block Nested Loop) + // Range checked for each record + // Full scan on NULL key + // Impossible WHERE + // No matching min/max row + if strings.Contains(extra, "Using index") { + return "Using index, efficient query" + } else if strings.Contains(extra, "Using where") { + return "Where filter condition is not an index" + } else if strings.Contains(extra, "Using temporary") { + return "Using temporary tables" + } else if strings.Contains(extra, "Using filesort") { + return "Using non indexed fields for the 'order by'" + } + return "" +} + +/** + * TODO: independent thread processing or blocking processing is required? + * analyzing the root cause + */ +func analyzeSlowLog(data map[string]interface{}, ssA *ssAnalyzer) string { + _, ok := data["explain0"] + rowsSend, _ := strconv.Atoi(data["Rows_sent"].(string)) + rowsExamined, _ := strconv.Atoi(data["Rows_examined"].(string)) + //can not get explain + if rowsSend > 0 && (rowsSend*100 < rowsExamined) { + if !ok { + return fmt.Sprintf( + "Index Invalidation, too many examined rows, examined(%s) sent(%s)", + data["Rows_examined"].(string), data["Rows_sent"].(string)) + } + } else { + lockTime, _ := strconv.ParseFloat(data["Lock_time"].(string), 64) + queryTime, _ := strconv.ParseFloat(data["Query_time"].(string), 64) + if lockTime >= queryTime*0.2 { + return fmt.Sprintf( + "Wait lock too long, LockTime(%vs) exceeds 20%% of queryTime(%vs)", + lockTime, + queryTime) + } + if rowsSend == 0 && rowsExamined > 100000 && !ok { + return fmt.Sprintf("%d examined rows maybe Optimize query statements(%s), %s", + rowsExamined, data["statement"], osChkStrFlag) + } + } + if !ok { + ret := ssA.dbConn.ParseSQL(data["statement"].(string)) + if ret == "complex" { + return fmt.Sprintf("Complex SQL statement, "+ + "please Optimize query statements(%s), %s", + data["statement"], osChkStrFlag) + } + return "unkown, please chk explain for more information, " + osChkStrFlag + } + + // possible_keys: indexes that can use to accelerate queries + // key: The actual index used, + // if not in possible_keys, maybe poor index used + // rows: scan rows + var result []string + for k, m := range data { + if !strings.Contains(k, "explain") { + continue + } + explain := m.(map[string]interface{}) + + r := selectTypeAnalyze(explain["select_type"].(string)) + if r != "" { + r += ", " + } + if explain["key"].(string) == "NULL" { + //no use index + r += "index not used" + } else if explain["possible_keys"].(string) != "NULL" && + !strings.Contains(explain["possible_keys"].(string), + explain["key"].(string)) { + //Unexpected index used + r += fmt.Sprintf( + "unexpected index used, expected(%s) but (%s)", + explain["possible_keys"].(string), + explain["key"].(string)) + } + //Extra info + extraInfo := extraInfoAnalyze(explain["Extra"].(string)) + // type: good to bad: + // system + // const + // eq_ref + // ref + // range + // index + // all + //full table scan + if explain["type"].(string) == "ALL" { + if explain["key"].(string) != "NULL" { + if len(r) > 0 { + r += ", " + } + r += fmt.Sprintf( + "Index %s Invalidation", explain["key"].(string)) + } + if len(r) > 0 { + r += ", " + } + r += "full table scan" + if rowsExamined < 100000 { + r += fmt.Sprintf(", %d examined rows, %s", + rowsExamined, osChkStrFlag) + } + } else { + indexStr := explain["key"].(string) + indexOptimal := false + optimalIdxReason := "" + //the field after where is index,and in effect + if strings.Contains(extraInfo, "Using index") { + indexOptimal = true + } + if rowsSend > 0 && (rowsSend*100 < rowsExamined) { + optimalIdxReason = fmt.Sprintf( + "Index %s Invalidation, too many examined rows," + + " examined(%s) sent(%s)", indexStr, + data["Rows_examined"].(string), + data["Rows_sent"].(string)) + } else { + if rowsSend == 0 { + optimalIdxReason = ", " + osChkStrFlag + } else { + r += ", " + osChkStrFlag + } + } + if !strings.Contains(r, osChkStrFlag) { + if indexOptimal { + if len(optimalIdxReason) > 0 { + r += optimalIdxReason + } + } else { + if len(extraInfo) > 0 { + r += (", " + extraInfo + ", ") + } + r += fmt.Sprintf("Not optimal index(%s) used, %s", + indexStr, osChkStrFlag) + } + } + } + result = append(result, r) + } + return strings.Join(result, "\n") +} + +func analyzAndReportEvent(ssA *ssAnalyzer, + joinMap map[string]interface{}) error { + //joinMap["explain"] = "" + podId := ssA.dbConn.GetPodID() + containerId := ssA.dbConn.GetContainerID() + for k := range joinMap { + if strings.HasPrefix(k, "explain") { + delete(joinMap, k) + } + } + mExplainData := ssA.getQueryExplain(joinMap["statement"].(string)) + for idx, m := range mExplainData { + joinMap["explain"+strconv.Itoa(idx)] = m + } + joinMap["app_analyz"] = analyzeSlowLog(joinMap, ssA) + reason := joinMap["app_analyz"] + j, err := json.Marshal(joinMap) + if err != nil { + common.PrintDefinedErr( + ErrorCode(common.Fail_Analyzer_Data_Formatting_JSON)) + return err + } + jApp := string(j) + jOSEve := "{}" + pid := common.GetAppInstanceInfo( + map[string]interface{}{ + "ContainerId": containerId, + "Port": ssA.dbConn.GetPort(), + "Comm": "mysqld"}, + "Pid").(int) + if strings.Contains(joinMap["app_analyz"].(string), osChkStrFlag) { + //reason = "unknow" + osEve := AnalyzOSEvents(podId, containerId, pid) + if osEve != nil { + reason = osEve["value"] + j, err = json.Marshal(osEve) + if err != nil { + common.PrintDefinedErr( + ErrorCode(common.Fail_Analyzer_Data_Formatting_JSON)) + return err + } + jOSEve = string(j) + } else { + reason = strings.ReplaceAll( + reason.(string), ", " + osChkStrFlag, "") + } + } + portStr := strconv.Itoa(ssA.dbConn.GetPort()) + desc := "slow SQL occurs" + extra := fmt.Sprintf(`{"level":"warning"`+ + `,"value":"%s"`+ + `,"ts":"%s"`+ + `,"app_log":%s`+ + `,"reason":"%s"`+ + `,"os_log":%s`+ + `,"pid":"%s"`+ + `,"port":"%s"`+ + `,"podId":"%s"`+ + `,"containerId":"%s"`+ + `,"tag_set":"mysqld"}`, + desc, time.Unix(time.Now().Unix(), 0).Format(common.TIME_FORMAT), jApp, reason, + jOSEve, strconv.Itoa(pid), portStr, podId, containerId) + SubmitAlarm(GetLogEventsDesc( + Notify_Process_Mysql_Slow_Sql_Type, "", "", desc, extra)) + return nil +} + +func handleData(data *[]string, dataLen int, p *interface{}) { + ssA := (*ssAnalyzer)(unsafe.Pointer(p)) + budget := cap(ssA.mSlowData) + leftLen := dataLen + retInfoCnt := budget + for { + startPos := dataLen - leftLen + endPos := startPos + leftLen + retInfoCnt, leftLen = convertLogToSlowData( + (*data)[startPos:endPos], leftLen, &ssA.mSlowData, budget) + for i := 0; i < retInfoCnt; i++ { + if err := analyzAndReportEvent(ssA, ssA.mSlowData[i]); err != nil { + break + } + } + if leftLen <= 0 { + break + } + } +} + +func (ssA *ssAnalyzer) ExitAnalyzer() { + ssA.A.ExitAnalyzer() +} + +func (ssA *ssAnalyzer) CopyDataToAnalyzer(data []string, dataLen int) { + ssA.A.CopyDataToAnalyzer(data, dataLen) +} + +func NewSlowSqlAnalyzer(dbConn *DBConnect) *ssAnalyzer { + budget := 10 + ssA := &ssAnalyzer{ + A: NewAnalyzer(handleData), + mSlowData: make([]map[string]interface{}, budget), + dbConn: dbConn, + } + for i := 0; i < budget; i++ { + ssA.mSlowData[i] = make(map[string]interface{}) + } + ssA.A.private = (*interface{})(unsafe.Pointer(ssA)) + return ssA +} diff --git a/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlChk.go b/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlChk.go new file mode 100644 index 0000000000000000000000000000000000000000..1d5b9233e7204b6933e3fba425f25881c1bbd878 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/analyzer/slowSqlChk.go @@ -0,0 +1,125 @@ +package analyzer + +import ( + "fmt" + "sql-obs/common" + //"os" + "time" + "strconv" +) + +func startMonitorSlowLog(slowQueryLogFile string, A *ssAnalyzer) { + fw := common.NewFileWriteWatcher(slowQueryLogFile, 0) + fw.StartWatch() + for { + status := <-fw.Status() + if status == common.Has_data { + A.CopyDataToAnalyzer(fw.Data(), fw.ChangeLines()) + } else if status == common.Watcher_Exited { + A.ExitAnalyzer() + common.PrintDefinedErr(ErrorCode(common.Fail_File_Watcher_Exit)) + return + } + } +} + +func getSlowQueryLogFile(dbConn *DBConnect) string { + var slowQueryLog string + var slowQueryLogFile string + match := map[string]interface{} { + "Comm": "mysqld", + "Ip": dbConn.GetIp(), + "Port": dbConn.GetPort(), + } + + if dbConn.DBConnIsVaild() { + sqlCmd := "SHOW VARIABLES LIKE '%slow_query_log%'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return "" + } + + for _, m := range data { + if value, ok := m["slow_query_log"].(string); ok { + slowQueryLog = value + } else if value, ok := m["slow_query_log_file"].(string); ok { + slowQueryLogFile = value + } + } + + if slowQueryLog == "OFF" { + common.PrintDefinedErr(ErrorCode(common.Fail_Slow_Log_Is_OFF)) + slowQueryLogFile = "" + } + if len(slowQueryLogFile) > 0 { + ajust := map[string]interface{} { + "Slowlog": slowQueryLogFile, + } + common.AppInstancesAjustMember(match, ajust) + } + } + return common.GetAppInstanceInfo(match, "Slowlog").(string) +} + +/** + * Slow SQL is achieved by listening to slow log files + */ + func detectSlowSqlBySlowLog(dbConn *DBConnect) { + slowQueryLogFile := getSlowQueryLogFile(dbConn) + if len(slowQueryLogFile) > 0 { + startMonitorSlowLog(slowQueryLogFile, NewSlowSqlAnalyzer(dbConn)) + } + } + +func detectSlowSqlBySlowQuerys(dbConn *DBConnect) { + var slowQueries int + retry := 0 + sqlCmd := "SHOW GLOBAL STATUS LIKE 'Slow_queries'" + for { + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + break + } + if value, ok := data[0]["Slow_queries"].(string); ok { + slowQueries, _ = strconv.Atoi(value) + fmt.Printf("Slow_queries is %v\n", slowQueries) + retry = 0 + goto loop + } + retry++ + if retry >= 5 { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + break + } + loop: + time.Sleep(600 * time.Second) + } +} + +func detectSlowSql(dbConn *DBConnect) { + go detectSlowSqlBySlowLog(dbConn) + if dbConn.DBConnIsVaild() { + go detectSlowSqlBySlowQuerys(dbConn) + } +} + +/** + * Obtaining SQL latency by tracking SQL requests + */ +/* +func detectBadSql(data ...interface{}) { + fmt.Println("check bad sql events") + for _, d := range data { + fmt.Println(d) + } +}*/ + +func DetectSlowOrBadSql(dbConn *DBConnect) { + detectSlowSql(dbConn) +} + +func RegisterSqlExceptChkNotify() { + //RegisterNotify(Notify_Process_RT_Type, detectBadSql) +} diff --git a/source/tools/monitor/observ/sql-obs/events/eventsChk.go b/source/tools/monitor/observ/sql-obs/events/eventsChk.go new file mode 100644 index 0000000000000000000000000000000000000000..aa73327d8989fd7d2b7e3502a57e34e9cb1115c2 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/events/eventsChk.go @@ -0,0 +1,71 @@ +package events + +import ( + "fmt" + "sql-obs/common" + "sql-obs/events/analyzer" +) + +type DBConnect = common.DBConnect +type ErrorCode = common.ErrorCode + +var dbConnList []*DBConnect + +func ForeachDBConnect(f func(*DBConnect)) { + for _, dbConn := range dbConnList { + f(dbConn) + } +} + +func startMysqlExceptCheck() { + ForeachDBConnect(analyzer.DetectSlowOrBadSql) + analyzer.RegisterSqlExceptChkNotify() +} + +func startOSExceptCheck() { + analyzer.OsChkStart() +} + +func StartEventsCheck() { + fmt.Println("start events check") + analyzer.StartEventNotify() + analyzer.InitAlarmManage() + startOSExceptCheck() + startMysqlExceptCheck() +} + +func CreateDBConnections() error { + userInvaild := false + user, passwd := common.GetUsersInfo() + if user == "" && passwd == "" { + userInvaild = true + } + common.ForeachAppInstances("mysqld", []string{"Ip", "Port", "ContainerId", "PodId"}, + func(values []interface{}) { + dbConn, err := common.NewDBConnection("mysql", + values[0].(string), + values[1].(int), + values[2].(string), + values[3].(string)) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Create_DB_Connect)) + return + } + if !userInvaild { + common.ConnectToDB(dbConn, user, passwd) + } + dbConnList = append(dbConnList, dbConn) + }) + return nil +} + +func destroyDBConnection() { + for _, dbConn := range dbConnList { + dbConn.CloseDBConnection() + } +} + +func DestroyResource() { + destroyDBConnection() + analyzer.DestroyAlarmResource() +} diff --git a/source/tools/monitor/observ/sql-obs/go.mod b/source/tools/monitor/observ/sql-obs/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..0649e2823ef919284b2f6b653c087752844af8da --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/go.mod @@ -0,0 +1,10 @@ +module sql-obs + +go 1.19 + +require ( + github.com/fsnotify/fsnotify v1.6.0 + github.com/go-sql-driver/mysql v1.7.1 +) + +require golang.org/x/sys v0.7.0 // indirect diff --git a/source/tools/monitor/observ/sql-obs/go.sum b/source/tools/monitor/observ/sql-obs/go.sum new file mode 100644 index 0000000000000000000000000000000000000000..90be24df0fb24b7de767bc6a3add0cfb254913ab --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/go.sum @@ -0,0 +1,7 @@ +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= +github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= +golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/source/tools/monitor/observ/sql-obs/logs/log.go b/source/tools/monitor/observ/sql-obs/logs/log.go new file mode 100644 index 0000000000000000000000000000000000000000..8e2d79d8b7b311d2c8b8c7fb0e2776918c07b2aa --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/logs/log.go @@ -0,0 +1,19 @@ +package logs + +import ( + "fmt" + //"os" + //"time" + //"strconv" + "sql-obs/common" + "sql-obs/events" +) + +type DBConnect = common.DBConnect +type ErrorCode = common.ErrorCode + +func StartLogCollect() { + fmt.Println("start log check") + go OsKernelLogChkStart() + events.ForeachDBConnect(DetectMysqlErrorLog) +} diff --git a/source/tools/monitor/observ/sql-obs/logs/osKernelErrLogChk.go b/source/tools/monitor/observ/sql-obs/logs/osKernelErrLogChk.go new file mode 100644 index 0000000000000000000000000000000000000000..7caf89d75d9b25393a9bdf45ece31acf12755b9d --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/logs/osKernelErrLogChk.go @@ -0,0 +1,76 @@ +package logs + +import ( + "sql-obs/common" + "sql-obs/events/analyzer" + // "fmt" + // "os" + // "time" + // "strconv" + "strings" +) + +type MetricsType = analyzer.MetricsType + +//kernel log err Events: +// OOM log +// IO error log: IO timeout、Scsi/nvme error +// Net error log: network link up/down、syn overflow +// Filesystem readonly/error log +func osChkKernelLogErrEvents() { + //fmt.Println("start OS kernel error log events check...") + fw := common.NewFileWriteWatcher("/dev/kmsg", 0) + fw.StartWatch() + for { + status := <-fw.Status() + if status == common.Has_data { + lines := fw.ChangeLines() + for i := 0; i < lines; i++ { + d := fw.Data()[i] + errType := "" + mType := 0 + // Usually, when these kmsg errors occur, + // it requires people to fix them + if (strings.Contains(d, "blk_update_request") && + strings.Contains(d, "error")){ + errType = "block I/O error" + mType = analyzer.Notify_IO_Error_Type + } else if ( + (strings.Contains(d, "reset controller") || + strings.Contains(d, "disable controller")) && + strings.Contains(d, "nvme")) || + ((strings.Contains(d, "exception Emask") || + strings.Contains(d, "failed command") || + strings.Contains(d, "limiting SATA link")) && + strings.Contains(d, "ata")) { + errType = "hardwaer I/O error" + mType = analyzer.Notify_IO_Error_Type + } else if strings.Contains(d, "read-only") && + (strings.Contains(d, "filesystem") || + strings.Contains(d, "mode")) { + errType = "FS read-only error" + mType = analyzer.Notify_FS_Error_Type + } else if strings.Contains(d, "Link") && + strings.Contains(d, "Down") && + strings.Contains(d, "eth") { + errType = "Net hardware error" + mType = analyzer.Notify_Net_Link_Down_Type + } else if strings.Contains(d, "Killed process") { + errType = "OOM" + mType = analyzer.Notify_Process_OOM_Type + } + if len(errType) > 0 { + analyzer.MarkEventsNotify(MetricsType(mType), errType, d) + analyzer.TriggerNotify() + } + } + } else if status == common.Watcher_Exited { + common.PrintDefinedErr(ErrorCode(common.Fail_File_Watcher_Exit)) + return + } + } +} + +func OsKernelLogChkStart() { + go osChkKernelLogErrEvents() +} diff --git a/source/tools/monitor/observ/sql-obs/logs/sqlErrLogChk.go b/source/tools/monitor/observ/sql-obs/logs/sqlErrLogChk.go new file mode 100644 index 0000000000000000000000000000000000000000..5d30d5b84726730372335982484bf0a865fb20c1 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/logs/sqlErrLogChk.go @@ -0,0 +1,94 @@ +package logs + +import ( + "fmt" + "sql-obs/common" + "sql-obs/events/analyzer" + "strings" + //"os" + "time" +) + +/** + * detect mysql err_log file + */ + func detectErrorLog(dbConn *DBConnect) { + var logErrorFileName string + match := map[string]interface{} { + "Comm": "mysqld", + "Ip": dbConn.GetIp(), + "Port": dbConn.GetPort(), + } + + if dbConn.DBConnIsVaild() { + sqlCmd := "SHOW VARIABLES LIKE 'log_error'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + if value, ok := data[0]["log_error"].(string); ok { + logErrorFileName = value + ajust := map[string]interface{} { + "Errlog": logErrorFileName, + } + common.AppInstancesAjustMember(match, ajust) + } + } + logErrorFileName = + common.GetAppInstanceInfo(match, "Errlog").(string) + if len(logErrorFileName) > 0 { + fw := common.NewFileWriteWatcher(logErrorFileName, 0) + fw.StartWatch() + for { + status := <-fw.Status() + if status == common.Has_data { + lines := fw.ChangeLines() + for i := 0; i < lines; i++ { + data := fw.Data()[i] + if len(data) > 0 { + level := "" + if strings.Contains(data, "ERROR") { + level = "error" + } else if strings.Contains(data, "CRITICAL") { + level = "fatal" + } + if len(level) > 0 { + nowFormat := time.Unix( + time.Now().Unix(), 0).Format(common.TIME_FORMAT) + pid := common.GetAppInstanceInfo( + map[string]interface{}{ + "ContainerId": dbConn.GetContainerID(), + "Port": dbConn.GetPort(), + "Comm": "mysqld"}, + "Pid").(int) + extra := fmt.Sprintf(`{"level":"%s"` + + `,"value":"%s"` + + `,"ts":"%s"` + + `,"tag_set":"%s"` + + `,"pid":"%d"` + + `,"podId":"%s"` + + `,"containerId":"%s"}`, + level, data, nowFormat, "mysqld", pid, + dbConn.GetPodID(), dbConn.GetContainerID()) + analyzer.SubmitAlarm(analyzer.GetLogEventsDesc( + analyzer.Notify_Process_Mysql_Error_Type, + level, "", "Error occurred in mysqld", extra)) + } + } + } + } else if status == common.Watcher_Exited { + common.PrintDefinedErr(ErrorCode(common.Fail_File_Watcher_Exit)) + return + } + } + } else { + common.PrintOnlyErrMsg("not found logErrorFile for %s in %s", + match["Comm"], match["Ip"]) + return + } +} + +func DetectMysqlErrorLog(dbConn *DBConnect) { + go detectErrorLog(dbConn) +} diff --git a/source/tools/monitor/observ/sql-obs/main.go b/source/tools/monitor/observ/sql-obs/main.go new file mode 100644 index 0000000000000000000000000000000000000000..4fc4c381fa5f62f5cac90e314e3e805ec987346e --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/main.go @@ -0,0 +1,82 @@ +package main + +import ( + "fmt" + "os" + "os/signal" + "sql-obs/common" + "sql-obs/events" + "sql-obs/logs" + "sql-obs/metrics" + "sql-obs/tracing" + "syscall" + "io/ioutil" + "strings" +) + +type ErrorCode = common.ErrorCode + +func main() { + if err := common.DetectSqlObs(); err != nil { + return + } + outlineFilePath, err:= getSysomOutline() + if err != nil { + common.PrintOnlyErrMsg("Not get path of unity outline") + return + } + if err := common.InitAppInstancesInfo("mysqld"); err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Init_Mysqld_Instances_Info)) + return + } + fmt.Println("start create a mysql connection") + if err := events.CreateDBConnections(); err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Create_DB_Connect)) + return + } + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + fmt.Println("start mysql observability") + if err := common.InitDataExport(outlineFilePath); err != nil { + common.PrintDefinedErr( + ErrorCode(common.Fail_Init_Data_Export), + "Please confirm if the 'unity' is activated") + events.DestroyResource() + return + } + defer events.DestroyResource() + defer common.UninitDataExport() + events.StartEventsCheck() + logs.StartLogCollect() + tracing.StartTracing() + metrics.StartMonitor() + e := <-sigCh + fmt.Printf("exit mysql observability, signal: %v\n", e) +} + +func getSysomOutline() (string, error) { + pipeFile := "" + yamlF := common.GetYamlFile() + yamlContent, err := ioutil.ReadFile(yamlF) + if err != nil { + return "", err + } + lines := strings.Split(string(yamlContent), "\n") + for i, line := range lines { + if strings.HasPrefix(line, "#") { + continue + } + if strings.Contains(line, "outline:") { + if len(lines) > i+1 { + outline := strings.Split(lines[i+1], " ") + pipeFile = strings.TrimRight(outline[len(outline)-1], "\n") + break + } + } + } + if pipeFile == "" { + return "", common.PrintOnlyErrMsg( + "Unable to get label \"outline\" in %s", yamlF) + } + return pipeFile, nil +} diff --git a/source/tools/monitor/observ/sql-obs/metrics/innodbMetrics.go b/source/tools/monitor/observ/sql-obs/metrics/innodbMetrics.go new file mode 100644 index 0000000000000000000000000000000000000000..cb0d13b9a78572c44df9cf1f950eba4f7627434d --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/metrics/innodbMetrics.go @@ -0,0 +1,211 @@ +package metrics + +import ( + "fmt" + "sql-obs/common" + "strconv" + "strings" + "time" + "sync" +) + +var mysqldInlineMetricsTlbName string = "sysom_obser_metrics_mysqld_innodb" +var dataUploadLock sync.Mutex +var dataExport string +var accCount int + +type InnodbMetrics struct { + ThreadCached uint64 `json:"threadCached"` + ThreadCreated uint64 `json:"threadCreated"` + ThreadConnected uint64 `json:"threadConnected"` + ThreadRunning uint64 `json:"threadRunning"` + MaxConnection uint64 `json:"maxConnection"` + BufferPoolTotal uint64 `json:"bufferPoolTotal"` + BufferPoolFree uint64 `json:"bufferPoolFree"` + LongTransactionCnt uint64 `json:"longTransactionCnt"` + HisListLen uint64 `json:"hisListLen"` + HisListMaxLen uint64 `json:"hisListMaxLen"` + HisListMinLen uint64 `json:"hisListMinLen"` + ChkPointUsage uint64 `json:"chkPointUsage"` + RedologCapacity uint64 `json:"redologCapacity"` +} + +func getConnectionInfo(dbConn *DBConnect, metric *InnodbMetrics) { + sqlCmd := "show status like 'Threads_%'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + + for _, m := range data { + if value, ok := m["Threads_cached"].(string); ok { + metric.ThreadCached, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Threads_connected"].(string); ok { + metric.ThreadConnected, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Threads_created"].(string); ok { + metric.ThreadCreated, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Threads_running"].(string); ok { + metric.ThreadRunning, _ = strconv.ParseUint(value, 10, 64) + } + } +} + +func getConnectionLimit(dbConn *DBConnect, metric *InnodbMetrics) { + sqlCmd := "show VARIABLES like 'max_connections'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + for _, m := range data { + if value, ok := m["max_connections"].(string); ok { + metric.MaxConnection, _ = strconv.ParseUint(value, 10, 64) + } + } +} + +func getBufferPoolInfo(dbConn *DBConnect, metric *InnodbMetrics) { + sqlCmd := "show global status like '%Innodb_buffer_pool%'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + var pageData uint64 + var byteData uint64 + var pageFree uint64 + var pageTotal uint64 + for _, m := range data { + if value, ok := m["Innodb_buffer_pool_pages_data"].(string); ok { + pageData, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Innodb_buffer_pool_bytes_data"].(string); ok { + byteData, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Innodb_buffer_pool_pages_free"].(string); ok { + pageFree, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["Innodb_buffer_pool_pages_total"].(string); ok { + pageTotal, _ = strconv.ParseUint(value, 10, 64) + } + } + pageSize := byteData / pageData + metric.BufferPoolTotal = pageTotal * pageSize + metric.BufferPoolFree = pageFree * pageSize +} + +func getLongTransactionCount(dbConn *DBConnect, shreshold uint32, metric *InnodbMetrics) { + sqlCmd := fmt.Sprintf("select trx_id, TRX_MYSQL_THREAD_ID as thread_id, TIME_TO_SEC(timediff(now(),trx_started)) as time from information_schema.innodb_trx where TIME_TO_SEC(timediff(now(),trx_started))>%d ORDER BY time ASC", shreshold) + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + var thread_id []string + for _, m := range data { + if value, ok := m["thread_id"].(string); ok { + thread_id = append(thread_id, value) + } + } + metric.LongTransactionCnt = uint64(len(thread_id)) +} + +func getHistoryListLength(dbConn *DBConnect, metric *InnodbMetrics) { + sqlCmd := "SELECT count, max_count, min_count FROM information_schema.innodb_metrics WHERE name='trx_rseg_history_len'" + data, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + for _, m := range data { + if value, ok := m["count"].(string); ok { + metric.HisListLen, _ = strconv.ParseUint(value, 10, 64) + } + if value, ok := m["max_count"].(string); ok { + metric.HisListMaxLen, _ = strconv.ParseUint(value, 10, 64) + } + if value, ok := m["min_count"].(string); ok { + metric.HisListMinLen, _ = strconv.ParseUint(value, 10, 64) + } + } +} + +func getCheckPointAge(dbConn *DBConnect, metric *InnodbMetrics) { + // get checkpoint age + var ftd_lsn uint64 + var pg_lsn uint64 + sqlCmd := "show engine innodb status" + data, err := dbConn.GetRowsListByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + if len(*data) > 0 { + lines := strings.Split((*data)[0], "\n") + for _, line := range lines { + if strings.HasPrefix(line, "Log flushed up to") { + fields := strings.Fields(line) + ftd_lsn, _ = strconv.ParseUint(fields[len(fields)-1], 10, 64) + } else if strings.HasPrefix(line, "Pages flushed up to") { + fields := strings.Fields(line) + pg_lsn, _ = strconv.ParseUint(fields[len(fields)-1], 10, 64) + } + } + } + metric.ChkPointUsage = ftd_lsn - pg_lsn + + // get redo log capacity + sqlCmd = `show variables like '%innodb_log_file%'` + data2, err := dbConn.GetRowsByQueryCmd(sqlCmd) + if err != nil { + common.PrintDefinedErr(ErrorCode(common.Fail_Get_DB_Variables)) + return + } + var fileSize uint64 + var fileCount uint64 + for _, m := range data2 { + if value, ok := m["innodb_log_file_size"].(string); ok { + fileSize, _ = strconv.ParseUint(value, 10, 64) + } else if value, ok := m["innodb_log_files_in_group"].(string); ok { + fileCount, _ = strconv.ParseUint(value, 10, 64) + } + } + metric.RedologCapacity = fileSize * fileCount +} + +func addDataToExport(data string, exportThresh int) { + dataUploadLock.Lock() + if len(dataExport) > 0 { + dataExport += "\n" + } + dataExport += data + accCount++ + if accCount >= exportThresh { + common.ExportData(dataExport) + accCount = 0 + dataExport = "" + } + dataUploadLock.Unlock() +} + +func startEngineMonitor(dbConn *DBConnect) { + var innodbMetrics InnodbMetrics + for { + getConnectionInfo(dbConn, &innodbMetrics) + getConnectionLimit(dbConn, &innodbMetrics) + getBufferPoolInfo(dbConn, &innodbMetrics) + getLongTransactionCount(dbConn, 1, &innodbMetrics) + getHistoryListLength(dbConn, &innodbMetrics) + getCheckPointAge(dbConn, &innodbMetrics) + addDataToExport(mysqldInlineMetricsTlbName + `,podID=` + + dbConn.GetPodID() + `,containerID=` + dbConn.GetContainerID() + + `,port=` + strconv.Itoa(dbConn.GetPort()) + `,comm=mysqld ` + + common.Struct2String(innodbMetrics), + common.GetAppInstanceCnt()) + time.Sleep(60 * time.Second) + } +} + +func StartSqlMonitor(dbConn *DBConnect) { + if dbConn.DBConnIsVaild() { + go startEngineMonitor(dbConn) + } +} diff --git a/source/tools/monitor/observ/sql-obs/metrics/metrics.go b/source/tools/monitor/observ/sql-obs/metrics/metrics.go new file mode 100644 index 0000000000000000000000000000000000000000..01b501b2ee3db120d0bbe6c40e0a0af14550dd66 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/metrics/metrics.go @@ -0,0 +1,15 @@ +package metrics + +import ( + "fmt" + "sql-obs/common" + "sql-obs/events" +) + +type DBConnect = common.DBConnect + +func StartMonitor() { + fmt.Println("start metrics monitor") + go StartOsBaseMonitor() + events.ForeachDBConnect(StartSqlMonitor) +} diff --git a/source/tools/monitor/observ/sql-obs/metrics/osBaseMetrics.go b/source/tools/monitor/observ/sql-obs/metrics/osBaseMetrics.go new file mode 100644 index 0000000000000000000000000000000000000000..8ef9ad77e38c726672191749ea447ebf1e976ab3 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/metrics/osBaseMetrics.go @@ -0,0 +1,297 @@ +package metrics + +import ( + //"os" + // "fmt" + "sql-obs/common" + "sql-obs/events/analyzer" + "strconv" + "time" + "strings" + //"math" +) + +type ErrorCode = common.ErrorCode +type MetricsType = analyzer.MetricsType + +type MetricsFuncFloat32 func(table string, metrics string) (float32, error) +type MultiMetricsFuncFloat32 func(table string, metrics []string) []float32 +type MetricsFuncFloat64 func(table string, metrics string) (float64, error) +type MultiMetricsFuncFloat64 func(table string, metrics []string) []float64 + +var GetMetricsFloat32 MetricsFuncFloat32 = common.GetSingleMetrics[float32] +var GetMultiMetricsFloat32 MultiMetricsFuncFloat32 = common.GetMultiMetrics[float32] +var GetMetricsFloat64 MetricsFuncFloat64 = common.GetSingleMetrics[float64] +var GetMultiMetricsFloat64 MultiMetricsFuncFloat64 = common.GetMultiMetrics[float64] + +var ( + osGlobalMetricsTlbName string = "sysom_obser_metrics_mysqld_os" + osMysqldMetricsTlbName string = "sysom_obser_metrics_mysqld_process" +) + +type appOsMetrics struct { + CpuTotal float64 `json:"cpuTotal"` + CpuUser float64 `json:"cpuUser"` + CpuSys float64 `json:"cpuSys"` + CpuGiveup float64 `json:"cpuGiveup"` + SchedDelay float64 `json:"schedDelay"` + UNStatusTime float64 `json:"UNStatusTime"` + IoWriteBps float64 `json:"ioWriteBps"` + IoReadBps float64 `json:"ioReadBps"` + Iowait float64 `json:"iowait"` + IoDelay float64 `json:"ioDelay"` + CgroupDirtyBlockThresh float64 `json:"cgroupDirtyBlockThresh"` + CgroupDirtyPages float64 `json:"cgroupDirtyPages"` + CgroupFlushPages float64 `json:"cgroupFlushPages"` + // MemUsedLayout float64 `json:"memUsedLayout"` + CgroupMemUsedAnon float64 `json:"cgroupMemUsedAnon"` + CgroupMemUsedCache float64 `json:"cgroupMemUsedCache"` + CgroupMemUsedSh float64 `json:"cgroupMemUsedSh"` + MemReclaimLatency float64 `json:"memReclaimLatency"` + PkgDrops float64 `json:"pkgDrops"` + RequestCount float64 `json:"requestCount"` + NetSendTraffic float64 `json:"netSendTraffic"` + NetRecTraffic float64 `json:"netRecTraffic"` + ResponseTimeMax uint64 `json:"responseTimeMax"` + ResponseTimeAvg uint64 `json:"responseTimeAvg"` +} + +type globalOsMetrics struct { + // cpuPercent float32 + //loadThresh float64 //0.7, 1.0, 5.0 * cpu core + LoadD float64 `json:"loadD"` + LoadR float64 `json:"loadR"` + SoftLockup float64 `json:"softLockup"` + HardLockup float64 `json:"hardLockup"` + //memUsedLayout float64 //use memgraph + //memReclaimLayout float64 + MemFrag float64 `json:"memFrag"` + NetAccepetCnt float64 `json:"netAccepetCnt"` + NetAccepetThresh float64 `json:"netAccepetThresh"` + NetSYNCnt float64 `json:"netSYNCnt"` + NetSYNCntThresh float64 `json:"netSYNCntThresh"` + ResponseTime float64 `json:"responseTime"` +} + +var labelsRT = []string{ + "Requests", "InBytes", "OutBytes", "AvgRT", "MaxRT"} +var metricsName = []string{ + "cpu_total", "cpu_user", "cpu_sys", "nr_voluntary_switches", + "nr_involuntary_switches", "delay", "write_bytes", "read_bytes", "IOwait", + "majflt", + } + +func updateAppMetrics(mList *[]*appOsMetrics) ([][]string, error) { + info := make([][]string, 0) + set := make(map[string]struct{}) + labelName := []string{ + "cgroup", "pid", + } + rtMap := common.GetAppLatency("sysom_metrics_ntopo_node", labelsRT) + for _, line := range common.GetAppMetrics( + "observe", "mysqld", labelName, metricsName) { + metric := line.(map[string]interface{}) + pid, _ := strconv.Atoi(metric["pid"].(string)) + containerId := "NULL" + if val, ok := metric["cgroup"]; ok && (val != nil) { + containerId = metric["cgroup"].(string) + if len(containerId) < 1 { + containerId = "NULL" + } + } + app := []string{ + metric["pid"].(string), + common.GetAppInstanceMemberByPid(pid, "PodId").(string), + containerId, + } + _, ok := set[strings.Join(app, ",")] + if !ok { + info = append(info, app) + set[strings.Join(app, ",")] = struct{}{} + } else { + /* repeated data for same app instance */ + continue + } + var m appOsMetrics + m.CpuTotal, _ = metric["cpu_total"].(float64) + m.CpuUser, _ = metric["cpu_user"].(float64) + m.CpuSys, _ = metric["cpu_sys"].(float64) + + vswitches, _ := metric["nr_voluntary_switches"].(float64) + invswitches, _ := metric["nr_involuntary_switches"].(float64) + if (vswitches + invswitches) > 0 { + m.CpuGiveup = vswitches / (vswitches + invswitches) + } + m.SchedDelay, _ = metric["delay"].(float64) + m.IoWriteBps, _ = metric["write_bytes"].(float64) + m.IoReadBps, _ = metric["read_bytes"].(float64) + m.Iowait, _ = metric["IOwait"].(float64) + // m.iodelay, _ = metric("obIO", "iowait") // null + // m.UNStatusTime, _ := metric["UNStatusTime"].(float64) + memInfo := getCgroupInfo(pid, containerId) + m.MemReclaimLatency = float64(memInfo.DRMemLatencyPS) + m.CgroupMemUsedAnon = float64(memInfo.AnonMem) + m.CgroupMemUsedCache = float64(memInfo.CacheMem) + m.CgroupMemUsedSh = float64(memInfo.ShMem) + m.CgroupDirtyPages = float64(memInfo.Dirty) + m.CgroupDirtyBlockThresh = + (float64(memInfo.MemLimit) - float64(memInfo.MemUsage)) * 0.4 + // dirty_ratio, _ := metric["mem_available"] // null + // m.CgroupFlushPages, _ = metric["cg_wb"] // null + // m.MemUsedLayout, _ = metric["mem_used"] // null + // m.PkgDrops, _ := metric["drops"] // null + // retrans, _ = metric["retran"] // null + if rtMap != nil { + key := containerId + ":" + app[0] + if _, ok := rtMap[key]; ok { + num, _ := strconv.ParseFloat(rtMap[key]["Requests"], 64) + m.RequestCount = num / 30.0 + num, _ = strconv.ParseFloat(rtMap[key]["InBytes"], 64) + m.NetRecTraffic = num / 30.0 + num, _ = strconv.ParseFloat(rtMap[key]["OutBytes"], 64) + m.NetSendTraffic = num / 30.0 + m.ResponseTimeAvg, _ = + strconv.ParseUint(rtMap[key]["AvgRT"], 10, 64) + m.ResponseTimeMax, _ = + strconv.ParseUint(rtMap[key]["MaxRT"], 10, 64) + } + } + if m.CpuTotal > 0 { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_Process_CPU_HIGH_Type), + app[2], "mysqld", app[0], + m.CpuTotal, m.CpuUser, m.CpuSys) + } + if m.SchedDelay > 0 { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_Process_Sched_Delay_Type), + app[2], "mysqld", app[0], m.SchedDelay/1000.0) + } + if m.ResponseTimeAvg > 0 { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_Process_RT_Type), + app[2], "mysqld", app[0], + float64(m.ResponseTimeAvg)) + } + if m.UNStatusTime > 0 { + analyzer.MarkEventsNotify(MetricsType(analyzer.Notify_Long_Time_D_Type), + app[2], "mysqld", app[0], m.UNStatusTime) + } + pgmajfault, _ := metric["majflt"].(float64) + if m.MemReclaimLatency > 0 { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_Direct_Reclaim_Type), + app[2], "mysqld", app[0], + pgmajfault, m.MemReclaimLatency, m.MemReclaimLatency) + } + if m.PkgDrops > 0 { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_Process_Net_Drops_Type), + app[2], "mysqld", app[0], m.PkgDrops) + } + analyzer.TriggerNotify() + *mList = append(*mList, &m) + } + return info, nil +} + +func updateOsMetrics(m *globalOsMetrics) error { + // loadinfo := GetMultiMetricsFloat64("sysak_proc_loadavg", + // []string{"loadD", "loadR"}) + // m.LoadD = loadinfo[0] + // m.LoadR = loadinfo[1] + + // schedinfo := GetMultiMetricsFloat64("sysak_proc_sched", + // []string{"nosched", "irq_off"}) + // m.SoftLockup = schedinfo[0] + // m.HardLockup = schedinfo[1] + // if m.SoftLockup > 0 || m.HardLockup > 0 { + // analyzer.MarkEventsNotify( + // MetricsType(analyzer.Notify_OS_Lockup_Type), + // m.SoftLockup, + // m.HardLockup) + // } + + // // [total_mem, alloc_page, SUnreclaim] + // meminfo := GetMultiMetricsFloat64("meminfo", + // []string{"total", "alloc_page", "SUnreclaim"}) + // if meminfo != nil { + // analyzer.MarkEventsNotify( + // MetricsType(analyzer.Notify_Memleak_Type), + // meminfo[0], meminfo[1], meminfo[2]) + // } + // m.MemFrag, _ = GetMetricsFloat64("sysak_proc_mem", "memFrag") + + // netinfo := GetMultiMetricsFloat64("sysak_proc_net", + // []string{"acc_cnt", "acc_thresh", "syn_cnt", "syn_thresh"}) + // m.NetAccepetCnt = netinfo[0] + // m.NetAccepetThresh = netinfo[1] + // m.NetSYNCnt = netinfo[2] + // m.NetSYNCntThresh = netinfo[3] + + iowait, err := GetMetricsFloat64("cpu_total", "iowait") + if err == nil { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_IO_Wait_Type), iowait) + } + + ioData := common.GetIOMetrics("disks", []string{ + "busy", "reads", "writes", "rmsec", "wmsec", "rkb", "wkb", "backlog"}) + if ioData != nil { + analyzer.MarkEventsNotify( + MetricsType(analyzer.Notify_IO_Except_Type), ioData) + } + + analyzer.TriggerNotify() + return nil +} + +func exportAppMetrics(appMetrics []*appOsMetrics, info [][]string) { + data := "" + for index, m := range appMetrics { + if len(data) > 0 { + data += "\n" + } + pid, _ := strconv.Atoi(info[index][0]) + port := common.GetAppInstanceMemberByPid(pid, "Port") + if port == "" { + continue + } + portStr := strconv.Itoa(port.(int)) + data += (osMysqldMetricsTlbName + `,pid=` + info[index][0] + + `,podID=` + info[index][1] + `,containerID=` + info[index][2] + + `,port=` + portStr + `,comm=mysqld ` + common.Struct2String(m)) + } + if len(data) > 0 { + common.ExportData(data) + } +} + +func exportOSMetrics(m *globalOsMetrics) { + common.ExportData(osGlobalMetricsTlbName + + `,comm=mysqld ` + common.Struct2String(m)) +} + +func exportAlarmStatics() { + analyzer.ExportAlarmStatics() +} + +// Monitoring resource availability +func StartOsBaseMonitor() { + var osMetrics globalOsMetrics + for { + duration := 30 * time.Second + startTime := time.Now() + var appMetrics []*appOsMetrics + info, _ := updateAppMetrics(&appMetrics) + updateOsMetrics(&osMetrics) + exportAppMetrics(appMetrics, info) + exportOSMetrics(&osMetrics) + exportAlarmStatics() + costTime := time.Now().Sub(startTime) + if costTime < duration { + duration -= costTime + } + time.Sleep(duration) + } +} diff --git a/source/tools/monitor/observ/sql-obs/metrics/osCgroupInfo.go b/source/tools/monitor/observ/sql-obs/metrics/osCgroupInfo.go new file mode 100644 index 0000000000000000000000000000000000000000..1afdcd668f9b8612f7ed05cea5f5672878910f3e --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/metrics/osCgroupInfo.go @@ -0,0 +1,163 @@ +package metrics + +import ( + // "fmt" + "io/ioutil" + "strconv" + "strings" + "sql-obs/common" +) + +type CgroupInfo struct { + path string + // direct reclaim memory + DRMemLayout [7]uint64 + DRMemLatencyTotal uint64 + DRMemLatencyPS float64 + // cgroup mem layout + AnonMem uint64 + CacheMem uint64 + ShMem uint64 + // dirty page + Dirty uint64 + MemLimit uint64 + MemUsage uint64 +} + +var CgroupInfoMap map[string]*CgroupInfo = make(map[string]*CgroupInfo) + +func getCgroupInfo(pid int, containerID string) *CgroupInfo { + getMemCgroupPath(pid, containerID) + getMemUsedLayout(CgroupInfoMap[containerID]) + getDRLatency(CgroupInfoMap[containerID]) + getDirtyShreshold(CgroupInfoMap[containerID]) + // fmt.Println(*CgroupInfoMap[containerID]) + return CgroupInfoMap[containerID] +} + +func getDirtyShreshold(info *CgroupInfo) { + path := info.path + for { + if path == "/sys/fs/cgroup" { + getMemLimitByProc(info) + return + } + limit := getCgroupValue(path + "/memory.limit_in_bytes") + if limit != 0x7ffffffffffff000 { + info.MemLimit = limit + break + } + sep := strings.LastIndex(path, "/") + path = path[0:sep] + } + info.MemUsage = getCgroupValue(path + "/memory.usage_in_bytes") +} + +func getCgroupValue(path string) uint64 { + data, err := ioutil.ReadFile(path) + if err != nil { + common.PrintOnlyErrMsg("read cgroup memory layout fail.") + return 0 + } + lines := strings.Split(string(data), "\n") + memLimit, _ := strconv.ParseUint(lines[0], 10, 64) + return memLimit +} + +func getMemLimitByProc(info *CgroupInfo) { + data, err := ioutil.ReadFile("/proc/meminfo") + if err != nil { + common.PrintOnlyErrMsg("read meminfo fail.") + return + } + lines := strings.Split(string(data), "\n") + for _, line := range(lines) { + if (strings.HasPrefix(line, "MemAvailable")) { + parts := strings.Fields(line) + memAva, _ := strconv.ParseUint(parts[1], 10, 64) + info.MemLimit = memAva * 1024 + info.MemUsage = 0 + return + } + } +} + +func getMemUsedLayout(info *CgroupInfo) { + data, err := ioutil.ReadFile(info.path + + "/memory.stat") + if err != nil { + common.PrintOnlyErrMsg("read cgroup memory layout fail.") + return + } + + var anon uint64 + var cache uint64 + lines := strings.Split(string(data), "\n") + for _, line := range lines { + parts := strings.Fields(line) + if len(parts) != 2 { + continue + } + if parts[0] == "total_inactive_anon" || + parts[0] == "total_active_anon" { + value, _ := strconv.ParseUint(parts[1], 10, 64) + anon += value + } else if parts[0] == "total_inactive_file" || + parts[0] == "total_active_file" { + value, _ := strconv.ParseUint(parts[1], 10, 64) + cache += value + } else if parts[0] == "total_shmem" { + info.ShMem, _ = strconv.ParseUint(parts[1], 10, 64) + } else if parts[0] == "total_dirty" { + info.Dirty, _ = strconv.ParseUint(parts[1], 10, 64) + } + } + info.AnonMem = anon + info.CacheMem = cache +} + +func getDRLatency(info *CgroupInfo) { + data, err := ioutil.ReadFile(info.path + + "/memory.direct_reclaim_global_latency") + if err != nil { + common.PrintOnlyErrMsg("read cgroup memory layout fail.") + return + } + + var count uint64 + lines := strings.Split(string(data), "\n") + for index, line := range lines { + parts := strings.Fields(line) + if len(parts) != 2 { + continue + } + value, _ := strconv.ParseUint(parts[1], 10, 64) + if strings.Contains(parts[0], "ms:") { + count += (value - info.DRMemLayout[index]) + info.DRMemLayout[index] = value + } else if strings.Contains(parts[0], "total") { + if count == 0 || info.DRMemLatencyPS < 0 { + info.DRMemLatencyPS = 0 + } else { + info.DRMemLatencyPS = + float64(value-info.DRMemLatencyTotal) / float64(count) + } + info.DRMemLatencyTotal = value + break + } + } +} + +func getMemCgroupPath(pid int, containerID string) { + if _, ok := CgroupInfoMap[containerID]; ok { + return + } + path, err := common.GetCgroupPath(pid, "memory") + if err != nil { + common.PrintOnlyErrMsg("get proc's cgroup file error.") + return + } + CgroupInfoMap[containerID] = new(CgroupInfo) + CgroupInfoMap[containerID].path = "/sys/fs/cgroup/memory" + path + CgroupInfoMap[containerID].DRMemLatencyPS = -1 +} diff --git a/source/tools/monitor/observ/sql-obs/tracing/sqlTracing.go b/source/tools/monitor/observ/sql-obs/tracing/sqlTracing.go new file mode 100644 index 0000000000000000000000000000000000000000..5feda79730e54598443ae65fbefbbe9bdd04d295 --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/tracing/sqlTracing.go @@ -0,0 +1,73 @@ +package tracing + +import ( + "net" + "os" + "sql-obs/common" + "strconv" + "strings" + "time" +) + +const ( + //PIPE_PATH string = "/var/sysom/outline" // 参考 YAML 中的配置 + MAX_BUFF int = 64 * 1024 // 最大消息长度 +) + + +func StartTracingSql() { + go traceSqlRequestRT("mysqld") +} + +func traceSqlRequestRT(comm string) { + var pidList []string + common.ForeachAppInstances(comm, []string{"Pid"}, + func(values []interface{}) { + pidList = append(pidList, strconv.Itoa(values[0].(int))) + }) + + if len(pidList) == 0 { + common.PrintOnlyErrMsg("not found app %s", comm) + return + } + pidList = append([]string{"mysql", strconv.Itoa(len(pidList))}, pidList...) + + pidListStr := strings.Join(pidList, ",") + for retry := 0; retry < 10; retry++ { + if _, err := os.Stat("/var/ntopo"); os.IsNotExist(err) { + common.PrintOnlyErrMsg("ntopo not startup, retry after 3secs") + time.Sleep(3 * time.Second) + continue + } + + var sock *net.UnixConn = nil + addr, err := net.ResolveUnixAddr("unix", "/var/ntopo") + if err == nil { + sock, err = net.DialUnix("unix", nil, addr) + } + if sock == nil || err != nil { + common.PrintSysError(err) + common.PrintOnlyErrMsg("Can't connect ntopo, retry after 1secs") + time.Sleep(1 * time.Second) + continue + } + + if len(pidListStr) > MAX_BUFF { + common.PrintOnlyErrMsg("pidList too long") + sock.Close() + break + } + for i := 0; i < 5; i++ { + if _, err := sock.Write([]byte(pidListStr)); err != nil { + common.PrintOnlyErrMsg( + "send pidlist failed, pidlist: %s, retry after 1secs", + pidListStr) + time.Sleep(1 * time.Second) + continue + } + break + } + sock.Close() + break + } +} diff --git a/source/tools/monitor/observ/sql-obs/tracing/tracing.go b/source/tools/monitor/observ/sql-obs/tracing/tracing.go new file mode 100644 index 0000000000000000000000000000000000000000..cc1c06a82faaa135ed36ea1569214ddcaf3c944b --- /dev/null +++ b/source/tools/monitor/observ/sql-obs/tracing/tracing.go @@ -0,0 +1,10 @@ +package tracing + +import ( + "fmt" +) + +func StartTracing() { + fmt.Println("start tracing") + StartTracingSql() +} \ No newline at end of file diff --git a/source/tools/monitor/observ/xwatcher/Cargo.toml b/source/tools/monitor/observ/xwatcher/Cargo.toml new file mode 100644 index 0000000000000000000000000000000000000000..233d476d62f75e68841573e4b66d22897a6f4a6a --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "xwatcher" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = "1.0.75" +cached = "0.46.0" +chrono = "0.4.31" +curl = "0.4.44" +env_logger = "0.10.0" +libc = "0.2.150" +linemux = "0.3.0" +log = "0.4.20" +metrics-exporter-prometheus = "0.12.1" +procfs = "0.16.0" +psutil = "3.2.2" +regex = "1.10.2" +reqwest = { version = "0.11", features = ["blocking", "json"] } +structopt = "0.3.26" +libbpf-rs = "0.21.2" +serde = {version = "1.0.190", features = ["derive"]} +serde_json = "1.0.107" +uuid = { version = "1.5.0", features = ["v4", "fast-rng"] } +local-ip-address = "0.5.6" +get_if_addrs = "0.5.3" +rtrace = {path = "../../../detect/net/rtrace"} +crossbeam = "0.8.2" +once_cell = "1.18.0" +tokio = { version = "1.34.0", features = ["rt"] } + + +[build-dependencies] +libbpf-cargo = "0.21.2" +bindgen = "0.69.0" diff --git a/source/tools/monitor/observ/xwatcher/Makefile b/source/tools/monitor/observ/xwatcher/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..25a5c83d91aa8619c74167b25a39bdd96e3f3322 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/Makefile @@ -0,0 +1,13 @@ +TARGET_PATH := $(OBJ_TOOLS_ROOT) + +.PHONY: xwatcher + +ntopo: bin target_rule + +bin: + cargo build --release + cp target/release/xwatcher $(TARGET_PATH)/ + +target := xwatcher + +include $(SRC)/mk/target.inc \ No newline at end of file diff --git a/source/tools/monitor/observ/xwatcher/build.rs b/source/tools/monitor/observ/xwatcher/build.rs new file mode 100644 index 0000000000000000000000000000000000000000..8fd6fdf64512b03562f03f5778b241805bacab44 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/build.rs @@ -0,0 +1,28 @@ +use libbpf_cargo::SkeletonBuilder; +use std::env; +use std::path::PathBuf; + +const SRC: &str = "src/bpf/ntopo.bpf.c"; +const HDR: &str = "src/bpf/ntopo.h"; + +fn main() { + let mut out = + PathBuf::from(env::var_os("OUT_DIR").expect("OUT_DIR must be set in build script")); + out.push("ntopo.skel.rs"); + SkeletonBuilder::new() + .source(SRC) + .clang_args("-Wno-compare-distinct-pointer-types") + .build_and_generate(&out) + .unwrap(); + + out.pop(); + out.push("ntopo.rs"); + let bindings = bindgen::Builder::default() + .header(HDR) + .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + .generate() + .unwrap(); + bindings.write_to_file(&out).unwrap(); + println!("cargo:rerun-if-changed={SRC}"); + println!("cargo:rerun-if-changed={HDR}"); +} diff --git a/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.bpf.c b/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..1a1ab94e3e838a4468dbd2f4f3d85217a75e9229 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.bpf.c @@ -0,0 +1,203 @@ + +#define NTOPO_BPF_DEBUG +#define BPF_NO_GLOBAL_DATA + +#include "vmlinux.h" +#include +#include +#include +#include +#include "ntopo.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sock *); + __type(value, struct sock_info); + __uint(max_entries, 1024000); +} socks SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, struct pid_info); + __uint(max_entries, 1024); +} pids SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct edge_info_key); + __type(value, struct edge_info); + __uint(max_entries, 1024); +} edges SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct node_info_key); + __type(value, struct node_info); + __uint(max_entries, 1024); +} nodes SEC(".maps"); + +static inline void set_addr_pair_by_sock(struct sock *sk, struct addrpair *ap) +{ + bpf_probe_read(&ap->daddr, sizeof(ap->daddr), &sk->__sk_common.skc_daddr); + bpf_probe_read(&ap->dport, sizeof(ap->dport), &sk->__sk_common.skc_dport); + bpf_probe_read(&ap->saddr, sizeof(ap->saddr), &sk->__sk_common.skc_rcv_saddr); + bpf_probe_read(&ap->sport, sizeof(ap->sport), &sk->__sk_common.skc_num); + ap->dport = bpf_ntohs(ap->dport); +} + +static inline void update_pid_info(struct pid_info *info, int in_bytes, int out_bytes) +{ + struct task_struct *curr_task; + struct kernfs_node *knode, *pknode; + + info->in_bytes += in_bytes; + info->out_bytes += out_bytes; +} + +static inline enum role get_sock_role(struct sock_info *info, struct sock *sk) +{ + if (info->role == ROLE_UNKNOWN) + { + int max_ack_backlog = 0; + bpf_probe_read(&max_ack_backlog, sizeof(max_ack_backlog), &sk->sk_max_ack_backlog); + + info->role = max_ack_backlog == 0 ? ROLE_CLIENT : ROLE_SERVER; + } + return info->role; +} + +static inline void update_edges(struct sock_info *info, int role) +{ + struct edge_info_key key = {0}; + struct edge_info val = {0}; + + key.saddr = info->ap.saddr; + key.daddr = info->ap.daddr; + if (role == ROLE_SERVER) + { + int tmp = key.saddr; + key.saddr = key.daddr; + key.daddr = tmp; + } + bpf_map_update_elem(&edges, &key, &val, BPF_ANY); +} + +static inline void update_nodes(struct sock_info *info) +{ + struct node_info_key key = {0}; + key.addr = info->ap.saddr; + + struct node_info *ninfo = bpf_map_lookup_elem(&nodes, &key); + if (!ninfo) + { + struct node_info val = {0}; + bpf_map_update_elem(&nodes, &key, &val, BPF_ANY); + } +} + + +// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +SEC("kprobe/tcp_sendmsg_locked") +int BPF_KPROBE(kprobe_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg, size_t size) +{ + if (size == 0) + return 0; + + u64 tgid = bpf_get_current_pid_tgid(); + u32 pid = tgid >> 32; + struct pid_info *pinfop; + pinfop = bpf_map_lookup_elem(&pids, &pid); + if (!pinfop) + return 0; + + update_pid_info(pinfop, 0, size); + + struct sock_info *infop = bpf_map_lookup_elem(&socks, &sk); + if (!infop) + { + struct sock_info info = {0}; + info.pid = pid; + set_addr_pair_by_sock(sk, &info.ap); + if (info.ap.saddr == info.ap.daddr) + return 0; + + info.role = get_sock_role(&info, sk); + bpf_map_update_elem(&socks, &sk, &info, BPF_ANY); + } + infop = bpf_map_lookup_elem(&socks, &sk); + if (!infop) + return 0; + + enum role role = get_sock_role(infop, sk); + + update_nodes(infop); + update_edges(infop, role); + return 0; +} + +struct tcp_rcv_space_adjust_args +{ + u32 pad[2]; + struct sock *sk; +}; + +SEC("tracepoint/tcp/tcp_rcv_space_adjust") +int tracepoint_tcp_rcv_space_adjust(struct tcp_rcv_space_adjust_args *ctx) +{ + struct sock *sk = ctx->sk; + u64 tgid = bpf_get_current_pid_tgid(); + u32 pid = tgid >> 32; + struct pid_info *pinfop; + pinfop = bpf_map_lookup_elem(&pids, &pid); + if (!pinfop) + return 0; + + struct tcp_sock *tp = sk; + u32 copied_seq, seq, in_bytes; + bpf_probe_read(&copied_seq, sizeof(copied_seq), &tp->copied_seq); + bpf_probe_read(&seq, sizeof(copied_seq), &tp->rcvq_space.seq); + in_bytes = copied_seq - seq; + update_pid_info(pinfop, in_bytes, 0); + + struct sock_info *infop = bpf_map_lookup_elem(&socks, &sk); + if (!infop) + { + struct sock_info info = {0}; + info.pid = pid; + set_addr_pair_by_sock(sk, &info.ap); + if (info.ap.saddr == info.ap.daddr) + return 0; + + info.role = get_sock_role(&info, sk); + bpf_map_update_elem(&socks, &sk, &info, BPF_ANY); + } + infop = bpf_map_lookup_elem(&socks, &sk); + if (!infop) + return 0; + + enum role role = get_sock_role(infop, sk); + update_nodes(infop); + update_edges(infop, role); + return 0; +} + +// void tcp_close(struct sock *sk, long timeout); +SEC("kprobe/tcp_close") +int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) +{ + struct sock_info *info = bpf_map_lookup_elem(&socks, &sk); + if (!info) + return 0; + + u64 ts = bpf_ktime_get_ns(); + enum role role = get_sock_role(info, sk); + bpf_map_delete_elem(&socks, &sk); + return 0; +} + +char _license[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.h b/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.h new file mode 100644 index 0000000000000000000000000000000000000000..5c88f648fb606577dd91f4d0aa321b8bd677ce8f --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/bpf/ntopo.h @@ -0,0 +1,56 @@ +#ifndef XWATCHER_H +#define XWATCHER_H + +enum role +{ + ROLE_UNKNOWN, + ROLE_CLIENT, + ROLE_SERVER, +}; + +struct addrpair +{ + unsigned int saddr; + unsigned int daddr; + unsigned short sport; + unsigned short dport; +}; + +struct sock_info +{ + struct addrpair ap; + enum role role; + + unsigned int pid; + unsigned long long in_bytes; + unsigned long long out_bytes; +}; + +struct edge_info_key +{ + unsigned int saddr; + unsigned int daddr; +}; + +struct edge_info +{ + int empty; +}; + +struct node_info_key +{ + unsigned int addr; +}; + +struct node_info +{ + int empty; +}; + +struct pid_info +{ + unsigned long long in_bytes; + unsigned long long out_bytes; +}; + +#endif \ No newline at end of file diff --git a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/vmlinux.h b/source/tools/monitor/observ/xwatcher/src/bpf/vmlinux.h similarity index 43% rename from source/tools/detect/net/rtrace/latency/icmp/src/bpf/vmlinux.h rename to source/tools/monitor/observ/xwatcher/src/bpf/vmlinux.h index bbdac43f8fcc70ff6789874cf282bc184cca13e1..1add98ab8b0b6a7c2fafc6dec1b5df0af309f56f 100644 --- a/source/tools/detect/net/rtrace/latency/icmp/src/bpf/vmlinux.h +++ b/source/tools/monitor/observ/xwatcher/src/bpf/vmlinux.h @@ -1,10 +1,9 @@ - #ifndef __VMLINUX_ARCH_H__ #define __VMLINUX_ARCH_H__ #if defined(__TARGET_ARCH_x86) - #include "../../../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/x86_64/vmlinux.h" #elif defined(__TARGET_ARCH_arm64) - #include "../../../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" + #include "../../../../../../lib/internal/ebpf/coolbpf/arch/aarch64/vmlinux.h" #endif #endif \ No newline at end of file diff --git a/source/tools/monitor/observ/xwatcher/src/cpu.rs b/source/tools/monitor/observ/xwatcher/src/cpu.rs new file mode 100644 index 0000000000000000000000000000000000000000..0480496c8888f409cb185ebaa90e64d48b10e699 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/cpu.rs @@ -0,0 +1,33 @@ +use crate::utils::{cached_clock_tick, METRICS_PERIOD}; +use procfs::process::Stat; + +#[derive(Default, Debug)] +pub struct Cpu { + pub usr: f32, + pub sys: f32, + pub tot: f32, +} + +impl Cpu { + pub fn new(last: &Stat, now: &Stat) -> Self { + let udelta = (now.utime - last.utime) * 100; + let sdelta = (now.stime - last.stime) * 100; + + let total = ((METRICS_PERIOD as i64) * cached_clock_tick()) as f32; + + Cpu { + usr: udelta as f32 / total, + sys: sdelta as f32 / total, + tot: (udelta + sdelta) as f32 / total, + } + } +} + +impl ToString for Cpu { + fn to_string(&self) -> String { + format!( + "cpuUsr={:.2},cpuSys={:.2},cpuTot={:.2}", + self.usr, self.sys, self.tot + ) + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/main.rs b/source/tools/monitor/observ/xwatcher/src/main.rs new file mode 100644 index 0000000000000000000000000000000000000000..6063e9b65fa8e7e3bde0f26c9c0177517e3974ea --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/main.rs @@ -0,0 +1,64 @@ +use nginx::nginx::{find_nginx_instances, Nginx}; +use structopt::StructOpt; +use unity::unity_sock_send; + +use crate::{ntopo::NTopo, rtrace::run_xrtrace}; + +mod cpu; +mod memory; +mod metrics; +mod network; +mod nginx; +mod ntopo; +mod unity; +mod utils; +mod xwatcher; +use libbpf_rs::skel::*; +mod rtrace; + +#[derive(Debug, StructOpt)] +#[structopt(name = "xwatcher", about = "keep nginx healthy")] +pub struct Command { + #[structopt( + long, + default_value = "30", + help = "Set the collection period in seconds" + )] + duration: u32, + #[structopt(short, long, help = "Verbose debug output")] + verbose: bool, +} + +fn main() { + let opts = Command::from_args(); + env_logger::init(); + + run_xrtrace(); + + let mut nginxes = find_nginx_instances(); + assert!(nginxes.len() == 1); + + let mut builder = ntopo::NtopoSkelBuilder::default(); + builder.obj_builder.debug(opts.verbose); + + let mut open_skel = builder.open().unwrap(); + let mut skel = open_skel.load().expect("failed to load ntopo"); + skel.attach().unwrap(); + + let pidsmap = libbpf_rs::MapHandle::try_clone(skel.maps().pids()).unwrap(); + let nodesmap = libbpf_rs::MapHandle::try_clone(skel.maps().nodes()).unwrap(); + let edgesmap = libbpf_rs::MapHandle::try_clone(skel.maps().edges()).unwrap(); + + let mut ntopo = NTopo::new(pidsmap, nodesmap, edgesmap, &nginxes[0]); + + loop { + std::thread::sleep(std::time::Duration::from_secs(opts.duration as u64)); + let mut metrics = vec![]; + for nginx in &mut nginxes { + metrics.push(nginx.metrics()); + } + metrics.push(ntopo.metrics(&nginxes[0])); + let metrics_string = metrics.join("\n"); + unity_sock_send("/var/sysom/outline", &metrics_string); + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/memory.rs b/source/tools/monitor/observ/xwatcher/src/memory.rs new file mode 100644 index 0000000000000000000000000000000000000000..ee012edbec3d5c2cac43b0f8220021368fb84f89 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/memory.rs @@ -0,0 +1,30 @@ +use procfs::{process::Stat, Meminfo}; + +#[derive(Default, Debug)] +pub struct Memory { + pub rss: u64, + pub vms: u64, + pub pct: f32, // rss / total +} + +impl Memory { + pub fn new(stat: &Stat, minfo: &Meminfo) -> Self { + let rss = stat.rss; + let vms = stat.vsize; + + Memory { + rss, + vms, + pct: (rss * 100) as f32 / minfo.mem_total as f32, + } + } +} + +impl ToString for Memory { + fn to_string(&self) -> String { + format!( + "memRss={},memVms={},memPct={:.2}", + self.rss, self.vms, self.pct + ) + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/metrics.rs b/source/tools/monitor/observ/xwatcher/src/metrics.rs new file mode 100644 index 0000000000000000000000000000000000000000..d099fbaacff9f4a50984966630d518769f5c0ca6 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/metrics.rs @@ -0,0 +1 @@ +use crate::{cpu::Cpu, memory::Memory}; diff --git a/source/tools/monitor/observ/xwatcher/src/network.rs b/source/tools/monitor/observ/xwatcher/src/network.rs new file mode 100644 index 0000000000000000000000000000000000000000..0fcdfc1fcd008afabcfdc0a311749d318fe1005a --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/network.rs @@ -0,0 +1,6 @@ +pub struct Network { + pub in_bytes: usize, + pub out_bytes: usize, + pub drops: usize, + pub retrans: usize, +} diff --git a/source/tools/monitor/observ/xwatcher/src/nginx/access_log.rs b/source/tools/monitor/observ/xwatcher/src/nginx/access_log.rs new file mode 100644 index 0000000000000000000000000000000000000000..2f3d88058f8ad4867f1abfe1425020567dda895f --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/nginx/access_log.rs @@ -0,0 +1,298 @@ +use crate::rtrace::xtrace_collect; +use crate::utils::send_alarm; +use anyhow::{bail, Result}; +use chrono::prelude::*; +use once_cell::sync::Lazy; +use rtrace::common::utils::current_monotime; +use rtrace::common::utils::parse_ip_str; +use serde::ser::SerializeStruct; +use serde::Serialize; +use serde::Serializer; +use std::net::Ipv4Addr; +use std::sync::Mutex; +use tokio::runtime::Runtime; + +static GLOBAL_METRICS: Lazy> = Lazy::new(|| { + let metrics = AccessLogMetrics::default(); + Mutex::new(metrics) +}); + +#[derive(Debug)] +pub struct AccessLog { + master: i32, + pub metrics: AccessLogMetrics, +} + +impl AccessLog { + pub fn new(master: i32) -> Self { + std::thread::spawn(move || { + let rt = Runtime::new().unwrap(); + rt.block_on(async { + let mut lines = linemux::MuxedLines::new().unwrap(); + lines.add_file("/var/log/nginx/access.log").await.unwrap(); + while let Ok(Some(line)) = lines.next_line().await { + match parse_accesslog_entry(line.line()) { + Ok(entry) => { + send_accesslog_alarm(&entry, master); + GLOBAL_METRICS.lock().unwrap().add_entry(entry); + } + Err(_) => log::error!("failed to parse access log"), + } + } + }); + }); + + AccessLog { + master, + metrics: Default::default(), + } + } + + pub fn metrics(&mut self) -> String { + self.metrics = GLOBAL_METRICS.lock().unwrap().clone(); + GLOBAL_METRICS.lock().unwrap().reset(); + self.metrics.to_string() + } +} + +fn send_accesslog_alarm(entry: &AccessLogEntry, master: i32) { + let mut valid = false; + if entry.status.eq("499") || entry.status.eq("504") { + valid = true; + } + if entry.request_time > 10 { + valid = true; + } + + if valid { + let raddr: Ipv4Addr = entry.remote_addr.parse().unwrap(); + let (addr, uport) = parse_ip_str(&entry.upstream_addr); + let uaddr = Ipv4Addr::from(u32::from_be(addr)); + let r = current_monotime(); + let l = r - (entry.request_time as u64) * 1_000_000; + let mut reason = xtrace_collect(raddr, uaddr, uport, (l, r)); + if reason.len() < 3 { + reason = "未发现网络丢包和重传".to_owned(); + } + + let me = MetricEvent::new(master.to_string(), entry, reason); + send_alarm(me); + } +} + +#[derive(Debug)] +pub struct AccessLogEntry { + request_time: usize, + upstream_response_time: usize, + status: String, + remote_addr: String, + upstream_addr: String, + request: String, +} + +fn parse_until_space(input: &str) -> Result<(&str, &str)> { + let res = input.split_once(" "); + match res { + Some(r) => Ok(r), + None => bail!("failed to parse"), + } +} + +fn parse_inside_quotes(input: &str) -> Result<(&str, &str)> { + if let Some(start) = input.find('"') { + if let Some(end) = input[start + 1..].find('"') { + return Ok(( + &input[start + 1..start + end + 1], + &input[start + end + 1..], + )); + } + } + bail!("failed to find quotes") +} + +fn parse_time_ms(time: &str) -> Result { + let f: f32 = time.parse()?; + Ok((f * 1000.0) as usize) +} + +fn parse_accesslog_entry(input: &str) -> Result { + let (tmp, input) = parse_until_space(input)?; + let request_time = parse_time_ms(tmp)?; + + let (tmp, input) = parse_until_space(input)?; + let upstream_response_time = parse_time_ms(tmp)?; + + let (status, input) = parse_until_space(input)?; + let (remote_addr, input) = parse_until_space(input)?; + let (upstream_addr, input) = parse_until_space(input)?; + + let (request, _) = parse_inside_quotes(input)?; + + Ok(AccessLogEntry { + request_time, + upstream_response_time, + status: status.to_owned(), + remote_addr: remote_addr.to_owned(), + upstream_addr: upstream_addr.to_owned(), + request: request.to_owned(), + }) +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct AccessLogMetrics { + pub status: [usize; 5], + + pub request_time_count: usize, + pub request_time_total: usize, + + pub upstream_response_time_count: usize, + pub upstream_response_time_total: usize, + + pub max_request_time: usize, + pub max_upstream_reponse_time: usize, + + pub request_jitter: usize, +} + +impl ToString for AccessLogMetrics { + fn to_string(&self) -> String { + format!( + "status_1xx={},status_2xx={},status_3xx={},status_4xx={},status_5xx={},requestTimeAvg={},upstreamTimeAvg={},maxRequestTime={},maxUpstreamTime={},requestJitter={}", + self.status[0], + self.status[1], + self.status[2], + self.status[3], + self.status[4], + if self.request_time_count == 0 { 0} else { self.request_time_total/self.request_time_count}, + if self.upstream_response_time_count==0 {0} else {self.upstream_response_time_total/self.upstream_response_time_count}, + self.max_request_time, + self.max_upstream_reponse_time, + self.request_jitter + ) + } +} + +impl AccessLogMetrics { + fn reset(&mut self) { + self.status[0] = 0; + self.status[1] = 0; + self.status[2] = 0; + self.status[3] = 0; + self.status[4] = 0; + self.request_time_count = 0; + self.request_time_total = 0; + + self.upstream_response_time_count = 0; + self.upstream_response_time_total = 0; + + self.max_request_time = 0; + self.max_upstream_reponse_time = 0; + self.request_jitter = 0; + } + + pub fn add_request_time(&mut self, time_ms: usize) { + self.request_time_total += time_ms; + self.request_time_count += 1; + self.max_request_time = std::cmp::max(time_ms, self.max_request_time); + } + + pub fn add_upstream_response_time(&mut self, time_ms: usize) { + self.request_time_total += time_ms; + self.request_time_count += 1; + self.max_upstream_reponse_time = std::cmp::max(time_ms, self.max_upstream_reponse_time); + } + + pub fn add_http_status(&mut self, status: &str) { + if let Some(first_char) = status.chars().next() { + let index = first_char as usize - '0' as usize; + self.status[index - 1] += 1; + } else { + log::error!("wrong format status code: {}", status); + } + } + + pub fn add_entry(&mut self, entry: AccessLogEntry) { + if !entry.status.eq("499") && !entry.status.eq("504") && entry.request_time > 10{ + self.request_jitter += 1; + } + + self.add_request_time(entry.request_time); + self.add_upstream_response_time(entry.upstream_response_time); + self.add_http_status(&entry.status); + } +} + +#[derive(Debug, Clone)] +struct MetricEvent { + ts: String, + remote_addr: String, + upstream_addr: String, + request: String, + request_time: String, + upstream_response_time: String, + master_pid: String, + diag_id: String, + status: String, + reason: String, + diag: bool, +} + +impl Serialize for MetricEvent { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut s = serializer.serialize_struct("MetricEvent", 7)?; + s.serialize_field("ts", &self.ts)?; + s.serialize_field("remoteAddr", &self.remote_addr)?; + s.serialize_field("upstreamAddr", &self.upstream_addr)?; + s.serialize_field("request", &self.request)?; + s.serialize_field("requestTime", &self.request_time)?; + s.serialize_field("upstreamResponseTime", &self.upstream_response_time)?; + s.serialize_field("masterPid", &self.master_pid)?; + s.serialize_field("diagId", &self.diag_id)?; + s.serialize_field("status", &self.status)?; + s.serialize_field("reason", &self.reason)?; + s.serialize_field("diag", &self.diag)?; + s.end() + } +} + +impl MetricEvent { + pub fn new(pid: String, entry: &AccessLogEntry, reason: String) -> Self { + let local: DateTime = Local::now(); + MetricEvent { + ts: local.format("%d/%m/%Y %H:%M:%S").to_string(), + remote_addr: entry.remote_addr.clone(), + upstream_addr: entry.upstream_addr.clone(), + request: entry.request.clone(), + request_time: entry.request_time.to_string(), + upstream_response_time: entry.upstream_response_time.to_string(), + master_pid: pid, + diag_id: "none".to_string(), + status: entry.status.clone(), + reason, + diag: true + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_accesslog() { + let text = r#"0.002 0.002 301 140.205.118.62 192.168.0.76:8000 "GET /polls HTTP/1.1""#; + println!("{:?}", parse_accesslog_entry(text)); + } + + #[test] + fn show_metric_event() { + let text = r#"0.002 0.002 301 140.205.118.62 192.168.0.76:8000 "GET /polls HTTP/1.1""#; + let entry = parse_accesslog_entry(text).unwrap(); + let me = MetricEvent::new(12.to_string(), &entry); + + println!("{}", serde_json::to_string(&me).unwrap()); + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/nginx/error_log.rs b/source/tools/monitor/observ/xwatcher/src/nginx/error_log.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/source/tools/monitor/observ/xwatcher/src/nginx/mod.rs b/source/tools/monitor/observ/xwatcher/src/nginx/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..656f2b65aecbec37c573d112941ce41a58b02623 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/nginx/mod.rs @@ -0,0 +1,4 @@ +pub mod access_log; +pub mod error_log; +pub mod nginx; +pub mod status; diff --git a/source/tools/monitor/observ/xwatcher/src/nginx/nginx.rs b/source/tools/monitor/observ/xwatcher/src/nginx/nginx.rs new file mode 100644 index 0000000000000000000000000000000000000000..b9557b35fffe5dc5b0ec09d333be13d669ea65e6 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/nginx/nginx.rs @@ -0,0 +1,275 @@ +use std::{ + collections::{HashMap, HashSet}, + process::Command, +}; + +use anyhow::Result; +use procfs::{ + process::{Process, Stat}, + Current, Meminfo, +}; + +use crate::nginx::status::Status; +use crate::{cpu::Cpu, memory::Memory}; + +use super::access_log::AccessLog; + +pub struct Nginx { + pub prcs: HashMap, + pub stats: HashMap, + pub ppids: HashMap, + + pub master: i32, + workers: HashSet, + + meminfo: Meminfo, + info: NginxInfo, + + status: Status, + pub access_log: AccessLog, +} + +impl Nginx { + pub fn new(master: i32, workers: HashSet, prcs: HashMap) -> Self { + let mut nginx = Nginx { + prcs, + stats: Default::default(), + ppids: Default::default(), + meminfo: Meminfo::current().unwrap(), + info: NginxInfo::new(master), + master, + workers, + status: Status::new("http://127.0.0.1/api".to_owned()), + access_log: AccessLog::new(master), + }; + nginx.refresh(); + + nginx + } + + fn refresh(&mut self) { + self.ppids.clear(); + for (pid, prc) in &self.prcs { + match prc.stat() { + Ok(stat) => { + let ppid = stat.ppid; + self.stats + .entry(*pid) + .and_modify(|(x, y)| { + std::mem::swap(x, y); + *y = stat.clone(); + }) + .or_insert((stat.clone(), stat)); + self.ppids.entry(ppid).and_modify(|c| *c += 1).or_insert(1); + } + Err(e) => { + log::error!("failed to read process stat, error messsage: {}", e); + } + } + } + } + + pub fn metrics(&mut self) -> String { + self.refresh(); + let mut metrics = vec![]; + + metrics.push(self.nginx_main_metrics()); + metrics.push(self.nginx_process_metrics()); + + metrics.join("\n") + } + + fn nginx_main_metrics(&mut self) -> String { + format!( + "sysom_nginx_main_metrics,masterPid={} errorLog=0,workersCount={},{},{}", + self.master, + self.workers.len(), + if let Some(x) = self.status.metrics() { + x + } else { + "noStatus=1".to_owned() + }, + self.access_log.metrics(), + ) + } + + fn nginx_process_metrics(&mut self) -> String { + let mut metrics = vec![]; + + for (pid, (last, now)) in &self.stats { + let metric = format!( + "sysom_nginx_worker_metrics,masterPid={},pid={} {},{}", + self.master, + pid, + Cpu::new(last, now).to_string(), + Memory::new(now, &self.meminfo).to_string() + ); + metrics.push(metric); + } + metrics.join("\n") + } +} + +pub struct NginxMetrics { + pid: i32, + master_pid: i32, + cpu: Cpu, + mem: Memory, +} + +impl NginxMetrics { + pub fn to_line_protocol(&self) -> String { + format!( + "sysom_nginx_metrics,masterPid={},pid={} {},{}", + self.master_pid, + self.pid, + self.cpu.to_string(), + self.mem.to_string() + ) + } + + pub fn is_master(&self) -> bool { + self.pid == self.master_pid + } +} + +pub fn find_nginx_instances() -> Vec { + let mut res = vec![]; + let mut prcs = find_nginx_processes(); + assert_ne!(prcs.len(), 0, "No running nginx instance"); + + let nginxes = classify_processes(&prcs); + for (master, workers) in nginxes { + let mut nginx_prcs = HashMap::from([(master, prcs.remove(&master).unwrap())]); + for worker in &workers { + nginx_prcs.insert(*worker, prcs.remove(worker).unwrap()); + } + res.push(Nginx::new(master, workers, nginx_prcs)); + } + res +} + +fn find_nginx_processes() -> HashMap { + let mut prcs = HashMap::new(); + for prc in procfs::process::all_processes().unwrap() { + let prc = prc.unwrap(); + if let Ok(stat) = prc.stat() { + if stat.comm.starts_with("nginx") { + log::debug!("find nginx process: {:?}", prc); + prcs.insert(prc.pid, prc); + } + } + } + prcs +} + +// master workers +fn classify_processes(prcs: &HashMap) -> HashMap> { + let mut ptree: HashMap> = HashMap::new(); + for (pid, prc) in prcs { + if let Ok(stat) = prc.stat() { + let ppid = stat.ppid; + + if !prcs.contains_key(&ppid) { + continue; + } + + let mut entry = ptree.entry(ppid).or_insert(HashSet::new()); + entry.insert(*pid); + } + } + ptree +} + +#[derive(Debug, Default)] +struct NginxInfo { + args: HashMap, +} + +impl From for NginxInfo { + fn from(value: String) -> Self { + let lines = value.lines().map(str::trim); + let mut args = Default::default(); + for line in lines { + match line { + line if line.starts_with("configure arguments") => { + args = NginxInfo::parse_configure_arguments(line); + } + _ => {} + } + } + + NginxInfo { args } + } +} + +impl NginxInfo { + pub fn new(pid: i32) -> Self { + let exe = format!("/proc/{}/exe", pid); + let output = Command::new(&exe) + .arg("-V") + .output() + .map_err(|e| log::error!("nginx -V failed: {}", e)) + .ok() + .unwrap(); + let outbuf = String::from_utf8_lossy(&output.stdout).to_string(); + NginxInfo::from(outbuf) + } + + pub fn conf_path(&self) -> Option<&String> { + self.args.get("conf-path") + } + + pub fn access_log_path(&self) -> Option<&String> { + self.args.get("http-log-path") + } + + pub fn error_log_path(&self) -> Option<&String> { + self.args.get("error-log-path") + } + + fn parse_configure_arguments(line: &str) -> HashMap { + let line = line.trim_start_matches("configure arguments:"); + let flags: Vec<&str> = line.split(" --").collect(); + let mut result: HashMap = HashMap::new(); + for flag in flags { + let vals: Vec<&str> = flag.split("=").collect(); + match vals.len() { + 1 => { + if !vals[0].is_empty() { + result.insert(vals[0].to_string(), String::from("true")); + } + } + 2 => { + result.insert(vals[0].to_string(), vals[1].to_string()); + } + _ => {} + } + } + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nginx_info() { + let nginx_v = r#"nginx version: nginx/1.20.1 + built by gcc 10.2.1 20200825 (Alibaba 10.2.1-3 2.32) (GCC) + built with OpenSSL 1.1.1k FIPS 25 Mar 2021 + TLS SNI support enabled + configure arguments: --prefix=/usr/share/nginx --sbin-path=/usr/sbin/nginx --modules-path=/usr/lib64/nginx/modules --conf-path=/etc/nginx/nginx.conf --error-log-path=/var/log/nginx/error.log --http-log-path=/var/log/nginx/access.log --http-client-body-temp-path=/var/lib/nginx/tmp/client_body --http-proxy-temp-path=/var/lib/nginx/tmp/proxy --http-fastcgi-temp-path=/var/lib/nginx/tmp/fastcgi --http-uwsgi-temp-path=/var/lib/nginx/tmp/uwsgi --http-scgi-temp-path=/var/lib/nginx/tmp/scgi --pid-path=/run/nginx.pid --lock-path=/run/lock/subsys/nginx --user=nginx --group=nginx --with-file-aio --with-ipv6 --with-http_ssl_module --with-http_v2_module --with-http_realip_module --with-stream_ssl_preread_module --with-http_addition_module --with-http_xslt_module=dynamic --with-http_image_filter_module=dynamic --with-http_sub_module --with-http_dav_module --with-http_flv_module --with-http_mp4_module --with-http_gunzip_module --with-http_gzip_static_module --with-http_random_index_module --with-http_secure_link_module --with-http_degradation_module --with-http_slice_module --with-http_stub_status_module --with-http_perl_module=dynamic --with-http_auth_request_module --with-mail=dynamic --with-mail_ssl_module --with-pcre --with-pcre-jit --with-stream=dynamic --with-stream_ssl_module --with-debug --with-cc-opt='-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -floop-unroll-and-jam -ftree-loop-distribution --param early-inlining-insns=160 --param inline-heuristics-hint-percent=800 --param inline-min-speedup=50 --param inline-unit-growth=256 --param max-average-unrolled-insns=500 --param max-completely-peel-times=32 --param max-completely-peeled-insns=800 --param max-inline-insns-auto=128 --param max-inline-insns-small=128 --param max-unroll-times=16 --param max-unrolled-insns=16 -O3' --with-compat --with-ld-opt='-Wl,-z,relro -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld -Wl,-E'"#; + let info = NginxInfo::from(nginx_v.to_owned()); + assert_eq!(info.conf_path(), Some(&"/etc/nginx/nginx.conf".to_owned())); + assert_eq!( + info.access_log_path(), + Some(&"/var/log/nginx/access.log".to_owned()) + ); + assert_eq!( + info.error_log_path(), + Some(&"/var/log/nginx/error.log".to_owned()) + ); + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/nginx/status.rs b/source/tools/monitor/observ/xwatcher/src/nginx/status.rs new file mode 100644 index 0000000000000000000000000000000000000000..ac0087ff2a7c121f8ba9410c578a11330794375e --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/nginx/status.rs @@ -0,0 +1,103 @@ +use anyhow::bail; +use anyhow::Result; +use regex::Regex; + +#[derive(Debug, Default)] +pub struct Status { + active_connections: u32, + accepts: u32, + handled: u32, + requests: u32, + reading: u32, + writing: u32, + waiting: u32, + + url: String, + last_accepts: usize, + last_handled: usize, + last_requests: usize, +} + +impl Status { + pub fn new(url: String) -> Self { + let mut status = Status { + url, + ..Default::default() + }; + + let _ = status.refresh(); + status + } + + pub fn metrics(&mut self) -> Option { + match self.refresh() { + Ok(()) => Some(self.to_string()), + Err(e) => { + log::error!("{}", e); + None + } + } + } + + fn refresh(&mut self) -> Result<()> { + let status_string = reqwest::blocking::get(&self.url)?.text()?; + let numbers = parse_text(&status_string); + if numbers.len() != 7 { + bail!("wrong status format: {}", status_string); + } + + self.active_connections = numbers[0] as u32; + self.accepts = (numbers[1] - self.last_accepts) as u32; + self.handled = (numbers[2] - self.last_handled) as u32; + self.requests = (numbers[3] - self.last_requests) as u32; + self.reading = numbers[4] as u32; + self.writing = numbers[5] as u32; + self.waiting = numbers[6] as u32; + + self.last_accepts = numbers[1]; + self.last_handled = numbers[2]; + self.last_requests = numbers[3]; + Ok(()) + } +} + +impl ToString for Status { + fn to_string(&self) -> String { + format!("activeConnections={},accepts={},handled={},requests={},reading={},writing={},waiting={}", self.active_connections, self.accepts, self.handled, self.requests, self.reading, self.writing, self.waiting) + } +} + +fn parse_text(status_string: &str) -> Vec { + let re = Regex::new(r"\d+").unwrap(); + let mut numbers = Vec::new(); + + for capture in re.captures_iter(status_string) { + if let Ok(number) = capture[0].parse::() { + numbers.push(number); + } + } + + numbers +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolve_status_string() { + let text = r#"Active connections: 1 + server accepts handled requests + 136859 136859 137052 + Reading: 0 Writing: 1 Waiting: 0 "#; + let nums = parse_text(text); + assert_eq!(nums.len(), 7); + assert_eq!(nums[0], 1); + assert_eq!(nums[1], 136859); + assert_eq!(nums[2], 136859); + assert_eq!(nums[3], 137052); + assert_eq!(nums[4], 0); + assert_eq!(nums[5], 1); + assert_eq!(nums[6], 0); + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/ntopo.rs b/source/tools/monitor/observ/xwatcher/src/ntopo.rs new file mode 100644 index 0000000000000000000000000000000000000000..d2cbef5b49eac892bd3f3712a83df54ed970150d --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/ntopo.rs @@ -0,0 +1,212 @@ +include!(concat!(env!("OUT_DIR"), "/ntopo.skel.rs")); +include!(concat!(env!("OUT_DIR"), "/ntopo.rs")); + +use libbpf_rs::MapHandle; +use std::net::Ipv4Addr; + +pub use NtopoSkelBuilder; + +use crate::nginx::nginx::Nginx; + +#[derive(Debug, Default)] +pub struct Node { + ip: String, + pid: u32, + + in_bytes: usize, + out_bytes: usize, + + max_rt: u32, +} + +impl Node { + pub fn set_max_rt(&mut self, max_rt: u32) { + self.max_rt = max_rt; + } + + pub fn set_pid(&mut self, pid: u32) { + self.pid = pid; + } +} + +impl ToString for Node { + fn to_string(&self) -> String { + format!( + "sysom_metrics_ntopo_node,Ip={},Kind=Node,Pid={},Comm=Nginx,Title=Node({}),Icon=NGINX,APP=NGINX,InBytes={},OutBytes={},MaxRT={} Value=1", + self.ip, self.pid, self.ip, self.in_bytes, self.out_bytes, self.max_rt, + ) + } +} + +pub fn get_nodes(map: &MapHandle) -> Vec { + let mut res = vec![]; + for key_bytes in map.keys() { + if let Some(val) = map + .lookup(&key_bytes, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup pid map") + { + let (head, body, _tail) = unsafe { key_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let key = &body[0]; + let mut node = Node::default(); + node.ip = Ipv4Addr::from(u32::from_be(key.addr)).to_string(); + res.push(node); + map.delete(&key_bytes).expect("failed to delete nodes map"); + } + } + + res +} + +pub struct Edge { + pub client_ip: String, + pub server_ip: String, +} + +impl ToString for Edge { + fn to_string(&self) -> String { + format!( + "sysom_metrics_ntopo_edge,ClientIp={},ServerIp={},LinkId={}{},APP=NGINX Value=1", + self.client_ip, self.server_ip, self.client_ip, self.server_ip, + ) + } +} + +pub fn get_edges(map: &MapHandle) -> Vec { + let mut res = vec![]; + for key_bytes in map.keys() { + if let Some(val) = map + .lookup(&key_bytes, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup edges map") + { + let (head, body, _tail) = unsafe { key_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let key = &body[0]; + res.push(Edge { + client_ip: Ipv4Addr::from(u32::from_be(key.saddr)).to_string(), + server_ip: Ipv4Addr::from(u32::from_be(key.daddr)).to_string(), + }); + + map.delete(&key_bytes).expect("failed to delete edges map"); + } + } + + res +} + +#[derive(Debug, Default)] +pub struct Pid { + pub pid: u32, + pub in_bytes: usize, + pub out_bytes: usize, +} + +fn get_pids(map: &MapHandle) -> Vec { + let mut res = vec![]; + let zero_pid_info = [0_u8; std::mem::size_of::()]; + for key_bytes in map.keys() { + if let Some(val_bytes) = map + .lookup(&key_bytes, libbpf_rs::MapFlags::ANY) + .expect("failed to lookup pid map") + { + let (head, body, _tail) = unsafe { val_bytes.align_to::() }; + assert!(head.is_empty(), "Data was not aligned"); + let info = &body[0]; + + let pid_arr = [key_bytes[0], key_bytes[1], key_bytes[2], key_bytes[3]]; + let pid_num = u32::from_ne_bytes(pid_arr); + let pid = Pid { + pid: pid_num, + in_bytes: info.in_bytes as usize, + out_bytes: info.out_bytes as usize, + }; + + res.push(pid); + + map.update(&key_bytes, &zero_pid_info, libbpf_rs::MapFlags::EXIST) + .expect("failed to update pid map"); + } + } + res +} + +pub struct NTopo { + nodes: Vec, + edges: Vec, + pids: Vec, + + pid: MapHandle, + node: MapHandle, + edge: MapHandle, +} + +impl NTopo { + pub fn new(pid: MapHandle, node: MapHandle, edge: MapHandle, nginx: &Nginx) -> Self { + let mut ntopo = NTopo { + nodes: Default::default(), + edges: Default::default(), + pids: Default::default(), + + pid, + node, + edge, + }; + + let zero_pid_info = [0_u8; std::mem::size_of::()]; + for (pid, _) in &nginx.prcs { + ntopo + .pid + .update(&pid.to_ne_bytes(), &zero_pid_info, libbpf_rs::MapFlags::ANY) + .unwrap(); + } + + ntopo + } + + fn refresh(&mut self, nginx: &Nginx) { + let mut edges = get_edges(&self.edge); + let mut nodes = get_nodes(&self.node); + let mut pids = get_pids(&self.pid); + assert!(nodes.len() <= 1); + + let mut total_in_bytes = 0; + let mut total_out_bytes = 0; + for pid in &pids { + total_in_bytes += pid.in_bytes; + total_out_bytes += pid.out_bytes; + } + + if nodes.len() == 1 { + nodes[0].set_max_rt(nginx.access_log.metrics.max_request_time as u32); + nodes[0].set_pid(nginx.master as u32); + nodes[0].in_bytes = total_in_bytes; + nodes[0].out_bytes = total_out_bytes; + } + + self.nodes = nodes; + self.edges = edges; + self.pids = pids; + } + + pub fn metrics(&mut self, nginx: &Nginx) -> String { + let mut res = vec![]; + self.refresh(nginx); + for n in &self.nodes { + res.push(n.to_string()); + } + + for e in &self.edges { + res.push(e.to_string()); + } + + let master = nginx.master; + for p in &self.pids { + res.push(format!( + "sysom_nginx_worker_metrics,masterPid={},pid={} inBytes={},outBytes={}", + master, p.pid, p.in_bytes, p.out_bytes + )); + } + + res.join("\n") + } +} diff --git a/source/tools/monitor/observ/xwatcher/src/rtrace.rs b/source/tools/monitor/observ/xwatcher/src/rtrace.rs new file mode 100644 index 0000000000000000000000000000000000000000..d7aca61ceb82cd18c0d2f05e98ab22c4be1f138c --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/rtrace.rs @@ -0,0 +1,158 @@ +use once_cell::sync::Lazy; +use rtrace::collector::drop::disable_tp_kfree_skb; +use rtrace::collector::drop::Drop; +use rtrace::collector::launcher::initial_collector_thread_drop; +use rtrace::collector::launcher::initial_collector_thread_retran; +use rtrace::collector::retran::Retran; +use rtrace::common::config::Config; +use rtrace::common::utils::current_monotime; +use rtrace::event::get_event_channel; +use rtrace::event::Event; +use std::cmp::Ordering; +use std::collections::{BTreeMap, HashMap}; +use std::net::Ipv4Addr; +use std::net::SocketAddr; +use std::ops::Bound::Included; +use std::sync::Mutex; + +static GLOBAL_XRTRACE: Lazy> = Lazy::new(|| { + let rtrace = XRtrace::new(); + Mutex::new(rtrace) +}); + +fn get_config() -> Config { + let mut config = Config::default(); + config.set_protocol_tcp(); + config.enable_drop(); + config.enable_retran(); + config.disable_drop_kfree_skb(); + config +} + +pub struct XRtrace { + drops: BTreeMap, + retrans: BTreeMap, +} + +impl XRtrace { + pub fn new() -> Self { + disable_tp_kfree_skb(); + let config = get_config(); + let (tx, rx) = get_event_channel(); + initial_collector_thread_drop(&config, tx.clone()); + initial_collector_thread_retran(&config, tx); + std::thread::spawn(move || loop { + match rx.recv() { + Ok(Event::Drop(drop)) => { + let ts = current_monotime(); + GLOBAL_XRTRACE.lock().unwrap().drops.insert(ts, drop); + } + Ok(Event::Retran(r)) => { + let ts = current_monotime(); + GLOBAL_XRTRACE.lock().unwrap().retrans.insert(ts, r); + } + _ => { + panic!("unexpected event") + } + } + }); + + XRtrace { + drops: Default::default(), + retrans: Default::default(), + } + } + + fn clear(&mut self) { + self.drops.clear(); + self.retrans.clear(); + } + + fn collect( + &mut self, + raddr: Ipv4Addr, + uaddr: Ipv4Addr, + uport: u16, + times: (u64, u64), + ) -> String { + let mut drop_count = 0; + let mut retran_count = HashMap::new(); + let l = Included(times.0); + let r = Included(times.1); + for (_, d) in self.drops.range((l, r)) { + match d.src { + SocketAddr::V4(i) => { + if i.ip().cmp(&raddr) == Ordering::Equal { + drop_count += 1; + } + } + _ => panic!("ipv6 not support"), + } + + match d.dst { + SocketAddr::V4(i) => { + if i.ip().cmp(&uaddr) == Ordering::Equal && i.port() == uport { + drop_count += 1; + } + } + _ => panic!("ipv6 not support"), + } + } + + for (_, r) in self.retrans.range((l, r)) { + match r.src { + SocketAddr::V4(i) => { + if i.ip().cmp(&raddr) == Ordering::Equal { + retran_count + .entry(r.retran_type.clone()) + .and_modify(|x| *x += 1) + .or_insert(1); + } + } + _ => panic!("ipv6 not support"), + } + + match r.dst { + SocketAddr::V4(i) => { + if i.ip().cmp(&uaddr) == Ordering::Equal && i.port() == uport { + retran_count + .entry(r.retran_type.clone()) + .and_modify(|x| *x += 1) + .or_insert(1); + } + } + _ => panic!("ipv6 not support"), + } + } + + let del = times.0 - 1_000_000_000; + self.drops = self.drops.split_off(&del); + self.retrans = self.retrans.split_off(&del); + + let mut lines = vec![]; + if drop_count != 0 { + lines.push(format!("该请求丢包数为:{}", drop_count)); + } + + for (k, v) in retran_count { + lines.push(format!("{}重传{}次", k, v)); + } + + lines.join(",") + } +} + +pub fn clear_xrtrace() { + GLOBAL_XRTRACE.lock().unwrap().clear(); +} + +pub fn run_xrtrace() { + clear_xrtrace() +} + +pub fn xtrace_collect(raddr: Ipv4Addr, uaddr: Ipv4Addr, uport: u16, times: (u64, u64)) -> String { + GLOBAL_XRTRACE + .lock() + .unwrap() + .collect(raddr, uaddr, uport, times) +} diff --git a/source/tools/monitor/observ/xwatcher/src/unity.rs b/source/tools/monitor/observ/xwatcher/src/unity.rs new file mode 100644 index 0000000000000000000000000000000000000000..7fe3e88a58aebc9af69732a8a9a50ec1136b5b41 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/unity.rs @@ -0,0 +1,30 @@ +use chrono::prelude::*; +use std::os::unix::net::UnixDatagram; + +pub fn unity_sock_send(addr: &str, data: &String) { + if data.len() == 0 { + return; + } + let sock = UnixDatagram::unbound().expect("failed to create unix sock"); + log::debug!("send message to unity: {}", data); + match sock.connect(addr) { + Ok(()) => { + if let Err(e) = sock.send(&data.as_bytes()) { + println!("failed to send data to sock: {addr}, error: {e}, data: {data}"); + } + } + Err(e) => { + println!("failed to connnect to sock: {addr}, error: {e}, data: {data}"); + } + } +} + +pub fn unity_sock_send_event(addr: &str, extra: &String) { + let ts = Utc::now().timestamp(); + let data = format!( + "node_event event_type=\"metric_exception\",description=\"nginx\",extra={:?},ts={:?}", + extra, ts + ); + + unity_sock_send(addr, &data); +} diff --git a/source/tools/monitor/observ/xwatcher/src/utils.rs b/source/tools/monitor/observ/xwatcher/src/utils.rs new file mode 100644 index 0000000000000000000000000000000000000000..fa868d0a3d0ea063027ca79618289e24d9697f29 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/utils.rs @@ -0,0 +1,103 @@ +use std::{os::unix, net::Ipv4Addr}; + +use cached::proc_macro::once; +use chrono::prelude::*; +use procfs::process::Process; +use serde::Serialize; + +pub const METRICS_PERIOD: i32 = 30; + +#[once] +pub fn cached_clock_tick() -> i64 { + unsafe { libc::sysconf(libc::_SC_CLK_TCK) } +} +#[derive(Debug, Clone, Serialize)] +struct Alarm +where + T: Serialize, +{ + alert_id: String, + instance: String, + alert_item: String, + alert_category: String, + alert_source_type: String, + alert_time: i64, + status: String, + labels: T, +} + +#[derive(Debug, Clone, Serialize)] +struct AlarmTopic { + topic: String, + data: Alarm, +} + +impl Alarm +where + T: Serialize, +{ + pub fn new(labels: T) -> Self { + Alarm { + alert_id: uuid::Uuid::new_v4().to_string(), + instance: get_host_ip(), + alert_item: "nginx".to_owned(), + alert_category: "MONITOR".to_owned(), + alert_source_type: "sysak".to_owned(), + alert_time: (Utc::now().timestamp() as i64) * 1000, + status: "FIRING".to_owned(), + labels, + } + } +} + +pub fn send_alarm(labels: T) +where + T: Serialize, +{ + let alarm = Alarm::new(labels); + let alarm_topic = AlarmTopic { + topic: "SYSOM_SAD_ALERT".to_owned(), + data: alarm, + }; + log::debug!("{}", serde_json::to_string(&alarm_topic).unwrap()); + let client = reqwest::blocking::Client::new(); + match client + .post("http://192.168.0.127/api/v1/cec_proxy/proxy/dispatch") + .json(&alarm_topic) + .send() + { + Ok(resp) => { + log::debug!("{}", resp.text().unwrap()); + } + Err(e) => { + log::error!("failed to send alarm event: {}", e); + } + } +} + +#[once] +fn get_host_ip() -> String { + local_ip_address::local_ip().unwrap().to_string() +} + +pub fn get_ipv4(s: &str) -> Ipv4Addr { + let vs: Vec<&str> = s.split(':').collect(); + for v in vs { + if v.contains(".") { + let ip: Ipv4Addr = v.parse().unwrap(); + return ip; + } + } + panic!("no ip addr") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn show_hostip() { + println!("{:?}", get_ipv4("127.0.0.1:600")); + } + +} diff --git a/source/tools/monitor/observ/xwatcher/src/xwatcher.rs b/source/tools/monitor/observ/xwatcher/src/xwatcher.rs new file mode 100644 index 0000000000000000000000000000000000000000..ce6bb3f4110d53d005fe86f4cc5e33f781d59320 --- /dev/null +++ b/source/tools/monitor/observ/xwatcher/src/xwatcher.rs @@ -0,0 +1 @@ +pub struct XWatcher {} diff --git a/source/tools/monitor/unity/Dockerfile b/source/tools/monitor/unity/Dockerfile index 49e867c282f16434ceb911992f9899df582e47f0..e9187933319456bd94d4f0f47794ecddcb76a79f 100644 --- a/source/tools/monitor/unity/Dockerfile +++ b/source/tools/monitor/unity/Dockerfile @@ -3,6 +3,7 @@ MAINTAINER "liaozhaoyan " WORKDIR /root/ RUN source /opt/rh/devtoolset-9/enable && \ yum install -y make wget lua-devel unzip git numactl-devel m4 && \ + yum install -y golang && \ mkdir /root/build && \ cd /root/build && \ git clone https://gitee.com/chuyansz/sysak.git && \ @@ -35,6 +36,7 @@ RUN source /opt/rh/devtoolset-9/enable && \ luarocks install luaposix 35.1-1 && \ luarocks install http && \ luarocks install inotify && \ + luarocks install luafilesystem && \ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/ && \ cd ../beeQ/ && \ make \ No newline at end of file diff --git a/source/tools/monitor/unity/Makefile b/source/tools/monitor/unity/Makefile index 2071a6ab0b051df5f53d909d140d61a9b821f542..3e3c7b39aa52508674de13a1730d6c9a5cfc5e1f 100644 --- a/source/tools/monitor/unity/Makefile +++ b/source/tools/monitor/unity/Makefile @@ -6,3 +6,5 @@ all: echo "build for beeQ" make dist -C beeQ +clean: + make -C collector/plugin clean diff --git a/source/tools/monitor/unity/beaver/beaver.lua b/source/tools/monitor/unity/beaver/beaver.lua index 15518b92dd2fdbce7f0e5bd01c823976edf10792..d3e4869d247b8221b1f55a4cc150708e347885d3 100644 --- a/source/tools/monitor/unity/beaver/beaver.lua +++ b/source/tools/monitor/unity/beaver/beaver.lua @@ -16,22 +16,30 @@ local CurlExportHtml = require("beaver.url_export_html") local CurlExportRaw = require("beaver.url_export_raw") local CLocalBeaver = require("beaver.localBeaver") local CbaseQuery = require("beaver.query.baseQuery") +local system = require("common.system") g_lb = nil function init(que, fYaml) fYaml = fYaml or "../collector/plugin.yaml" + local Cidentity = require("beaver.identity") + local inst = Cidentity.new(fYaml) + local instance = inst:id() + local web = Cframe.new() + local res = system:parseYaml(fYaml) - CurlIndex.new(web) - CurlApi.new(web, que, fYaml) - CurlRpc.new(web) - CurlGuide.new(web) CbaseQuery.new(web, fYaml) + CurlApi.new(web, que, fYaml, instance) - local Cidentity = require("beaver.identity") - local inst = Cidentity.new(fYaml) - local export = Cexport.new(inst:id(), fYaml) + if res.config.url_safe ~= "close" then + CurlIndex.new(web) + CurlRpc.new(web) + CurlGuide.new(web) + end + + + local export = Cexport.new(instance, fYaml) CurlExportHtml.new(web, export) CurlExportRaw.new(web, export) diff --git a/source/tools/monitor/unity/beaver/export.lua b/source/tools/monitor/unity/beaver/export.lua index 5a97c6f3f968bdc1964fa9ac42aaae39ad066cb5..6ad668098c4a774555f54dc4c04426f9b8207297 100644 --- a/source/tools/monitor/unity/beaver/export.lua +++ b/source/tools/monitor/unity/beaver/export.lua @@ -7,109 +7,29 @@ local system = require("common.system") local pystring = require("common.pystring") local CfoxTSDB = require("tsdb.foxTSDB") +local CtransPro = require("common.transPro") + require("common.class") local Cexport = class("Cexport") -local function qFormData(from, tData) - local res = {} - local len = #tData - local last = 0 - local c = 0 - for i = len, 1, -1 do - local line = tData[i] - if from == line.title then - if last == 0 or last == line.time then - c = c + 1 - res[c] = line - last = line.time - else - break - end - end - end - return res -end - -local function packLine_us(title, ls, v, time) - local tLs = {} - for k, v in pairs(ls) do - table.insert(tLs, string.format("%s=\"%s\"", k , v)) - end - local label = "" - if #tLs then - label = pystring:join(",", tLs) - label = "{" .. label .. "}" - end - return string.format("%s%s %.1f %d", title, label, v, time/1000) -end - -local function packLine(title, ls, v) - local tLs = {} - local c = 0 - for k, v in pairs(ls) do - c = c + 1 - tLs[c] = string.format("%s=\"%s\"", k , v) - end - local label = "" - if #tLs then - label = pystring:join(",", tLs) - label = "{" .. label .. "}" - end - return string.format("%s%s %.1f", title, label, v) -end - function Cexport:_init_(instance, fYaml) self._instance = instance local ms = system:parseYaml(fYaml) self._freq = ms.config.freq self._timestamps = ms.config.real_timestamps - if self._timestamps == true then - self.pack_line = packLine_us - else - self.pack_line = packLine - end + self._tDescr = ms.metrics + self._transPro = CtransPro.new(instance, fYaml, true, self._timestamps) self._fox = CfoxTSDB.new(fYaml) - self._fox:_setupRead() end function Cexport:export() local qs = {} - self._fox:resize() - self._fox:qlast(self._freq, qs) - local res = {} - local c = 0 - for _, line in ipairs(self._tDescr) do - local from = line.from - local tFroms = qFormData(from, qs) - if #tFroms then - local title = line.title - local help = string.format("# HELP %s %s", title, line.help) - c = c + 1 - res[c] = help - local sType = string.format("# TYPE %s %s", title, line.type) - c = c + 1 - res[c] = sType + self._fox:qLastMetric(self._freq + 1, qs) - for _, tFrom in ipairs(tFroms) do - local labels = system:deepcopy(tFrom.labels) - if not labels then - labels = {} - end - labels.instance = self._instance - for k, v in pairs(tFrom.values) do - labels[line.head] = k - c = c + 1 - res[c] = self.pack_line(title, labels, v, tFrom.time) - end - end - end - end - c = c + 1 - res[c] = "" - local lines = pystring:join("\n", res) + local lines = self._transPro:export(qs) return lines end diff --git a/source/tools/monitor/unity/beaver/frame.lua b/source/tools/monitor/unity/beaver/frame.lua index ea90a6eeacc7532bd1299f31bbccbf7439997c7c..f99784b11fc0c72095751ee56845c4b855873c8e 100644 --- a/source/tools/monitor/unity/beaver/frame.lua +++ b/source/tools/monitor/unity/beaver/frame.lua @@ -121,6 +121,7 @@ function Cframe:echo404() local body = "Oops! The page may have flown to Mars!!!\n" local headers = self:packServerHeaders(tHead, #body) local tHttp = {stat, headers, body} + error("404") return pystring:join("\r\n", tHttp) end @@ -152,7 +153,7 @@ function Cframe:proc(fread, session) local res, keep = obj:calls(tReq) return res, keep, tReq.session end - + print(self._objs[tReq.path]) return self:echo404(), false, {} end end diff --git a/source/tools/monitor/unity/beaver/guide/baseyaml.md b/source/tools/monitor/unity/beaver/guide/baseyaml.md new file mode 100644 index 0000000000000000000000000000000000000000..7f876d01a41f009fe265cb2b1e1bb3c986601282 --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/baseyaml.md @@ -0,0 +1,98 @@ +# site +/etc/sysak/base.yaml +# 各字段含义 +```yaml +config: + freq: 20 # 采集间隔 + port: 8400 # 监听端口 + bind_addr: 0.0.0.0 # 监听IP + backlog: 32 # 服务监听队列长度 + url_safe: close # 只开放必要的url, + identity: # 实例id配置模式,当前支持以下五种模式 + # hostip: 获取主机IP + # curl: 通过网络请求获取,需要指定url 参数,适合ECS场景 + # file: 从文件读取,需要指定path 参数 + # specify: 指定id,需要指定name参数 + mode: hostip + name: test_specify + real_timestamps: true # 上报监测数据的真实时间,默认关闭 + # unix_socket: "/tmp/sysom_unity.sock" # 通过unix_socket方式进行数据传输,默认关闭 + proc_path: /mnt/host/ # proc 文件路径,在host侧,为/。在容器侧,如配置 -v /:/mnt/host 则为 /mnt/host + limit: # 资源限制 + cpu: 30 + mem: 60 + cellLimit: -1 # guardSched插件执行时长,值为-1表示不限制时长,若不设置,则默认为50ms + +outline: # 外部数据入口,适合接入外部数据场景 + - /tmp/sysom # 外部unix socket 路径,可以指定多个 + +luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", + "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat"] # 控制lua 插件加载 + +pushTo: # 向指定数据库推送数据 + to: "AutoMetrics" # 推送数据的模式,当前支持以下三种模式 + # "AutoMetrics",推送到metricstore,自动识别project,endpoint和metricstore,需要配置addition + # "Metrics",推送到metricstore,需要设置project,endpoint,metricstore和addition + # "Influx",推送到Influxdb,需设置host,port和url + addition: "***" # 加密后的账户信息 + project: "sysom-metrics-cn-somewhere" # metricstore的project名 + endpoint: "cn-somewhere-intranet.log.aliyuncs.com" # metricstore的域名 + metricstore: "auto" # metricstore的时序数据库名 + host: "xxx.aliyuncs.com" + port: 8242 + url: "/api/v2/write?db=sysom" + +plugins: # 插件列表 对应 /collector/plugin 路径下编译出来的c库文件 + - so: kmsg # 库名 + description: "collect dmesg info." # 描述符 + +metrics: # export 导出的 metrics 列表 + - + title: sysak_proc_cpu_total # 显示的表名 + from: cpu_total # 数据源头,对应collector生成的数据表 + head: mode # 字段名,在prometheus 中以label 方式呈现 + help: "cpu usage info for total." # help 说明 + type: "gauge" # 数据类型 + discrete: true # 数据是否为离散的,不定义则默认为false + blacklist: 设置数据上传的黑名单,黑名单和白名单不可同时设置 + cpu: "cpu1$" # 按照lua的正则表达式进行设置 https://www.cnblogs.com/meamin9/p/4502461.html + mode: "softirq" + whitelist: 设置数据上传的白名单,黑名单和白名单不可同时设置 + cpu: "cpu1$" # 行协议中tag的限制,对应: + mode: "softirq" # 行协议中field的限制的写法,对应 字段名: + +observe: # 数据观测设置 + comms: # 需要观测的进程名 + java: "cgroup xxx" #需要获取的参数,双引号内的参数名用空格隔开 + mysqld: "cgroup" + ··· + period: 60 # 监测进程的刷新间隔 + +diagnose: # 诊断功能设置 + host: # sysom中心端ip,example http://111.111.111.111 + token: "" 访问sysom中心端的加密后的token + jobs: # 具体诊断项目的设置,不设置会将对应的block和time设置为默认值 + memgraph: # 诊断的service_name + block: 60 # 阻塞时间,单位秒 + time: 30 # 执行时间,单位秒 + +resctrl: + path: "sys/fs/resctrl" # resctrl文件系统的挂载路径 + + monLuaPlugin: ["rdt_llc_occupancy", "rdt_local_mem_bw", "rdt_total_mem_bw"] # 监控事件的采集插件名 + + resLuaPlugin: ["rdt_size"] # RDT资源分配策略的采集插件 + + auto: true # 如果存在atuo并且auto为true表示自动递归搜索所有RDT资源组和监控组,否则只采集group字段指定的相关路径 + + group: # 显式指定采集路径,当auto为false或者不存在auto时有效 + - name: "" #resctrl根目录下的资源组名 "" --> /sys/fs/resctrl/ + - name: "LS" #resctrl根目录下的资源组名 "" --> /sys/fs/resctrl/LS + monitor: ["mon1", "mon2"] # 资源组mon_groups目录下手动创建的监控组 "mon1" --> /sys/fs/resctrl/LS/mon_groups/mon1 + +# cgroup-v2指标采集 +cgroupv2: + directPaths: ["kubepods/besteffort", "kubepods.slice/kubepods-besteffort.slices"] # 配置cgroup的相对路径 + luaPlugin: ["cg_sched_cfs_stat_v2","cg_cpu_stat_v2"] # 采集插件 +```` + diff --git a/source/tools/monitor/unity/beaver/guide/cec.md b/source/tools/monitor/unity/beaver/guide/cec.md new file mode 100644 index 0000000000000000000000000000000000000000..2d1f8b61ad6d5ba5af194501b0a313e52b84a25b --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/cec.md @@ -0,0 +1,64 @@ +# 告警中心接口 + +告警中心需要在yaml 里面配置告警中心server 位置,如 + +```yaml +cec: "http://192.168.0.127" +``` + +前面的http:// 不能省略 + +## api + +往untiy 推送一个json 数据, + +* /api/cec 投递一个事件,含topic data 字段 +* /api/alert 投递一个告警,只含 data 内容 会自动添加 SYSOM_SAD_ALERT topic + +事件示例: + +```json +{ + "topic": "SYSOM_SAD_ALERT", # 投递主题,对于告警固定使用 SYSOM_SAD_ALERT + "data": { # SAD 格式的告警数据 + "alert_item": "sysload_high", + "alert_category": "MONITOR", + "alert_source_type": "grafana", + "alert_time": 1694672798174, # 可选,单位ms,未写会自动添加 + "alert_level": "WARNING", + "status": "FIRING", + "labels": { + "grafana_folder": "rules", + "alertname": "test" + }, + "annotations": { + "summary": "192.168.23.6 实例的系统负载长时间高于100,建议使用系统负载诊断分析sys高的原因" + }, + "origin_alert_data": {...} + } +} +``` + +## 示例代码 + +以python为例 + +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/alert" +data = {"alert_item": "sysload_high", + "alert_category": "MONITOR", + "alert_source_type": "grafana", + "alert_level": "WARNING", + "status": "FIRING", + "labels": { + "instance": "192.168.23.6", + "grafana_folder": "rules", + "alertname": "test" + } +} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` diff --git a/source/tools/monitor/unity/beaver/guide/dataquery.md b/source/tools/monitor/unity/beaver/guide/dataquery.md new file mode 100644 index 0000000000000000000000000000000000000000..41993868b50e87d290b125255289b37fa5bef67e --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/dataquery.md @@ -0,0 +1,161 @@ +# 数据查询接口 + +## 常规查询 + +[常规查询入口](http://127.0.0.1:8400/api/query) + +查询的mode可以为last table date,mode不可缺省。 + +一次最多查询200条数据。 + +数据最多保存7天。 + +### mode == last +mode为last时,需要的参数为table、time。 + +查找table表从当前时间开始time时长的数据。 + +time格式为%dh%dm%ds,其中h、m、s分别为时、分、秒的单位,若单位缺省,则默认为秒。 + +若table缺省,则查询所有表格的数据。 + +python脚本查询示例: + +```python +def q_by_table(): + url = "http://127.0.0.1:8400/api/query" + d = {"mode": "last", "time": "5m", "table": ["per_sirqs"]} + res = requests.post(url, json=d) + ret = res.content.decode() + print(ret) +``` + +curl查询示例: + +```bash +curl --header "Content-Type: application/json" \ + --request POST \ + --data "{\"mode\":\"last\",\"time\":\"5m\",\"table\":[\"cpus\"]}" \ + http://100.83.167.10:3350/api/query +``` + +### mode == table + +mode为table时,需要的参数为duration。 + +查询从当前时间开始前duration小时的所有数据。 + +duration单位为小时,缺省则默认为2小时。不能超过24小时。 + +python脚本查询示例: + +```python +def q_table(): + url = "http://127.0.0.1:8400/api/query" + d = {"mode": "table", "duration": "1"} + res = requests.post(url, json=d) + ret = res.content.decode() + print(ret) +``` + +curl查询示例: + +```bash +curl --header "Content-Type: application/json" \ + --request POST \ + --data "{\"mode\":\"table\",\"duration\":\"1\"}" \ + http://100.83.167.10:3350/api/query +``` + +### mode == date + +mode为date时,需要的参数为start、stop、tz、table。 + +查询从start到stop之间,table的数据,其中start和stop为tz时区的时间。 + +start和stop的格式为%Y-%m-%d %H:%M:%S,start和stop之间不能超过24小时。 + +tz缺省则为0。 + +若table缺省,则查询所有表格的数据。 + +python脚本查询示例: + +```python +def q_by_date(): + now = datetime.datetime.now() + delta1 = datetime.timedelta(days=1, hours=1) + delta2 = datetime.timedelta(minutes=5) + d1 = now - delta1 + d2 = d1 - delta2 + s1 = d1.strftime("%Y-%m-%d %H:%M:%S") + s2 = d2.strftime("%Y-%m-%d %H:%M:%S") + + print(s1, s2) + url = "http://127.0.0.1:8400/api/query" + d = {"mode": "date", "start": s2, "stop": s1, "tz": 8, "table": ["cpu_total", "cpus"]} + res = requests.post(url, json=d) + ret = res.content.decode() + print(ret) +``` + +curl查询示例: + +```bash +curl --header "Content-Type: application/json" \ + --request POST \ + --data "{\"mode\":\"date\",\"start\":\"2023-07-18 17:25:00\",\"stop\":\"2023-07-18 17:30:00\",\"tz\":8,\"table\":[\"cpu_total\", \"cpus\"]}" \ + http://100.83.167.10:3350/api/query +``` + +## sql语句查询 +[sql语句查询入口](http://127.0.0.1:8400/api/sql) + +### 查询语句要求 +根据sql语句进行查询,查询的语句基本遵循sql语法,但有如下规定 : + +1. where中必须包含对time的限制 + + 可以使用两种方式对time进行限制 + * between time1 and time2 + + time1 和time2的格式为%Y-%m-%d %H:%M:%S + * time>NOW(-secs) + + 如: + ```bash + SELECT * FROM tbl_a WHERE time BETWEEN time1 and time2 + SELECT * FROM tbl_b WHERE time > NOW(-10) + ``` + + +2. where中仅可使用“=”对tag进行筛选 + * 正确写法 + ```SELECT * FROM tbl_a WHERE time > NOW(-10) and cpu = cpu1``` + * 错误写法 + ```SELECT * FROM tbl_a WHERE time > NOW(-10) and cpu > cpu1``` + +3. 仅支持筛选出fields的值 + 关于field的定义可以参考[行协议说明](/guide/outline.md) + +### 查询示例 + +```python +def post_test(d): + url = "http://127.0.0.1:8400/api/sql" + res = requests.post(url, json=d) + ret = res.content.decode() + print(ret) + +def q_by_sql(): + post_test("SELECT net_rx, rcu FROM per_sirqs WHERE time > NOW(-10) and cpu = cpu1") + now = datetime.datetime.now() + delta1 = datetime.timedelta(hours=8) + delta2 = datetime.timedelta(minutes=5) + d1 = now + delta1 + d2 = d1 - delta2 + s1 = d1.strftime("%Y-%m-%d %H:%M:%S") + s2 = d2.strftime("%Y-%m-%d %H:%M:%S") + sqlclause = "SELECT net_rx, rcu FROM per_sirqs WHERE time BETWEEN '" + s2 + "' and '" + s1 + "' and cpu = cpu1" + post_test(sqlclause) +``` \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/develop.md b/source/tools/monitor/unity/beaver/guide/develop.md index 612affaf0d8c767bb5f1156866e3a5b1eb2d3494..453e7a2292a5effe8ef10214a44ada3fee220477 100644 --- a/source/tools/monitor/unity/beaver/guide/develop.md +++ b/source/tools/monitor/unity/beaver/guide/develop.md @@ -17,7 +17,7 @@ git clone -b unity https://gitee.com/anolis/sysak.git ## 2.2 拉起容器 ``` -docker run -v /root/1ext/code/:/root/code -v /:/mnt/host:ro --net=host --pid=host --name unity --privileged -itd registry.cn-hangzhou.aliyuncs.com/sysom/sysom:v3.0 /bin/sh +docker run -v /root/1ext/code/:/root/code -v /:/mnt/host:ro --net=host --pid=host --name unity --privileged -itd registry.cn-hangzhou.aliyuncs.com/sysom/sysom:v5.0 /bin/sh ``` docker 参数说明: diff --git a/source/tools/monitor/unity/beaver/guide/diagnose.md b/source/tools/monitor/unity/beaver/guide/diagnose.md new file mode 100644 index 0000000000000000000000000000000000000000..e0173d1f223f2f42c6727bb42769d639bdd4545b --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/diagnose.md @@ -0,0 +1,302 @@ +# 诊断接口 +[url](http://127.0.0.1:8400/api/diag) + +上传诊断命令时需要带上对应的参数 + +其中,instance如果不传,默认为"127.0.0.1" + +具体如下 +## iohang + +### 需要的参数 + +instance:需要诊断的实例IP + +timeout:诊断时长,单位秒 + +threshold:保留IO HANG住时间超过阈值的IO,单位毫秒 + +disk:需要诊断的目标磁盘,缺省为所有磁盘 + + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "timeout" : 5, "threshold" : 10} +body = {"service_name": "iohang", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` +## iofsstat +### 需要的参数 +instance:需要诊断的实例IP + +timeout:诊断时长,也是IO流量统计周期,单位秒,建议不超过60秒 + +disk:需要诊断的目标磁盘,缺省为所有磁盘 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "timeout" : 5} +body = {"service_name": "iofsstat", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## iolatency +### 需要的参数 +instance:需要诊断的实例IP + +timeout:诊断时长,单位秒 + +threshold:保留IO延迟大于设定时间阈值的IO,单位毫秒 + +disk:需要诊断的目标磁盘,缺省为所有磁盘 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "timeout" : 5, "threshold" : 10} +body = {"service_name": "iolatency", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + + +## jitter +### 需要的参数 +instance:需要诊断的实例IP + +time:诊断时长,单位秒 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "time" : 5} +body = {"service_name": "jitter", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## loadtask + +### 需要的参数 +instance:诊断的实例IP + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1"} +body = {"service_name": "loadtask", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## memgraph + +### 需要的参数 +instance:诊断的实例IP + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1"} +body = {"service_name": "memgraph", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## filecache + +### 需要的参数 +instance:诊断的实例IP + +value:需要诊断的容器ID,Pod名,cgroup + +type:需要诊断的类型(容器,POD,cgroup, host, all(所有容器)) + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "value" : "30001a90d0ff", "type" : "container"} +body = {"service_name": "filecache", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## oomcheck +### 需要的参数 +instance:诊断的实例IP + +time:需要诊断OOM的时间点,默认为最近一次 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1"} +body = {"service_name": "oomcheck", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` +## ossre + +## packetdrop +### 需要的参数 +instance:需要诊断的实例IP + +time:诊断时长,单位秒 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "time" : 5} +body = {"service_name": "packetdrop", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## pingtrace +### 需要的参数 +origin_instance:源实例IP + +target_instance:目标实例IP + +pkg_num:追踪包数 + +time_gap:间隔毫秒数 + +type:报文协议(icmp,tcp,udp) + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"origin_instance" : "127.0.0.1", "target_instance" : "192.168.0.1", "pkg_num" : 5, "time_gap" : 10, "type" : "icmp"} +body = {"service_name": "pingtrace", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + + + +## retran +### 需要的参数 +instance:需要诊断的实例IP + +time:诊断时长,单位秒 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "time" : 5} +body = {"service_name": "retran", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## schedmoni + +### 需要的参数 +instance:需要诊断的实例IP + +time:本次的期望的诊断时间,默认20秒 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "time" : 5} +body = {"service_name": "schedmoni", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` + +## taskprofile + +### 需要的参数 +instance:需要诊断的实例IP + +timeout:诊断时长,也是各应用占用cpu统计周期,单位分钟,建议不超过10分钟 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "timeout" : 1} +body = {"service_name": "taskprofile", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` +## jruntime + +### 需要的参数 +instance:需要诊断的实例IP + +nums:显示top nums 的进程诊断结果,格式为字符串,设置了nums就不需要设置pids + +pids:进程pid列表,格式为字符串,设置了nums的话pids不会生效 + +global:全局热点是否开启,"on"为开启,"off"为关闭,不设置则默认开启。 + +### 诊断示例 +```python +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "pids" : "12267", "global" : "on"} +body = {"service_name": "jruntime", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) +``` \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/guide.md b/source/tools/monitor/unity/beaver/guide/guide.md index a377b616f0ff63163f31bed87582f5d936bf362d..d1d5d633ce607ad859ec9c03a80561be7624efa7 100644 --- a/source/tools/monitor/unity/beaver/guide/guide.md +++ b/source/tools/monitor/unity/beaver/guide/guide.md @@ -5,4 +5,6 @@ 4. [在lua 中使用pystring](/guide/pystring.md) 5. [bpf\ map 开发](/guide/bpf.md) 6. [bpf perf 开发](/guide/bpf_perf.md) -7. [外部行协议写入](/guide/outLine.md) \ No newline at end of file +7. [外部行协议写入](/guide/outLine.md) +8. [数据查询接口](/guide/dataquery.md) +9. [诊断接口](/guide/diagnose.md) \ No newline at end of file diff --git a/source/tools/monitor/unity/beaver/guide/metrics.md b/source/tools/monitor/unity/beaver/guide/metrics.md index f8984bef0f903ce571b8713c024d1c702caf63d6..349d5b3df5676dbd18ac10c6cb0de96252329123 100644 --- a/source/tools/monitor/unity/beaver/guide/metrics.md +++ b/source/tools/monitor/unity/beaver/guide/metrics.md @@ -46,27 +46,6 @@ | iowait | % | iowait百分比 | | collector/proc\_stat.lua | -### cpus 表 - -* 对应export 指标 sysak\_proc\_cpus -* 属性标签: mode - - -| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | -| :--- | ---: | :---- | :---- | :--- | -| cpu_name | | CPU 名称 | | collector/proc\_stat.lua | -| softirq | % | 软中断百分比 | | collector/proc\_stat.lua | -| user | % | 用户态占用率百分比 | | collector/proc\_stat.lua | -| guestnice | % | guestnice百分比 | | collector/proc\_stat.lua | -| guest | % |guest百分比 | | collector/proc\_stat.lua | -| steal | % |steal百分比 | | collector/proc\_stat.lua | -| hardirq | % | 硬中断百分比 | | collector/proc\_stat.lua | -| nice | % | nice百分比 | | collector/proc\_stat.lua | -| idle | % | idle百分比 | | collector/proc\_stat.lua | -| sys | % | sys百分比 | | collector/proc\_stat.lua | -| iowait | % | iowait百分比 | | collector/proc\_stat.lua | - - ### stat\_counters表 * 对应export 指标 sysak\_proc\_stat\_counters @@ -94,21 +73,6 @@ | load10 | - | load10 | | collector/proc\_load.lua | | plit | - | plit | | collector/proc\_load.lua | - -### proc\_loadavg 表 - -* 对应 export 指标: sysak\_proc\_loadavg -* 属性标签:value - -| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | -|:---------| ---: | :---- | :---- | :--- | -| runq | - | rq队列长度 | | collector/proc\_load.lua | -| load1 | - | load1 | | collector/proc\_load.lua | -| load5 | - | load5 | | collector/proc\_load.lua | -| load10 | - | load10 | | collector/proc\_load.lua | -| plit | - | plit | | collector/proc\_load.lua | - - ### meminfo 表 * 对应 export 指标: sysak\_proc\_meminfo @@ -297,6 +261,19 @@ | time\_squeeze | 个 | cpu,对应CPU号 | 函数 net\_rx\_action 调用次数. | collector/proc\_softnet\_stat.lua | | flow\_limit\_count | 个 | cpu,对应CPU号 | 达到 flow limit 的次数. | collector/proc\_softnet\_stat.lua | + +### con_net_stat 表 + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----------- | ---- | :--------------------------- | :--- | :----------------------------------------- | +| net_rx_bytes | byte | 容器对应网口接收字节数 | interface对应容器中的网口(如eth0)| collector/container/con_net_stat.lua | +| net_rx_packets | packet | 容器对应网口接收数据包数 | | collector/container/con_net_stat.lua | +| net_rx_dropped | packet | 容器对应网口接收端丢包数 | | collector/container/con_net_stat.lua | +| net_tx_bytes | byte | 容器对应网口发送字节数 | | collector/container/con_net_stat.lua | +| net_tx_packets | packet | 容器对应网口发送数据包数 | | collector/container/con_net_stat.lua | +| net_tx_dropped | packet | 容器对应网口发送端丢包数 | | collector/container/con_net_stat.lua | + + ### cgroups 表 * 对应 export 指标: sysak\_cgroups @@ -337,6 +314,27 @@ | util | 占比 | IO繁忙度 | - | ../ioMonitor/ioMon/ioMonitorClass.py | | await | ms | 平均每个IO的延迟 | - | ../ioMonitor/ioMon/ioMonitorClass.py | +### disks表 +* 对应 export 指标: sysom\_proc\_disks +* 属性标签:counter + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| disk_name | - | 磁盘或分区名 | - | collector/proc\_diskstats.lua | +| busy | % | 该磁盘非空闲的时间比例 | - | collector/proc\_diskstats.lua | +| inflight | 个 | 正在进行中的IO请求个数 | - | collector/proc\_diskstats.lua | +| xfers | 次 | 读写总次数 | - | collector/proc\_diskstats.lua | +| bsize | B | 平均每次IO的字节数 | - | collector/proc\_diskstats.lua | +| reads | 次 | 读请求次数 | - | collector/proc\_diskstats.lua | +| writes | 次 | 写请求次数 | - | collector/proc\_diskstats.lua | +| rmerge | 次 | 合并后的读请求数 | - | collector/proc\_diskstats.lua | +| wmerge | 次 | 合并后的写请求数 | - | collector/proc\_diskstats.lua | +| rkb | KB | 写的总流量大小 | - | collector/proc\_diskstats.lua | +| wkb | KB | 写的总流量大小 | - | collector/proc\_diskstats.lua | +| rmsec | ms | 所有读请求总耗时 | - | collector/proc\_diskstats.lua | +| wmsec | ms | 所有写请求总耗时 | - | collector/proc\_diskstats.lua | + + ### IOMonIndForSystemIO 表 统计系统IO异常 @@ -352,17 +350,18 @@ ### cg_cpu_stat 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_container\_cpu\_stat * 属性标签:value | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | | :--- | --- | :---- | :---- | :--- | | nr_throttled | - | total throttled number | | collector/container/cg\_cpu\_stat.lua | | throttled_time | ms | total throttled time | | collector/container/cg\_cpu\_stat.lua | +| nr_periods | - | total period number | | collector/container/cg\_cpu\_stat.lua | ### cg_proc_stat 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_container\_proc\_stat * 属性标签:value | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | @@ -387,7 +386,7 @@ ### cg_memfail_cnt 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_container\_memfail\_cnt * 属性标签:value | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | @@ -396,10 +395,11 @@ ### cg_memdrcm_latency 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_container\_memdrcm\_latency * 属性标签:value This table show the hist of the latency of direct memory reclamation + | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | | :--- | --- | :---- | :---- | :--- | | memDrcm_lat_1to5ms | - | times 1to5ms | | collector/container/cg\_memory\_drcm\_latency.lua | @@ -408,13 +408,37 @@ This table show the hist of the latency of direct memory reclamation | memDrcm_lat_100to500ms | - | times 100to500ms | | collector/container/cg\_memory\_drcm\_latency.lua | | memDrcm_lat_500to1000ms | - | times 500msto1s | | collector/container/cg\_memory\_drcm\_latency.lua | | memDrcm_lat_1000ms | - | times more than 1s | | collector/container/cg\_memory\_drcm\_latency.lua | - + +### cg_blkio_stat 表 + +* 对应 export 指标: sysom\_container\_blkio\_stat +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :--- | --- | :---- | :---- | :--- | +| writes_service_bytes | byte | 容器中写字节数 | device对应容器进行IO的块设备名 | collector/container/cg\_blkio\_stat.lua | +| reads_service_bytes | byte | 容器中读字节数 | | collector/container/cg\_blkio\_stat.lua | +| total_service_bytes | byte | 容器所有IO操作(read, write, sync, async)的字节数 | | collector/container/cg\_blkio\_stat.lua | +| writes_serviced | I/Os | 容器中写IO个数 | | collector/container/cg\_blkio\_stat.lua | +| reads_serviced | I/Os| 容器中读IO个数| | collector/container/cg\_blkio\_stat.lua | +| total_serviced | I/Os | 容器中所有IO操作(read, write, sync, async)的个数 | | collector/container/cg\_blkio\_stat.lua | +| writes_bytes_queued | byte | 容器中写排队的IO字节数 | | collector/container/cg\_blkio\_stat.lua | +| reads_bytes_queued | byte | 容器中读排队的IO字节数| | collector/container/cg\_blkio\_stat.lua | +| total_bytes_queued | byte | 容器所有IO操作(read, write, sync, async)的排队字节数 | | collector/container/cg\_blkio\_stat.lua | +| writes_io_queued | I/Os | 容器中写排队IO个数 || collector/container/cg\_blkio\_stat.lua | +| reads_io_queued | I/Os | 容器中读排队IO个数| | collector/container/cg\_blkio\_stat.lua | +| total_io_queued | I/Os | 容器中对所有IO操作(read, write, sync, async)排队个数 | | collector/container/cg\_blkio\_stat.lua | +| writes_wait_time | ns | 容器中写I/O在调度队列中等待的时间 || collector/container/cg\_blkio\_stat.lua | +| reads_wait_time | ns | 容器中读I/O在调度队列中等待的时间| | collector/container/cg\_blkio\_stat.lua | +| total_wait_time | ns | 容器中对所有IO操作(read, write, sync, async)在调度队列中等待的时间 | | collector/container/cg\_blkio\_stat.lua | + ### cg_memmcmp_latency 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_container\_memmcmp\_latency * 属性标签:value This table show the hist of the latency of direct memory compaction + | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | | :--- | --- | :---- | :---- | :--- | | memDcmp_lat_1to5ms | - | times 1to5ms | | collector/container/cg\_memory\_dcmp\_latency.lua | @@ -426,34 +450,116 @@ This table show the hist of the latency of direct memory compaction ### pmu_events 表 -* 对应 export 指标: sysak\_ +* 对应 export 指标: sysom\_pmu\_events * 属性标签:value | 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | | :--- | --- | :---- | :---- | :--- | -| cpu_cycles | - | cycles | | collector/plugin/pmu_events/pmu\_events.c | -| instructions | - | instructions | | collector/plugin/pmu_events/pmu\_events.c | -| ipc | - | instructions per cycles | | collector/plugin/pmu_events/pmu\_events.c | -| cpi | - | cycles per instructions | | collector/plugin/pmu_events/pmu\_events.c | -| llc_store_ref | - | llc stroe hits counts | | collector/plugin/pmu_events/pmu\_events.c | -| llc_store_miss | - | llc stroe miss counts | | collector/plugin/pmu_events/pmu\_events.c | -| llc_load_ref | - | llc load hits counts | | collector/plugin/pmu_events/pmu\_events.c | -| llc_load_miss | - | llc load miss counts | | collector/plugin/pmu_events/pmu\_events.c | -| llc_rmiss_rate | - | llc load miss rate | | collector/plugin/pmu_events/pmu\_events.c | -| llc_wmiss_rate | - | llc store miss rate | | collector/plugin/pmu_events/pmu\_events.c | -| llc_miss_rate | - | llc miss rate | | collector/plugin/pmu_events/pmu\_events.c | -| llc_cache_mpi | - | llc miss per instructions | | collector/plugin/pmu_events/pmu\_events.c | - -### imc_socket_latency 表 - -| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | -| :----- | ---- | :-------------------- | :--- | :------------------------------------------ | -| rlat | ns | imc read ddr latency | socket级 | collector/plugin/imc_latency/imc\_latency.c | -| wlat | ns | imc write ddr latency | socket级 | collector/plugin/imc_latency/imc\_latency.c | - -### imc_channel_latency 表 - -| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | -| :----- | ---- | :-------------------- | :--- | :------------------------------------------ | -| rlat | ns | imc read ddr latency | channel级 | collector/plugin/imc_latency/imc\_latency.c | -| wlat | ns | imc write ddr latency | channel级 | collector/plugin/imc_latency/imc\_latency.c | \ No newline at end of file +| cycles | - | cycles | | collector/plugin/pmu_events/pmu\_events.c | +| ins | - | instructions | | collector/plugin/pmu_events/pmu\_events.c | +| IPC | - | instructions per cycles | | collector/plugin/pmu_events/pmu\_events.c | +| CPI | - | cycles per instructions | | collector/plugin/pmu_events/pmu\_events.c | +| llcStore | - | llc stroe hits counts | | collector/plugin/pmu_events/pmu\_events.c | +| llcStoreMis | - | llc stroe miss counts | | collector/plugin/pmu_events/pmu\_events.c | +| llcLoad | - | llc load hits counts | | collector/plugin/pmu_events/pmu\_events.c | +| llcLoadMis | - | llc load miss counts | | collector/plugin/pmu_events/pmu\_events.c | +| l3LoadMisRate | - | llc load miss rate | | collector/plugin/pmu_events/pmu\_events.c | +| l3StoreMisRate | - | llc store miss rate | | collector/plugin/pmu_events/pmu\_events.c | +| l3MisRate | - | llc miss rate | | collector/plugin/pmu_events/pmu\_events.c | +| MPI | - | llc miss per kilo-instruction | | collector/plugin/pmu_events/pmu\_events.c | + + +### imc_node_event 表 + +* 对应 export 指标: sysom\_imc\_event\_node +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----- | ---- | :-------------------- | :----- | :--------------------------------------- | +| rlat | ns | imc read ddr latency | node级 | collector/plugin/uncore_imc/uncore_imc.c | +| wlat | ns | imc write ddr latency | node级 | collector/plugin/uncore_imc/uncore_imc.c | +| avglat | ns | imc avg ddr latency | node级 | collector/plugin/uncore_imc/uncore_imc.c | + +### imc_socket_event 表 + +* 对应 export 指标: sysom\_imc\_event\_socket +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----- | ---- | :-------------------- | :------- | :--------------------------------------- | +| rlat | ns | imc read ddr latency | socket级 | collector/plugin/uncore_imc/uncore_imc.c | +| wlat | ns | imc write ddr latency | socket级 | collector/plugin/uncore_imc/uncore_imc.c | +| avglat | ns | imc avg ddr latency | socket级 | collector/plugin/uncore_imc/uncore_imc.c | +| bw_rd | byte | imc read bandwidth | socket级 | collector/plugin/uncore_imc/uncore_imc.c | +| bw_wr | byte | imc write bandwidth | socket级 | collector/plugin/uncore_imc/uncore_imc.c | + +### imc_channel_event 表 + +* 对应 export 指标: sysom\_imc\_channel\_event +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----- | ---- | :-------------------- | :-------- | :--------------------------------------- | +| rlat | ns | imc read ddr latency | channel级 | collector/plugin/uncore_imc/uncore_imc.c | +| wlat | ns | imc write ddr latency | channel级 | collector/plugin/uncore_imc/uncore_imc.c | +| avglat | ns | imc avg ddr latency | channel级 | collector/plugin/uncore_imc/uncore_imc.c | +| bw_rd | byte | imc read bandwidth | channel级 | collector/plugin/uncore_imc/uncore_imc.c | +| bw_wr | byte | imc write bandwidth | channel级 | collector/plugin/uncore_imc/uncore_imc.c | + + +### rdt_usage 表 + +* 对应 export 指标: sysom\_rdt\_usage +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----------- | ---- | :--------------------------- | :--- | :----------------------------------------- | +| llc_occ | byte | llc occupancy | | collector/rdt/plugin/rdt_llc_occupancy.lua | +| local_mem_bw | MB | 本地内存带宽 | | collector/rdt/plugin/rdt_local_mem_bw.lua | +| total_mem_bw | MB | Remote内存带宽+Local内存带宽 | | collector/rdt/plugin/rdt_total_mem_bw.lua | + +### rdt_alloc_policy 表 + +* 对应 export 指标: sysom\_rdt\_alloc\_policy +* 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :----- | ---- | :--------------- | :--------------------- | :-------------------------------- | +| MB | | 内存带宽分配比例 | MB(Memory Bandwidth) | collector/rdt/plugin/rdt_size.lua | +| L3 | byte | llc分配size | | collector/rdt/plugin/rdt_size.lua | + + +### cg_sched_cfs_stat_v2 + +- 对应 export 指标: sysom_cg_sched_cfs_stat_v2 +- 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :------------ | ---- | :------------------------------- | :--- | :---------------------------------------------------- | +| serve | ns | 从开始运行到睡眠的时间 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | +| oncpu | ns | oncpu时间 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | +| queue_other | ns | 被非兄弟层级打断的时长 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | +| queue_sibling | ns | 被兄弟层级打断的时长 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | +| queue_max | ns | 在调度队列上的最大等待时间 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | +| force_idle | ns | 被core sched造成force idle的时间 | | collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua | + + +### cg_cpu_stat_v2 + +- 对应 export 指标: sysom_cg_cpu_stat_v2 +- 属性标签:value + +| 指标名 | 单位 | 标签说明 | 备注 | 源码路径 | +| :------------------------- | ---- | :-------------------- | :--- | :---------------------------------------------- | +| usage_usec | usec | 运行时长 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| user_usec | usec | 用户态时间 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| system_usec | usec | 内核态时间 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| core_sched.force_idle_usec | usec | 对端HT force idle时间 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| sibidle_usec | usec | 对端HT idle时间 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| nr_periods | 次 | 带宽周期数 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| nr_throttled | 次 | 限流次数 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| throttled_usec | usec | 限流时长 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| current_bw | | | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| nr_bursts | 次 | burst次数 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | +| burst_usec | usec | busrt时长 | | collector/container/cgroupv2/cg_cpu_stat_v2.lua | + diff --git a/source/tools/monitor/unity/beaver/guide/outLine.md b/source/tools/monitor/unity/beaver/guide/outLine.md index d1788e81a59a0ddf18f1454bf11aec293fb0b627..19e279d11a61705ad76f5e4fe7b8ba6c86c1495a 100644 --- a/source/tools/monitor/unity/beaver/guide/outLine.md +++ b/source/tools/monitor/unity/beaver/guide/outLine.md @@ -1,6 +1,17 @@ # 外部数据写入支持 unity-mon可以作为一个独立的TSDB 数据库进行使用,支持[行协议](https://jasper-zhang1.gitbooks.io/influxdb/content/Write_protocols/line_protocol.html)写入数据,并按需完成对外数据吐出,如exporter等接口。 +## 行协议格式说明 +行协议使用换行符\n分隔每一行,每一行表示一个数据点,可以类比为关系型数据库中的一行。行协议是对空格敏感的。 + +``` +[,=[,=]] =[,=] +``` +从语法中可以看出,行协议分为如下四个内容:measurement、tag set、field set +* measurement是必需的,可以类比为关系型数据库的表名。measurement类型是字符串。如一个描述气温的时序型数据库,measurement为"气温"。 +* tag set不是必需的,用于描述一些不随时间变化的信息。tag_key和tag_value的类型均为字符串,如描述具体某地气温时,"城市"="深圳"。 +* field set是必需的,一个数据点中必须有至少一个field。field用于描述随时间变化的信息。field_key的类型是字符串,field_value只能是浮点型或字符串。field_value为浮点型时表示数值,field_value为字符串时表示日志。 + ## 行协议格式支持情况 unity-mon 当前除了不支持时间戳,支持行协议其它所有的数据类型,包含数值和日志。写行数据时,有以下注意事项: diff --git a/source/tools/monitor/unity/beaver/guide/pushto.md b/source/tools/monitor/unity/beaver/guide/pushto.md new file mode 100644 index 0000000000000000000000000000000000000000..5673b1bd3c467f5b6187bfa1bbd55af68af5adea --- /dev/null +++ b/source/tools/monitor/unity/beaver/guide/pushto.md @@ -0,0 +1,72 @@ +# 推送配置 + +unity 支持通过yaml 配置将指标、日志等数据向多目标推送。需要确认你当前使用的版本已经更到了最新,早期版本只支持单目标。 + +支持以下几种推送方式: + +* sls (含指标、日志、或两者兼得) +* sls metricstore +* influxdb,含阿里云自研lindorm +* prometheus remote write(预留) + +# 配置示例 +推送目标配置在yaml 的 pushTo 字段,类型为list,示例如下: + +``` + pushTo: + - to: "Influx" + host: "www.influx.com" + port: 8086 + url: "/write?db=db" + - to: "Sls" + endpoint: xxxxxx + project: xxxxxxx + logstore: xxxxx + addition: xxxxxx +``` + +配置说明: +## Sls + +将数据以行协议的方式推送到sls logstore,logstore 需要手动创建。参数列表 + +* endpoint 参考sls endpoint 配置 +* project 参考sls project 配置 +* logstore 参考sls logstore 配置 +* addition + +## SlsLog + +参数与Sls 一致,仅推送log + +## SlsMetric + +参数与Sls 一致,仅推送metric + +## Metricstore(推荐使用) + +将数据按照 prometheus remote write 方式写入 sls metricstore,参数: +* host "[project].[endpoint]" +* url "/prometheus/[project]/[metricstore]/api/v1/write" +* addtion + +## Metrics + +功能与Metricstore 类似,后面将逐步退出 + +* endpoint 参考sls endpoint 配置 +* project 参考sls project 配置 +* metricstore 参考sls metricstore 配置 +* addition + +## Influx + +将数据以行协议方式写入influxDB或者Lindorm,参数 + +* host: 目标host +* port: 数据库端口 +* url: 写入url + + + + diff --git a/source/tools/monitor/unity/beaver/localBeaver.lua b/source/tools/monitor/unity/beaver/localBeaver.lua index d0c2c5dea9dbafc1eeed5e4c0f23384d562090cc..721bd9c7a23c984d852e8129779e620578b98f53 100644 --- a/source/tools/monitor/unity/beaver/localBeaver.lua +++ b/source/tools/monitor/unity/beaver/localBeaver.lua @@ -166,6 +166,101 @@ function CLocalBeaver:_install_fd(port, ip, backlog) end end +function CLocalBeaver:ssl_read(handle, maxLen) + maxLen = maxLen or 2 * 1024 * 1024 -- signal conversation accept 2M stream max + local function readHandle() + local e = coroutine.yield() + if e.ev_close > 0 then + return nil + elseif e.ev_in > 0 then + local size = 16 * 1024 + local data = self._ffi.new("char[?]", size) + local ret = self._cffi.ssl_read(handle, data, size) + if ret > 0 then + local s = self._ffi.string(data, ret) + return s + elseif ret == 0 then + return "" + else + return nil + end + else + print(system:dump(e)) + end + return nil + end + return readHandle +end + +function CLocalBeaver:ssl_write(fd, handle, stream) + local ret + local len = #stream + ret = self._cffi.ssl_write(handle, stream, len) + assert(ret >= 0, "ssl_write return " .. ret) + + if ret < len then + ret = self._cffi.mod_fd(self._efd, fd, 1) -- epoll write ev + assert(ret == 0) + + while ret < len do + local e = coroutine.yield() + if e.ev_close > 0 then + return nil + elseif e.ev_out then + stream = string.sub(stream, len + 1) + if stream == nil then + return 1 + end + ret = self._cffi.ssl_write(handle, stream, len) + assert(ret >= 0) + end + end + + ret = self._cffi.mod_fd(self._efd, fd, 0) + assert(ret == 0) + end + return 1 +end + +local function handshakeYield() + local e = coroutine.yield() + if e.ev_close > 0 then + return true + end + return false +end + +function CLocalBeaver:ssl_handshake(fd) + local handle = self._cffi.ssl_connect_pre(fd) + + local ret + repeat + ret = self._cffi.ssl_connect(handle) + if ret == 1 then -- 1 means neet to write + self._cffi.mod_fd(self._efd, fd, 1) + if handshakeYield() then + ret = -1 + end + elseif ret == 2 then -- 2 means neet to read + self._cffi.mod_fd(self._efd, fd, 0) + if handshakeYield() then + ret = -1 + end + end + until (ret <= 0) + self._cffi.mod_fd(self._efd, fd, 0) -- set back to 0 + + if ret < 0 then + self._cffi.ssl_del(handle) + handle = nil + end + return handle +end + +function CLocalBeaver:ssl_del(handle) + self._cffi.ssl_del(handle) +end + function CLocalBeaver:read(fd, maxLen) maxLen = maxLen or 2 * 1024 * 1024 -- signal conversation accept 2M stream max local function readFd() @@ -215,6 +310,7 @@ function CLocalBeaver:write(fd, stream) sent, err, errno = socket.send(fd, stream) if sent == nil then if errno == 11 then -- EAGAIN ? + sent = 0 goto continue end system:posixError("socket send error.", err, errno) diff --git a/source/tools/monitor/unity/beaver/native/Makefile b/source/tools/monitor/unity/beaver/native/Makefile index 48c42266ca239e4809518431915bc0fc23ec32d5..3b696d0319b72cef832f5355353fffa33c6b7d35 100644 --- a/source/tools/monitor/unity/beaver/native/Makefile +++ b/source/tools/monitor/unity/beaver/native/Makefile @@ -1,7 +1,7 @@ CC := gcc CFLAG := -g -fpic -LDFLAG := -g -fpic -shared -OBJS := local_beaver.o +LDFLAG := -g -fpic -shared -lssl -lcrypto +OBJS := local_beaver.o async_ssl.o SO := liblbeaver.so all: $(SO) install diff --git a/source/tools/monitor/unity/beaver/native/async_ssl.c b/source/tools/monitor/unity/beaver/native/async_ssl.c new file mode 100644 index 0000000000000000000000000000000000000000..b4ec249bdaa8bd73362317187a82997d7d4430ce --- /dev/null +++ b/source/tools/monitor/unity/beaver/native/async_ssl.c @@ -0,0 +1,123 @@ +// +// Created by 廖肇燕 on 2023/8/21. +// + +#include "async_ssl.h" +#include +#include +#include +#include +#include +#include +#include + +#define BUFF_MAX 16384 +static SSL_CTX *sslContext = NULL; + +int ssl_read(void *handle, const char *buff, int len) +{ + int ret = 0; + int size = BUFF_MAX < len ? BUFF_MAX : len; + + ret = SSL_read((SSL *)handle, buff, size); + if (ret < 0) { + int err = SSL_get_error((SSL *)handle, ret); + if (err == SSL_ERROR_WANT_READ) { + ret = 0; + goto needContinue; + } + goto readFailed; + } + + return ret; + needContinue: // for to continue read. + return ret; + readFailed: + return ret; +} + +int ssl_write(void *handle, const char *buff, int len) { + int ret = 0; + ret = SSL_write((SSL *)handle, buff, len); + + if (ret < 0) { + int err = SSL_get_error((SSL *)handle, ret); + if (err == SSL_ERROR_WANT_WRITE) { //just need to write. + ret = 0; + goto needContinue; + } + } + return ret; + needContinue: + return ret; +} + +void *ssl_connect_pre(int fd) { + int ret; + SSL *handle = SSL_new(sslContext); + if (handle == NULL) { + fprintf(stderr, "ssl_connect_pre new ssl failed. %d, %s\n", errno, strerror(errno)); + return NULL; + } + + ret = SSL_set_fd(handle, fd); + if (ret < 0) { + fprintf(stderr, "ssl_connect_pre bind fd failed. %d, %s\n", errno, strerror(errno)); + SSL_shutdown(handle); + SSL_free(handle); + return NULL; + } + SSL_set_connect_state(handle); + return handle; +} + +int ssl_connect(void * handle) { + int ret = 0, err = 0; + SSL *h = (SSL *)handle; + + ret = SSL_do_handshake(h); + if (ret == 1) { + return 0; // means handshake success. + } + + err = SSL_get_error(h, ret); + switch (err) { + case 0: + return 0; + case SSL_ERROR_WANT_WRITE: //waite write. + return 1; + case SSL_ERROR_WANT_READ: + return 2; + default: + fprintf(stderr, "ssl_connect handshake failed. %d, %s\n", errno, strerror(errno)); + return -1; + } +} + +void ssl_del(void *handle) { + SSL *h = (SSL *)handle; + SSL_shutdown(handle); + SSL_free(handle); +} + +int async_ssl_init(void) { + int ret = 0; + + SSL_load_error_strings(); + SSL_library_init(); + sslContext = SSL_CTX_new(SSLv23_client_method()); + if (sslContext == NULL) { + fprintf(stderr, "set up sslContext failed. %d, %s\n", errno, strerror(errno)); + ret = -errno; + goto sslFailed; + } + return ret; + + sslFailed: + return ret; +} + +void async_ssl_deinit(void) { + SSL_CTX_free(sslContext); + sslContext = NULL; +} diff --git a/source/tools/monitor/unity/beaver/native/async_ssl.h b/source/tools/monitor/unity/beaver/native/async_ssl.h new file mode 100644 index 0000000000000000000000000000000000000000..7aca6882bf88f1cf0093eea8ecaa3d9d9bb53c74 --- /dev/null +++ b/source/tools/monitor/unity/beaver/native/async_ssl.h @@ -0,0 +1,11 @@ +// +// Created by 廖肇燕 on 2023/8/21. +// + +#ifndef UNITY_ASYNC_SSL_H +#define UNITY_ASYNC_SSL_H + +int async_ssl_init(void); +void async_ssl_deinit(void); + +#endif //UNITY_ASYNC_SSL_H diff --git a/source/tools/monitor/unity/beaver/native/beavercffi.lua b/source/tools/monitor/unity/beaver/native/beavercffi.lua index 9aa9cef9289cba53c4b7cc66be400eca94f88051..2d30e48eb22476735c22a8594585f21cebd1ca99 100644 --- a/source/tools/monitor/unity/beaver/native/beavercffi.lua +++ b/source/tools/monitor/unity/beaver/native/beavercffi.lua @@ -26,6 +26,12 @@ int del_fd(int efd, int fd); int poll_fds(int efd, int tmo, native_events_t* nes); int setsockopt_AP(int fd); void deinit(int efd); + +int ssl_read(void *handle, const char *buff, int len); +int ssl_write(void *handle, const char *buff, int len); +void *ssl_connect_pre(int fd); +int ssl_connect(void * handle); +void ssl_del(void *handle); ]] return {ffi = ffi, cffi=cffi} diff --git a/source/tools/monitor/unity/beaver/native/local_beaver.c b/source/tools/monitor/unity/beaver/native/local_beaver.c index 634907813f204e4027c2d695bfde37047835fc9b..e5a11c5a486fe057527920b0099a82471b23fa24 100644 --- a/source/tools/monitor/unity/beaver/native/local_beaver.c +++ b/source/tools/monitor/unity/beaver/native/local_beaver.c @@ -11,12 +11,13 @@ #include #include #include +#include "async_ssl.h" int setsockopt_AP(int fd){ int opt =1; - int r = setsockopt(fd, SOL_SOCKET,SO_REUSEPORT,(char*)&opt,sizeof(int)); + int r = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (char*)&opt, sizeof(int)); // SO_REUSEPORT - if(r<0){ + if (r < 0) { perror("set sock opt"); } return r; @@ -24,7 +25,7 @@ int setsockopt_AP(int fd){ static int socket_non_blocking(int sfd) { - int flags, res; + int flags = 0, ret = 0; flags = fcntl(sfd, F_GETFL); if (flags < 0) { @@ -33,44 +34,52 @@ static int socket_non_blocking(int sfd) } flags |= O_NONBLOCK; - res = fcntl(sfd, F_SETFL, flags); - if (res < 0) { + ret = fcntl(sfd, F_SETFL, flags); + if (ret < 0) { perror("error : cannot set socket flags!\n"); - return -errno; + ret = -errno; + goto fcntlFailed; } return 0; + fcntlFailed: + return ret; } static int epoll_add(int efd, int fd) { struct epoll_event event; - int res; + int ret = 0; event.events = EPOLLIN; event.data.fd = fd; - res = epoll_ctl(efd, EPOLL_CTL_ADD, fd, &event); - if (res < 0) { + ret = epoll_ctl(efd, EPOLL_CTL_ADD, fd, &event); + if (ret < 0) { perror("error : can not add event to epoll!\n"); return -errno; } - return res; + return ret; } static int epoll_del(int efd, int fd) { - int res; + int ret; - res = epoll_ctl(efd, EPOLL_CTL_DEL, fd, NULL); - if (res < 0) { + ret = epoll_ctl(efd, EPOLL_CTL_DEL, fd, NULL); + if (ret < 0) { perror("error : can not del event to epoll!\n"); return -errno; } - return res; + return ret; } int init(int listen_fd) { - int efd; - int res; + int efd = 0; + int ret = 0; + + ret = async_ssl_init(); + if (ret < 0) { + goto end_ssl_init; + } efd = epoll_create(NATIVE_EVENT_MAX); if (efd < 0) { @@ -78,69 +87,71 @@ int init(int listen_fd) { exit(1); } - res = epoll_add(efd, listen_fd); - if (res < 0) { + ret = epoll_add(efd, listen_fd); + if (ret < 0) { + ret = -errno; goto end_epoll_add; } return efd; + end_ssl_init: end_epoll_add: - return -errno; + return ret; } int add_fd(int efd, int fd) { - int res; + int ret; - res = socket_non_blocking(fd); - if (res < 0) { + ret = socket_non_blocking(fd); + if (ret < 0) { goto end_socket_non_blocking; } - res = epoll_add(efd, fd); - if (res < 0) { + ret = epoll_add(efd, fd); + if (ret < 0) { goto end_epoll_add; } - return res; + return ret; end_socket_non_blocking: end_epoll_add: - return res; + return ret; } int mod_fd(int efd, int fd, int wr) { struct epoll_event event; - int res; + int ret; - event.events = wr ? EPOLLIN | EPOLLOUT : EPOLLIN; + event.events = wr ? EPOLLOUT | EPOLLERR : EPOLLIN | EPOLLERR; event.data.fd = fd; - res = epoll_ctl(efd, EPOLL_CTL_MOD, fd, &event); - if (res < 0) { + ret = epoll_ctl(efd, EPOLL_CTL_MOD, fd, &event); + if (ret < 0) { perror("error : can not add event to epoll!\n"); return -errno; } - return res; + return ret; } int del_fd(int efd, int fd) { - int res; - res = epoll_del(efd, fd); + int ret; + ret = epoll_del(efd, fd); close(fd); - return res; + return ret; } int poll_fds(int efd, int tmo, native_events_t* nes) { struct epoll_event events[NATIVE_EVENT_MAX]; - int i, res; + int i, ret = 0; - res = epoll_wait(efd, events, NATIVE_EVENT_MAX, tmo * 1000); - if (res < 0) { + ret = epoll_wait(efd, events, NATIVE_EVENT_MAX, tmo * 1000); + if (ret < 0) { perror("error : epoll failed!\n"); return -errno; } - nes->num = res; - for (i = 0; i < res; i ++) { + nes->num = ret; + for (i = 0; i < ret; i ++) { nes->evs[i].fd = events[i].data.fd; if ( (events[i].events & EPOLLERR) || @@ -159,4 +170,5 @@ int poll_fds(int efd, int tmo, native_events_t* nes) { void deinit(int efd) { close(efd); + async_ssl_deinit(); } diff --git a/source/tools/monitor/unity/beaver/query/baseQuery.lua b/source/tools/monitor/unity/beaver/query/baseQuery.lua index 109f67c1d07d5ca3758210ff27336d2495de4881..0d13f1d10f76ef172b21c46f54f17c64ce95a4dc 100644 --- a/source/tools/monitor/unity/beaver/query/baseQuery.lua +++ b/source/tools/monitor/unity/beaver/query/baseQuery.lua @@ -88,13 +88,15 @@ local function packTimeLen(forms, session) table.insert(forms, formTLEnd) end -local function packForm(session, tables) +local function packForm(session, tables, us) local forms = {} packForm1(forms) packTimeFormat(forms, session) packTables(forms, session, tables) packTimeLen(forms, session) packForm2(forms) + + table.insert(forms, string.format("

列表耗时:%d us

", us)) return table.concat(forms, "\n") end @@ -108,8 +110,10 @@ end function CbaseQuery:base(tReq) local res = {title="Beaver Query"} + local t1 = self._fox:get_us() self:qTables(tReq.session) - res.content = packForm(tReq.session, tReq.session.tables) + local t2 = self._fox:get_us() + res.content = packForm(tReq.session, tReq.session.tables, tonumber(t2 - t1)) return res end @@ -218,14 +222,16 @@ function CbaseQuery:baseQ(tReq) end if session.selTable == nil then - contents[1] = "查询表未设置,将跳转会设置页面." + contents[1] = "查询表未设置,将跳转回设置页面." contents[2] = '' res.content = table.concat(contents, "\n") return res end + local t1 = self._fox:get_us() local ms = self._fox:qNow(tonumber(session.timeLen) * 60, {session.selTable}) + local t2 = self._fox:get_us() table.insert(contents, "# 反馈输入\n") table.insert(contents, "* 表名: " .. system:escMd(session.selTable)) table.insert(contents, "* 时间戳: " .. session.gmt) @@ -236,6 +242,7 @@ function CbaseQuery:baseQ(tReq) packDataTabel(contents, ms, session.gmt) + table.insert(contents, string.format("查表耗时 %dus", tonumber(t2 - t1))) table.insert(contents, "[返回](/query/base)") table.insert(contents, "[刷新](/query/baseQ)") diff --git a/source/tools/monitor/unity/beaver/url_api.lua b/source/tools/monitor/unity/beaver/url_api.lua index 47389f6b62f50270bd5694a1fdf27054caaa33cc..c51bf31d837055c2e6ad0bb4825a6d4342fd1311 100644 --- a/source/tools/monitor/unity/beaver/url_api.lua +++ b/source/tools/monitor/unity/beaver/url_api.lua @@ -8,28 +8,49 @@ require("common.class") local system = require("common.system") local ChttpApp = require("httplib.httpApp") local CfoxTSDB = require("tsdb.foxTSDB") +local CfoxSQL = require("tsdb.foxSQL") local postQue = require("beeQ.postQue.postQue") local CpushLine = require("beaver.pushLine") local CasyncDns = require("httplib.asyncDns") local CasyncHttp = require("httplib.asyncHttp") +local CasyncHttps = require("httplib.asyncHttps") local CasyncOSS = require("httplib.asyncOSS") local CurlApi = class("urlApi", ChttpApp) -function CurlApi:_init_(frame, que, fYaml) +function CurlApi:_init_(frame, que, fYaml, instance) ChttpApp._init_(self) self._pushLine = CpushLine.new(que) - self._urlCb["/api/sum"] = function(tReq) return self:sum(tReq) end - self._urlCb["/api/sub"] = function(tReq) return self:sub(tReq) end + local res = system:parseYaml(fYaml) + + self._instance = instance + if res.config.url_safe==nil or res.config.url_safe ~= "close" then + self._urlCb["/api/sum"] = function(tReq) return self:sum(tReq) end + self._urlCb["/api/sub"] = function(tReq) return self:sub(tReq) end + self._urlCb["/api/trig"] = function(tReq) return self:trig(tReq) end + self._urlCb["/api/line"] = function(tReq) return self:line(tReq) end + self._urlCb["/api/dns"] = function(tReq) return self:dns(tReq) end + self._urlCb["/api/proxy"] = function(tReq) return self:proxy(tReq) end + self._urlCb["/api/ssl"] = function(tReq) return self:ssl(tReq) end + if res.diagnose then + self._diagAuth = res.diagnose.token + self._diagHost = res.diagnose.host + self._urlCb["/api/diag"] = function(tReq) return self:diag(tReq) end + end + end + self._urlCb["/api/query"] = function(tReq) return self:query(tReq) end - self._urlCb["/api/trig"] = function(tReq) return self:trig(tReq) end - self._urlCb["/api/line"] = function(tReq) return self:line(tReq) end - self._urlCb["/api/dns"] = function(tReq) return self:dns(tReq) end - self._urlCb["/api/proxy"] = function(tReq) return self:proxy(tReq) end - self:_ossIntall(fYaml) + self._urlCb["/api/sql"] = function(tReq) return self:qsql(tReq) end + if res.cec then + self._cec = res.cec + self._urlCb["/api/cec"] = function(tReq) return self:cec(tReq) end + self._urlCb["/api/alert"] = function(tReq) return self:alert(tReq) end + end + self:_ossIntall(fYaml) self:_install(frame) self:_setupQs(fYaml) - self._proxy = CasyncHttp.new() + self._proxyhttp = CasyncHttp.new() + self._proxyhttps = CasyncHttps.new() end function CurlApi:_ossIntall(fYaml) @@ -64,10 +85,171 @@ function CurlApi:oss(tReq) end end +local function reqSSL(https, host, uri, port) + port = port or 443 + return https:get(host, uri, port) +end + +function CurlApi:ssl(tReq) + local stat, tJson = pcall(self.getJson, self, tReq) + if stat and tJson then + local host = tJson.host + local uri = tJson.uri + if host and uri then + local https = CasyncHttps.new() + local stat, body = pcall(reqSSL, https, host, uri) + if stat then + return {body = body} + else + return "bad req dns " .. body, 400 + end + else + return "need domain arg.", 400 + end + else + return "bad dns " .. tReq.data, 400 + end +end + local function reqProxy(proxy, host, uri) return proxy:get(host, uri) end +local function proxyPost(proxy, host, uri, headers, body) + return proxy:post(host, uri, headers, body) +end + +function CurlApi:packCEC(topic, data) + local host = self._cec + local uri = "/api/v1/cec_proxy/proxy/dispatch" + + data.alert_id = system:guid() + data.instance = self._instance + if not data.alert_time then + data.alert_time = os.time() * 1000 + end + + local req = { + topic = topic, + data = data, + } + local headers = { + accept = "application/json", + ["Content-Type"] = "application/json", + } + + if host and uri then + local stat + local body + if string.sub(host,1,5) == "https" then + host = string.sub(host,9) + stat, body = pcall(proxyPost, self._proxyhttps, host, uri, headers, self:jencode(req)) + else + host = string.sub(host,8) + stat, body = pcall(proxyPost, self._proxyhttp, host, uri, headers, self:jencode(req)) + end + + if stat then + body = self:jdecode(body) + if body.code == 0 then + return string.format("cec process %s.", data.alert_id), 200 + else + return {code = body.code, message = body.message} + end + end + end +end + +-- refer to cec design +function CurlApi:cec(tReq) + local stat, tJson = pcall(self.getJson, self, tReq) + if stat and tJson then + local topic, data = tJson.topic, tJson.data + if type(topic) ~= "string" then + return "no data segment." .. tReq.data, 400 + end + if type(data) ~= "table" then + return "no data segment." .. tReq.data, 400 + end + + return self:packCEC(topic, data) + else + return "bad data" .. tReq.data, 400 + end +end + +function CurlApi:alert(tReq) + local stat, tJson = pcall(self.getJson, self, tReq) + if stat and tJson then + if type(tJson) ~= "table" then + return "no data segment." .. tReq.data, 400 + end + + return self:packCEC("SYSOM_SAD_ALERT", tJson) + else + return "bad data" .. tReq.data, 400 + end +end + +function CurlApi:diag(tReq) + local stat, tJson = pcall(self.getJson, self, tReq) + if stat and tJson then + local host = self._diagHost + local uri = "/api/v1/tasks/sbs_task_create/" + if not host then + host = tJson.host + end + --local headers = tJson.headers + local reqbody = tJson.body + + if system:keyIsIn(reqbody, "params") == false then + reqbody["params"] = { + instance = "127.0.0.1" + } + elseif system:keyIsIn(reqbody.params, "instance") == false then + reqbody.params["instance"] = "127.0.0.1" + end + + local headers = { + accept = "application/json", + ["Content-Type"] = "application/json", + authorization = self._diagAuth + } + local service_name = reqbody.service_name + + if host and uri then + local stat + local body + if string.sub(host,1,5) == "https" then + host = string.sub(host,9) + stat, body = pcall(proxyPost, self._proxyhttps, host, uri, headers, self:jencode(reqbody)) + else + host = string.sub(host,8) + stat, body = pcall(proxyPost, self._proxyhttp, host, uri, headers, self:jencode(reqbody)) + end + if stat then + body = self:jdecode(body) + if body.code == 200 then + local data = body.data + data["service_name"] = service_name + local s = self:jencode(data) + postQue.post(s) + else + print(body.message) + return {code = body.code, message = body.message} + end + return {task_id = body.data.task_id} + else + return "bad req dns " .. body, 400 + end + else + return "need domain arg.", 400 + end + else + return "bad dns " .. tReq.data, 400 + end +end + function CurlApi:proxy(tReq) local stat, tJson = pcall(self.getJson, self, tReq) if stat and tJson then @@ -204,12 +386,30 @@ function CurlApi:qtable(tJson) return self.fox:qTabelNow(secs) end +function CurlApi:qsql(tReq) + local stat, tJson = pcall(self.getJson, self, tReq) + if stat then + local res = self.foxSQL:parse(tJson) + if res.error ~= nil or res.cursorpos ~= nil then + print("sql parse error") + return {} + end + return self.foxSQL:sql(res) + else + return {} + end + + +end + function CurlApi:_setupQs(fYaml) self.fox = CfoxTSDB.new(fYaml) + self.foxSQL = CfoxSQL.new(fYaml) self._q = {} self._q["last"] = function(tJson) return self:qlast(tJson) end self._q["table"] = function(tJson) return self:qtable(tJson) end self._q["date"] = function(tJson) return self:qdate(tJson) end + --self._q["sql"] = function(tJson) return self:qsql(tJson) end end function CurlApi:lquery(tJson) @@ -227,6 +427,7 @@ function CurlApi:query(tReq) if cStat then return ms else + print("query return :", ms) return {} end else diff --git a/source/tools/monitor/unity/beeQ/Makefile b/source/tools/monitor/unity/beeQ/Makefile index 93737366f72774ee2c20eefcaaebf98f5aca634d..cc82f0db712e83235f58943d9d557fab53707e65 100644 --- a/source/tools/monitor/unity/beeQ/Makefile +++ b/source/tools/monitor/unity/beeQ/Makefile @@ -6,7 +6,7 @@ LDFLAG := -g -lm -ldl -lrt -lpthread -lluajit-5.1 -L./lib/ -lbeeQ -L../beaver -l PRG=unity-mon OBJ=apps.o bees.o daemon.o pushTo.o -DEPMOD=lib rbtree clock postQue ../beaver ../collector/native ../collector/interface ../collector/outline ../collector/plugin ../tsdb/native ../collector/container/cg_pmu_events_ffi ../common/protobuf/metricstore +DEPMOD=lib rbtree clock postQue ../beaver ../collector/native ../collector/interface ../collector/outline ../collector/plugin ../tsdb/native ../collector/container/cg_pmu_events_ffi ../common/protobuf/metricstore ../collector/rdt/rdt_helper ../collector/container/podmem ../collector/podMan/runtime/cri $(PRG): $(DEPMOD) $(OBJ) $(CC) $(LIB) -o $@ $(OBJ) $(LDFLAG) diff --git a/source/tools/monitor/unity/beeQ/apps.c b/source/tools/monitor/unity/beeQ/apps.c index 40e37b57efc2196544691a1694c855a2e889bb76..763b3be789982452953ca475fa16908b382de458 100644 --- a/source/tools/monitor/unity/beeQ/apps.c +++ b/source/tools/monitor/unity/beeQ/apps.c @@ -294,21 +294,21 @@ static int app_collector_work(void* q, void* proto_q) { goto endLoad; } - // call init. + // call work. lua_getglobal(L, "work"); lua_pushlightuserdata(L, q); lua_pushlightuserdata(L, proto_q); lua_pushstring(L, g_yaml_file); lua_pushinteger(L, (int)gettidv1()); ret = lua_pcall(L, 4, 1, err_func); - if (ret < 0) { + if (ret) { lua_check_ret(ret); goto endCall; } if (!lua_isnumber(L, -1)) { // check errno = -EINVAL; - perror("function collectors.lua init must return a number."); + perror("function collectors.lua work must return a number."); goto endReturn; } lret = lua_tonumber(L, -1); diff --git a/source/tools/monitor/unity/beeQ/collectors.lua b/source/tools/monitor/unity/beeQ/collectors.lua index b083d7a67df59b4d32f5251b33b07fa0dd22bbac..4d5f6dfb4998ab56e8a632028776b0a7316992dc 100644 --- a/source/tools/monitor/unity/beeQ/collectors.lua +++ b/source/tools/monitor/unity/beeQ/collectors.lua @@ -121,6 +121,10 @@ local function setupPostEngine(que, proto_q, fYaml, tid) return w, 1 end +local function work_loop(e) + return e:proc() +end + function work(que, proto_q, yaml, tid) local fYaml = yaml or "../collector/plugin.yaml" checkSos() @@ -135,5 +139,11 @@ function work(que, proto_q, yaml, tid) e:addEvent("postEngine", engine, unit) engine:setMainloop(main) - return e:proc() + local res, msg = pcall(work_loop, e) + if res then + return res + else + print(msg) + return nil + end end diff --git a/source/tools/monitor/unity/beeQ/outline.lua b/source/tools/monitor/unity/beeQ/outline.lua index 7e02b80e8630082ea5815d7bd303969873660ed4..4b4cd9924f564d36bff108dc83b9ae189742fb0b 100644 --- a/source/tools/monitor/unity/beeQ/outline.lua +++ b/source/tools/monitor/unity/beeQ/outline.lua @@ -13,7 +13,6 @@ local pipe = nil function init(que, fYaml) local fYaml = fYaml or "../collector/plugin.yaml" pipe = CpipeMon.new(que, fYaml) - if pipe then return 0 end diff --git a/source/tools/monitor/unity/beeQ/pack.sh b/source/tools/monitor/unity/beeQ/pack.sh index 207b0ddda8e13947d63ad6984c36b8fd5b01b160..cf10e12a6ffe555515d0e3b2bba22a7c2dedbb33 100755 --- a/source/tools/monitor/unity/beeQ/pack.sh +++ b/source/tools/monitor/unity/beeQ/pack.sh @@ -49,6 +49,7 @@ cp beeQ/rbtree/*.lua ${APP}/beeQ/rbtree/ cp beeQ/unity-mon ${APP}/beeQ/ cp beeQ/run.sh ${APP}/beeQ/ +# for collector mkdir ${APP}/collector mkdir ${APP}/collector/native mkdir ${APP}/collector/guard @@ -58,7 +59,15 @@ mkdir ${APP}/collector/postEngine mkdir ${APP}/collector/execEngine mkdir ${APP}/collector/podMan mkdir ${APP}/collector/container +mkdir ${APP}/collector/podMan/runtime +mkdir ${APP}/collector/podMan/runtime/cri mkdir ${APP}/collector/io +mkdir ${APP}/collector/rdt +mkdir ${APP}/collector/rdt/plugin +mkdir ${APP}/collector/observe +mkdir ${APP}/collector/perfRun +mkdir ${APP}/collector/cgroupv2 +mkdir ${APP}/collector/container/cgroupv2 cp collector/native/*.so* ${APP}/collector/native/ cp collector/native/*.lua ${APP}/collector/native/ cp collector/*.lua ${APP}/collector/ @@ -70,8 +79,16 @@ cp collector/execEngine/*.lua ${APP}/collector/execEngine cp collector/container/*.lua ${APP}/collector/container/ cp collector/postPlugin/*.lua ${APP}/collector/postPlugin cp collector/podMan/*.lua ${APP}/collector/podMan +cp collector/podMan/runtime/*.lua ${APP}/collector/podMan/runtime +cp collector/podMan/runtime/cri/*.lua ${APP}/collector/podMan/runtime/cri cp collector/io/*.lua ${APP}/collector/io +cp collector/rdt/*.lua ${APP}/collector/rdt +cp collector/rdt/plugin/*.lua ${APP}/collector/rdt/plugin +cp collector/observe/*.lua ${APP}/collector/observe cp collector/plugin.yaml ${APP}/collector/ +cp collector/perfRun/perfRun.sh ${APP}/collector/perfRun/perfRun.sh +cp collector/cgroupv2/*.lua ${APP}/collector/cgroupv2 +cp collector/container/cgroupv2/*.lua ${APP}/collector/container/cgroupv2 mkdir ${APP}/common diff --git a/source/tools/monitor/unity/beeQ/postQue/postQue.c b/source/tools/monitor/unity/beeQ/postQue/postQue.c index b9c0c7d2d297e714255856cb5b8521e84145cf77..e9ef208071e70ae62c75dcefc0666bda539b9f4e 100644 --- a/source/tools/monitor/unity/beeQ/postQue/postQue.c +++ b/source/tools/monitor/unity/beeQ/postQue/postQue.c @@ -13,20 +13,21 @@ struct unity_postQue { int num; pthread_mutex_t mtx; - char msgs[UNITY_POSTQUE_NUM][UNITY_POSTQUE_MSG_SIZE]; + char* msgs[UNITY_POSTQUE_NUM]; }; static struct unity_postQue que; -int postQue_pull(char *msg) { +int postQue_pull(char *msg, int size) { int ret; int i; pthread_mutex_lock(&que.mtx); ret = que.num; for (i = 0; i < ret; i ++) { - strcat(msg, que.msgs[i]); - strcat(msg, "\n"); + strncat(msg, que.msgs[i], size); + free(que.msgs[i]); + strncat(msg, "\n", size); } que.num = 0; pthread_mutex_unlock(&que.mtx); @@ -41,12 +42,12 @@ int postQue_pull(char *msg) { int postQue_post(const char *msg) { int ret = 0; int len = strlen(msg); - if (len >= UNITY_POSTQUE_MSG_SIZE) { - return -EINVAL; - } - +// if (len >= UNITY_POSTQUE_MSG_SIZE) { +// return -EINVAL; +// } pthread_mutex_lock(&que.mtx); if (que.num < UNITY_POSTQUE_NUM) { + que.msgs[que.num] = (char*) malloc(len+1); strcpy(que.msgs[que.num], msg); que.num ++; } else { diff --git a/source/tools/monitor/unity/beeQ/postQue/postQue.h b/source/tools/monitor/unity/beeQ/postQue/postQue.h index 070325e54fec6711f41163a24b394b7aba85615d..3d7f34e99b4a5655d14b073a9bf8b65581ebd6b0 100644 --- a/source/tools/monitor/unity/beeQ/postQue/postQue.h +++ b/source/tools/monitor/unity/beeQ/postQue/postQue.h @@ -9,7 +9,7 @@ #include #include -int postQue_pull(char *msg); +int postQue_pull(char *msg, int size); int postQue_post(const char *msg); int postQue_init(); diff --git a/source/tools/monitor/unity/beeQ/postQue/postQue.lua b/source/tools/monitor/unity/beeQ/postQue/postQue.lua index becbf24d0055dcd24762ea44e074141f919b3583..76274465e578f8344514a88c8cba3d3ab636affa 100644 --- a/source/tools/monitor/unity/beeQ/postQue/postQue.lua +++ b/source/tools/monitor/unity/beeQ/postQue/postQue.lua @@ -8,15 +8,16 @@ local mod = {} local ffi = require("ffi") ffi.cdef [[ -int postQue_pull(char *msg); +int postQue_pull(char *msg, int size); int postQue_post(const char *msg); ]] local cffi = ffi.load("postQue") function mod.pull() - local s = ffi.new("char[?]", 1024) - local ret = cffi.postQue_pull(s) + local len = 16*1024 + local s = ffi.new("char[?]", len) + local ret = cffi.postQue_pull(s,len) if ret > 0 then return ffi.string(s) end diff --git a/source/tools/monitor/unity/beeQ/pushTo.lua b/source/tools/monitor/unity/beeQ/pushTo.lua index c4d68eff493db881fd85b998e74c2fb2905de1ee..dd3074d2d2a7efb9a4f30d09477058bce8fc0f1c 100644 --- a/source/tools/monitor/unity/beeQ/pushTo.lua +++ b/source/tools/monitor/unity/beeQ/pushTo.lua @@ -10,21 +10,36 @@ local system = require("common.system") local coCli = require("httplib.coCli") local coInflux = require("httplib.coInflux") local coMetrics = require("httplib.coMetrics") -local coAutoMetrics =require("httplib.coAutoMetrics") +local coAutoMetrics = require("httplib.coAutoMetrics") +local coMetricstore = require("httplib.coMetricstore") +local coSls = require("httplib.coSls") +local coSlsLog = require("httplib.coSlsLog") +local coSlsMetric = require("httplib.coSlsMetric") function work(fd, fYaml) local conf = system:parseYaml(fYaml) - local to = conf.pushTo.to - print(to) + local tos = conf.pushTo local frame = coCli.new(fd) + + local Cidentity = require("beaver.identity") + local inst = Cidentity.new(fYaml) + local instance = inst:id() + local _funcs = { - Influx = function(fYaml) return coInflux.new(fYaml) end, - Metrics = function(fYaml) return coMetrics.new(fYaml) end, - AutoMetrics = function(fYaml) return coAutoMetrics.new(fYaml) end + Influx = function(fYaml, config, instance) return coInflux.new(fYaml, config, instance) end, + Metrics = function(fYaml, config, instance) return coMetrics.new(fYaml, config, instance) end, + AutoMetrics = function(fYaml, config, instance) return coAutoMetrics.new(fYaml, config, instance) end, + Metricstore = function(fYaml, config, instance) return coMetricstore.new(fYaml, config, instance) end, + Sls = function(fYaml, config, instance) return coSls.new(fYaml, config, instance) end, + SlsLog = function(fYaml, config, instance) return coSlsLog.new(fYaml, config, instance) end, + SlsMetric = function(fYaml, config, instance) return coSlsMetric.new(fYaml, config, instance) end } - local c = _funcs[to](fYaml) - --local c = _funcs[to]("/etc/sysak/base.yaml") - frame:poll(c) + + local clis = {} + for _, push in ipairs(tos) do + table.insert(clis, _funcs[push.to](fYaml, push, instance)) + end + frame:poll(clis) print("end push.") return 0 diff --git a/source/tools/monitor/unity/beeQ/rbtree/rbEvent.lua b/source/tools/monitor/unity/beeQ/rbtree/rbEvent.lua index c9b45317037d83002ad251fdc545cf29d1021f8d..11a17718fef67cea11f983aae8636bfea55e3a20 100644 --- a/source/tools/monitor/unity/beeQ/rbtree/rbEvent.lua +++ b/source/tools/monitor/unity/beeQ/rbtree/rbEvent.lua @@ -36,7 +36,7 @@ function CrbEvent:_init_() self._nsec = timeNsec() end -function CrbEvent:addEvent(name, obj, period, delay, loop) +function CrbEvent:addEvent(name, obj, period, delay, loop)--delay 要不要等一等再执行 loop = loop or -1 -- -1: 会永远增加下去,大于1 则会递减,减少0 不再使用 if loop == 0 then diff --git a/source/tools/monitor/unity/beeQ/run.sh b/source/tools/monitor/unity/beeQ/run.sh index 97b5be763dc2807fbf432511db096a36bc45a9db..4b4ccc1abc76ef9d6331671ae61d766bffb47dd8 100755 --- a/source/tools/monitor/unity/beeQ/run.sh +++ b/source/tools/monitor/unity/beeQ/run.sh @@ -14,6 +14,8 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../install/ export LUA_PATH="../../lua/?.lua;../../lua/?/init.lua;" export LUA_CPATH="./lib/?.so;../../lib/?.so;../../lib/loadall.so;" +export SYSAK_WORK_PATH="/usr/local/sysak/.sysak_components" + yaml_path=$1 [ ! $yaml_path ] && yaml_path="/etc/sysak/base.yaml" diff --git a/source/tools/monitor/unity/collector/cgroupv2/cgroupv2.lua b/source/tools/monitor/unity/collector/cgroupv2/cgroupv2.lua new file mode 100644 index 0000000000000000000000000000000000000000..76e44e7de0338aadcfed3cdac51fa0e9a7417d5f --- /dev/null +++ b/source/tools/monitor/unity/collector/cgroupv2/cgroupv2.lua @@ -0,0 +1,43 @@ +require("common.class") + +local dirent = require("posix.dirent") +local unistd = require("posix.unistd") +local pystring = require("common.pystring") +local Cinotifies = require("common.inotifies") + +local cgrpV2 = class("cgroupv2") + +function cgrpV2:_init_(resYaml, proto, pffi, mnt) + self._verbose = false + self._proto = proto + self._pffi = pffi + self._mnt = mnt + self._resYaml = resYaml + self._plugins = self:setupPlugins(proto, pffi, mnt) +end + +function cgrpV2:setupPlugins(proto, pffi, mnt) + local plugins = {} + + for _, path in ipairs(self._resYaml.cgroupv2.directPaths) do + local lables = { + { name = "path", index = path } + } + for _, plugin in ipairs(self._resYaml.cgroupv2.luaPlugin) do + local CProcs = require("collector.container.cgroupv2." .. plugin) + table.insert(plugins, CProcs.new(proto, pffi, mnt, path, lables)) + end + end + return plugins +end + +function cgrpV2:proc(elapsed, lines) + for _, plugin in ipairs(self._plugins) do + local stat, res = pcall(plugin.proc, plugin, elapsed, lines) + if not stat or res == -1 then + print("cgroupv2: pcall plugin error.") + end + end +end + +return cgrpV2 diff --git a/source/tools/monitor/unity/collector/container/cg_blkio_stat.lua b/source/tools/monitor/unity/collector/container/cg_blkio_stat.lua new file mode 100644 index 0000000000000000000000000000000000000000..8db137406bd48bb3ee17d72925ae905515b90464 --- /dev/null +++ b/source/tools/monitor/unity/collector/container/cg_blkio_stat.lua @@ -0,0 +1,140 @@ +require("common.class") +local pystring = require("common.pystring") +local unistd = require("posix.unistd") +local CvProc = require("collector.vproc") +local utsname = require("posix.sys.utsname") + +local root = "sys/fs/cgroup/blkio/" +local CgBlkIoStat = class("cg_blkio_stat", CvProc) + +function CgBlkIoStat:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, nil) + self.mnt = mnt + self.ls = ls + self.path = path +end + +-- get device name form (major, minor) +function CgBlkIoStat:devNoToName(devNo) + local blkFile = self.mnt .. "sys/dev/block/" .. devNo + + if unistd.access(blkFile) == 0 then + for line in io.lines(blkFile .. "/uevent" ) do + if pystring:startswith(line, "DEVNAME") then + local cell = pystring:split(line, "=") + return "/dev/" .. cell[2] + end + end + end + return devNo +end + +function CgBlkIoStat:copyTable(original) + local copy = {} + for key, value in pairs(original) do + copy[key] = value + end + return copy +end + +function CgBlkIoStat:_proc(line, metrics, tableName) + local vs = {} + local label = self:copyTable(self.ls) + local cells = pystring:split(line) + -- skip last line + if #cells == 2 then goto exit end + + local op = cells[2] + local devName = self:devNoToName(cells[1]) + table.insert(label, {name = "device", index = devName}) + + if op == "Read" then + vs= {{ + name = "reads_" .. metrics, + value = tonumber(cells[3]) + }} + elseif op == "Write" then + vs = {{ + name = "writes_" .. metrics, + value = tonumber(cells[3]) + }} + elseif op == "Total" then + vs= {{ + name = "total_" .. metrics, + value = tonumber(cells[3]) + }} + else + goto exit + end + + self:appendLine(self:_packProto(tableName, label , vs)) + ::exit:: +end + +function CgBlkIoStat:proc(elapsed, lines) + local tableName = "cg_blkio_stat" + CvProc.proc(self) + + local distro = utsname.uname() + if not distro then + print("cg_blk_stat: get distro error!") + return + end + + local blkMetrics = {} + local release = distro.release + if not string.match(release, "^3.10") then + blkMetrics = { + { + path = "/blkio.throttle.io_service_bytes", + metrics = "service_bytes" + }, + { + path = "/blkio.throttle.io_serviced", + metrics = "serviced" + }, + { + path = "//blkio.throttle.total_bytes_queued", + metrics = "bytes_queued" + }, + { + path = "/blkio.throttle.total_io_queued", + metrics = "io_queued" + }, + { + path = "/blkio.throttle.io_wait_time", + metrics = "wait_time" + } + } + else + blkMetrics = { + { + path = "/blkio.throttle.io_service_bytes", + metrics = "service_bytes" + }, + { + path = "/blkio.throttle.io_serviced", + metrics = "serviced" + }, + { + path = "/blkio.io_queued", + metrics = "io_queued" + }, + { + path = "/blkio.io_wait_time", + metrics = "wait_time" + } + } + end + + for _, blk_metrics in ipairs(blkMetrics) do + local cg_path = self.mnt .. root .. self.path .. blk_metrics.path + for line in io.lines(cg_path) do + self:_proc(line, blk_metrics.metrics, tableName) + end + end + + self:push(lines) +end + +return CgBlkIoStat \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/container/cg_bvt_warp_ns.lua b/source/tools/monitor/unity/collector/container/cg_bvt_warp_ns.lua new file mode 100644 index 0000000000000000000000000000000000000000..0172d70a8799bd9d99be2ca8c7b8e761826c96aa --- /dev/null +++ b/source/tools/monitor/unity/collector/container/cg_bvt_warp_ns.lua @@ -0,0 +1,36 @@ +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") +local root = "sys/fs/cgroup/cpu/" +local dfile = "/cpu.bvt_warp_ns" + +local cgBvtWarpNs = class("cg_bvt_warp_ns", CvProc) + +function cgBvtWarpNs:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, root .. path .. dfile) + self.ls = ls +end + +function cgBvtWarpNs:proc(elapsed, lines) + CvProc.proc(self) + local values = {} + + local f = io.open(self.pFile, "r") + if f ~= nil then + io.close(f) + else + return + end + + for line in io.lines(self.pFile) do + values[1] = { + name = 'bvt_warp_ns', + value = tonumber(line) + } + break + end + self:appendLine(self:_packProto("cg_bvt_warp_ns", self.ls, values)) + self:push(lines) +end + +return cgBvtWarpNs diff --git a/source/tools/monitor/unity/collector/container/cg_cpuacct_stat.lua b/source/tools/monitor/unity/collector/container/cg_cpuacct_stat.lua index 1d6ec301b38a66989d4c2782d497bcfe5ebeff90..a5d5ab07ed4c03f5c30191557a7e1598634c76e8 100644 --- a/source/tools/monitor/unity/collector/container/cg_cpuacct_stat.lua +++ b/source/tools/monitor/unity/collector/container/cg_cpuacct_stat.lua @@ -52,10 +52,10 @@ function CgCpuacctStat:proc(elapsed, lines) name = cell[1] local prev = self.values[c] local now = tonumber(cell[num]) - local rate = tonumber(((now - prev)*100.0) / self.hostCpuSum) + local rate = (100.00*tonumber((now - prev)))/ tonumber(self.hostCpuSum) values[c] = { name = name, - value = tonumber(rate) + value = rate } self.values[c] = now self.conTotal = self.conTotal + (now - prev) diff --git a/source/tools/monitor/unity/collector/container/cg_memory_drcm_latency.lua b/source/tools/monitor/unity/collector/container/cg_memory_drcm_latency.lua index 2c10f61ed3a9cd278d37abb98c619a5d02efdf65..cac5789dea5434f1a7e3114a7e5d9866342ae702 100644 --- a/source/tools/monitor/unity/collector/container/cg_memory_drcm_latency.lua +++ b/source/tools/monitor/unity/collector/container/cg_memory_drcm_latency.lua @@ -18,22 +18,36 @@ function CgMemDrcmLatency:_init_(proto, pffi, mnt, path, ls) self.ls = ls end +function CgMemDrcmLatency:global_proc(cPath) + local c = 1 + local values = {} + local gPath = cPath.."" + gPath = string.gsub(gPath, "direct_reclaim_memcg_latency", "direct_reclaim_global_latency", 1) + for line in io.lines(gPath) do + local cell = pystring:split(line) + values[c] = tonumber(cell[2]) + c = c + 1 + end + return values +end + function CgMemDrcmLatency:proc(elapsed, lines) - -- if pFile not valid ,return -1 + -- if pFile not valid ,return -1 local c = 1 CvProc.proc(self) local values = {} - + local gvalues = {} + gvalues = self:global_proc(self.pFile) for line in io.lines(self.pFile) do local cell = pystring:split(line) - local tmp = cell[1] - tmp = string.gsub(tmp, ":", "", 1) - tmp = string.gsub(tmp, ">=", "", 1) - tmp = string.gsub(tmp, "-", "to", 1) - tmp = string.gsub(tmp, "%(.*%)", "", 1) + local tmp = cell[1] + tmp = string.gsub(tmp, ":", "", 1) + tmp = string.gsub(tmp, ">=", "", 1) + tmp = string.gsub(tmp, "-", "to", 1) + tmp = string.gsub(tmp, "%(.*%)", "", 1) values[c] = { name = "memDrcm_lat_"..tmp, - value = tonumber(cell[2]) + value = tonumber(cell[2]) + gvalues[c] } c = c + 1 end diff --git a/source/tools/monitor/unity/collector/container/cg_memory_oom_cnt.lua b/source/tools/monitor/unity/collector/container/cg_memory_oom_cnt.lua new file mode 100644 index 0000000000000000000000000000000000000000..32c0fefa8fbccd8a21e558882cf894d8abc6ad60 --- /dev/null +++ b/source/tools/monitor/unity/collector/container/cg_memory_oom_cnt.lua @@ -0,0 +1,35 @@ +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") +local root = "sys/fs/cgroup/memory/" +local dfile = "/memory.oom_control" + +local CgMemOomCnt = class("cg_memoom_cnt", CvProc) + +--ls{}, (pod_name and docker_name +function CgMemOomCnt:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, root .. path .. dfile) + self.ls = ls +end + +function CgMemOomCnt:proc(elapsed, lines) + -- if pFile not valid ,return -1 + local c = 1 + CvProc.proc(self) + local values = {} + + for line in io.lines(self.pFile) do + if string.find(line, "oom_kill") then + local cell = pystring:split(line) + values[c] = { + name = "oom_kill", + value = tonumber(cell[2]) + } + c = c + 1 + end + end + self:appendLine(self:_packProto("cg_memoom_cnt", self.ls, values)) + self:push(lines) +end + +return CgMemOomCnt \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/container/cg_memory_util.lua b/source/tools/monitor/unity/collector/container/cg_memory_util.lua index 54316714212fbe234af962f080fabc8d31997e4a..b59103a06b20f4da71a9bdcf51d0d6425e17730c 100644 --- a/source/tools/monitor/unity/collector/container/cg_memory_util.lua +++ b/source/tools/monitor/unity/collector/container/cg_memory_util.lua @@ -56,14 +56,6 @@ function CgMemUtil:proc(elapsed, lines) value = val } k = k + 1 - if ("total_cache" == cell[1]) or ("total_rss" == cell[1]) then - local ratio = (100.00*val) / tonumber(self.usage) - values[k] = { - name = name.."_ratio", - value = ratio - } - k = k + 1 - end end end values[k] = { @@ -71,8 +63,8 @@ function CgMemUtil:proc(elapsed, lines) value = self.usage } values[k+1] = { - name = "mem_util", - value = (tonumber(self.usage)*100.0)/ tonumber(self.limit) + name = "limit", + value = self.limit } self:appendLine(self:_packProto("cg_memory_util", self.ls, values)) self:push(lines) diff --git a/source/tools/monitor/unity/collector/container/cg_pmu_events.lua b/source/tools/monitor/unity/collector/container/cg_pmu_events.lua index a44e083d515ca422ccadb1baadeebb8413cff0ca..d127f64a2ffa9f3e12d4f05fcdf9cb94aac4dff6 100644 --- a/source/tools/monitor/unity/collector/container/cg_pmu_events.lua +++ b/source/tools/monitor/unity/collector/container/cg_pmu_events.lua @@ -36,7 +36,7 @@ function cgPmu:_drcName() end function cgPmu:_compName() - return {"CPI", "IPC", "MPI", "l3LoadMisRate", + return {"CPI", "MPI", "IPC", "l3LoadMisRate", "l3StoreMisRate", "l3MisRate"} end diff --git a/source/tools/monitor/unity/collector/container/cgroupv2/cg_cpu_stat_v2.lua b/source/tools/monitor/unity/collector/container/cgroupv2/cg_cpu_stat_v2.lua new file mode 100644 index 0000000000000000000000000000000000000000..357c5f352bf36c078b1d609467219efbd11a0575 --- /dev/null +++ b/source/tools/monitor/unity/collector/container/cgroupv2/cg_cpu_stat_v2.lua @@ -0,0 +1,37 @@ +require("common.class") + +local unistd = require("posix.unistd") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") +local root = "sys/fs/cgroup/" +local dfile = "/cpu.stat" + +local cgCpuStatV2 = class("cg_cpu_stat_v2", CvProc) + +function cgCpuStatV2:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, root .. path .. dfile) + self.ls = ls +end + +function cgCpuStatV2:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + + CvProc.proc(self) + local c = 1 + local values = {} + + for line in io.lines(self.pFile) do + local cell = pystring:split(line) + values[c] = { + name = cell[1], + value = tonumber(cell[2]) + } + c = c + 1 + end + self:appendLine(self:_packProto("cg_cpu_stat_v2", self.ls, values)) + self:push(lines) +end + +return cgCpuStatV2 diff --git a/source/tools/monitor/unity/collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua b/source/tools/monitor/unity/collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua new file mode 100644 index 0000000000000000000000000000000000000000..0d910bd431f1bdc2025d36734a2da597b4e66849 --- /dev/null +++ b/source/tools/monitor/unity/collector/container/cgroupv2/cg_sched_cfs_stat_v2.lua @@ -0,0 +1,40 @@ +require("common.class") +local unistd = require("posix.unistd") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") +local root = "sys/fs/cgroup/" +local dfile = "/cpu.sched_cfs_statistics" + +local cgCpuSchedCfsStatV2 = class("cg_sched_cfs_stat_v2", CvProc) + +function cgCpuSchedCfsStatV2:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, root .. path .. dfile) + self.ls = ls +end + +function cgCpuSchedCfsStatV2:getMetricNames() + return { "serve", "oncpu", "queue_other", "queue_sibling", "queue_max", "force_idle" }; +end + +function cgCpuSchedCfsStatV2:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + CvProc.proc(self) + local values = {} + local metrics = self:getMetricNames() + for line in io.lines(self.pFile) do + local cell = pystring:split(line) + for c, val in ipairs(cell) do + values[c] = { + name = metrics[c], + value = tonumber(val) + } + end + break + end + self:appendLine(self:_packProto("cg_sched_cfs_stat_v2", self.ls, values)) + self:push(lines) +end + +return cgCpuSchedCfsStatV2 diff --git a/source/tools/monitor/unity/collector/container/con_net_stat.lua b/source/tools/monitor/unity/collector/container/con_net_stat.lua new file mode 100644 index 0000000000000000000000000000000000000000..867ac244de8357ca81f459fac26e2b3168ae347e --- /dev/null +++ b/source/tools/monitor/unity/collector/container/con_net_stat.lua @@ -0,0 +1,121 @@ +require("common.class") +local pystring = require("common.pystring") +local system = require("common.system") +local CvProc = require("collector.vproc") + +local cpu_root = "sys/fs/cgroup/cpu/" +local procs_file = "/cgroup.procs" +local proc = "proc/" +local net_dev = "/net/dev" + +local ConNetStat = class("con_net_stat", CvProc) + +function ConNetStat:_init_(proto, pffi, mnt, path, ls) + CvProc._init_(self, proto, pffi, mnt, cpu_root..path..procs_file) + self.ls = ls + self.mnt = mnt + self.procnetpath = "" + self.init_pid = "" + +end + +--- open cgroup.procs to get the first pid +function ConNetStat:_getInitPid_() + local pfile = io.open(self.pFile, "r") + local firstline = pfile:read("*line") + self.init_pid = firstline + io.close(pfile) +end + +--- procnetpath = "/proc/self/net/dev" +function ConNetStat:_getProcNetPath_() + self:_getInitPid_() + self.procnetpath = self.mnt..proc..self.init_pid..net_dev +end + +local function isIgnoredDevice(devname) + local ignoredDevicePrefixes = {"lo", "veth", "docker"} + for _, value in ipairs(ignoredDevicePrefixes) do + if pystring:startswith(devname, value) then + return true + end + end + return false +end + +function ConNetStat:proc(elapsed, lines) + local c = 1 + local k = 1 + local devName + local RxBytes, RxPackets = 0, 0 + local TxBytes, TxPackets = 0, 0 + local RxPacketsDrop, TxPacketsDrop = 0, 0 + local values = {} + CvProc.proc(self) + self:_getProcNetPath_() + + for line in io.lines(self.procnetpath) do + local cell + local new_line = line + + --- skip first two lines + if c < 3 then + c = c + 1 + goto continue + end + + new_line = pystring:replace(new_line, ":", "") + --- "eth0" and "lo" may start with " " + new_line = pystring:lstrip(new_line) + cell = pystring:split(new_line) + if #cell ~= 17 then + print("invalid interface stats line " .. new_line) + goto continue + end + + devName = cell[1] + --- ignore lo and veth interface + if isIgnoredDevice(devName) then + goto continue + end + + RxBytes = RxBytes + tonumber(cell[2]) + RxPackets = RxPackets + tonumber(cell[3]) + RxPacketsDrop = RxPacketsDrop + tonumber(cell[5]) + TxBytes = TxBytes + tonumber(cell[10]) + TxPackets = TxPackets + tonumber(cell[11]) + TxPacketsDrop = TxPacketsDrop + tonumber(cell[13]) + + ::continue:: + end + + values[k] = { + name = "net_rx_bytes", + value = RxBytes + } + values[k + 1] = { + name = "net_rx_packets", + value = RxPackets + } + values[k + 2] = { + name = "net_rx_dropped", + value = RxPacketsDrop + } + values[k + 3] = { + name = "net_tx_bytes", + value = TxBytes + } + values[k + 4] = { + name = "net_tx_packets", + value = TxPackets + } + values[k + 5] = { + name = "net_tx_dropped", + value = TxPacketsDrop + } + + self:appendLine(self:_packProto("con_net_stat", self.ls, values)) + self:push(lines) +end + +return ConNetStat \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/container/pod_storage_stat.lua b/source/tools/monitor/unity/collector/container/pod_storage_stat.lua new file mode 100644 index 0000000000000000000000000000000000000000..57de4c3e211d9c3d3d09e2f0fc265115d458ce99 --- /dev/null +++ b/source/tools/monitor/unity/collector/container/pod_storage_stat.lua @@ -0,0 +1,273 @@ +require("common.class") +local json = require("cjson") +local https = require("ssl.https") +local ltn12 = require("ltn12") +local CkvProc = require("collector.kvProc") +local CvProc = require("collector.vproc") + +local PodStorageStat = class("con_storage_stat", CkvProc) + +local default_token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" +local default_ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" +local summary_api = "/stats/summary" + +function PodStorageStat:_init_(resYaml, proto, pffi, mnt) + CkvProc._init_(self, proto, pffi, mnt, nil, "PodStorageStat") + self._token = "" + self.pods = {} + self.cons = {} + self._ns_blacklist = {} + self._con_capacity_bytes = math.maxinteger + + local ns_blacklist = resYaml.container.nsBlacklist + for _, ns in ipairs(ns_blacklist) do + self._ns_blacklist[ns] = 1 + end +end + +-- get container's resoure info and pod's volume info +function PodStorageStat:setup(cons) + -- reset the data struct first + self.pods= {} + self.cons = {} + self._con_capacity_bytes = math.maxinteger + + for _, con in ipairs(cons) do + local pod_key = con.pod.name .. "-" .. con.pod.namespace + local con_key = pod_key .. "-" .. con.name + self.cons[con_key] = con + self.pods[pod_key] = con.pod + end +end + +-- query 10255/stats/summary to get storage stats +function PodStorageStat:querySummaryStats(token_path, ca_path) + if self._token == "" then + local f = io.open(token_path, "r") + if not f then + print("service token not exist!") + return nil + end + + self._token = f:read("*a") + f:close() + end + + local header = { + ["Authorization"] = "Bearer ".. self._token + } + + local host_ip = os.getenv("HOST_IP") + if not host_ip then + host_ip = "127.0.0.1" + end + + local node_name = os.getenv("NODE_NAME") + if not node_name then + print("NODE_NAME not exist!") + return nil + end + + local url = "https://"..host_ip..":10250" .. summary_api + + local resp = {} + local params = { + url = url, + method = "GET", + verify = "none", + protocol = "any", + headers = header, + cafile = ca_path, + sink = ltn12.sink.table(resp) + } + + local worked, code, _, _ = https.request(params) + if not worked then + print("failed to query summary api: ", code) + return nil + end + + return table.concat(resp) +end + +local function appendMetric(self, table, con, pod, podns, protoType) + local label = { + {name="pod", index=pod}, + {name="nanmespace", index=podns}, + {name="container", index=con}, + } + + -- container empheral storage: rootfs and log + if protoType == "rootfs" or protoType == "logs" then + label[#label+1] = {name="storage", index=protoType} + protoType = "container_ephemeral_storage" + end + + if protoType == "pod_volume" and table.name then + label[#label+1] = {name="Volume", index=table.name} + end + + for type, value in pairs(table) do + if type == "time" or type == "device" or type == "name" then + goto continue + end + + -- all container's rootfs and log's capacityBytes are the + -- same (equal with nodes's rootfs capacity) + if type == "capacityBytes" and + protoType == "container_ephemeral_storage" then + if self._con_capacity_bytes == math.maxinteger then + self._con_capacity_bytes = tonumber(value) + end + end + + local cell = { + {name=type, value=value}, + } + self:appendLine(self:_packProto(protoType, label, cell)) + ::continue:: + end +end + +local function convertToBytes(limit, capacityBytes) + local suffixes = { + ["E"] = 10^18, + ["P"] = 10^15, + ["T"] = 10^12, + ["G"] = 10^9, + ["M"] = 10^6, + ["k"] = 10^3, + ["Ei"] = 2^60, + ["Pi"] = 2^50, + ["Ti"] = 2^40, + ["Gi"] = 2^30, + ["Mi"] = 2^20, + ["Ki"] = 2^10 + } + + local value, suffix = limit:match("(%d+%.?%d*)(%a+)") + value = tonumber(value) + + if suffixes[suffix] then + return value * suffixes[suffix] + elseif suffixes[suffix:upper()] then + return value * suffixes[suffix:upper()] + else + return capacityBytes + end +end + +local function searchKey(table, key) + if type(table) ~= "table" then return nil end + + if table[key] ~= nil then + return table[key] + end + + for k, v in pairs(table) do + if type(v) ~= "table" then goto continue end + local result = searchKey(v, key) + if result ~= nil then + return result + end + ::continue:: + end + + return nil +end + +function PodStorageStat:packContainerLimit(con_name, pod_name, pod_ns) + local limit_bytes = self._con_capacity_bytes + local labels = { + {name="pod", index=pod_name}, + {name="nanmespace", index=pod_ns}, + {name="container", index=con_name}, + {name="storage", index=""} + } + local con_key = pod_name.."-"..pod_ns.."-"..con_name + local con = self.cons[con_key] + if con and con.resources and con.resources.limits then + local limits = con.resources.limits + local es_limit = limits["ephemeral-storage"] + if es_limit then + limit_bytes = convertToBytes(es_limit) + end + end + + local cell = { + {name="limit", value=limit_bytes} + } + self:appendLine(self:_packProto("container_ephemeral_storage", + labels, cell)) +end + +function PodStorageStat:packVolumeLimit(volume_name, pod_name, pod_ns) + local pod = self.pods[pod_name.."-"..pod_ns] + if not pod or not pod.volume then return end + + for _, vol in ipairs(pod.volume) do + if vol.name == volume_name then + local labels = { + {name="pod", index=pod_name}, + {name="nanmespace", index=pod_ns}, + {name="container", index=""}, + {name="Volume", index=volume_name} + } + + local volume_limit = searchKey(vol, "sizeLimit") + if volume_limit == nil then return end + + local limit_bytes = convertToBytes(volume_limit) + local cell = { + {name="limit", value=limit_bytes} + } + self:appendLine(self:_packProto("pod_volume", + labels, cell)) + break + end + end +end + +function PodStorageStat:proc(elapsed, lines) + CvProc.proc(self) + + local statres = self:querySummaryStats(default_token_path, default_ca_path) + if not statres then + print("Failed to get summary stats from summary api!") + return + end + + local statobj = json.decode(statres) + for _, pod in ipairs(statobj.pods) do + local podname = pod.podRef.name + local podns = pod.podRef.namespace + -- skip blacklist namespace + if self._ns_blacklist[podns] then + goto continue + end + + for _, con in ipairs(pod.containers) do + appendMetric(self, con.rootfs, con.name, + podname, podns, "rootfs") + appendMetric(self, con.logs, con.name, + podname, podns, "logs") + self:packContainerLimit(con.name, podname, podns) + end + + appendMetric(self, pod["ephemeral-storage"], "", + podname, podns, "pod_ephemeral_storage") + + --[[ dont't collect pod volume info for now + for _, vol in ipairs(pod.volume) do + appendMetric(self, vol, "", + podname, podns, "pod_volume") + self:packVolumeLimit(vol.name, podname, podns) + end + --]] + ::continue:: + end + + self:push(lines) +end + +return PodStorageStat \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podmem.lua b/source/tools/monitor/unity/collector/container/podmem.lua similarity index 32% rename from source/tools/monitor/unity/collector/podmem.lua rename to source/tools/monitor/unity/collector/container/podmem.lua index f36e8e8d15b99d99b73737bb549fd9d3bf6d42ca..d29e64982db527fcfea74b76d668cd44c0aa80e6 100644 --- a/source/tools/monitor/unity/collector/podmem.lua +++ b/source/tools/monitor/unity/collector/container/podmem.lua @@ -5,31 +5,23 @@ --- require("common.class") -local fcntl = require("posix.fcntl") -local unistd = require("posix.unistd") -local dirent = require("posix.dirent") -local stdlib = require("posix.stdlib") local stat = require("posix.sys.stat") local CkvProc = require("collector.kvProc") local CvProc = require("collector.vproc") local pystring = require("common.pystring") -local dockerinfo = require("common.dockerinfo") -local json = require("cjson") local CPodMem = class("podmem", CkvProc) -local ChttpCli = require("httplib.httpCli") -local podman = require("collector.podMan.podsAll") +--local podman = require("collector.podMan.podsAll") -function CPodMem:_init_(proto, pffi, mnt, pFile) - CkvProc._init_(self, proto, pffi, mnt, pFile , "podmem") +function CPodMem:_init_(resYaml, proto, pffi, mnt) + CkvProc._init_(self, proto, pffi, mnt, nil, "podmem") self._ffi = require("collector.native.plugincffi") self.cffi = self._ffi.load("podmem") self.root_fs = mnt self.proc_fs = mnt .. "/proc/" self.allcons = {} self.inodes = {} + self.is_pod = 0 self.podmemres = {} - self.blacklist = {["arms-prom"] = 1, ["kube-system"] = 1, ["kube-public"] = 1, ["kube-node-lease"] = 1} - -- self.blacklist= {} self.cffi.monitor_init(self.root_fs) end @@ -40,31 +32,41 @@ end function CPodMem:get_inode(file) local f=stat.lstat(file) if f ~= nil then - return f["st_ino"] + return f["st_ino"] else return -1 end end +function CPodMem:setup(cons) + -- reset the data struct first + self.allcons = {} + self.inodes = {} -function CPodMem:get_allcons() - local cri_cons = podman:getAllcons(self.root_fs) - for _,v in pairs(cri_cons) do - if not self.blacklist[v['pod']['namespace']] then - local path = self.root_fs .. "/sys/fs/cgroup/memory/" .. v['path'] - local inode = self:get_inode(path) + -- setup self.allcons and self.inodes + for _, v in pairs(cons) do + local path = self.root_fs .. "sys/fs/cgroup/memory" .. v['path'] + local inode = self:get_inode(path) + if inode == -1 then + print("Get cgroup path inode failed: ", path) + goto continue + end + if v.pod then -- is pod + self.is_pod = 1 self.allcons[v['name']..v['pod']['name']] = {["pod"]=v['pod']['name'], ["ns"]=v['pod']['namespace'], ["path"]=path, ["inode"] = inode, ["cname"]=v['name']} self.inodes[inode] = v['name']..v['pod']['name'] + else -- is container + self.allcons[v['name']] = {["path"]=path, ["inode"]=inode, ["cname"]=v['name']} + self.inodes[inode] = v['name'] end + ::continue:: end end function CPodMem:proc(elapsed, lines) CvProc.proc(self) - self.allcons = {} - self.inodes = {} self.podmemres = {} - self:get_allcons() + -- for k,v in pairs(self.inodes) do print(k,v) end -- for k,v in pairs(self.allcons) do for k1,v1 in pairs(v) do print(k,k1,v1) end end local fs = io.open("/tmp/.memcg.txt", "w") @@ -80,32 +82,51 @@ function CPodMem:proc(elapsed, lines) local res = self._ffi.string(resptr) local reslines = pystring:splitlines(res) - for _,line in pairs(reslines) do - local cinode, cache, size, filen = line:match("cinode=(%d+) cached=(%d+) size=(%d+) file=(%S+)") - if self.inodes[tonumber(cinode)] ~= nil then - if filen:find("overlayfs/snapshots/%d+/fs") ~= nil then - filen = pystring:split(pystring:split(filen,"overlayfs/snapshots/")[2],"/fs/")[2] - filen = "/" .. filen - end - if filen:find("diff") ~= nil then filen = pystring:split(filen,"diff")[2] end - local cname = self.inodes[tonumber(cinode)] - if not self.podmemres[cname] then self.podmemres[cname]= {} end - self.podmemres[cname][filen] = {["pod"]=self.allcons[cname]['pod'], ["ns"]=self.allcons[cname]['ns'], ["size"]=size, ["cache"]=cache, ["cname"]=self.allcons[cname]['cname']} + + if self.is_pod == 1 then -- is pod + for _,line in pairs(reslines) do + local cinode, cache, size, filen = line:match("cinode=(%d+) cached=(%d+) size=(%d+) file=(%S+)") + if self.inodes[tonumber(cinode)] ~= nil then + if filen:find("overlayfs/snapshots/%d+/fs") ~= nil then + filen = pystring:split(pystring:split(filen,"overlayfs/snapshots/")[2],"/fs/")[2] + filen = "/" .. filen + end + if filen:find("diff") ~= nil then filen = pystring:split(filen,"diff")[2] end + local cname = self.inodes[tonumber(cinode)] + if not self.podmemres[cname] then self.podmemres[cname]= {} end + self.podmemres[cname][filen] = {["pod"]=self.allcons[cname]['pod'], ["ns"]=self.allcons[cname]['ns'], ["size"]=size, ["cache"]=cache, ["cname"]=self.allcons[cname]['cname']} + end end - end - self._ffi.C.free(resptr) + self._ffi.C.free(resptr) - for k,v in pairs(self.podmemres) do - for k1,v1 in pairs(v) do - -- for k2,v2 in pairs(v1) do print(k,k1,k2,v2) end - local cell = {{name="size", value=tonumber(v1['size'])}} - local label = {{name="podname",index=v1['pod'],}, {name="podns",index = v1['ns'],},{name="file",index = k1,},{name="container", index=v1['cname']},} - self:appendLine(self:_packProto("podmem", label, cell)) - cell = {{name="cached", value=tonumber(v1['cache'])}} - label = {{name="podname",index=v1['pod'],}, {name="podns",index = v1['ns'],},{name="file",index = k1,},{name="container", index=v1['cname']},} - self:appendLine(self:_packProto("podmem", label, cell)) + for k,v in pairs(self.podmemres) do + for k1,v1 in pairs(v) do + -- for k2,v2 in pairs(v1) do print(k,k1,k2,v2) end + local cell = {{name="size", value=tonumber(v1['size'])}} + local label = {{name="pod",index=v1['pod'],}, {name="namespace",index = v1['ns'],},{name="file",index = k1,},{name="container", index=v1['cname']},} + self:appendLine(self:_packProto("podmem", label, cell)) + cell = {{name="cached", value=tonumber(v1['cache'])}} + label = {{name="pod",index=v1['pod'],}, {name="namespace",index = v1['ns'],},{name="file",index = k1,},{name="container", index=v1['cname']},} + self:appendLine(self:_packProto("podmem", label, cell)) + end + end + self:push(lines) + else -- is container + for _,line in pairs(reslines) do + local cinode, cache, size, filen = line:match("cinode=(%d+) cached=(%d+) size=(%d+) file=(%S+)") + -- todo: 显示容器内文件名 + if self.inodes[tonumber(cinode)] ~= nil then + local cname = self.inodes[tonumber(cinode)] + local cell = {{name = "size", value = tonumber(size)}} + local label = {{name="pod",index=cname}, {name="namespace",index = "None",}, + {name = "container", index = cname}, {name = "file", index = filen}} + self:appendLine(self:_packProto("podmem", label, cell)) + cell = {{name = "cached", value = tonumber(cache)}} + self:appendLine(self:_packProto("podmem", label, cell)) + end end + self._ffi.C.free(resptr) + self:push(lines) end - self:push(lines) end -return CPodMem +return CPodMem \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/podmem/Makefile b/source/tools/monitor/unity/collector/container/podmem/Makefile similarity index 100% rename from source/tools/monitor/unity/collector/plugin/podmem/Makefile rename to source/tools/monitor/unity/collector/container/podmem/Makefile diff --git a/source/tools/monitor/unity/collector/plugin/podmem/btfparse.h b/source/tools/monitor/unity/collector/container/podmem/btfparse.h similarity index 94% rename from source/tools/monitor/unity/collector/plugin/podmem/btfparse.h rename to source/tools/monitor/unity/collector/container/podmem/btfparse.h index 3a586d0f0bdec166b25804215f644cd3cfef8a52..5c4b85ee18826b6ef3ca0abb470dbe61ad904b00 100644 --- a/source/tools/monitor/unity/collector/plugin/podmem/btfparse.h +++ b/source/tools/monitor/unity/collector/container/podmem/btfparse.h @@ -31,6 +31,7 @@ struct member_attribute * Note: Remember to free pointer of struct member_attribute */ struct member_attribute *btf_find_struct_member(struct btf *btf, char *struct_name, char *member_name); +int btf_type_size(struct btf *btf, char *type_name); void btf__free(struct btf *btf); diff --git a/source/tools/monitor/unity/collector/plugin/podmem/cache.h b/source/tools/monitor/unity/collector/container/podmem/cache.h similarity index 43% rename from source/tools/monitor/unity/collector/plugin/podmem/cache.h rename to source/tools/monitor/unity/collector/container/podmem/cache.h index 8b8c14bc63d0518973648426c6771f1475f4a14f..4fbe9dae40a98f2125e51a4523eceacaa79430d2 100644 --- a/source/tools/monitor/unity/collector/plugin/podmem/cache.h +++ b/source/tools/monitor/unity/collector/container/podmem/cache.h @@ -43,47 +43,47 @@ extern unsigned long memstart_addr; #ifdef __aarch64__ /*arm arch*/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0) /*kernel 510*/ -#define VA_BITS (48) -#define SZ_2M 0x00200000 -#define STRUCT_PAGE_MAX_SHIFT (6) -#define VA_BITS_MIN (48) - -#define _PAGE_END(va) (-((unsigned long )(1) << ((va) - 1))) -#define _PAGE_OFFSET(va) (-((unsigned long )(1) << (va))) -#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) -#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \ +# define VA_BITS (48) +# define SZ_2M 0x00200000 +# define STRUCT_PAGE_MAX_SHIFT (6) +# define VA_BITS_MIN (48) +# define _PAGE_END(va) (-((unsigned long )(1) << ((va) - 1))) +# define _PAGE_OFFSET(va) (-((unsigned long )(1) << (va))) +# define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) +# define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \ >> (page_shift - STRUCT_PAGE_MAX_SHIFT)) -#define PHYS_OFFSET (memstart_addr) -#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) -#define vmemmap (VMEMMAP_START - (memstart_addr >> page_shift)*PAGE_STRUCT_SIZE) - +# define PHYS_OFFSET (memstart_addr) +# define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) +# define vmemmap (VMEMMAP_START - (memstart_addr >> page_shift)*PAGE_STRUCT_SIZE) #elif LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 9)/*kernel 419*/ -#define VA_BITS (48) -#define VA_START ((unsigned long )(0xffffffffffffffff) - \ +# define VA_BITS (48) +# define VA_START ((unsigned long )(0xffffffffffffffff) - \ ((unsigned long )(1) << VA_BITS) + 1) -#define PAGE_OFFSET ((unsigned long )(0xffffffffffffffff) - \ +# define PAGE_OFFSET ((unsigned long )(0xffffffffffffffff) - \ ((unsigned long )(1) << (VA_BITS - 1)) + 1) -#define STRUCT_PAGE_MAX_SHIFT (6) -#define VMEMMAP_SIZE ((unsigned long )(1) << (VA_BITS - page_shift - 1 + STRUCT_PAGE_MAX_SHIFT)) -#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) -#define vmemmap (VMEMMAP_START - (memstart_addr >> page_shift)*PAGE_STRUCT_SIZE) - +# define STRUCT_PAGE_MAX_SHIFT (6) +# define VMEMMAP_SIZE ((unsigned long )(1) << (VA_BITS - page_shift - 1 + STRUCT_PAGE_MAX_SHIFT)) +# define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) +# define vmemmap (VMEMMAP_START - (memstart_addr >> page_shift)*PAGE_STRUCT_SIZE) #else /*others*/ -#define SZ_64K 0x00010000 -#define PAGE_OFFSET (unsigned long )(0xffffffc000000000) -#define VMALLOC_END (PAGE_OFFSET - (unsigned long)(0x400000000) - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) - +# define SZ_64K 0x00010000 +# define PAGE_OFFSET (unsigned long )(0xffffffc000000000) +# define VMALLOC_END (PAGE_OFFSET - (unsigned long)(0x400000000) - SZ_64K) +# define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) #endif /*end to check ver, arm arch*/ -#define PFN_TO_VIRT(pfn) (((unsigned long)((pfn) - PHYS_OFFSET) | PAGE_OFFSET) + ((pfn) << page_shift)) -#define PFN_TO_PAGE(pfn) (vmemmap + (pfn) * PAGE_STRUCT_SIZE) -#define is_kvaddr(kvaddr) (!!(kvaddr >= PAGE_OFFSET)) +# define PFN_TO_VIRT(pfn) (((unsigned long)((pfn) - PHYS_OFFSET) | PAGE_OFFSET) + ((pfn) << page_shift)) +// assume CONFIG_SPARSEMEM_VMEMMAP is set +# define PFN_TO_PAGE(pfn) (vmemmap + (pfn) * PAGE_STRUCT_SIZE) +# define is_kvaddr(kvaddr) (!!(kvaddr >= PAGE_OFFSET)) +# define SECTION_SIZE_BITS 30 // arch/arm64/include/asm/memory.h #else /*x86 arch*/ - -#define PFN_TO_VIRT(pfn) (page_offset_base + ((pfn) << page_shift)) -#define PFN_TO_PAGE(pfn) (vmemmap_base + (pfn) * PAGE_STRUCT_SIZE) -#define is_kvaddr(kvaddr) (!!(kvaddr >= page_offset_base)) +# define SECTION_SIZE_BITS 27 +# define PFN_TO_VIRT(pfn) (page_offset_base + ((pfn) << page_shift)) +// assume CONFIG_SPARSEMEM_VMEMMAP is set +# define PFN_TO_PAGE(pfn) (vmemmap_base + (pfn) * PAGE_STRUCT_SIZE) +# define is_kvaddr(kvaddr) (!!(kvaddr >= page_offset_base)) #endif + #define max_pfn (g_max_phy_addr>>12) struct options { @@ -101,3 +101,20 @@ ssize_t kpagecgroup_read(void *buf, size_t count, off_t offset); uintptr_t lookup_kernel_symbol(const char *symbol_name); ssize_t kcore_readmem(unsigned long kvaddr, void *buf, ssize_t size); #endif /* _PAGESCAN_UTIL_H */ + +/* + * Marco used to acheive __pfn_to_sections. + * we assume CONFIG_SPARSEMEM and CONFIG_SPARSEMEM_VMEMMAP is set, + * so NODE_NOT_IN_PAGE_FLAGS is define, use __pfn_to_section defined in mmzone.h + */ +extern unsigned long mem_section_base; +extern int mem_section_size; + +#define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - page_shift) +#define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) +/* + * We assume CONFIG_SPARSEMEM_EXTREME is set, + */ +#define SECTIONS_PER_ROOT (PAGE_SIZE / mem_section_size) +#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) +#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/podmem/memcg.cpp b/source/tools/monitor/unity/collector/container/podmem/memcg.cpp similarity index 57% rename from source/tools/monitor/unity/collector/plugin/podmem/memcg.cpp rename to source/tools/monitor/unity/collector/container/podmem/memcg.cpp index 33ede0821dc8e0745f57d34d8b30248e5f42d313..d0cc5fff655d37590d67539931cf313a3d9dc487 100644 --- a/source/tools/monitor/unity/collector/plugin/podmem/memcg.cpp +++ b/source/tools/monitor/unity/collector/container/podmem/memcg.cpp @@ -31,6 +31,7 @@ using namespace std; static bool full_scan; static unsigned int scan_rate = 4; + struct file_info { char filename[256]; unsigned long inode; @@ -48,22 +49,29 @@ struct myComp2 { bool operator()(const pair &a,const pair &b) { - return a.second>b.second; + return a.second >= b.second; } }; -set ,myComp2> cachedset; +//set ,myComp2> cachedset; +// cachedset[cgroup_inode] = set> +map, myComp2>* > cinode_cached; map files; struct myComp { bool operator()(const unsigned long &a,const unsigned long &b) { - return files[a]->cached >= files[b]->cached; + return files[a]->cached > files[b]->cached || (files[a]->cached == files[b]->cached && a != b); } }; set fileset; -map inodes; +//map inodes; map history_inodes; +// memcg -> inode cache +map memcg_inodes; + +extern int kpagecgroup_fd; +extern int page_cgroup_size; extern struct member_attribute *get_offset(string struct_name, string member_name); static int prepend(char **buffer, int *buflen, const char *str, int namelen, int off) @@ -137,6 +145,8 @@ int get_top_dentry(unsigned long pfn, int top, unsigned long cinode) { unsigned long page = PFN_TO_PAGE(pfn); map::iterator iter2; + map, myComp2>* >::iterator it; + set, myComp2> *cachedset; struct member_attribute *att; struct member_attribute *att2; unsigned long map = 0; @@ -144,6 +154,14 @@ int get_top_dentry(unsigned long pfn, int top, unsigned long cinode) unsigned long inode = 0; unsigned long i_ino; char* tables; + + it = cinode_cached.find(cinode); + if (it != cinode_cached.end()) { + cachedset = it->second; + } else { + return 0; + } + att = get_offset("page", "mapping"); if (!att) { return 0; @@ -165,20 +183,31 @@ int get_top_dentry(unsigned long pfn, int top, unsigned long cinode) inode = *((unsigned long*) tables); cached = *((unsigned long*) (tables+att2->offset-att->offset)); free(tables); - /* skip file cache < 100K */ - //printf("top:%d, cached size:%d, cached:%d\n",top, cachedset.size(), cached*4); - if (history_inodes.find(inode) != history_inodes.end() or (cachedset.size() >= top and (cached*4 < (--cachedset.end())->second))) - return 0; - cachedset.insert(pair(inode,cached*4)); + /* skip file cache < 1M */ + if (cached*4 < 1024) + return 0; + + if (history_inodes.find(inode) != history_inodes.end() or (cachedset->size() >= top and (cached*4 < (--cachedset->end())->second))) + return 0; + + //printf("---after filter---: cached:%d, cinode:%ld, inode:%lu\n", cached*4, cinode, inode); + cachedset->insert(pair(inode, cached*4)); history_inodes[inode] = cinode; - if (cachedset.size() > top) - cachedset.erase(--cachedset.end()); + if (cachedset->size() > top) { + set, myComp2>::iterator iter; + iter = --cachedset->end(); + //printf("erase inode size: %d\n", iter->second); + cachedset->erase(--cachedset->end()); + } + + cinode_cached[cinode] = cachedset; } -static int get_dentry_top() +static int iterate_dentry_path() { set, myComp2>::iterator iter; + map, myComp2>* >::iterator it; unsigned long map = 0; unsigned long inode = 0; unsigned long i_ino; @@ -192,107 +221,109 @@ static int get_dentry_top() int del = 0; struct file_info *info; struct member_attribute *att; - for(iter=cachedset.begin();iter!=cachedset.end();iter++) - { - char tmp[4096] = {0}; - char *end = tmp + 4095; - int buflen = 4095; - char filename[1024] = {0}; - cached = iter->second; - inode = iter->first; - att = get_offset("inode", "i_ino"); - if (!att) { - return 0; - } - kcore_readmem(inode + att->offset, &i_ino, sizeof(i_ino)); - att = get_offset("inode", "i_size"); - if (!att) { - return 0; - } - kcore_readmem(inode + att->offset, &i_size, sizeof(i_size)); + for (it = cinode_cached.begin(); it != cinode_cached.end(); ++it) { + set, myComp2> *cachedset = it->second; + for (iter = cachedset->begin(); iter != cachedset->end(); iter++) { + char tmp[4096] = {0}; + char *end = tmp + 4095; + int buflen = 4095; + char filename[1024] = {0}; + cached = iter->second; + inode = iter->first; + att = get_offset("inode", "i_ino"); + if (!att) { + return 0; + } + kcore_readmem(inode + att->offset, &i_ino, sizeof(i_ino)); + att = get_offset("inode", "i_size"); + if (!att) { + return 0; + } + kcore_readmem(inode + att->offset, &i_size, sizeof(i_size)); - mount = inode2mount(inode); - att = get_offset("inode","i_dentry"); - if (!att) { - return 0; - } - kcore_readmem(inode + att->offset, &inode_dentry, sizeof(inode)); - if (!is_kvaddr(inode_dentry)) - continue; - att = get_offset("dentry", "d_alias"); - if (!att) { - att = get_offset("dentry", "d_u"); - if (!att) + mount = inode2mount(inode); + att = get_offset("inode","i_dentry"); + if (!att) { return 0; - } + } + kcore_readmem(inode + att->offset, &inode_dentry, sizeof(inode)); + if (!is_kvaddr(inode_dentry)) + continue; + att = get_offset("dentry", "d_alias"); + if (!att) { + att = get_offset("dentry", "d_u"); + if (!att) + return 0; + } - dentry_first = inode_dentry - att->offset; - memset(filename, 0, 1024); - att = get_offset("dentry", "d_parent"); - if (!att) { - return 0; - } - kcore_readmem(dentry_first+att->offset, &pdentry, sizeof(pdentry)); - att = get_offset("dentry", "d_hash"); - if (!att) { - return 0; - } - kcore_readmem(dentry_first+att->offset + sizeof(void*), &hdentry, sizeof(hdentry)); - if ((dentry_first != pdentry) && !hdentry) - del = 1; - do { - unsigned long mount_parent = 0; - unsigned long mount_dentry = 0; - int len = 0; - int ret = 0; - - get_filename(dentry_first, filename, 1024); - len = strlen(filename); - if (len <=0 || ((len == 1) && (filename[0] == '/'))) - break; - - prepend(&end, &buflen, filename, strlen(filename), 0); - att = get_offset("mount", "mnt_parent"); + dentry_first = inode_dentry - att->offset; + memset(filename, 0, 1024); + att = get_offset("dentry", "d_parent"); if (!att) { return 0; } - ret = kcore_readmem(mount + att->offset, &mount_parent , sizeof(mount_parent)); - if (ret != sizeof(mount_parent)) - break; - att = get_offset("mount", "mnt_mountpoint"); + kcore_readmem(dentry_first+att->offset, &pdentry, sizeof(pdentry)); + att = get_offset("dentry", "d_hash"); if (!att) { return 0; } - kcore_readmem(mount+ att->offset, &mount_dentry, sizeof(mount_dentry)); - if (mount_parent == mount || mount_dentry==dentry_first) - break; - dentry_first = mount_dentry; - mount = mount_parent; - } while(true); - - if (buflen >= 4092) - continue; - info = (struct file_info *)malloc(sizeof(struct file_info)); - if (!info) { - printf("alloc file info error \n"); - continue; + kcore_readmem(dentry_first+att->offset + sizeof(void*), &hdentry, sizeof(hdentry)); + if ((dentry_first != pdentry) && !hdentry) + del = 1; + do { + unsigned long mount_parent = 0; + unsigned long mount_dentry = 0; + int len = 0; + int ret = 0; + + get_filename(dentry_first, filename, 1024); + len = strlen(filename); + if (len <=0 || ((len == 1) && (filename[0] == '/'))) + break; + + prepend(&end, &buflen, filename, strlen(filename), 0); + att = get_offset("mount", "mnt_parent"); + if (!att) { + return 0; + } + ret = kcore_readmem(mount + att->offset, &mount_parent , sizeof(mount_parent)); + if (ret != sizeof(mount_parent)) + break; + att = get_offset("mount", "mnt_mountpoint"); + if (!att) { + return 0; + } + kcore_readmem(mount+ att->offset, &mount_dentry, sizeof(mount_dentry)); + if (mount_parent == mount || mount_dentry==dentry_first) + break; + dentry_first = mount_dentry; + mount = mount_parent; + } while(true); + + if (buflen >= 4092) + continue; + info = (struct file_info *)malloc(sizeof(struct file_info)); + if (!info) { + printf("alloc file info error \n"); + continue; + } + memset(info, 0, sizeof(struct file_info)); + info->inode = i_ino; + //info->shmem = shmem; + info->cached = cached; + info->cgcached = 1; + info->active = 0; + info->dirty = 0; + info->inactive = 0; + info->del = del; + info->cinode = history_inodes[inode]; + + info->size = i_size>>10; + strncpy(info->filename, end, sizeof(info->filename) - 2); + info->filename[sizeof(info->filename) -1] = '0'; + files[i_ino] = info; + fileset.insert(i_ino); } - memset(info, 0, sizeof(struct file_info)); - info->inode = i_ino; - //info->shmem = shmem; - info->cached = cached; - info->cgcached = 1; - info->active = 0; - info->dirty = 0; - info->inactive = 0; - info->del = del; - info->cinode = history_inodes[inode]; - - info->size = i_size>>10; - strncpy(info->filename, end, sizeof(info->filename) - 2); - info->filename[sizeof(info->filename) -1] = '0'; - files[i_ino] = info; - fileset.insert(i_ino); } return 0; } @@ -470,21 +501,159 @@ static int get_dentry(unsigned long pfn, unsigned long cinode, int active, int s return 0; } +/* + * struct mem_section *mem_section[NR_SECTION_ROOTS] + */ +static unsigned long __nr_to_section(unsigned long nr) +{ + unsigned long offset = nr & SECTION_ROOT_MASK; + unsigned long mem_sections; + + // mem_sections = mem_section[i] + kcore_readmem(mem_section_base + SECTION_NR_TO_ROOT(nr) * sizeof(unsigned long), + &mem_sections, sizeof(mem_sections)); + + // mem_section = mem_sections[i] + return mem_sections + offset * mem_section_size; +} + +static unsigned long __pfn_to_section(unsigned long pfn) +{ + return __nr_to_section(pfn_to_section_nr(pfn)); +} + +// pfn to page_cgroup +unsigned long lookup_page_cgroup(unsigned long pfn) +{ + unsigned long mem_section_addr = __pfn_to_section(pfn); + //LOG_WARN("mem_section_addr: %lx\n", mem_section_addr); + unsigned long mem_section; + unsigned long page_cgroup; + struct member_attribute *mem_sec_att; + + mem_sec_att = get_offset("mem_section", "page_cgroup"); + if (!mem_sec_att) { + return 0; + } + + kcore_readmem(mem_section_addr + mem_sec_att->offset, &page_cgroup, sizeof(page_cgroup)); + + return page_cgroup + pfn * page_cgroup_size; +} + +// page_cgroup to mem_cgroup +unsigned long mem_cgroup_from_page_group(unsigned long page_cgroup) +{ + unsigned long mem_cgroup; + struct member_attribute *memcg_att; + + memcg_att = get_offset("page_cgroup", "mem_cgroup"); + if (!memcg_att) { + return 0; + } + + kcore_readmem(page_cgroup + memcg_att->offset, + &mem_cgroup, sizeof(mem_cgroup)); + + return mem_cgroup; +} + +unsigned long page_cgroup_ino(unsigned long pfn) +{ + unsigned long page_group, mem_cgroup; + unsigned long css, cgroup, dentry, inode, i_ino; + struct member_attribute *memcg_css_att, *css_cg_att; + struct member_attribute *cg_dentry_att, *dentry_inode_att; + struct member_attribute *inode_i_ino_att; + map::iterator iter; + + page_group = lookup_page_cgroup(pfn); + if (!page_group) { + return 0; + } + + mem_cgroup = mem_cgroup_from_page_group(page_group); + if (!mem_cgroup) { + return 0; + } + + // check memcg -> inode cache + iter = memcg_inodes.find(mem_cgroup); + if (iter != memcg_inodes.end()) { + return iter->second; + } + + /* memcg->css.cgroup->dentry->d_inode->i_ino;*/ + memcg_css_att = get_offset("mem_cgroup", "css"); + if (!memcg_css_att) { + return 0; + } + + css_cg_att = get_offset("cgroup_subsys_state", "cgroup"); + if (!css_cg_att) { + return 0; + } + + // css = mem_cgroup(addr) + css_offset + css = mem_cgroup + memcg_css_att->offset; + + // cgroup = readmem(css + cgroup_offset) + kcore_readmem(css + css_cg_att->offset, + &cgroup, sizeof(cgroup)); + + cg_dentry_att = get_offset("cgroup", "dentry"); + if (!cg_dentry_att) { + return 0; + } + + // dentry = readmem(cgroup + dentry_offset) + kcore_readmem(cgroup + cg_dentry_att->offset, + &dentry, sizeof(dentry)); + + dentry_inode_att = get_offset("dentry", "d_inode"); + if (!dentry_inode_att) { + return 0; + } + + // inode = readmem(dentry + inode_offset) + kcore_readmem(dentry + dentry_inode_att->offset, + &inode, sizeof(inode)); + + inode_i_ino_att = get_offset("inode", "i_ino"); + if (!inode_i_ino_att) { + return 0; + } + + // i_ino = readmem(inode + i_ino_offset) + kcore_readmem(inode + inode_i_ino_att->offset, + &i_ino, sizeof(i_ino)); + + memcg_inodes[mem_cgroup] = i_ino; + + return i_ino; +} + unsigned long get_cgroup_inode(unsigned long pfn) { unsigned long ino; int ret = 0; - ret = kpagecgroup_read(&ino, sizeof(ino), pfn*sizeof(ino)); - if (ret != sizeof(ino)) { - return 0; + if (kpagecgroup_fd > 0) { + ret = kpagecgroup_read(&ino, sizeof(ino), pfn*sizeof(ino)); + if (ret != sizeof(ino)) { + return 0; + } } + else { + ino = page_cgroup_ino(pfn); + } + return ino; } int check_cgroup_inode(unsigned long inode) { - return (full_scan||(inodes.find(inode)!=inodes.end())); + return (full_scan||(cinode_cached.find(inode)!=cinode_cached.end())); } bool cached_cmp(const pair& a, const pair& b) { @@ -496,7 +665,8 @@ bool cached_cmp(const pair& a, const pair, myComp2>* >::iterator it; set::iterator iter2; struct file_info *info; int size = 0; @@ -508,11 +678,18 @@ static int output_file_cached_string(unsigned int top, char *res) } size += sprintf(res + size, "cinode=%lu cached=%lu size=%lu file=%s\n", info->cinode,info->cached, info->size,info->filename); free(info); + files[*iter2] = NULL; } + + for (it = cinode_cached.begin(); it != cinode_cached.end(); ++it) { + delete it->second; + } + files.clear(); fileset.clear(); - cachedset.clear(); + cinode_cached.clear(); history_inodes.clear(); + memcg_inodes.clear(); return 0; } @@ -561,7 +738,7 @@ int scan_pageflags_nooutput(struct options *opt, char *res) get_top_dentry(pfn, opt->top, inode); } } - get_dentry_top(); + iterate_dentry_path(); output_file_cached_string(opt->top, res); return 0; } @@ -569,12 +746,13 @@ int scan_pageflags_nooutput(struct options *opt, char *res) int memcg_cgroup_path(const char *cgrouppath) { struct stat st; + set, myComp2>* cachedset = new set, myComp2>; if (access(cgrouppath, F_OK)) { return 0; } stat(cgrouppath, &st); - inodes[st.st_ino] = 1; + cinode_cached[st.st_ino] = cachedset; return 0; } diff --git a/source/tools/monitor/unity/collector/plugin/podmem/memcg.h b/source/tools/monitor/unity/collector/container/podmem/memcg.h similarity index 100% rename from source/tools/monitor/unity/collector/plugin/podmem/memcg.h rename to source/tools/monitor/unity/collector/container/podmem/memcg.h diff --git a/source/tools/monitor/unity/collector/plugin/podmem/memread.cpp b/source/tools/monitor/unity/collector/container/podmem/memread.cpp similarity index 88% rename from source/tools/monitor/unity/collector/plugin/podmem/memread.cpp rename to source/tools/monitor/unity/collector/container/podmem/memread.cpp index d53189b27f8430dc208aebca601d1dff0d41bb42..8e707eff685d372dae39ff369db307eba60ac953 100644 --- a/source/tools/monitor/unity/collector/plugin/podmem/memread.cpp +++ b/source/tools/monitor/unity/collector/container/podmem/memread.cpp @@ -28,6 +28,7 @@ extern int memcg_cgroup_file(const char *cgroupfile); extern int memcg_cgroup_path(const char *cgrouppath); extern int offset_init(void); extern int sym_uninit(void); +extern int get_structsize(char *type_name); extern "C" { char* scanall(); @@ -55,7 +56,7 @@ static struct proc_kcore_data *pkd = &proc_kcore_data; static int kcore_fd = 0; static int kpageflags_fd = 0; -static int kpagecgroup_fd = 0; +int kpagecgroup_fd = 0; uint64_t g_max_phy_addr; @@ -74,6 +75,11 @@ unsigned long page_offset_base = 0xffff880000000000UL; unsigned long memstart_addr = 0x0; unsigned long page_shift = 0; +// for linux 3.10 which has no kpagecgroup and mem_cgroup in page struct +unsigned long mem_section_base = 0x0; +int mem_section_size = 0x0; +int page_cgroup_size = 0; + /* * Routines of kpageflags, i.e., /proc/kpageflags */ @@ -324,10 +330,40 @@ static uint64_t get_max_phy_addr(const char *proc) return max_phy_addr; } +static int setup_mem_section(const char *proc) +{ + mem_section_base = lookup_kernel_symbol("mem_section", proc); + if (mem_section_base == (unsigned long)-1) { + LOG_ERROR("failed to lookup symbol: mem_section\n"); + return -1; + } + + mem_section_size = get_structsize("mem_section"); + if (mem_section_size == 0) { + LOG_ERROR("failed to get struct size: mem_section\n"); + return -1; + } + + page_cgroup_size = get_structsize("page_cgroup"); + if (page_cgroup_size == 0) { + LOG_ERROR("failed to get struct size: page_cgroup\n"); + return -1; + } + + return 0; +} + static int setup(const char* proc) { - std::string filename(proc); - std::string tmp; + std::string filename(proc); + std::string tmp; + + // init btf first + if (offset_init() < 0) { + LOG_ERROR("failed to init btf\n"); + return -1; + } + g_max_phy_addr = get_max_phy_addr(proc); if (g_max_phy_addr == 0ULL) { g_max_phy_addr = 64 * 1024 * 1024 * 1024; @@ -335,23 +371,30 @@ static int setup(const char* proc) } LOG_DEBUG("max physical address = %#lx\n", g_max_phy_addr); - tmp = filename+"/proc/kpageflags"; + tmp = filename+"/proc/kpageflags"; kpageflags_fd = open(tmp.c_str(), O_RDONLY); if (kpageflags_fd < 0) { perror("open: /proc/kpageflags"); return -1; } - tmp = filename+"/proc/kpagecgroup"; + + tmp = filename+"/proc/kpagecgroup"; kpagecgroup_fd = open(tmp.c_str(), O_RDONLY); if (kpagecgroup_fd < 0) { - perror("open: /proc/kpagecgroup"); + LOG_WARN("no /proc/kpagecgroup, use pfn_to_cgroup instead," + "(only support kernel 3.10)\n"); + if (setup_mem_section(proc) < 0) { + LOG_ERROR("failed to setup mem_section\n"); + return -1; + } } + if (kcore_init(proc) < 0) { LOG_ERROR("failed to init kcore\n"); return -1; } - return offset_init(); + return 0; } static void cleanup(void) @@ -420,7 +463,7 @@ char* scanall() char *outputres = NULL; opt.rate = 100; - opt.top = 10; + opt.top = 5; int count = memcg_cgroup_file(filename); if (count < 0) return NULL; diff --git a/source/tools/monitor/unity/collector/plugin/podmem/offset.cpp b/source/tools/monitor/unity/collector/container/podmem/offset.cpp similarity index 94% rename from source/tools/monitor/unity/collector/plugin/podmem/offset.cpp rename to source/tools/monitor/unity/collector/container/podmem/offset.cpp index 882aae60c750156e5bd1d08386848fe923e73d35..f6bad7945d79f17c3f4cdbb0ffb21ff19de06542 100644 --- a/source/tools/monitor/unity/collector/plugin/podmem/offset.cpp +++ b/source/tools/monitor/unity/collector/container/podmem/offset.cpp @@ -1,9 +1,13 @@ #include #include #include +#include #include #include -#include +#include + +#include "cache.h" + extern "C" { #include "btfparse.h" } @@ -39,6 +43,13 @@ int sym_uninit(void) return 0; } +int get_structsize(char *type_name) +{ + string structname("struct "); + structname += type_name; + return btf_type_size(handle, (char*)structname.c_str()); +} + struct member_attribute *get_offset(string struct_name, string member_name) { string index; @@ -152,7 +163,7 @@ static int download_btf(void) { sysak_path = getenv("SYSAK_WORK_PATH") ; sysak_path += "/tools/"; - sysak_path += kernel; + //sysak_path += kernel; } snprintf(dw, LEN + LEN + LEN, "wget -T 5 -t 2 -q -O %s/vmlinux-%s https://sysom-cn-%s.oss-cn-%s%s.aliyuncs.com/home/hive/btf/%s/vmlinux-%s",sysak_path.c_str(), kernel, ®ion[3],®ion[3],timeout.c_str(),arch, kernel); @@ -185,7 +196,7 @@ int offset_init(void) cmd = string("uname -r").c_str(); do_cmd(cmd, ver, LEN); if(getenv("SYSAK_WORK_PATH") != NULL) - sprintf(btf,"%s/tools/%s/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver, ver); + sprintf(btf,"%s/tools/vmlinux-%s", getenv("SYSAK_WORK_PATH"), ver); else sprintf(btf,"/boot/vmlinux-%s", ver); diff --git a/source/tools/monitor/unity/collector/execEngine/forkRun.lua b/source/tools/monitor/unity/collector/execEngine/forkRun.lua index 0e6c0f6a46f4afd75a72975061a7030c7ba0f9f5..09ed5ddc73ec47252eb5feb45b52895e040964eb 100644 --- a/source/tools/monitor/unity/collector/execEngine/forkRun.lua +++ b/source/tools/monitor/unity/collector/execEngine/forkRun.lua @@ -30,8 +30,8 @@ end function CforkRun:_del_() if self._pid then exec.kill(self._pid) + print("kill " .. self._pid) end - print("kill " .. self._pid) end function CforkRun:proc(elapsed, lines) diff --git a/source/tools/monitor/unity/collector/guard/guardDaemon.lua b/source/tools/monitor/unity/collector/guard/guardDaemon.lua index a637edae22121f1bafd2e1ab4211bb745891df61..f3cf8029724157e8b4667ce4f315da96e3be6c49 100644 --- a/source/tools/monitor/unity/collector/guard/guardDaemon.lua +++ b/source/tools/monitor/unity/collector/guard/guardDaemon.lua @@ -13,7 +13,7 @@ local CguardDaemon = class("guardDaemon") local function feedDaemon(fd) local ws, err, errno = unistd.write(fd, "feed.") if ws == nil then - system:posixError("feed daemo failed.", err, errno) + system:posixError("feed daemon failed.", err, errno) end end diff --git a/source/tools/monitor/unity/collector/guard/guardObserve.lua b/source/tools/monitor/unity/collector/guard/guardObserve.lua new file mode 100644 index 0000000000000000000000000000000000000000..90f08dd6cd8d2dff01293c13394c079534b66242 --- /dev/null +++ b/source/tools/monitor/unity/collector/guard/guardObserve.lua @@ -0,0 +1,149 @@ +----- +----- Generated by EmmyLua(https://github.com/EmmyLua) +----- Created by wrp. +----- DateTime: 2023/7/24 16:01 +----- +-- +require("common.class") +local CcollectorStat = require("collector.guard.collector_stat") +local system = require("common.system") +local pystring = require("common.pystring") +local CguardSched = require("collector.guard.guardSched") +local obHelper = require("collector.observe.obHelper") +local CprotoData = require("common.protoData") + +local CguardObserve = class("guardObserve") + +function CguardObserve:_init_(tid, jperiod, resYaml, que, proc_path, procffi) + self._proto = CprotoData.new(que) + self._proc_path = proc_path + self._procffi = procffi + + self._stat = CcollectorStat.new(tid) + self._jperiod = jperiod + + self._limit = resYaml.config.limit.cellLimit + if self._limit == nil then + self._limit = 1e4 * 5 -- 50ms + elseif self._limit ~= -1 then + self._limit = self._limit * 1e3 + end + + self._resYaml = resYaml + if resYaml.observe then + self._hasOb = true + self._obperiod = resYaml.observe.period + self._timerstart = -self._obperiod + else + self._hasOb = false + end + + self._pids = {} + self._obs = {} + +end + +function CguardObserve:getPids() + local pids = {} + + local comms = self._resYaml.observe.comms + for commk, commv in pairs(comms) do + local res = obHelper:getPidByComm(commk) + for _, pid in ipairs(res) do + table.insert(pids,pid) + + end + end + return pids +end + +function CguardObserve:proc(t,lines) + if self._hasOb then + local now = os.time() + if self._timerstart + self._obperiod < now then + self._pids = self:getPids() + self._obs = {} + + local cnt = 1 + local CobProcess = require("collector.observe.obProcess" ) + for _, pid in ipairs(self._pids) do + local comm + local fpid = io.open("/proc/"..pid.."/comm") + if fpid == nil then + goto continue + end + for line in fpid:lines() do + comm = line + end + fpid:close() + + local cgroup = "" + local conf = self._resYaml.observe.comms[comm] + local confs = pystring:split(conf," ") + for _, c in ipairs(confs) do + if c == "cgroup" then + cgroup = obHelper:getCgroupSystemd(pid) + end + end + local labels ={ + pid = pid, + comm = comm, + cgroup = cgroup + } + + self._obs[cnt] = CobProcess.new(self._jperiod, labels, self._proto, self._procffi, self._proc_path) + + cnt = cnt + 1 + ::continue:: + end + + print("observe pids reset") + self._timerstart = now + end + + local toRemove = {} + + local start = lua_local_clock() -- unit us + local stop = 0 + local j1 = self._stat:jiffies() + for i, obj in ipairs(self._obs) do + if i % 100 == 0 then -- need to update jiffies + j1 = self._stat:jiffies() + end + local ret, overTime = obj:proc(t, lines) + if ret == -1 then + table.insert(toRemove, i) + else + stop = lua_local_clock() + if ret ~= 1 then + overTime = 0 + end + if self._limit ~= -1 then + if stop - start - overTime >= self._limit then -- + local j2 = self._stat:jiffies() + if j2 - j1 >= self._limit / 1e6 * self._jperiod * 3 / 4 then -- 3/4 time used by plugin + table.insert(toRemove, i) + end + j1 = j2 + end + end + + end + start = stop + + ::continue:: + end + + if #toRemove > 0 then + system:reverseTable(toRemove) -- list should reverse at first. + for _, i in ipairs(toRemove) do + + table.remove(self._obs, i) + + end + end + end + +end + +return CguardObserve \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/guard/guardSched.lua b/source/tools/monitor/unity/collector/guard/guardSched.lua index 46c21d1461ddb63fb30f493952d354828ea5ee83..d4ca598a24b1432452338952ea8555f330c87305 100644 --- a/source/tools/monitor/unity/collector/guard/guardSched.lua +++ b/source/tools/monitor/unity/collector/guard/guardSched.lua @@ -10,12 +10,19 @@ local system = require("common.system") local CguardSched = class("guardSched") -function CguardSched:_init_(tid, procs, names, jperiod) +function CguardSched:_init_(tid, procs, names, jperiod, resYaml) self._stat = CcollectorStat.new(tid) self._jperiod = jperiod self._procs = procs self._names = names - self._limit = 1e5 * 5 -- 500 ms + + self._limit = resYaml.config.limit.cellLimit + if self._limit == nil then + self._limit = 1e4 * 5 -- 50ms + elseif self._limit ~= -1 then + self._limit = self._limit * 1e3 + end + end function CguardSched:proc(t, lines) @@ -24,24 +31,32 @@ function CguardSched:proc(t, lines) local start = lua_local_clock() -- unit us local stop = 0 local j1 = self._stat:jiffies() - local ret - for i, obj in ipairs(self._procs) do + if i % 100 == 0 then -- need to update jiffies + j1 = self._stat:jiffies() + end local ret, overTime = obj:proc(t, lines) + stop = lua_local_clock() if ret == -1 then + print(self._names[i].. "-1") table.insert(toRemove, i) else - stop = lua_local_clock() - if ret ~= 1 then + if ret ~= 1 then -- plugin must return 1, overTime will be available. overTime = 0 end - if stop - start - overTime >= self._limit then -- - print(stop - start) - local j2 = self._stat:jiffies() - if j2 - j1 >= self._limit / 1e6 * self._jperiod * 3 / 4 then -- 3/4 time used by plugin - table.insert(toRemove, i) + + local elapse = stop - start - overTime + if self._limit ~= -1 then + if stop - start - overTime >= self._limit then -- + local j2 = self._stat:jiffies() + if j2 - j1 >= elapse / 1e6 * self._jperiod * 3 / 4 then -- 3/4 time used by plugin + table.insert(toRemove, i) + end + j1 = j2 end end + + end start = stop diff --git a/source/tools/monitor/unity/collector/guard/guardSelfStat.lua b/source/tools/monitor/unity/collector/guard/guardSelfStat.lua index f02d72e48b844770300a2ae3e7d52a025a08fd80..105262d4e8146ffcafa02aad41d515af3841761f 100644 --- a/source/tools/monitor/unity/collector/guard/guardSelfStat.lua +++ b/source/tools/monitor/unity/collector/guard/guardSelfStat.lua @@ -46,7 +46,7 @@ end local function rssRssAnon() local anon = 0 - local f = io.open("/proc/self/status") + local f = assert(io.open("/proc/self/status")) for line in f:lines() do if pystring:startswith(line, "RssAnon:") then local res = pystring:split(line) @@ -71,7 +71,10 @@ function CguardSelfStat:proc(elapsed, lines) os.exit(1) end - local anon = rssRssAnon() + local ret, anon = pcall(rssRssAnon) + if not ret then + anon = 0 + end if self._memLimit and anon > self._memLimit then print("last mem usage overflow. rss bytes: " .. rss) os.exit(1) diff --git a/source/tools/monitor/unity/collector/interface/fastKsym.c b/source/tools/monitor/unity/collector/interface/fastKsym.c index 611bf87139a7cab0aafb01a5ae3a4be423aca76a..db1ce2614abaddc623475132000a6e4f3f2902d9 100644 --- a/source/tools/monitor/unity/collector/interface/fastKsym.c +++ b/source/tools/monitor/unity/collector/interface/fastKsym.c @@ -145,7 +145,6 @@ struct ksym_cell* ksym_search(addr_t key) { int mid; if (sym_cnt <= 0) { - printf("sym_cnt: %d", sym_cnt); return NULL; } diff --git a/source/tools/monitor/unity/collector/loop.lua b/source/tools/monitor/unity/collector/loop.lua index 68b19a557bd4eedd50ed18326bd38de9c4e89ba6..28a5ffff5f8d74f4411a289980c1424cebade603 100644 --- a/source/tools/monitor/unity/collector/loop.lua +++ b/source/tools/monitor/unity/collector/loop.lua @@ -12,12 +12,18 @@ local CbtfLoader = require("collector.btfLoader") local CpluginManager = require("collector.pluginManager") local calcJiffies = require("collector.guard.calcJiffies") local CguardSched = require("collector.guard.guardSched") +local CguardObserve = require("collector.guard.guardObserve") local CguardDaemon = require("collector.guard.guardDaemon") local CguardSelfStat = require("collector.guard.guardSelfStat") local CpostPlugin = require("collector.postPlugin.postPlugin") local CforkRun = require("collector.execEngine.forkRun") local CpodFilter = require("collector.podMan.podFilter") +local CrdtManager = require("collector.rdt.rdtManager") +local Ccgroupv2 = require("collector.cgroupv2.cgroupv2") +---local CpodFilter = require("collector.podMan.podFilter") + local CpodsAll = require("collector.podMan.podsAll") + local Cloop = class("loop") function Cloop:_init_(que, proto_q, fYaml, tid) @@ -31,10 +37,25 @@ function Cloop:_init_(que, proto_q, fYaml, tid) self:forkRun(res) local jperiod = calcJiffies.calc(res.config.proc_path, procffi) -- - self._guardSched = CguardSched.new(tid, self._procs, self._names, jperiod) + + self._guardSched = CguardSched.new(tid, self._procs, self._names, jperiod,res) + self._guardObserve = CguardObserve.new(tid, jperiod, res, que, res.config.proc_path, procffi) + self.soPlugins = CpluginManager.new(procffi, proto_q, res, tid, jperiod) self._guardStat = CguardSelfStat.new(self._proto, procffi, "/", res, jperiod) self.postPlugin = CpostPlugin.new(self._proto, procffi, res) + +end + +local function newProc(plugin, proto, procffi, proc_path) + print("add " .. plugin) + local CProcs = require("collector." .. plugin) + return CProcs.new(proto, procffi, proc_path) +end + +local function newPlugin(Cplugin, res, proto, procffi, proc_path) + print("add plugin") + return Cplugin.new(res, proto, procffi, proc_path) end function Cloop:loadLuaPlugin(res, proc_path, procffi) @@ -45,23 +66,67 @@ function Cloop:loadLuaPlugin(res, proc_path, procffi) local c = 1 if res.luaPlugins then for _, plugin in ipairs(luas) do - local CProcs = require("collector." .. plugin) - self._procs[c] = CProcs.new(self._proto, procffi, proc_path) - self._names[c] = plugin - c = c + 1 + --local CProcs = require("collector." .. plugin) + local status,msg = pcall(newProc, plugin, self._proto, procffi, proc_path) + if status then + self._procs[c] = msg + self._names[c] = plugin + c = c + 1 + else + print("add plugin " .. plugin .. " failed: " .. msg) + end + --self._procs[c] = CProcs.new(self._proto, procffi, proc_path) + --self._names[c] = plugin + --c = c + 1 end end + if res.container then if res.container.mode == "cgroup" then --print("mods1="..res.container.mode) - self._procs[c] = CpodFilter.new(res, self._proto, procffi, proc_path) - self._names[c] = "podFilter" + local status, msg = pcall(newPlugin, CpodFilter, res, self._proto, procffi, proc_path) + if status then + self._procs[c] = msg + self._names[c] = "podFilter" + c = c + 1 + else + print("add podFilter failed. " .. msg) + end else --print("mods2="..res.container.mode) - self._procs[c] = CpodsAll.new(res, self._proto, procffi, proc_path) - self._names[c] = "podMon" + local status, msg = pcall(newPlugin, CpodsAll, res, self._proto, procffi, proc_path) + if status then + self._procs[c] = msg + self._names[c] = "podMon" + c = c + 1 + else + print("add podMon failed. " .. msg) + end end end + + if res.resctrl then + local status, msg = pcall(newPlugin, CrdtManager, res, self._proto, procffi, proc_path) + if status then + self._procs[c] = msg + self._names[c] = "rdtManager" + c = c + 1 + else + print("add rdtManager failed. " .. msg) + end + end + + if res.cgroupv2 then + local status, msg = pcall(newPlugin, Ccgroupv2, res, self._proto, procffi, proc_path) + if status then + self._procs[c] = msg + self._names[c] = "cgroupv2" + c = c + 1 + else + print("add cgroupv2 failed. " .. msg) + end + end + print("add " .. system:keyCount(self._procs) .. " lua plugin.") end @@ -81,6 +146,7 @@ function Cloop:work(t) local lines = self._proto:protoTable() self._guardSched:proc(t, lines) + self._guardObserve:proc(t, lines) self.soPlugins:proc(t, lines) self._guardStat:proc(t, lines) self.postPlugin:proc(t, lines) diff --git a/source/tools/monitor/unity/collector/native/ffi_rdt_helper.lua b/source/tools/monitor/unity/collector/native/ffi_rdt_helper.lua new file mode 100644 index 0000000000000000000000000000000000000000..1b023d32124ddce2044741c69d144bc98a1bca9c --- /dev/null +++ b/source/tools/monitor/unity/collector/native/ffi_rdt_helper.lua @@ -0,0 +1,8 @@ +local rawffi = require "ffi" +local rdtffi = rawffi.load('rdt_helper') + +rawffi.cdef [[ + int calculate(const char* now,const char* prev); +]] + +return { rawffi = rawffi, rdtffi = rdtffi } diff --git a/source/tools/monitor/unity/collector/observe/obFault.lua b/source/tools/monitor/unity/collector/observe/obFault.lua new file mode 100644 index 0000000000000000000000000000000000000000..c54e89e8bc9b2e59a3481fcbaa7d80d9a2c50fc7 --- /dev/null +++ b/source/tools/monitor/unity/collector/observe/obFault.lua @@ -0,0 +1,45 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/7/20 17:10 +--- 进程级主次缺页次数。主12,次10 +--- + +require("common.class") + +local CvProc = require("collector.vproc") +local system = require("common.system") +local pystring = require("common.pystring") + +local CobFault = class("obFault",CvProc) + + +function CobFault:_init_(pid, proto, pffi, mnt, pFile) + self._pid = pid + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/"..self._pid.."/stat") + self._protoTable = { + line = "observe", + ls = { { name = "pid", index = tostring(pid) } }, + vs = {} + } + +end + +function CobFault:proc(elapsed, lines) + self._protoTable.vs = {} + CvProc.proc(self) + for line in io.lines(self.pFile) do + + local subline = pystring:split(line," ",13) + local maj = subline[12] + local min = subline[10] + local cell1 = {name = "majflt",value = tonumber(maj)} + table.insert(self._protoTable["vs"], cell1) + local cell2 = {name = "minflt",value = tonumber(min)} + table.insert(self._protoTable["vs"], cell2) + end + self:appendLine(self._protoTable) + self:push(lines) +end + +return CobFault \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/observe/obHelper.lua b/source/tools/monitor/unity/collector/observe/obHelper.lua new file mode 100644 index 0000000000000000000000000000000000000000..da072f4ee0a3c705e6dfbc68291fc6cde01a4d41 --- /dev/null +++ b/source/tools/monitor/unity/collector/observe/obHelper.lua @@ -0,0 +1,70 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/7/24 09:54 +--- + +local lfs = require("lfs") +local pystring = require("common.pystring") +local obHelper = {} + +function obHelper:getPidByComm(comm) + local pids={} + local c = 1 + local path = "/proc/" + for file in lfs.dir(path) do + -- just need file named by numbers + for i = 1, string.len(file) do + if string.byte(string.sub(file, i, i)) < 48 or string.byte(string.sub(file, i, i)) > 57 then + goto continue + end + end + + local fInfo = io.open(path..file.."/comm") + if fInfo == nil then + goto continue + end + + for line in fInfo:lines() do + if line == comm then + pids[c] = tonumber(file) + c = c + 1 + end + end + fInfo:close() + ::continue:: + end + return pids +end + +function obHelper:getCmdlinePort(pid) + local res = -1 + local path = "/proc/"..pid.."/cmdline" + for line in io.lines(path) do + local start, stop = string.find(line, "--port") + local substring = string.sub(line, stop+1, -1) + local port = string.match(substring, "%d+") + if port then + res = tonumber(port) + end + end + return res +end + +function obHelper:getCgroupSystemd(pid) + local path = "/proc/"..pid.."/cgroup" + local res = "NULL" + for line in io.lines(path) do + local sublines = pystring:split(line, ":") + if sublines[1] == "1" and sublines[2] == "name=systemd" then + for cname in string.gmatch(sublines[3], "%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x*") do + res = cname + res = string.sub(res, 1, 12) + end + end + end + + return res +end + +return obHelper diff --git a/source/tools/monitor/unity/collector/observe/obIO.lua b/source/tools/monitor/unity/collector/observe/obIO.lua new file mode 100644 index 0000000000000000000000000000000000000000..f8dbdd178017eed08d99b3c92dc4a39b10aed531 --- /dev/null +++ b/source/tools/monitor/unity/collector/observe/obIO.lua @@ -0,0 +1,61 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/7/4 19:39 +--- 进程级IO吞吐 +--- 进程级等待IO延迟,/proc//stat第42个字段 +--- + +require("common.class") + +local CkvProc = require("collector.kvProc") +local CvProc = require("collector.vproc") +local pystring = require("common.pystring") +local CobIO = class("obIO",CkvProc) + + +function CobIO:_init_(pid, proto, pffi, mnt, pFile) + self._pid = pid + CkvProc._init_(self, proto, pffi, mnt, pFile or "proc/" .. self._pid .. "/io", "obIO") + self._stat = mnt .. "proc/" .. self._pid .. "/stat" + self._protoTable = { + line = "observe", + ls = { { name = "pid", index = tostring(pid) }}, + vs = {} + } +end + +function CobIO:readKV(line) + local data = self._ffi.new("var_kvs_t") + assert(self._cffi.var_input_kvs(self._ffi.string(line), data) == 0) + assert(data.no >= 1) + + local name = self._ffi.string(data.s) + name = self:checkTitle(name) + local value = tonumber(data.value[0]) + + if name == "read_bytes" or name == "write_bytes" then + local cell = {name=name, value=value} + table.insert(self._protoTable["vs"], cell) + end + +end + +function CobIO:proc(elapsed, lines) + self._protoTable.vs = {} + CvProc.proc(self) + for line in io.lines(self.pFile) do + self:readKV(line) + end + for line in io.lines(self._stat) do + local subline = pystring:split(line," ",43) + local IOdelay = subline[42] + local cell = {name = "IOdelay",value = tonumber(IOdelay)} + table.insert(self._protoTable["vs"], cell) + + end + self:appendLine(self._protoTable) + self:push(lines) +end + +return CobIO \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/observe/obProcSched.lua b/source/tools/monitor/unity/collector/observe/obProcSched.lua new file mode 100644 index 0000000000000000000000000000000000000000..24c5608f6c21030632f150f4c079e4dc145f23ca --- /dev/null +++ b/source/tools/monitor/unity/collector/observe/obProcSched.lua @@ -0,0 +1,94 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/7/21 11:14 +--- 进程级调度延迟 +--- 进程级运行时间 +--- 进程级cpu total占用率 +--- 进程级cpu user占用率 +--- 进程级cpu sys占用率 +--- 进程自主切换 +--- 进程非自主切换 +--- + +require("common.class") +local CvProto = require("collector.vproto") +local CvProc = require("collector.vproc") + +local pystring = require("common.pystring") + +local CobProcSched = class("obScheDelay",CvProc) +--local pid = 1 + +function CobProcSched:_init_(pid, proto, pffi, mnt, pFile) + self._pid = pid + CvProc._init_(self, proto, pffi, mnt, pFile) + self._protoTable = { + line = "observe", + ls = { { name = "pid", index = tostring(pid) } }, + vs = {} + } + self._fschedstat = mnt .. "proc/" .. self._pid .. "/schedstat" + self._fstat = mnt .. "proc/" .. self._pid .. "/stat" + self._fsched = mnt .. "proc/" .. self._pid .. "/sched" +end + +function CobProcSched:proc(elapsed, lines) + CvProto.proc(self) + for line in io.lines(self._fschedstat) do + local data = self._ffi.new("var_long_t") + assert(self._cffi.var_input_long(self._ffi.string(line), data) == 0) + assert(data.no == 3) + local cell1 = { + name = "delay", + value = tonumber(data.value[1]),--value从0开始计数 + } + table.insert(self._protoTable["vs"], cell1) + local cell2 = { + name = "time", + value = tonumber(data.value[0]),--value从0开始计数 + } + table.insert(self._protoTable["vs"], cell2) + end + + for line in io.lines(self._fsched) do + if string.find(line,":") then + local subline = pystring:split(line,":",1) + local _name = subline[1] + local site = string.find(_name," ") + _name = string.sub(_name,1,site-1) + + if _name == "nr_involuntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + local cell = {name=_name, value=tonumber(_value)} + table.insert(self._protoTable["vs"], cell) + elseif _name == "nr_voluntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + local cell = {name=_name, value=tonumber(_value)} + table.insert(self._protoTable["vs"], cell) + end + + end + end + + for line in io.lines(self._fstat) do + local subline = pystring:split(line," ",16) + local user = tonumber(subline[14]) + local sys = tonumber(subline[15]) + local total = user + sys + local cell1 = {name = "cpu_user",value = user} + table.insert(self._protoTable["vs"], cell1) + local cell2 = {name = "cpu_sys",value = sys} + table.insert(self._protoTable["vs"], cell2) + local cell3 = {name = "cpu_total",value = total} + table.insert(self._protoTable["vs"], cell3) + end + self:appendLine(self._protoTable) + self:push(lines) +end + +return CobProcSched \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/observe/obProcess.lua b/source/tools/monitor/unity/collector/observe/obProcess.lua new file mode 100644 index 0000000000000000000000000000000000000000..c126ae7d9fb41d64fcc9159fca9674b475d02359 --- /dev/null +++ b/source/tools/monitor/unity/collector/observe/obProcess.lua @@ -0,0 +1,249 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/7/28 22:12 +--- + +require("common.class") + +local CvProc = require("collector.vproc") +local system = require("common.system") +local pystring = require("common.pystring") +local unistd = require("posix.unistd") +local CobProcess = class("obProcess",CvProc) + +function CobProcess:_init_(jperiod, labels, proto, pffi, mnt, pFile) + self._jperiod = jperiod + self._pid = labels["pid"] + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/"..self._pid) + self._protoTable = { + line = "observe", + ls = {}, + vs = {} + } + for k, v in pairs(labels) do + local cell = {name = k, index = tostring(v)} + table.insert(self._protoTable["ls"], cell) + end + self._fschedstat = mnt .. "proc/" .. self._pid .. "/schedstat" + self._fstat = mnt .. "proc/" .. self._pid .. "/stat" + self._fsched = mnt .. "proc/" .. self._pid .. "/sched" + self._fio = mnt .. "proc/" .. self._pid .. "/io" + + local err, errno + self.nproc, err, errno = unistd.sysconf(84) + if err then + system:posixError("sysconf failed", err, errno) + end + + for line in io.lines(self._fstat) do + local subline = pystring:split(line," ",43) + + self.cpu_user_start = tonumber(subline[14]) + self.cpu_sys_start = tonumber(subline[15]) + self.IOwait_start = tonumber(subline[42]) + self.majflt_start = tonumber(subline[12]) + self.minflt_start = tonumber(subline[10]) + end + + for line in io.lines(self._fschedstat) do + local data = self._ffi.new("var_long_t") + assert(self._cffi.var_input_long(self._ffi.string(line), data) == 0) + assert(data.no == 3) + self.sched_time_start = tonumber(data.value[0]) + self.sched_delay_start = tonumber(data.value[1]) + end + + for line in io.lines(self._fsched) do + if string.find(line,":") then + local subline = pystring:split(line,":",1) + local _name = subline[1] + local site = string.find(_name," ") + _name = string.sub(_name,1,site-1) + + if _name == "nr_involuntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + self.nr_inv_swi_start=tonumber(_value) + + elseif _name == "nr_voluntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + self.nr_vol_swi_start=tonumber(_value) + + end + end + end + + for line in io.lines(self._fio) do + local data = self._ffi.new("var_kvs_t") + assert(self._cffi.var_input_kvs(self._ffi.string(line), data) == 0) + assert(data.no >= 1) + + local name = self._ffi.string(data.s) + name = self:checkTitle(name) + local value = tonumber(data.value[0]) + + if name == "read_bytes" then + self.read_bytes_start = value + end + if name == "write_bytes" then + self.write_bytes_start = value + end + end + +end + +function CobProcess:checkTitle(title) + local res = string.gsub(title, ":", "") + res = string.gsub(res, "%)", "") + res = string.gsub(res, "%(", "_") + return res +end + +function CobProcess:readKV(line,elapsed) + local data = self._ffi.new("var_kvs_t") + assert(self._cffi.var_input_kvs(self._ffi.string(line), data) == 0) + assert(data.no >= 1) + + local name = self._ffi.string(data.s) + name = self:checkTitle(name) + local value = tonumber(data.value[0]) + + if name == "read_bytes" then + local cell = {name=name, value=(value-self.read_bytes_start)/elapsed} + table.insert(self._protoTable["vs"], cell) + self.read_bytes_start = value + end + if name == "write_bytes" then + local cell = {name=name, value=(value-self.write_bytes_start)/elapsed} + table.insert(self._protoTable["vs"], cell) + self.write_bytes_start = value + end +end + +function CobProcess:getProcRunTime(starttime) + -- 进程运行时间 + local ptime + + local fprocstat = io.open("/proc/stat") + if fprocstat~=nil then + for l in fprocstat:lines() do + local ls = pystring:split(l, " ", 1)[2] + local cpusum = 0 + local data = self._ffi.new("var_long_t") + assert(self._cffi.var_input_long(self._ffi.string(ls), data) == 0) + for i=0,data.no-1 do + cpusum = cpusum + tonumber(data.value[i]) + end + ptime = ((cpusum/self.nproc)-starttime)/self._jperiod + + break + end + end + fprocstat:close() + return ptime +end + +function CobProcess:proc(elapsed, lines) + self._protoTable.vs = {} + self._protoTable.vs = {} + CvProc.proc(self) + + for line in io.lines(self._fio) do + self:readKV(line,elapsed) + end + + for line in io.lines(self._fstat) do + local subline = pystring:split(line," ",43) + + local IOwait = tonumber(subline[42]) + local cell = {name = "IOwait",value = 1e6*(IOwait-self.IOwait_start)/(elapsed*self._jperiod)} + self.IOwait_start = IOwait + table.insert(self._protoTable["vs"], cell) + + local maj = tonumber(subline[12]) + local min = tonumber(subline[10]) + local cell1 = {name = "majflt",value = maj-self.majflt_start} + self.majflt_start = maj + table.insert(self._protoTable["vs"], cell1) + local cell2 = {name = "minflt",value = min-self.minflt_start} + self.minflt_start = min + table.insert(self._protoTable["vs"], cell2) + + local user = tonumber(subline[14]) + local sys = tonumber(subline[15]) + local total = user + sys + local cell3 = {name = "cpu_user",value = 100*(user-self.cpu_user_start)/(elapsed*self._jperiod)} + table.insert(self._protoTable["vs"], cell3) + local cell4 = {name = "cpu_sys",value = 100*(sys-self.cpu_sys_start)/(elapsed*self._jperiod)} + table.insert(self._protoTable["vs"], cell4) + local cell5 = {name = "cpu_total",value = 100*(total-self.cpu_sys_start-self.cpu_user_start)/(elapsed*self._jperiod)} + table.insert(self._protoTable["vs"], cell5) + self.cpu_user_start = user + self.cpu_sys_start = sys + + local vsize = subline[23] + local rss = subline[24] + local cell6 = {name = "vsize", value = tonumber(vsize)} + table.insert(self._protoTable["vs"], cell6) + local cell7 = {name = "rss", value = tonumber(rss)} + table.insert(self._protoTable["vs"], cell7) + + local starttime = tonumber(subline[22]) + local procruntime = self:getProcRunTime(starttime) + local cell8 = {name = "proc_run_time", value = procruntime} + table.insert(self._protoTable["vs"], cell8) + + end + + for line in io.lines(self._fschedstat) do + local data = self._ffi.new("var_long_t") + assert(self._cffi.var_input_long(self._ffi.string(line), data) == 0) + assert(data.no == 3) + local cell1 = { + name = "delay", + value = (tonumber(data.value[1])-self.sched_delay_start)/(1e3*elapsed), + } + self.sched_delay_start = tonumber(data.value[1]) + table.insert(self._protoTable["vs"], cell1) + local cell2 = { + name = "time", + value = (tonumber(data.value[0])-self.sched_time_start)/(1e3*elapsed), + } + self.sched_time_start = tonumber(data.value[0]) + table.insert(self._protoTable["vs"], cell2) + end + + for line in io.lines(self._fsched) do + if string.find(line,":") then + local subline = pystring:split(line,":",1) + local _name = subline[1] + local site = string.find(_name," ") + _name = string.sub(_name,1,site-1) + + if _name == "nr_involuntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + local cell = {name=_name, value=tonumber(_value)-self.nr_inv_swi_start} + table.insert(self._protoTable["vs"], cell) + self.nr_inv_swi_start = tonumber(_value) + elseif _name == "nr_voluntary_switches" then + local _value = subline[2] + local i = pystring:rfind(_value,' ') + _value = string.sub(_value,i+1,-1) + local cell = {name=_name, value=tonumber(_value)-self.nr_vol_swi_start} + table.insert(self._protoTable["vs"], cell) + self.nr_vol_swi_start = tonumber(_value) + end + end + end + + self:appendLine(self._protoTable) + self:push(lines) +end + +return CobProcess \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/outline/pipeMon.lua b/source/tools/monitor/unity/collector/outline/pipeMon.lua index 81f2069698ef18cb43d89eff15d38ae33415f5bf..0d6347d610de1fd291ceaf1c54052f54786fd35b 100644 --- a/source/tools/monitor/unity/collector/outline/pipeMon.lua +++ b/source/tools/monitor/unity/collector/outline/pipeMon.lua @@ -47,6 +47,7 @@ function CpipeMon:setupPipe(fYaml) s:bind(path) print("bind " .. path) table.insert(self._socks, s) + else error("create udp pipe failed.") end @@ -89,6 +90,7 @@ function CpipeMon:procLines(stream) end local bytes = self._proto:encode(lines) self._proto:que(bytes) + return 0 end function CpipeMon:poll() @@ -104,7 +106,10 @@ function CpipeMon:poll() end local stream, err = sock:receive() if stream then - self:procLines(stream) + local res, msg = pcall(self.procLines, self, stream) + if not res then + print("bad line:\n" .. stream .. msg) + end else print("recv line return: " .. err) return 0 @@ -114,4 +119,4 @@ function CpipeMon:poll() return 0 end -return CpipeMon +return CpipeMon \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/perfRun/perfRun.sh b/source/tools/monitor/unity/collector/perfRun/perfRun.sh new file mode 100755 index 0000000000000000000000000000000000000000..1ff8a394dcbbda869b81f135b36c020134fb3d89 --- /dev/null +++ b/source/tools/monitor/unity/collector/perfRun/perfRun.sh @@ -0,0 +1,113 @@ +id=$1 +echo $pid + +EXPIRE=$((2*24*60*60)) +# EXPIRE=$((5*60)) +LOG_DIR="/var/log/sysak/cpuhigh/" +STOP_FILE="${LOG_DIR}/stop" +TIME_FORMAT_STR="" +i=0 + +collect_func() { + perf record -F 99 -a -g -o $1 -- sleep 30 + echo $? >> ${LOG_DIR}/shell.log + sleep 1 +} + +# /var/log/sysak/cpuhigh/2023-07-31/09/37_20.log +# 2 3 4 5 6 7 8 + +get_time_str() { + time_str=$1 + format_time="" + arr=`echo $time_str | awk -F '[./_]' '{print $6,$7,$8,$9}'` + # echo $arr + N=1 + for time in $arr + do + # echo "[$num]" + if [ $N -eq 1 ] + then + format_time="$time" + elif [ $N -eq 2 ] + then + format_time="$format_time $time" + else + format_time="$format_time:$time" + fi + + ((N++)) + done + # echo $format_time + TIME_FORMAT_STR=$format_time +} + +clean_expire_files() { + now=`date "+%s"` + for date_dir in ${LOG_DIR}* + do + if test -d $date_dir + then + for hour_dir in ${date_dir}/* + do + if test -d $hour_dir + then + for time_file in ${hour_dir}/* + do + if test -f $time_file + then + get_time_str "$time_file" + ts=`date -d "$TIME_FORMAT_STR" +%s` + delta=`expr $now - $ts` + if [ $delta -gt $EXPIRE ] + then + echo "delte file=${time_file}." + rm -f $time_file + #else + # echo "file is available=${time_file}." + fi + fi + done + fi + + done + fi + done +} + +main() { + mkdir -p $LOG_DIR + yum install perf -y + i=0 + while ((1)) + do + if [ -a $STOP_FILE ]; then + echo "stop profileing" + exit 0 + fi + # _day0=$(date "+%d") + _date=$(date "+%Y-%m-%d") + _hour=$(date "+%H") + _time=$(date "+%M_%S") + if [ ! -d ${LOG_DIR}/${_date} ]; then + mkdir -p ${LOG_DIR}/${_date} + fi + if [ -d ${LOG_DIR}/${_date}/${_hour} ]; then + collect_func ${LOG_DIR}/${_date}/${_hour}/${_time}.log + else + mkdir -p ${LOG_DIR}/${_date}/${_hour} + collect_func ${LOG_DIR}/${_date}/${_hour}/${_time}.log + fi + i=$((i+1)) + #3*60*12, 12hours + if [ $i -gt 2160 ]; then + clean_expire_files + i=0 + fi + done +} + +main + +# clean_expire_files +# get_ts "/var/log/sysak/cpuhigh/2023-07-31/09/37_20.log" diff --git a/source/tools/monitor/unity/collector/plugin.lua b/source/tools/monitor/unity/collector/plugin.lua index a37801f4fd6e1e9c730aefb03fdc6ee8c27ec2da..9d997851d77be0ace16fb937211906c1b1476985 100644 --- a/source/tools/monitor/unity/collector/plugin.lua +++ b/source/tools/monitor/unity/collector/plugin.lua @@ -13,7 +13,7 @@ local json = cjson.new() function Cplugin:_init_(resYaml, ffi, proto_q, so, loop) self._ffi = ffi self._cffi = self._ffi.load(so) - self._cffi.init(proto_q) + self.alive = self._cffi.init(proto_q) self._so = so self._loop = loop or -1 self.proc_fs = resYaml.config["proc_path"] or "/" @@ -21,8 +21,10 @@ function Cplugin:_init_(resYaml, ffi, proto_q, so, loop) end function Cplugin:_del_() - print("uninstall " .. self._so) - self._cffi.deinit() + print("del " .. self._so) + if self.alive >= 0 then + self._cffi.deinit() + end end function Cplugin:load_label(unity_line, line) diff --git a/source/tools/monitor/unity/collector/plugin/Makefile b/source/tools/monitor/unity/collector/plugin/Makefile index 4f4007be71f6c84c9ef32d40c5448ea9f930709b..dd8b917006c2e3e4369f2d586ca7d28f649c6cc5 100644 --- a/source/tools/monitor/unity/collector/plugin/Makefile +++ b/source/tools/monitor/unity/collector/plugin/Makefile @@ -4,7 +4,7 @@ LDFLAG := -g -fpic -shared OBJS := proto_sender.o LIB := libproto_sender.a -DEPMOD=sample threads kmsg proc_schedstat proc_loadavg unity_nosched unity_irqoff cpudist cpu_bled net_health net_retrans netlink cpufreq gpuinfo pmu_events virtout sum_retrans virtiostat podmem +DEPMOD=sample threads kmsg proc_schedstat proc_loadavg unity_nosched unity_irqoff cpudist cpu_bled net_health net_retrans netlink cpufreq gpuinfo pmu_events virtout sum_retrans virtiostat numainfo rto_accel uncore_imc all: $(LIB) $(DEPMOD) diff --git a/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c index 943f68fe0a482a9fe2941368b3091a27db23f926..dd0630994cd193e509e440bdbffa68bfa1888614 100644 --- a/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c +++ b/source/tools/monitor/unity/collector/plugin/gpuinfo/gpuinfo.c @@ -19,6 +19,7 @@ int call(int t, struct unity_lines* lines) { char buffer[256]; /* Temporary buffer for parsing */ float mm_total, mm_used, mm_free, temp, powerdraw, gpu_util, mem_util; struct unity_line* line; + int num_gpus = 0, num_gpus_index = 0; // make sure nvidia-smi installed @@ -31,34 +32,53 @@ int call(int t, struct unity_lines* lines) { fp = popen("nvidia-smi --query-gpu=\"memory.total,memory.used,memory.free,temperature.gpu,power.draw,utilization.gpu,utilization.memory\" --format=nounits,csv,noheader", "r"); memset(buffer, 0, sizeof(buffer)); - // // for test - // char command[128]; - // if (sprintf(command, "cat %s%s", get_unity_proc(), "/proc/gpuinfo") < 0) - // printf("sprintf error\n"); - // fp = popen(command, "r"); + if (fp != NULL) + { + while (fgets(buffer, sizeof(buffer), fp)) + { + if (strstr(buffer, "Failed") != NULL) { + // printf("Found the word 'Failed' in the buffer: %s", buffer); + break; + } + num_gpus++; + sscanf(buffer, "%f, %f, %f, %f, %f, %f, %f", &mm_total, &mm_used, &mm_free, &temp, &powerdraw, &gpu_util, &mem_util); + } + pclose(fp); + } + unity_alloc_lines(lines, num_gpus); + + fp = popen("nvidia-smi --query-gpu=\"memory.total,memory.used,memory.free,temperature.gpu,power.draw,utilization.gpu,utilization.memory\" --format=nounits,csv,noheader", "r"); + memset(buffer, 0, sizeof(buffer)); if (fp != NULL) { while (fgets(buffer, sizeof(buffer), fp)) { + if (strstr(buffer, "Failed") != NULL) { + // printf("Found the word 'Failed' in the buffer: %s", buffer); + break; + } + sscanf(buffer, "%f, %f, %f, %f, %f, %f, %f", &mm_total, &mm_used, &mm_free, &temp, &powerdraw, &gpu_util, &mem_util); + line = unity_get_line(lines, num_gpus_index); + unity_set_table(line, "gpuinfo"); + char gpu_name[10]; + snprintf(gpu_name, 10, "%s%d", "gpu", num_gpus_index); + unity_set_index(line, 0, "gpu_num", gpu_name); + unity_set_value(line, 0, "mm_total", mm_total); + unity_set_value(line, 1, "mm_used", mm_used); + unity_set_value(line, 2, "mm_free", mm_free); + unity_set_value(line, 3, "temp", temp); + unity_set_value(line, 4, "powerdraw", powerdraw); + unity_set_value(line, 5, "gpu_util", gpu_util); + unity_set_value(line, 6, "mem_util", mem_util); + + num_gpus_index++; } pclose(fp); } - unity_alloc_lines(lines, 1); // 预分配好 - line = unity_get_line(lines, 0); - unity_set_table(line, "gpuinfo"); - unity_set_index(line, 0, "gpu_num", "gpu0"); - unity_set_value(line, 0, "mm_total", mm_total); - unity_set_value(line, 1, "mm_used", mm_used); - unity_set_value(line, 2, "mm_free", mm_free); - unity_set_value(line, 3, "temp", temp); - unity_set_value(line, 4, "powerdraw", powerdraw); - unity_set_value(line, 5, "gpu_util", gpu_util); - unity_set_value(line, 6, "mem_util", mem_util); - return 0; } diff --git a/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c index 6be0e8ab8eb7b3cc81902fbeae962ab824523a4f..4b26114ce9f0ba9f27d05f5b37be871c5596824a 100644 --- a/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c +++ b/source/tools/monitor/unity/collector/plugin/net_retrans/net_retrans.bpf.c @@ -22,7 +22,7 @@ struct liphdr { #define MAX_ENTRY 128 #define BPF_F_FAST_STACK_CMP (1ULL << 9) #define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;}) BPF_PERF_OUTPUT(perf, 1024); BPF_STACK_TRACE(stack, MAX_ENTRY); @@ -90,7 +90,7 @@ static inline void get_list_task(struct list_head* phead, struct data_t* e) { struct task_struct* tsk = (struct task_struct*)BPF_CORE_READ(pwq, polling_task); if (tsk) { e->pid = BPF_CORE_READ(tsk, pid); - bpf_probe_read(&e->comm[0], TASK_COMM_LEN, &tsk->comm[0]); + bpf_probe_read_kernel(&e->comm[0], TASK_COMM_LEN, &tsk->comm[0]); } } } diff --git a/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c index 67017fdae592000b8cd5d1a42a267d1e67106d42..d5df80b123790b06bf1f45467b975bad990bbf45 100644 --- a/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c +++ b/source/tools/monitor/unity/collector/plugin/numainfo/numainfo.c @@ -4,32 +4,69 @@ #include "numainfo.h" #include -#include +#include +#include +#include -int init(void * arg) { +int init(void *arg) +{ printf("numainfo plugin install, proc: %s\n", get_unity_proc()); return 0; } -int call(int t, struct unity_lines* lines) { +#define NODE_DIR "/sys/devices/system/node/" + +int get_numa_node_count(char *path) +{ + int count = 0; + DIR *dir; + struct dirent *entry; + char fname[128]; + sprintf(fname, "%s%s", path, NODE_DIR); + if ((dir = opendir(fname)) == NULL) + { + perror("打开/sys/devices/system/node/目录失败"); + exit(EXIT_FAILURE); + } + while ((entry = readdir(dir)) != NULL) + { + if (strncmp(entry->d_name, "node", 4) == 0) + { + count++; + } + } + closedir(dir); + return count; +} +// how to test +// sh-4.2# mkdir -p /tmp/sys/devices/system/node/node0/ +// sh-4.2# cp -r /sys/devices/system/node/node0/numastat /tmp/sys/devices/system/node/node0/ +// sh-4.2# cp -r /sys/devices/system/node/node0/meminfo /tmp/sys/devices/system/node/node0/ +// sh-4.2# mkdir -p /tmp/sys/devices/system/node/node1/ +// sh-4.2# cp -r /sys/devices/system/node/node0/numastat /tmp/sys/devices/system/node/node1/ +// sh-4.2# cp -r /sys/devices/system/node/node0/meminfo /tmp/sys/devices/system/node/node1/ +// + +int call(int t, struct unity_lines *lines) +{ // get numa node number - // yum install numactl-devel - int num_nodes = numa_max_node() + 1; + int num_nodes = get_numa_node_count(get_unity_proc()); // num_nodes = 2; // read from /sys/devices/system/node/node0/numastat // printf("numa %d\n", num_nodes); - struct unity_line* line; + struct unity_line *line; int i, j, ret; - FILE *fp; - char fname[128]; + FILE *fp; + char fname[128]; + + unity_alloc_lines(lines, num_nodes); // 预分配好 - unity_alloc_lines(lines, num_nodes); // 预分配好 - // unity_set_index(line, 0, "mode", "numa_num"); // unity_set_value(line, 0, "numa_num_sum", num_nodes); - for (i = 0; i < num_nodes; i++) { + for (i = 0; i < num_nodes; i++) + { char numa_name[10]; snprintf(numa_name, 10, "%s%d", "node", i); // printf("numa is %s\n", numa_name); @@ -41,12 +78,14 @@ int call(int t, struct unity_lines* lines) { if (sprintf(fname, "%s%s%d%s", get_unity_proc(), "/sys/devices/system/node/node", i, "/numastat") < 0) printf("sprintf error\n"); // printf("fname is %s\n", fname); - if ((fp = fopen(fname, "r")) == NULL) { + if ((fp = fopen(fname, "r")) == NULL) + { ret = errno; printf("WARN: numainfo install FAIL fopen\n"); return ret; } - for (j = 0; j < 6; j++) { + for (j = 0; j < 6; j++) + { char k[32]; unsigned long v; errno = fscanf(fp, "%s %ld\n", k, &v); @@ -56,11 +95,41 @@ int call(int t, struct unity_lines* lines) { unity_set_value(line, j, k, v); } if (fp) - fclose(fp); + fclose(fp); + + if (sprintf(fname, "%s%s%d%s", get_unity_proc(), "/sys/devices/system/node/node", i, "/meminfo") < 0) + printf("sprintf error\n"); + if ((fp = fopen(fname, "r")) == NULL) + { + ret = errno; + printf("WARN: numainfo install FAIL fopen\n"); + return ret; + } + char buf[1024]; + int mem_total = 0, mem_free = 0; + int node_id = 0; + while (fgets(buf, sizeof(buf), fp) != NULL) + { + if (sscanf(buf, "Node %d MemTotal: %d kB", &node_id, &mem_total) == 2) + { + // 读取到了 MemTotal 的值 + // printf("总内存:%d kB\n", mem_total); + unity_set_value(line, j, "node_mem_total", mem_total); + } + else if (sscanf(buf, "Node %d MemFree: %d kB", &node_id, &mem_free) == 2) + { + // 读取到了 MemFree 的值 + // printf("空闲内存:%d kB\n", mem_free); + unity_set_value(line, j + 1, "node_mem_free", mem_free); + } + } + if (fp) + fclose(fp); } return 0; } -void deinit(void) { - printf("sample plugin uninstall\n"); +void deinit(void) +{ + printf("numainfo plugin uninstall\n"); } diff --git a/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.c b/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.c index af6666cd4de92b1f886ca30d41b52042d18ce924..d8782af3d4c17bdcff73015bcaa3709946bd60a4 100644 --- a/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.c +++ b/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.c @@ -1,3 +1,5 @@ +#include +#include #include "pmu_events.h" struct pmu_events { @@ -11,14 +13,13 @@ struct pcpu_hw_info *gpcpu_hwi; struct pmu_events *glb_pme; char *events_str[] = {"cycles", "ins", "refCyc", "llcLoadMis", "llcStoreMis", - "llcLoad", "llcStore"}; + "llcLoad", "llcStore", "splitLock"}; char *value_str[] = {"cycles", "instructions", "CPI", "llc_load_ref", "llc_load_miss", "LLC_LMISS_RATE" - "llc_store_ref", "llc_store_miss", "LLC_SMIRSS_RATE"}; + "llc_store_ref", "llc_store_miss", "LLC_SMIRSS_RATE", "splitLock"}; /*char origpath[]="/mnt/host/sys/fs/cgroup/perf_event/system.slice/"; */ char *origpath = NULL; /* defalt to host events */ -static int init_fail; static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { @@ -29,6 +30,19 @@ static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, return ret; } +static void bump_nofile_rlimit(void) +{ + struct rlimit rlim_new = { + /* NOFILE is limited by sysctl_nr_file, which is 1024*1024 by default */ + .rlim_cur = 1024*1024, + .rlim_max = 1024*1024, + }; + + if (setrlimit(RLIMIT_NOFILE, &rlim_new)) { + fprintf(stderr, "Failed to increase RLIMIT_NOFILE limit!\n"); + } +} + int create_hw_events(struct pcpu_hw_info *pc_hwi) { int cpu, i, j, group_last, idx_fail; @@ -65,7 +79,7 @@ int create_hw_events(struct pcpu_hw_info *pc_hwi) #endif hwi[i].fd = perf_event_open(&attr, pid, cpu, group_leader, flags); if (hwi[i].fd <= 0) { - int ret = errno; + ret = errno; if (ret == ENODEV) { printf("cpu may OFF LINE\n"); } else { @@ -149,8 +163,7 @@ int init(void * arg) if (nr_cpus <= 0) { ret = errno; printf("WARN: pmu_events install FAIL sysconf\n"); - init_fail = ret; - return 0; + return -ret; } pmue = pme_new(nr_cpus); @@ -158,8 +171,7 @@ int init(void * arg) pcpu_hwi = pmue->pcpu_hwi; glb_pme = pmue; } else { - init_fail = -1; - return 0; + return -1; } #if 0 pmue = (struct pmu_events *)arg; @@ -169,26 +181,24 @@ int init(void * arg) cgroup_fd = open(origpath, O_RDONLY); if (cgroup_fd < 0) { printf(" open %s fail\n", origpath); - init_fail = cgroup_fd; - return 0; + return cgroup_fd; } flags = PERF_FLAG_PID_CGROUP; } else { cgroup_fd = -1; flags = 0; } + bump_nofile_rlimit(); for (i = 0; i < nr_cpus; i++) { pcpu_hwi[i].cpu = i; pcpu_hwi[i].pid = cgroup_fd; pcpu_hwi[i].flags = flags; ret = create_hw_events(&pcpu_hwi[i]); if (ret) { - init_fail = ret; - return 0; + return ret; } } printf("pmu_events plugin install.\n"); - init_fail = 0; return 0; } @@ -234,7 +244,8 @@ int fill_line(struct unity_line *line, double *summ, char *mode, char *index) summ[CYCLES]==0?0:summ[INSTRUCTIONS]/summ[CYCLES]); unity_set_value(line, i++, "MPI", summ[INSTRUCTIONS]==0?0: - (summ[LLC_LOAD_MISS]+summ[LLC_STORE_MISS])/summ[INSTRUCTIONS]); + ((float)summ[LLC_LOAD_MISS]+(float)summ[LLC_STORE_MISS])*1000.00/ + (float)summ[INSTRUCTIONS]); unity_set_value(line, i++, "l3LoadMisRate", summ[LLC_LOAD_REF]==0?0:summ[LLC_LOAD_MISS]/summ[LLC_LOAD_REF]); unity_set_value(line, i++, "l3StoreMisRate", @@ -253,9 +264,6 @@ int call(int t, struct unity_lines* lines) double summ[NR_EVENTS]; struct pcpu_hw_info *pcp_hw; - if (init_fail) { - return init_fail; - } pcp_hw = glb_pme->pcpu_hwi; for (i = 0; i < nr_cpus; i++) { collect(&pcp_hw[i], summ); diff --git a/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.h b/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.h index 89994a4063a76b69e2e492e1ba7178463a815995..1c1acda55817629902bd1dc1b820d807bde9e611 100644 --- a/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.h +++ b/source/tools/monitor/unity/collector/plugin/pmu_events/pmu_events.h @@ -15,10 +15,8 @@ #include #include #include "../plugin_head.h" -#define NR_GROUP 2 -#define NR_EVENTS 7 -#define NR_CELL 2 - +#define NR_GROUP 3 +#define NR_EVENTS 8 #ifdef DEBUG /* for test */ @@ -51,6 +49,7 @@ __u32 hw_types[] = { PERF_TYPE_HW_CACHE, PERF_TYPE_HW_CACHE, PERF_TYPE_HW_CACHE, + PERF_TYPE_RAW, }; __u64 hw_configs[] = { @@ -69,11 +68,13 @@ __u64 hw_configs[] = { PERF_COUNT_HW_CACHE_LL << 0 | (PERF_COUNT_HW_CACHE_OP_WRITE << 8) | (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), + 0x10f4, }; int groupidx[NR_EVENTS] = { 0,0,0,0,0, 1,1, + 1, }; enum { @@ -84,6 +85,7 @@ enum { LLC_STORE_MISS, LLC_LOAD_REF, LLC_STORE_REF, + SPLIT_LOCK, }; struct hw_info { diff --git a/source/tools/monitor/unity/collector/plugin/rto_accel/Makefile b/source/tools/monitor/unity/collector/plugin/rto_accel/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..867450357feadcb08778892683ed94c4074de4ba --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/rto_accel/Makefile @@ -0,0 +1,8 @@ + +newdirs := $(shell find ./ -type d) + +bpfsrcs := rto_accel.bpf.c +csrcs := rto_accel.c +so := librto_accel.so + +include ../bpfso.mk \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.bpf.c b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..45eda7a3d8f6c991e63c6e00f302483d5de55435 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.bpf.c @@ -0,0 +1,25 @@ +// +// Created by 廖肇燕 on 2023/8/1. +// +#include +#include + +#ifndef __section +# define __section(NAME) \ + __attribute__((section(NAME), used)) +#endif + +#define SOL_TCP 6 + +__section("sockops") +int set_rto_min(struct bpf_sock_ops *skops) +{ + int op = (int)skops->op; + int rv = -1; + if (op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) { + int rto_min_usecs = 8000; + rv = bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &rto_min_usecs, sizeof(rto_min_usecs)); + } + skops->reply = rv; + return 1; +} diff --git a/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.c b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.c new file mode 100644 index 0000000000000000000000000000000000000000..21602b5ea2225c85716f5df439455f15c86330d4 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.c @@ -0,0 +1,70 @@ +// +// Created by 廖肇燕 on 2023/8/1. +// + +#include "rto_accel.h" +#include "../bpf_head.h" +#include "rto_accel.skel.h" + +#include +#include +#include +#include + +#define NS_FDS_NUM 256 + +static int ns_fds[NS_FDS_NUM] = {0}; + +static void close_ns_fds(void) { + int i; + for (i = 0; i < NS_FDS_NUM; i ++) { + if (ns_fds[i] > 0) { + close(ns_fds[i]); + ns_fds[i] = 0; + } + } +} + +DEFINE_SEKL_OBJECT(rto_accel); +static struct bpf_prog_skeleton *search_progs(const char* func) { + struct bpf_object_skeleton *s; + int i; + + s = rto_accel->skeleton; + for (i = 0; i < s->prog_cnt; i ++) { + if (strcmp(s->progs[i].name, func) == 0) { + return &(s->progs[i]); + } + } + return NULL; +} + +static int combine() { + return 0; +} + +int init(void *arg) +{ + int ret = 0; + printf("rto_accel plugin install.\n"); + + return ret; +} + +int call(int t, struct unity_lines *lines) +{ + struct unity_line* line; + + unity_alloc_lines(lines, 1); // 预分配好 + line = unity_get_line(lines, 0); + unity_set_table(line, "rto_accel"); + unity_set_value(line, 0, "value1", 1); + + return 0; +} + +void deinit(void) +{ + printf("rto_accel plugin uninstall.\n"); + DESTORY_SKEL_BOJECT(rto_accel); +} diff --git a/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.h b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.h new file mode 100644 index 0000000000000000000000000000000000000000..32585a3343ab961392c3260d7b7f31f919c253f5 --- /dev/null +++ b/source/tools/monitor/unity/collector/plugin/rto_accel/rto_accel.h @@ -0,0 +1,8 @@ +// +// Created by 廖肇燕 on 2023/8/1. +// + +#ifndef UNITY_RTO_ACCEL_H +#define UNITY_RTO_ACCEL_H + +#endif //UNITY_RTO_ACCEL_H diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/Makefile b/source/tools/monitor/unity/collector/plugin/uncore_imc/Makefile similarity index 83% rename from source/tools/monitor/unity/collector/plugin/imc_latency/Makefile rename to source/tools/monitor/unity/collector/plugin/uncore_imc/Makefile index 4e8c20b686d2347fe27be717e0f999bd5ef546a8..ebb639c4b38bb1831fc00adb818cb0a6ae475a59 100644 --- a/source/tools/monitor/unity/collector/plugin/imc_latency/Makefile +++ b/source/tools/monitor/unity/collector/plugin/uncore_imc/Makefile @@ -1,8 +1,8 @@ CC := gcc CFLAG := -g -fpic LDFLAG := -g -fpic -shared -OBJS := imc_latency.o -SO := libimc_latency.so +OBJS := uncore_imc.o +SO := libuncore_imc.so all: $(SO) install diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.c b/source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.c similarity index 55% rename from source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.c rename to source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.c index 6dfd0f7574b3e98b7c0921b719e786866b32bfda..29597a605b2d686d301ecd81ba0175398321c7ec 100644 --- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.c +++ b/source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.c @@ -8,10 +8,11 @@ #include #include #include +#include -#include "imc_latency.h" +#include "uncore_imc.h" -// #define DEBUG +#define NR_IMC 6 struct Env { uint32_t max_cpuid; @@ -23,50 +24,41 @@ struct Env { int64_t nr_socket; int64_t nr_core; int64_t nr_channel; - int64_t* socket_ref_core; + int64_t *socket_ref_core; bool init_succ; } env = {.vm = false, .init_succ = true}; -typedef struct event { - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - uint64_t dram_speed; -} event; - -typedef struct channel_record { - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - double read_latency; - double write_latency; -} channel_record; - -typedef struct socket_record { - channel_record* channel_record_arr; - uint64_t rpq_occ; - uint64_t rpq_ins; - uint64_t wpq_occ; - uint64_t wpq_ins; - double read_latency; - double write_latency; - uint64_t dram_clock; -} socket_record; - -typedef struct record { - socket_record* socket_record_arr; -} record; +// #define DEBUG +#ifdef DEBUG +void print_metric(metric *m) { + printf("rlat=%lf wlat=%lf avglat=%lf bw_rd=%ld bw_wr=%ld\n", m->rlat, + m->wlat, m->avglat, m->bw_rd, m->bw_wr); +} + +void print_result(result res) { + int i, j; + for (int i = 0; i < env.nr_socket; i++) { + // for (int j = 0; j < env.nr_channel; j++) { + // printf("socket=%d channel=%d\n", i, j); + // print_metric(&res.channel[i][j]); + // } + printf("socket=%d\n", i); + print_metric(&res.socket[i]); + } + printf("node:\n"); + print_metric(res.node); +} +#endif record before, after; +result res; time_t before_ts = 0, after_ts = 0; -imc_pmu* pmus = 0; +imc_pmu *pmus = 0; -int64_t read_sys_file(char* path, bool slient) { +int64_t read_sys_file(char *path, bool slient) { int64_t val; - FILE* fp = fopen(path, "r"); + FILE *fp = fopen(path, "r"); if (!fp) { if (!slient) fprintf(stderr, "Failed open sys-file: %s\n", path); return -1; @@ -80,7 +72,7 @@ int64_t read_sys_file(char* path, bool slient) { return val; } -static int write_reg(imc_event* ev, uint64_t val) { +static int write_reg(imc_event *ev, uint64_t val) { int err = 0; if (ev->fd >= 0) { close(ev->fd); @@ -102,7 +94,7 @@ static int write_reg(imc_event* ev, uint64_t val) { return err; } -static uint64_t read_reg(imc_event* ev) { +static uint64_t read_reg(imc_event *ev) { uint64_t result = 0; if (ev->fd >= 0) { int status = read(ev->fd, &result, sizeof(result)); @@ -123,7 +115,7 @@ static bool is_cpu_online(int cpu_id) { snprintf(path, BUF_SIZE, "/sys/devices/system/cpu/cpu%d/online", cpu_id); - FILE* fp = fopen(path, "r"); + FILE *fp = fopen(path, "r"); if (!fp) { fprintf(stderr, "Failed open %s.\n", path); goto cleanup; @@ -164,7 +156,7 @@ int64_t read_physical_package_id(int cpu_id) { return val; } -static int get_topology(int id, struct topology_ent* ent) { +static int get_topology(int id, struct topology_ent *ent) { int err = 0; ent->core_id = read_core_id(id); ent->socket_id = read_physical_package_id(id); @@ -181,7 +173,7 @@ static int get_topology(int id, struct topology_ent* ent) { static int discovery_topology() { int err = 0, i = 0; - struct topology_ent* topo = 0; + struct topology_ent *topo = 0; env.nr_cpu = sysconf(_SC_NPROCESSORS_CONF); @@ -235,14 +227,14 @@ cleanup: return err; } -static void cpuid_1(int leaf, CPUID_INFO* info) { +static void cpuid_1(int leaf, CPUID_INFO *info) { __asm__ __volatile__("cpuid" : "=a"(info->reg.eax), "=b"(info->reg.ebx), "=c"(info->reg.ecx), "=d"(info->reg.edx) : "a"(leaf)); } -void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO* info) { +void cpuid_2(const unsigned leaf, const unsigned subleaf, CPUID_INFO *info) { __asm__ __volatile__("cpuid" : "=a"(info->reg.eax), "=b"(info->reg.ebx), "=c"(info->reg.ecx), "=d"(info->reg.edx) @@ -337,8 +329,8 @@ bool is_model_support() { env.cpu_model == SKX); } -uint32_t* get_ddr_latency_metric_config() { - uint32_t* cfgs = 0; +uint32_t *get_ddr_latency_metric_config() { + uint32_t *cfgs = 0; cfgs = calloc(4, sizeof(uint32_t)); if (!cfgs) { fprintf(stderr, "Failed calloc cfgs memory.\n"); @@ -346,25 +338,51 @@ uint32_t* get_ddr_latency_metric_config() { } if (ICX == env.cpu_model || SPR == env.cpu_model) { - cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy pch 0 - cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + - MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert.pch 0 - cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x82) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy pch 0 - cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + - MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM WPQ Insert.pch 0 + cfgs[RPQ_OCC] = + MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy pch 0 + cfgs[RPQ_INS] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM RPQ Insert.pch 0 + cfgs[WPQ_OCC] = + MC_CH_PCI_PMON_CTL_EVENT(0x82) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy pch 0 + cfgs[WPQ_INS] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(1); // DRAM WPQ Insert.pch 0 } else { - cfgs[0] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy - cfgs[1] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ Insert - cfgs[2] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy - cfgs[3] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + - MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert + cfgs[RPQ_OCC] = MC_CH_PCI_PMON_CTL_EVENT(0x80) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ occupancy + cfgs[RPQ_INS] = MC_CH_PCI_PMON_CTL_EVENT(0x10) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM RPQ Insert + cfgs[WPQ_OCC] = MC_CH_PCI_PMON_CTL_EVENT(0x81) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Occupancy + cfgs[WPQ_INS] = MC_CH_PCI_PMON_CTL_EVENT(0x20) + + MC_CH_PCI_PMON_CTL_UMASK(0); // DRAM WPQ Insert } + /* CAS_COUNT.RD and CAS_COUNT.WR */ + switch (env.cpu_model) { + case KNL: + cfgs[CAS_RD] = + MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(1); + cfgs[CAS_WR] = + MC_CH_PCI_PMON_CTL_EVENT(0x03) + MC_CH_PCI_PMON_CTL_UMASK(2); + case SNOWRIDGE: + case ICX: + cfgs[CAS_RD] = + MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x0f); + cfgs[CAS_WR] = + MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(0x30); + case SPR: + cfgs[CAS_RD] = + MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xcf); + cfgs[CAS_WR] = + MC_CH_PCI_PMON_CTL_EVENT(0x05) + MC_CH_PCI_PMON_CTL_UMASK(0xf0); + default: + cfgs[CAS_RD] = + MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(3); + cfgs[CAS_WR] = + MC_CH_PCI_PMON_CTL_EVENT(0x04) + MC_CH_PCI_PMON_CTL_UMASK(12); + } return cfgs; } @@ -380,8 +398,9 @@ struct perf_event_attr init_perf_event_attr(bool group) { return e; } -void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) { +void init_imc_event(imc_event *event, int pmu_id, int core_id, bool fixed) { struct perf_event_attr attr = init_perf_event_attr(false); + attr.type = pmu_id; event->attr = attr; event->fixed = fixed; @@ -390,7 +409,7 @@ void init_imc_event(imc_event* event, int pmu_id, int core_id, bool fixed) { event->fd = -1; } -void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) { +void init_imc_reggrp(imc_reg_group *grp, int socket_id, int pmu_id) { int i = 0; #ifdef DEBUG @@ -406,11 +425,11 @@ void init_imc_reggrp(imc_reg_group* grp, int socket_id, int pmu_id) { } } -imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) { +imc_pmu *init_imc_pmus(int64_t *pmu_ids, int64_t size) { int skt_id = 0; int pmu_id = 0; - imc_pmu* pmus = calloc(env.nr_socket, sizeof(imc_pmu)); + imc_pmu *pmus = calloc(env.nr_socket, sizeof(imc_pmu)); for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { pmus[skt_id].reg_groups = calloc(size, sizeof(imc_reg_group)); @@ -426,14 +445,14 @@ imc_pmu* init_imc_pmus(int64_t* pmu_ids, int64_t size) { return pmus; } -void program_imc(uint32_t* cfgs, imc_pmu* pmus) { +void program_imc(uint32_t *cfgs, imc_pmu *pmus) { int skt_id = 0; int pmu_id = 0; int idx = 0; for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { - imc_pmu* pmu = pmus + skt_id; + imc_pmu *pmu = pmus + skt_id; for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { - imc_reg_group* grp = pmu->reg_groups + pmu_id; + imc_reg_group *grp = pmu->reg_groups + pmu_id; /* enabel and reset fixed counter(DRAM clock) */ write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN); write_reg(&grp->fixed_ev, MC_CH_PCI_PMON_FIXED_CTL_EN + @@ -452,32 +471,55 @@ void program_imc(uint32_t* cfgs, imc_pmu* pmus) { } } -socket_record* alloc_socket_record() { - int skt_id = 0; - socket_record* rec = calloc(env.nr_socket, sizeof(socket_record)); - for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { - rec[skt_id].channel_record_arr = - calloc(env.nr_channel, sizeof(channel_record)); +reg_event **alloc_record() { + int i = 0; + reg_event **ret = calloc(env.nr_socket, sizeof(reg_event *)); + for (i = 0; i < env.nr_socket; i++) { + ret[i] = calloc(env.nr_channel, sizeof(reg_event)); } - return rec; + return ret; } -void free_socket_record(socket_record* rec) { - int skt_id = 0; - for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { - free(rec[skt_id].channel_record_arr); +void free_record(reg_event **data) { + int i = 0; + for (i = 0; i < env.nr_socket; i++) { + free(data[i]); + } + free(data); +} + +void alloc_result() { + int i = 0; + res.channel = calloc(env.nr_socket, sizeof(metric *)); + for (i = 0; i < env.nr_socket; i++) { + res.channel[i] = calloc(env.nr_channel, sizeof(metric)); + } + + res.socket = calloc(env.nr_socket, sizeof(metric)); + res.node = calloc(1, sizeof(metric)); +} + +void free_result() { + int i = 0; + for (i = 0; i < env.nr_socket; i++) { + free(res.channel[i]); } - free(rec); + free(res.channel); + + if (res.socket) free(res.socket); + if (res.node) free(res.node); } void init_data() { - before.socket_record_arr = alloc_socket_record(); - after.socket_record_arr = alloc_socket_record(); + before.regs = alloc_record(); + after.regs = alloc_record(); + alloc_result(); } void free_data() { - if (before.socket_record_arr) free_socket_record(before.socket_record_arr); - if (after.socket_record_arr) free_socket_record(after.socket_record_arr); + free_record(before.regs); + free_record(after.regs); + free_result(); } int64_t get_perf_pmuid(int num) { @@ -497,8 +539,8 @@ int64_t get_perf_pmuid(int num) { return id; } -static int64_t* enumerate_imc_PMUs() { - int64_t* pmu_ids = 0; +static int64_t *enumerate_imc_PMUs() { + int64_t *pmu_ids = 0; int idx = 0, i = 0; pmu_ids = calloc(MAX_IMC_ID, sizeof(int64_t)); @@ -525,14 +567,13 @@ cleanup: return pmu_ids; } -int init(void* arg) { +int init(void *arg) { int err = 0; - int64_t* pmu_ids = 0; - uint32_t* cfgs = 0; + int64_t *pmu_ids = 0; + uint32_t *cfgs = 0; bzero(&before, sizeof(before)); bzero(&after, sizeof(after)); - // check model if (!detect_model()) { fprintf(stderr, "Failed detect model.\n"); @@ -588,7 +629,6 @@ int init(void* arg) { #endif cleanup: - if (pmu_ids) { free(pmu_ids); pmu_ids = 0; @@ -606,149 +646,145 @@ cleanup: } void read_imc() { - int skt_id = 0, pmu_id = 0, counter_id = 0; - after_ts = time(0); + int skt_id = 0, pmu_id = 0; for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { - imc_pmu* pmu = pmus + skt_id; - socket_record* socket_ev = &after.socket_record_arr[skt_id]; + imc_pmu *pmu = pmus + skt_id; for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { - imc_reg_group* grp = pmu->reg_groups + pmu_id; - channel_record* channel_ev = - &after.socket_record_arr[skt_id].channel_record_arr[pmu_id]; + imc_reg_group *grp = pmu->reg_groups + pmu_id; + reg_event *reg_ev = after.regs[skt_id] + pmu_id; /* enabel and reset fixed counter(DRAM clock) */ - if (pmu_id == 0) { - socket_ev->dram_clock = read_reg(&grp->fixed_ev); - if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) { - socket_ev->dram_clock = 2 * socket_ev->dram_clock; - } + reg_ev->dram_clock = read_reg(&grp->fixed_ev); + if (env.cpu_model == ICX || env.cpu_model == SNOWRIDGE) { + reg_ev->dram_clock = 2 * reg_ev->dram_clock; } - channel_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]); - channel_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]); - channel_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]); - channel_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]); - - socket_ev->rpq_occ += channel_ev->rpq_occ; - socket_ev->rpq_ins += channel_ev->rpq_ins; - socket_ev->wpq_occ += channel_ev->wpq_occ; - socket_ev->wpq_ins += channel_ev->wpq_ins; + reg_ev->rpq_occ = read_reg(&grp->general_ev[RPQ_OCC]); + reg_ev->rpq_ins = read_reg(&grp->general_ev[RPQ_INS]); + reg_ev->wpq_occ = read_reg(&grp->general_ev[WPQ_OCC]); + reg_ev->wpq_ins = read_reg(&grp->general_ev[WPQ_INS]); + reg_ev->cas_rd = read_reg(&grp->general_ev[CAS_RD]); + reg_ev->cas_wr = read_reg(&grp->general_ev[CAS_WR]); } } +} +void calculate_metric() { + int skt_id = 0, pmu_id = 0; + after_ts = time(0); if (before_ts) { +#define UINT48_MAX 281474976710655U /* (1 << 48) - 1 */ +#define LAT(dest, occ, ins, speed) \ + ({ \ + if ((ins != 0) && (speed != 0)) \ + dest = (occ) / (ins) / (speed); \ + else \ + dest = 0; \ + }) + +#define DELTA(val1, val2) \ + (val1) >= (val2) ? (val1) - (val2) : UINT48_MAX - (val2) + (val1); + double delta = after_ts - before_ts; - for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { - socket_record* before_socket_ev = &before.socket_record_arr[skt_id]; - socket_record* after_socket_ev = &after.socket_record_arr[skt_id]; - imc_pmu* pmu = pmus + skt_id; - double dram_speed = - (after_socket_ev->dram_clock - before_socket_ev->dram_clock) / - (delta * (double)1e9); + double dram_speed; + reg_event node_reg_ev; + bzero(&node_reg_ev, sizeof(reg_event)); + for (skt_id = 0; skt_id < env.nr_socket; skt_id++) { + imc_pmu *pmu = pmus + skt_id; + reg_event skt_reg_ev; + bzero(&skt_reg_ev, sizeof(skt_reg_ev)); for (pmu_id = 0; pmu_id < pmu->nr_grp; pmu_id++) { - channel_record* before_channel_ev = - &before_socket_ev->channel_record_arr[pmu_id]; - channel_record* after_channel_ev = - &after_socket_ev->channel_record_arr[pmu_id]; - - if (after_channel_ev->rpq_ins - before_channel_ev->rpq_ins > - 0) { - after_channel_ev->read_latency = - (after_channel_ev->rpq_occ - - before_channel_ev->rpq_occ) / - (after_channel_ev->rpq_ins - - before_channel_ev->rpq_ins) / - dram_speed; + reg_event *before_reg_ev = before.regs[skt_id] + pmu_id; + reg_event *after_reg_ev = after.regs[skt_id] + pmu_id; + if (pmu_id == 0) { + uint64_t clock = DELTA(after_reg_ev->dram_clock, + before_reg_ev->dram_clock); + dram_speed = (clock) / (delta * (double)1e9); } - - if (after_channel_ev->wpq_ins - before_channel_ev->wpq_ins > - 0) { - after_channel_ev->write_latency = - (after_channel_ev->wpq_occ - - before_channel_ev->wpq_occ) / - (after_channel_ev->wpq_ins - - before_channel_ev->wpq_ins) / - dram_speed; - } - } - - if (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins > 0) { - after_socket_ev->read_latency = - (after_socket_ev->rpq_occ - before_socket_ev->rpq_occ) / - (after_socket_ev->rpq_ins - before_socket_ev->rpq_ins) / - dram_speed; - } - - if (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins > 0) { - after_socket_ev->write_latency = - (after_socket_ev->wpq_occ - before_socket_ev->wpq_occ) / - (after_socket_ev->wpq_ins - before_socket_ev->wpq_ins) / - dram_speed; + // calculate the channel delta value + + uint64_t delta_rpqocc = + DELTA(after_reg_ev->rpq_occ, before_reg_ev->rpq_occ); + uint64_t delta_rpqins = + DELTA(after_reg_ev->rpq_ins, before_reg_ev->rpq_ins); + uint64_t delta_wpqocc = + DELTA(after_reg_ev->wpq_occ, before_reg_ev->wpq_occ); + uint64_t delta_wpqins = + DELTA(after_reg_ev->wpq_ins, before_reg_ev->wpq_ins); + uint64_t delta_wr = + DELTA(after_reg_ev->cas_wr, before_reg_ev->cas_wr); + uint64_t delta_rd = + DELTA(after_reg_ev->cas_rd, before_reg_ev->cas_rd); + + // calculate the channel metric + res.channel[skt_id][pmu_id].bw_wr = delta_wr * 64; + res.channel[skt_id][pmu_id].bw_rd = delta_rd * 64; + + LAT(res.channel[skt_id][pmu_id].rlat, delta_rpqocc, + delta_rpqins, dram_speed); + LAT(res.channel[skt_id][pmu_id].wlat, delta_wpqocc, + delta_wpqins, dram_speed); + LAT(res.channel[skt_id][pmu_id].avglat, + delta_rpqocc + delta_wpqocc, delta_wpqins + delta_rpqins, + dram_speed); + + // accumulate the socket delta value + skt_reg_ev.rpq_occ += delta_rpqocc; + skt_reg_ev.rpq_ins += delta_rpqins; + skt_reg_ev.wpq_occ += delta_wpqocc; + skt_reg_ev.wpq_ins += delta_wpqins; + skt_reg_ev.cas_wr += delta_wr; + skt_reg_ev.cas_rd += delta_rd; } - } - } -} - -#ifdef DEBUG -void print_socket(socket_record* rec) { - fprintf(stderr, - "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld dram_clocks=%ld " - "r_latency=%lf w_latency=%lf\n", - rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, - rec->dram_clock, rec->read_latency, rec->write_latency); -} -void print_channel(channel_record* rec) { - fprintf(stderr, - "rpq_occ=%ld rpq_ins=%ld wpq_occ=%ld wpq_ins=%ld r_latency = % lf " - "w_latency = % lf\n ", - rec->rpq_occ, rec->rpq_ins, rec->wpq_occ, rec->wpq_ins, - rec->read_latency, rec->write_latency); -} - -void print_record(record* rec) { - int i = 0; - int j = 0; - for (i = 0; i < env.nr_socket; i++) { - print_socket(&rec->socket_record_arr[i]); - for (j = 0; j < env.nr_channel; j++) { - print_channel(&rec->socket_record_arr[i].channel_record_arr[j]); + // calculate the socket metric + LAT(res.socket[skt_id].rlat, skt_reg_ev.rpq_occ, skt_reg_ev.rpq_ins, + dram_speed); + LAT(res.socket[skt_id].wlat, skt_reg_ev.wpq_occ, skt_reg_ev.wpq_ins, + dram_speed); + LAT(res.socket[skt_id].avglat, + skt_reg_ev.wpq_occ + skt_reg_ev.rpq_occ, + skt_reg_ev.wpq_ins + skt_reg_ev.rpq_ins, dram_speed); + res.socket[skt_id].bw_rd = skt_reg_ev.cas_rd * 64; + res.socket[skt_id].bw_wr = skt_reg_ev.cas_wr * 64; + // accumulate the node delta value + node_reg_ev.rpq_occ += skt_reg_ev.rpq_occ; + node_reg_ev.rpq_ins += skt_reg_ev.rpq_ins; + node_reg_ev.wpq_occ += skt_reg_ev.wpq_occ; + node_reg_ev.wpq_ins += skt_reg_ev.wpq_ins; + node_reg_ev.cas_wr += skt_reg_ev.cas_wr; + node_reg_ev.cas_rd += skt_reg_ev.cas_rd; } + // calculate the node metric + LAT(res.node->rlat, node_reg_ev.rpq_occ, node_reg_ev.rpq_ins, + dram_speed); + LAT(res.node->wlat, node_reg_ev.wpq_occ, node_reg_ev.wpq_ins, + dram_speed); + LAT(res.node->avglat, node_reg_ev.wpq_occ + node_reg_ev.rpq_occ, + node_reg_ev.wpq_ins + node_reg_ev.rpq_ins, dram_speed); + res.node->bw_rd = node_reg_ev.cas_rd * 64; + res.node->bw_wr = node_reg_ev.cas_wr * 64; } } -#endif -int call(int t, struct unity_lines* lines) { - if (!env.init_succ) { - return 0; - } - - struct unity_line* line; +void setup_table(int t, struct unity_lines *lines) { + struct unity_line *line; int32_t socket_id = 0, channel_id = 0, line_num = 0; - - read_imc(); -#ifdef DEBUG - fprintf(stderr, "before.\n"); - print_record(&before); - - fprintf(stderr, "after.\n"); - print_record(&after); -#endif - - line_num = env.nr_socket * (1 + env.nr_channel); + line_num = env.nr_socket * (1 + env.nr_channel) + 1; unity_alloc_lines(lines, line_num); for (socket_id = 0; socket_id < env.nr_socket; socket_id++) { char socket_name[32]; snprintf(socket_name, 32, "%d", socket_id); line = unity_get_line(lines, (1 + env.nr_channel) * socket_id); - unity_set_table(line, "imc_socket_latency"); + unity_set_table(line, "imc_socket_event"); unity_set_index(line, 0, "socket", socket_name); - unity_set_value(line, 0, "rlat", - after.socket_record_arr[socket_id].read_latency); - unity_set_value(line, 1, "wlat", - after.socket_record_arr[socket_id].write_latency); + unity_set_value(line, 0, "rlat", res.socket[socket_id].rlat); + unity_set_value(line, 1, "wlat", res.socket[socket_id].wlat); + unity_set_value(line, 2, "avglat", res.socket[socket_id].avglat); + unity_set_value(line, 3, "bw_rd", res.socket[socket_id].bw_rd); + unity_set_value(line, 4, "bw_wr", res.socket[socket_id].bw_wr); for (channel_id = 0; channel_id < env.nr_channel; channel_id++) { char channel_name[32]; @@ -756,32 +792,56 @@ int call(int t, struct unity_lines* lines) { line = unity_get_line( lines, (1 + env.nr_channel) * socket_id + 1 + channel_id); - unity_set_table(line, "imc_channel_latency"); + unity_set_table(line, "imc_channel_event"); unity_set_index(line, 0, "socket", socket_name); unity_set_index(line, 1, "channel", channel_name); unity_set_value(line, 0, "rlat", - after.socket_record_arr[socket_id] - .channel_record_arr[channel_id] - .read_latency); + res.channel[socket_id][channel_id].rlat); unity_set_value(line, 1, "wlat", - after.socket_record_arr[socket_id] - .channel_record_arr[channel_id] - .write_latency); + res.channel[socket_id][channel_id].wlat); + unity_set_value(line, 2, "avglat", + res.channel[socket_id][channel_id].avglat); + unity_set_value(line, 3, "bw_rd", + res.channel[socket_id][channel_id].bw_rd); + unity_set_value(line, 4, "bw_wr", + res.channel[socket_id][channel_id].bw_wr); } } + line = unity_get_line(lines, line_num - 1); + unity_set_table(line, "imc_node_event"); + unity_set_value(line, 0, "rlat", res.node->rlat); + unity_set_value(line, 1, "wlat", res.node->wlat); + unity_set_value(line, 2, "avglat", res.node->avglat); + unity_set_value(line, 3, "bw_rd", res.node->bw_rd); + unity_set_value(line, 4, "bw_wr", res.node->bw_wr); +} + +void swap_regs() { /* swap data */ - socket_record* tmp = before.socket_record_arr; - before.socket_record_arr = after.socket_record_arr; - after.socket_record_arr = tmp; + reg_event **tmp = before.regs; + before.regs = after.regs; + after.regs = tmp; /* clear after data */ - free_socket_record(after.socket_record_arr); - after.socket_record_arr = alloc_socket_record(); + free(after.regs); + after.regs = alloc_record(); +} +int call(int t, struct unity_lines *lines) { + if (!env.init_succ) { + return 0; + } + read_imc(); + calculate_metric(); +#ifdef DEBUG + print_result(res); +#endif + + setup_table(t, lines); + swap_regs(); /* reset before timestamp */ before_ts = after_ts; - return 0; } @@ -797,4 +857,4 @@ int main() { deinit(); } -#endif \ No newline at end of file +#endif diff --git a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h b/source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.h similarity index 81% rename from source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h rename to source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.h index e3cc818b1fcbfeaa908ec4549a5d1d213a731176..d1d8917c61e56642734eaf74e411b2816e980d2d 100644 --- a/source/tools/monitor/unity/collector/plugin/imc_latency/imc_latency.h +++ b/source/tools/monitor/unity/collector/plugin/uncore_imc/uncore_imc.h @@ -1,5 +1,5 @@ -#ifndef UNITY_IMC_LATENCY_H -#define UNITY_IMC_LATENCY_H +#ifndef UNITY_UNCORE_IMC_H +#define UNITY_UNCORE_IMC_H #include #include @@ -91,10 +91,13 @@ enum INTEL_CPU_MODEL { #define RPQ_INS 1 #define WPQ_OCC 2 #define WPQ_INS 3 +#define CAS_RD 4 +#define CAS_WR 5 + #define BUF_SIZE 1024 #define MAX_IMC_ID 100 -#define GENERAL_REG_NUM 4 +#define GENERAL_REG_NUM 6 #define FIXED_REG_NUM 1 typedef struct imc_event_t { @@ -123,4 +126,32 @@ struct topology_ent { int64_t socket_id; }; +typedef struct reg_event { + uint64_t rpq_occ; + uint64_t rpq_ins; + uint64_t wpq_occ; + uint64_t wpq_ins; + uint64_t cas_rd; + uint64_t cas_wr; + uint64_t dram_clock; +} reg_event; + +typedef struct record { + reg_event **regs; +} record; + +typedef struct metric { + double rlat; + double wlat; + double avglat; + uint64_t bw_rd; + uint64_t bw_wr; +} metric; + +typedef struct result { + metric *node; + metric *socket; + metric **channel; +} result; + #endif diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c index 136445aae6cd5bd1c94918cee53c38c98cda2a1b..eebefe86feceb3c65b0767b89340aae5a8577315 100644 --- a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.bpf.c @@ -44,7 +44,7 @@ struct { __type(value, struct info); } info_map SEC(".maps"); -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;}) static inline u64 get_thresh(void) { diff --git a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c index fd443057454819f5340aa8dd34223cc7104979d6..eabe6c9f56db3b5e951f2f265e5702cd76ccbf06 100644 --- a/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c +++ b/source/tools/monitor/unity/collector/plugin/unity_irqoff/unity_irqoff.c @@ -23,7 +23,7 @@ struct env { .threshold = 50*1000*1000, /* 10ms */ }; -static int nr_cpus; +static int nr_cpus, irqoff_live; struct sched_jit_summary summary, prev; struct bpf_link **sw_mlinks, **hw_mlinks= NULL; @@ -169,11 +169,12 @@ int init(void *arg) { int err; + irqoff_live = 0; nr_cpus = libbpf_num_possible_cpus(); if (nr_cpus < 0) { fprintf(stderr, "failed to get # of possible cpus: '%s'!\n", strerror(-nr_cpus)); - return nr_cpus; + return 0; } bump_memlock_rlimit1(); @@ -182,7 +183,7 @@ int init(void *arg) if (!sw_mlinks) { err = errno; fprintf(stderr, "failed to alloc sw_mlinks or rlinks\n"); - return err; + return 0; } hw_mlinks = calloc(nr_cpus, sizeof(*hw_mlinks)); @@ -190,18 +191,18 @@ int init(void *arg) err = errno; fprintf(stderr, "failed to alloc hw_mlinks or rlinks\n"); free(sw_mlinks); - return err; + return 0; } unity_irqoff = unity_irqoff_bpf__open_and_load(); if (!unity_irqoff) { err = errno; fprintf(stderr, "failed to open and/or load BPF object\n"); - return err; + return 0; } irqoff_handler(arg, unity_irqoff); - + irqoff_live = 1; return 0; } #define delta(sum, value) \ @@ -210,6 +211,8 @@ int call(int t, struct unity_lines *lines) { struct unity_line *line; + if (!irqoff_live) + return 0; unity_alloc_lines(lines, 1); line = unity_get_line(lines, 0); unity_set_table(line, "sched_moni_jitter"); diff --git a/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c index 21cc7331d573831ef7bbefd5d634e6494e1420a4..577e5763e76ed326d22375fdf80f899cdd365fab 100644 --- a/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c +++ b/source/tools/monitor/unity/collector/plugin/unity_nosched/unity_nosched.bpf.c @@ -10,7 +10,7 @@ BPF_PERF_OUTPUT(perf, 1024); #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BITS_PER_LONG 64 -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;}) struct bpf_map_def SEC("maps") args_map = { .type = BPF_MAP_TYPE_HASH, diff --git a/source/tools/monitor/unity/collector/pluginManager.lua b/source/tools/monitor/unity/collector/pluginManager.lua index 501fea88b0d4bde067a4ec0bb86b55ee8ee94f20..98fe9f3b3513b46026f0956a3f8f6cd9ddb09eaa 100644 --- a/source/tools/monitor/unity/collector/pluginManager.lua +++ b/source/tools/monitor/unity/collector/pluginManager.lua @@ -20,7 +20,7 @@ function CpluginManager:_init_(procffi, proto_q, resYaml, tid, jperiod) self._plugins = {} self._names = {} self:setup(res, proto_q) - self._guardSched = CguardSched.new(tid, self._plugins, self._names, jperiod) + self._guardSched = CguardSched.new(tid, self._plugins, self._names, jperiod, res) self._resYaml = resYaml -- for add function self._proto_q = proto_q @@ -40,8 +40,13 @@ function CpluginManager:setup(resYaml, proto_q) for _, plugin in ipairs(plugins) do local so = plugin.so if so then - table.insert(self._plugins, Cplugin.new(resYaml, pluginFFI, proto_q, so)) - table.insert(self._names, so) + local plugin = Cplugin.new(resYaml, pluginFFI, proto_q, so) + if plugin.alive >= 0 then + table.insert(self._plugins, plugin) + table.insert(self._names, so) + else + plugin = nil + end end end end diff --git a/source/tools/monitor/unity/collector/podMan/podsAll.lua b/source/tools/monitor/unity/collector/podMan/podsAll.lua index 248a1a777ec8b908a2293f2eb911a5c35b30a192..910d8812f1696e2dbc98089495be1810a7fe96a0 100644 --- a/source/tools/monitor/unity/collector/podMan/podsAll.lua +++ b/source/tools/monitor/unity/collector/podMan/podsAll.lua @@ -1,230 +1,176 @@ ---- ---- Generated by EmmyLua(https://github.com/EmmyLua) ---- Created by liaozhaoyan. ---- DateTime: 2023/3/15 6:00 PM ---- - require("common.class") -local ChttpCli = require("httplib.httpCli") -local system = require("common.system") -local pystring = require("common.pystring") -local Cinotifies = require("common.inotifies") -local unistd = require("posix.unistd") -local json = require("cjson") - local CpodsAll = class("podsApi") -local function spiltConId(conId) - local res = pystring:split(conId, "//", 1) - return res[2] -end - -local function getRuntime(mnt) - if unistd.access(mnt .. "var/run/docker/runtime-runc/moby/") == 0 then - return "docker" - end - return "cri-containerd" -end - -local function joinNPath(cell, runtime) - -- "/sys/fs/cgroup/cpu/kubepods.slice/kubepods-${qos}.slice/kubepods-${qos}-pod${podid}.slice/${runtime}-${cid}.scope" - local paths = { - "/kubepods.slice/kubepods-", - cell.pod.qos, - ".slice/kubepods-", - cell.pod.qos, - "-pod", - cell.pod.uid, - ".slice/", - runtime, - "-", - cell.id, - ".scope" - } - return pystring:join("", paths) -end +local pod_plugins = { + podmem = 1, + pod_storage_stat = 1 +} -local function joinGPath(cell, runtime) - -- "/sys/fs/cgroup/cpu/kubepods.slice/kubepods-pod${podid}.slice/${runtime}-${cid}.scope" - local paths = { - "/kubepods.slice/kubepods-pod", - cell.pod.uid, - ".slice/", - runtime, - "-", - cell.id, - ".scope" - } - return pystring:join("", paths) -end - -local function joinPath(cell, runtime) - if cell.pod.qos == "guaranteed" then - return joinGPath(cell, runtime) - else - return joinNPath(cell, runtime) - end -end - -local function setupCons(res) - local mnt = res.config.proc_path - local runtime = getRuntime(mnt) - local cli = ChttpCli.new() - local cons = {} +function CpodsAll:setupPlugins() local c = 0 - local blacklist = {["arms-prom"] = 1, ["kube-system"] = 1, ["kube-public"] = 1, ["kube-node-lease"] = 1} - local content = cli:get("http://127.0.0.1:10255/pods") - local obj = cli:jdecode(content.body) - if not obj then - local cmd = ' curl -s -k -XGET https://127.0.0.1:10250/pods --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt --header "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token) "' - local f = io.popen(cmd,"r") - local podsinfo = f:read("*a") - f:close() - obj = json.decode(podsinfo) - end - - for _, pod in ipairs(obj.items) do - local metadata = pod.metadata - if blacklist[metadata.namespace] then - goto continue - end - local lpod = {name = metadata.name, - namespace = metadata.namespace, - uid = pystring:replace(metadata.uid, "-", "_"), - qos = pystring:lower(pod.status.qosClass), - } - - local containerStatuses = pod.status.containerStatuses - for _, con in ipairs(containerStatuses) do - local cell = { - pod = lpod, - name = con.name, - id = spiltConId(con.containerID) - } - cell.path = joinPath(cell, runtime) - if unistd.access(mnt .. "sys/fs/cgroup/cpu/" .. cell.path) == 0 then - c = c + 1 - cons[c] = cell - end - end - ::continue:: - end - - return cons -end + local plugins = {} -function CpodsAll:getAllcons(procfs) - local mnt = procfs - local runtime = getRuntime(mnt) - local cli = ChttpCli.new() - local cons = {} - local c = 0 - local content = cli:get("http://127.0.0.1:10255/pods") - local obj = cli:jdecode(content.body) - if not obj then - local cmd = ' curl -s -k -XGET https://127.0.0.1:10250/pods --cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt --header "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token) "' - local f = io.popen(cmd,"r") - local podsinfo = f:read("*a") - f:close() - obj = json.decode(podsinfo) + local cons = self._runtime:setupCons() + if cons == nil then + return nil end - for _, pod in ipairs(obj.items) do - local metadata = pod.metadata - --print(string.format("podns :%s, pod:%s",metadata.namespace, metadata.name)) - local lpod = {name = metadata.name, - namespace = metadata.namespace, - uid = pystring:replace(metadata.uid, "-", "_"), - qos = pystring:lower(pod.status.qosClass), - } - local containerStatuses = pod.status.containerStatuses - for _, con in ipairs(containerStatuses) do - local cell = { - pod = lpod, - name = con.name, - id = spiltConId(con.containerID) + -- 对于除podmem外的容器插件(都是每一个容器实例化一个新的插件对象) + for _, con in ipairs(cons) do + local ls = {} + if con.pod then -- k8s environment + ls = { + { + name = "pod", + index = con.pod.name, + }, + { + name = "bvt", + index = con.bvt, + }, + { + name = "container", + --index = con.name.."-"..string.sub(con.id,0,4), + index = con.name, + }, + { + name = "namespace", + index = con.pod.namespace, + }, + } + else -- container environment, no pod info + ls = { + { + name = "pod", + index = con.name, + }, + { + name = "container", + index = con.name, + }, + { + name = "namespace", + index = "None", + } } - cell.path = joinPath(cell, runtime) - if unistd.access(mnt .. "/sys/fs/cgroup/memory/" .. cell.path) == 0 then - c = c + 1 - cons[c] = cell - end end - end - return cons -end -local function setupPlugins(res, proto, pffi, mnt, ino) - local c = 0 - local cons = setupCons(res) - local plugins = {} + for _, plugin in ipairs(self._resYaml.container.luaPlugin) do + if pod_plugins[plugin] then + goto continue + end - for _, con in ipairs(cons) do - local ls = { - { - name = "podname", - index = con.pod.name, - }, - { - name = "container", - index = con.name.."-"..string.sub(con.id,0,4), - }, - { - name = "podns", - index = con.pod.namespace, - }, - } - - for _, plugin in ipairs(res.container.luaPlugin) do local CProcs = require("collector.container." .. plugin) c = c + 1 - plugins[c] = CProcs.new(proto, pffi, mnt, con.path, ls) - ino:add(plugins[c].pFile) + plugins[c] = CProcs.new(self._proto, self._pffi, self._mnt, con.path, ls) + -- no need to watch pFile, too many pod/container will exhaust system's inofity watchs + --ino:add(plugins[c].pFile) + ::continue:: end end + -- update pod info for pod plugins + for _, plugin in ipairs(self._pod_plugins) do + plugin:setup(cons) + end + return plugins end function CpodsAll:_init_(resYaml, proto, pffi, mnt) - self._monDir = mnt .. "sys/fs/cgroup/" + self._runtime = nil + self._pod_plugins = {} self._resYaml = resYaml self._proto = proto self._pffi = pffi self._mnt = mnt - self._ino = Cinotifies.new() - self._plugins = setupPlugins(self._resYaml, self._proto, self._pffi, self._mnt, self._ino) - - self._ino:add(mnt .. "sys/fs/cgroup/memory/kubepods.slice") - self._ino:add(mnt .. "sys/fs/cgroup/memory/kubepods.slice/kubepods-besteffort.slice") - self._ino:add(mnt .. "sys/fs/cgroup/memory/kubepods.slice/kubepods-burstable.slice") - + --[[ + 遍历yaml中所有容器运行时,只要其中某一个可用,则使用该运行时 + cri-containerd: 通过cri接口从containerd.sock获取pod和容器信息 + k8sApi: K8s环境,通过10250端口获得pod信息,包括k8s+containerd, k8s+docker, k8s+CRI-O)), + docker: 单节点非k8s(ecs)容器环境 + Todo: cri-docker, cri-o,添加时只需要按模版编写lua调用go暴露的cri接口即可 + ]] + for _, runtime in ipairs(self._resYaml.container.runtime) do + local runtime_module = require("collector.podMan.runtime." .. runtime) + local Cruntime = runtime_module.new(self._resYaml, self._mnt) + if Cruntime:checkRuntime() == 1 then + self._runtime = Cruntime + self._runtime_name = runtime + break + end + print("Using "..runtime.." failed! Fall back to next runtime") + end + + if not self._runtime then + print("No supported runtime, container monitor is unavaliable!") + return + end + + -- 初始化inotify(cgroup变化) + self._runtime:initInotify() + + -- init pods plugins + for _, plugin in ipairs(self._resYaml.container.luaPlugin) do + if pod_plugins[plugin] then + local CPlugin = require("collector.container."..plugin) + local CPodPlugin = CPlugin.new(self._resYaml, self._proto, + self._pffi, self._mnt) + table.insert(self._pod_plugins, CPodPlugin) + end + end + + self._plugins = self:setupPlugins() + if not self._plugins then + return + end + print( "pods plugin add " .. #self._plugins) end function CpodsAll:proc(elapsed, lines) local rec = {} - if self._ino:isChange() or #self._plugins == 0 then - print("cgroup changed.") - self._ino = Cinotifies.new() - self._plugins = setupPlugins(self._resYaml, self._proto, self._pffi, self._mnt, self._ino) - self._ino:add(self._mnt .. "sys/fs/cgroup/memory/kubepods.slice") - self._ino:add(self._mnt .. "sys/fs/cgroup/memory/kubepods.slice/kubepods-besteffort.slice") - self._ino:add(self._mnt .. "sys/fs/cgroup/memory/kubepods.slice/kubepods-burstable.slice") + local is_change + + if not self._runtime then + return + end + + is_change = self._runtime:cgroupChanged() + if is_change or not self._plugins then + self._plugins = self:setupPlugins() + if not self._plugins then + return + end end + + for _, plugin in ipairs(self._pod_plugins) do + local stat, res = pcall(plugin.proc, plugin, elapsed, lines) + if not stat then + print("Pod Plugin Error: ", res) + end + end + + -- run other container plugins for i, plugin in ipairs(self._plugins) do --local res = plugin:proc(elapsed, lines) - local stat, res = pcall(plugin.proc, plugin, elapsed, lines) + stat, res = pcall(plugin.proc, plugin, elapsed, lines) if not stat or res == -1 then - table.insert(rec, i) + table.insert(rec, i) end end - for _, i in ipairs(rec) do -- del bad plugin - self._plugins[i] = nil + --[[ + 容器删除后,对应的plugin会执行失败,删除对应的plugins,inoity同时也会 + 识别到cgroup路径的变化,重新为现存的容器重新实例化对应的plugins。 + (倒序删除元素,确保删除元素不会影响后续元素的索引) + ]] + for i = #rec, 1, -1 do + table.remove(self._plugins, rec[i]) end + end return CpodsAll diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/Makefile b/source/tools/monitor/unity/collector/podMan/runtime/cri/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..ea8a3faaf2ee3c3554eaae69b3a8b31e64e617e8 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/Makefile @@ -0,0 +1,11 @@ +SO := libCri.so + +all: + echo "1" + bash ./build_shared.sh + + +clean: + rm -f $(SO) + rm go.mod + rm go.sum \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/build_shared.sh b/source/tools/monitor/unity/collector/podMan/runtime/cri/build_shared.sh new file mode 100644 index 0000000000000000000000000000000000000000..a33c14a5782ce183c03aee446308505e103c4c63 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/build_shared.sh @@ -0,0 +1,2 @@ +go build -o libCri.so -buildmode=c-shared +cp libCri.so ../../../../beeQ/lib \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/cri.go b/source/tools/monitor/unity/collector/podMan/runtime/cri/cri.go new file mode 100644 index 0000000000000000000000000000000000000000..9c403cb22b063b6a447ce384e2a8022e7a200aba --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/cri.go @@ -0,0 +1,189 @@ +package main + +import ( + "context" + "fmt" + "strings" + + pb "k8s.io/cri-api/pkg/apis/runtime/v1" + + "google.golang.org/grpc" + "k8s.io/klog/v2" +) + +type Cri struct { + enabled bool + ctx context.Context + runtimeClient pb.RuntimeServiceClient + runtimeConn *grpc.ClientConn +} + +/* +var containerdEndpoints = []string{ + "unix:///mnt/host/run/containerd/containerd.sock", + "unix:///mnt/host/var/run/containerd/containerd.sock", + "unix:///mnt/host/run/containerd.sock", +} +*/ + +func newCri(endpoints []string) (Cri, error) { + c := Cri{} + c.enabled = false + runtimeClient, runtimeConn, err := getRuntimeClient(endpoints) + if err != nil { + //klog.Warningf("failed to create runtime client, %v", err) + return c, err + } + c.enabled = true + c.runtimeClient = runtimeClient + c.runtimeConn = runtimeConn + c.ctx = context.Background() + return c, nil +} + +func (c *Cri) GetContainerInfos() ([]ContainerInfo, error) { + allContainerInfos := []ContainerInfo{} + + cs, err := c.ListContainers(c.ctx) + if err != nil { + klog.Errorf("failed to list container,%v", err) + return nil, err + } + + for _, con := range cs { + containerInfo, err := c.InspectContainer(con, c.ctx) + if err != nil { + klog.Errorf("failed to inspect container %v, %v", con.Id, err) + continue + } + klog.V(7).Infof("not found container in cache and succeed to inspect container %v", con.Id) + klog.V(7).Infof("get container info: %v", con.Id, containerInfo) + + podId := con.PodSandboxId + podInfo, err := c.InspectPod(podId, c.ctx) + if err != nil { + klog.Errorf("failed to inspect Pod%v, %v", podId, err) + } + cgroupParent := podInfo.CgroupParent + containerInfo.PodCgroup = cgroupParent + containerInfo.ContainerCgroup = cgroupParent + containerInfo.ContainerCgroup + allContainerInfos = append(allContainerInfos, containerInfo) + } + + return allContainerInfos, nil +} + +func (c *Cri) ListContainers(ctx context.Context) ([]Container, error) { + request := &pb.ListContainersRequest{} + r, err := c.runtimeClient.ListContainers(ctx, request) + if err != nil { + return nil, fmt.Errorf("failed to list containers, %v", err) + } + containers := []Container{} + for _, con := range r.Containers { + state := con.GetState() + klog.V(7).Infof("container name: %v, status: %v", con.Metadata.Name, state.String()) + // make sure we only need running containers + if state == pb.ContainerState_CONTAINER_CREATED || state == pb.ContainerState_CONTAINER_EXITED { + continue + } + containers = append(containers, Container{ + Id: con.Id, + Names: []string{con.Metadata.Name}, + State: con.State.String(), + PodSandboxId: con.PodSandboxId, + }) + } + return containers, nil +} + +func (c *Cri) InspectContainer(container Container, ctx context.Context) (ContainerInfo, error) { + var containerCgroup string + + containerInfo := ContainerInfo{} + request := &pb.ContainerStatusRequest{ + ContainerId: container.Id, + Verbose: true, + } + r, err := c.runtimeClient.ContainerStatus(context.Background(), request) + if err != nil { + return containerInfo, fmt.Errorf("failed to inspect container %v,reason: %v", container.Id, err) + } + klog.V(7).Infof("succeed to get container status response for container %v", container.Id) + klog.V(7).Infof("result of containerStatus.GetInfo()", r.GetInfo()) + containerStatus, err := ParseContainerStatusResponse(r) + if err != nil { + return containerInfo, err + } + klog.V(7).Infof("succeed to parse container status response for container %v", container.Id) + klog.V(7).Infof("container status: %v", containerStatus) + + containerName := containerStatus.Status.Metadata.Name + if containerStatus.Info.Config.Labels["io.kubernetes.container.name"] != "" { + containerName = containerStatus.Info.Config.Labels["io.kubernetes.container.name"] + } + containerName = strings.Trim(containerName, "/") + + rawCgroupPath := containerStatus.Info.RuntimeSpec.Linux.CgroupsPath + if containerStatus.Info.RuntimeOptions.SystemdCgroup { + /* + * if systemdCgroup is true, rawCgroupPath would be + * "kubepods-burstable-pod$(pod uid).slice:cri-containerd:$(container id)" + */ + paths := strings.Split(rawCgroupPath, ":") + if len(paths) != 3 { + return containerInfo, fmt.Errorf("failed to parse container cgroup path %v", rawCgroupPath) + } + // the final path would be "cri-containerd-$(container id).scope" + containerCgroup = "/" + paths[1] + "-" + paths[2] + ".scope" + } else { + /* + * cgroupfs driver, rawCgrouppath would be + * $(container id) + */ + containerCgroup = "/" + container.Id + } + + return ContainerInfo{ + Id: container.Id, + PodName: containerStatus.Info.Config.Labels["io.kubernetes.pod.name"], + PodId: containerStatus.Info.Config.Labels["io.kubernetes.pod.uid"], + ContainerName: containerName, + Namespace: containerStatus.Info.Config.Labels["io.kubernetes.pod.namespace"], + Pid: int(containerStatus.Info.Pid), + ContainerCgroup: containerCgroup, + }, nil +} + +func (c *Cri) InspectPod(podId string, ctx context.Context) (PodInfo, error) { + podInfo := PodInfo{} + request := &pb.PodSandboxStatusRequest{ + PodSandboxId: podId, + Verbose: true, + } + r, err := c.runtimeClient.PodSandboxStatus(context.Background(), request) + if err != nil { + return podInfo, fmt.Errorf("failed to inspect pod %v,reason: %v", podId, err) + } + klog.V(7).Infof("succeed to get pod status response for container %v", podId) + klog.V(7).Infof("result of podsandboxStatus.GetInfo()", r.GetInfo()) + + podStatus, err := ParsePodStatusResponse(r) + if err != nil { + return podInfo, err + } + + return PodInfo{ + CgroupParent: podStatus.Info.Config.Linux.CgroupParent, + }, nil +} + +func (c *Cri) Shutdown() error { + if !c.enabled { + return nil + } + if c.runtimeConn != nil { + return c.runtimeConn.Close() + } + return nil +} diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/ffi_lua.lua b/source/tools/monitor/unity/collector/podMan/runtime/cri/ffi_lua.lua new file mode 100644 index 0000000000000000000000000000000000000000..2ccc2614494c31c6c9a90a7ef6c9186c52278f7e --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/ffi_lua.lua @@ -0,0 +1,17 @@ +local ffi = require("ffi") +local awesome = ffi.load('Cri') + +ffi.cdef[[ + typedef signed char GoInt8; + typedef unsigned char GoUint8; + typedef long long GoInt64; + typedef GoInt64 GoInt; + typedef double GoFloat64; + typedef struct { const char *p; GoInt n; } GoString; + typedef struct { void *data; GoInt len; GoInt cap; } GoSlice; + extern GoInt CheckRuntime(GoString* endpoint_ptr); + extern char *CGetContainerInfosfunc(const char* endpoint); + void free(void *p); +]] + +return {ffi = ffi, awesome = awesome} \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/go.mod b/source/tools/monitor/unity/collector/podMan/runtime/cri/go.mod new file mode 100644 index 0000000000000000000000000000000000000000..f364fb300aabbec7f4969a52e4ba761e9dc3c687 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/go.mod @@ -0,0 +1,19 @@ +module cri + +go 1.20 + +require ( + github.com/go-logr/logr v1.2.4 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/pkg/errors v0.9.1 // indirect + golang.org/x/net v0.13.0 // indirect + golang.org/x/sys v0.10.0 // indirect + golang.org/x/text v0.11.0 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect + google.golang.org/grpc v1.58.0 // indirect + google.golang.org/protobuf v1.31.0 // indirect + k8s.io/client-go v0.28.1 // indirect + k8s.io/cri-api v0.28.1 // indirect + k8s.io/klog/v2 v2.100.1 // indirect +) diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/go.sum b/source/tools/monitor/unity/collector/podMan/runtime/cri/go.sum new file mode 100644 index 0000000000000000000000000000000000000000..871d82654ae04459a06f442906b6c49cc50f7f78 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/go.sum @@ -0,0 +1,64 @@ +github.com/go-logr/logr v1.2.0 h1:QK40JKJyMdUDz+h+xvCsru/bJhvG0UxvePV0ufL/AcE= +github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= +golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/net v0.13.0 h1:Nvo8UFsZ8X3BhAC9699Z1j7XQ3rsZnUUm7jfBEk1ueY= +golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/genproto v0.0.0-20230711160842-782d3b101e98 h1:Z0hjGZePRE0ZBWotvtrwxFNrNE9CUAGtplaDK5NNI/g= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= +google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= +google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= +google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +k8s.io/client-go v0.28.1 h1:pRhMzB8HyLfVwpngWKE8hDcXRqifh1ga2Z/PU9SXVK8= +k8s.io/client-go v0.28.1/go.mod h1:pEZA3FqOsVkCc07pFVzK076R+P/eXqsgx5zuuRWukNE= +k8s.io/cri-api v0.28.1 h1:uNiDsUjYAFn4mvrtaa48qJK1MF5YEspfbUhQZLhz+gU= +k8s.io/cri-api v0.28.1/go.mod h1:xXygwvSOGcT/2KXg8sMYTHns2xFem3949kCQn5IS1k4= +k8s.io/klog/v2 v2.100.1 h1:7WCHKK6K8fNhTqfBhISHQ97KrnJNFZMcQvKp7gP/tmg= +k8s.io/klog/v2 v2.100.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/main.go b/source/tools/monitor/unity/collector/podMan/runtime/cri/main.go new file mode 100644 index 0000000000000000000000000000000000000000..5095e7c7d308009fd24b22374afa86c1f2e77e4d --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/main.go @@ -0,0 +1,65 @@ +package main + +import ( + "C" + "encoding/json" + "fmt" +) + +func GetContainerInfos(endpoints []string) ([]ContainerInfo, error) { + c, err := newCri(endpoints) + if err != nil { + return nil, err + } + + defer c.Shutdown() + return c.GetContainerInfos() +} + +//export CGetContainerInfosfunc +func CGetContainerInfosfunc(endpoint *C.char) *C.char { + ep := C.GoString(endpoint) + endpoints := []string{ep} + //fmt.Println(endpoints) + infos, err := GetContainerInfos(endpoints) + if err != nil { + fmt.Printf("cri: get container info failed: %v", err) + return nil + } + marshal, _ := json.Marshal(infos) + //fmt.Println(string(marshal)) + //return string(marshal) + return C.CString(string(marshal)) + //return "hello" +} + +//export CheckRuntime +func CheckRuntime(endpoint_ptr *string) int { + endpoints := []string{*endpoint_ptr} + //fmt.Println(endpoints) + c, err := newCri(endpoints) + defer c.Shutdown() + if err != nil { + fmt.Printf("failed to connect to containerd: %v", err) + return 0 + } + return 1 +} + +func main() { + var containerdEndpoints = []string{ + "unix:///mnt/host/run/containerd/containerd.sock", + "unix:///mnt/host/var/run/containerd/containerd.sock", + "unix:///mnt/host/run/containerd.sock", + } + + for _, ep := range containerdEndpoints { + if CheckRuntime(&ep) == 0 { + return + } + } + + infos, _ := GetContainerInfos(containerdEndpoints) + marshal, _ := json.Marshal(infos) + fmt.Println(string(marshal)) +} diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/runtime_client.go b/source/tools/monitor/unity/collector/podMan/runtime/cri/runtime_client.go new file mode 100644 index 0000000000000000000000000000000000000000..de98e4e1a6c30989d08985c65740111a00248068 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/runtime_client.go @@ -0,0 +1,144 @@ +package main + +import ( + "context" + "encoding/json" + "fmt" + "net" + "net/url" + "time" + + pb "k8s.io/cri-api/pkg/apis/runtime/v1" + + "github.com/pkg/errors" + "google.golang.org/grpc" + "k8s.io/klog/v2" +) + +const ( + // unixProtocol is the network protocol of unix socket. + unixProtocol = "unix" + Timeout = 1 * time.Second +) + +// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer. +func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) { + protocol, addr, err := parseEndpointWithFallbackProtocol(endpoint, unixProtocol) + if err != nil { + return "", nil, err + } + if protocol != unixProtocol { + return "", nil, fmt.Errorf("only support unix socket endpoint") + } + + return addr, dial, nil +} + +func dial(ctx context.Context, addr string) (net.Conn, error) { + return (&net.Dialer{}).DialContext(ctx, unixProtocol, addr) +} + +func parseEndpointWithFallbackProtocol(endpoint string, fallbackProtocol string) (protocol string, addr string, err error) { + if protocol, addr, err = parseEndpoint(endpoint); err != nil && protocol == "" { + fallbackEndpoint := fallbackProtocol + "://" + endpoint + protocol, addr, err = parseEndpoint(fallbackEndpoint) + if err == nil { + klog.InfoS("Using this format as endpoint is deprecated, please consider using full url format.", "deprecatedFormat", endpoint, "fullURLFormat", fallbackEndpoint) + } + } + return +} + +func parseEndpoint(endpoint string) (string, string, error) { + u, err := url.Parse(endpoint) + if err != nil { + return "", "", err + } + + switch u.Scheme { + case "tcp": + return "tcp", u.Host, nil + + case "unix": + return "unix", u.Path, nil + + case "": + return "", "", fmt.Errorf("using %q as endpoint is deprecated, please consider using full url format", endpoint) + + default: + return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme) + } +} + +func getRuntimeClient(endpoints []string) (pb.RuntimeServiceClient, *grpc.ClientConn, error) { + // Set up a connection to the server. + conn, err := getConnection(endpoints) + if err != nil { + return nil, nil, errors.Wrap(err, "connect") + } + runtimeClient := pb.NewRuntimeServiceClient(conn) + return runtimeClient, conn, nil +} + +func getConnection(endPoints []string) (*grpc.ClientConn, error) { + if endPoints == nil || len(endPoints) == 0 { + return nil, fmt.Errorf("endpoint is not set") + } + endPointsLen := len(endPoints) + var conn *grpc.ClientConn + for indx, endPoint := range endPoints { + //klog.Infof("connect using endpoint '%s' with '%s' timeout", endPoint, Timeout) + addr, dialer, err := GetAddressAndDialer(endPoint) + if err != nil { + if indx == endPointsLen-1 { + return nil, err + } + klog.Warningf("%v", err) + continue + } + conn, err = grpc.Dial(addr, grpc.WithInsecure(), grpc.WithBlock(), grpc.WithTimeout(Timeout), grpc.WithContextDialer(dialer)) + if err != nil { + errMsg := errors.Wrapf(err, "connect endpoint '%s', make sure you are running as root and the endpoint has been started", endPoint) + if indx == endPointsLen-1 { + return nil, errMsg + } + klog.Warningf("%v", errMsg) + } else { + //klog.Infof("connected successfully using endpoint: %s", endPoint) + break + } + } + return conn, nil +} + +func ParseContainerStatusResponse(response *pb.ContainerStatusResponse) (CRIContainerStatus, error) { + containerStatus := CRIContainerStatus{ + Status: response.Status, + } + data, found := response.GetInfo()["info"] + if !found { + return containerStatus, fmt.Errorf("not found field 'info' in container status response") + } + info := &CRIContainerInfo{} + if err := json.Unmarshal([]byte(data), info); err != nil { + return containerStatus, err + } + containerStatus.Info = info + return containerStatus, nil +} + +func ParsePodStatusResponse(response *pb.PodSandboxStatusResponse) (CRIPodStatus, error) { + podStatus := CRIPodStatus{ + Status: response.Status, + } + data, found := response.GetInfo()["info"] + if !found { + return podStatus, fmt.Errorf("not found field 'info' in pod status response") + } + info := &CRIPodInfo{} + if err := json.Unmarshal([]byte(data), info); err != nil { + return podStatus, err + } + podStatus.Info = info + return podStatus, nil +} diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri/types.go b/source/tools/monitor/unity/collector/podMan/runtime/cri/types.go new file mode 100644 index 0000000000000000000000000000000000000000..f37eb2a538d77c9b65bb06ad130f06c6987ca23c --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri/types.go @@ -0,0 +1,55 @@ +package main + +import ( + pb "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +type ContainerInfo struct { + Id string + PodName string + PodId string + Pid int + Namespace string + PodCgroup string + ContainerCgroup string + ContainerName string +} + +type PodInfo struct { + CgroupParent string +} + +// Container is an interface for get the container id and name +type Container struct { + Id string + Names []string + State string + PodSandboxId string +} + +type CRIContainerStatus struct { + Info *CRIContainerInfo `json:"info"` + Status *pb.ContainerStatus +} + +type CRIContainerInfo struct { + Pid int64 `json:"pid"` + Config *pb.ContainerConfig `json:"config"` + RuntimeOptions struct { + SystemdCgroup bool `json:"systemd_cgroup"` + } `json:"runtimeOptions"` + RuntimeSpec struct { + Linux struct { + CgroupsPath string `json:"cgroupsPath"` + } `json:"linux"` + } `json:"RuntimeSpec"` +} + +type CRIPodStatus struct { + Info *CRIPodInfo `json:"info"` + Status *pb.PodSandboxStatus +} + +type CRIPodInfo struct { + Config *pb.PodSandboxConfig `json:"config"` +} diff --git a/source/tools/monitor/unity/collector/podMan/runtime/cri_containerd.lua b/source/tools/monitor/unity/collector/podMan/runtime/cri_containerd.lua new file mode 100644 index 0000000000000000000000000000000000000000..6f89a21bd1687391a06beb25a308d8b7809e2a9c --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/cri_containerd.lua @@ -0,0 +1,124 @@ +require("common.class") + +local unistd = require("posix.unistd") +local CinotifyPod = require("common.inotifyPod") +local cjson = require("cjson") + +local CcriContainerd = class("criContainerd") + +local containerdEnpoint = {"/run/containerd/containerd.sock", + "/var/run/containerd/containerd.sock", + "/var/run/containerd/containerd.sock"} + +function CcriContainerd:_init_(resYaml, mnt) + self._name = "cri-containerd" + self._mnt = mnt + self._ino = {} + local ffi = require("collector.podMan.runtime.cri.ffi_lua") + self._ffi = ffi.ffi + self._awsome = ffi.awesome + self._valid_endpoint = "" + self._ns_blacklist = {} + + local ns_blacklist = resYaml.container.nsBlacklist + for _, ns in ipairs(ns_blacklist) do + self._ns_blacklist[ns] = 1 + end +end + +function CcriContainerd:checkRuntime() + for _, ep in ipairs(containerdEnpoint) do + local endpoint = "unix://"..self._mnt..ep + local endpoint_ffi = self._ffi.new("GoString") + endpoint_ffi.p = endpoint + endpoint_ffi.n = #endpoint + local endpoint_ptr = self._ffi.cast("GoString*", endpoint_ffi) + local is_enable = self._awsome.CheckRuntime(endpoint_ffi) + if is_enable == 1 then + self._valid_endpoint = endpoint + print("Using cri-containerd api.") + return 1 + end + end + return 0 +end + +function CcriContainerd:cgroupChanged() + local is_change, events = self._ino:isChange() + if is_change then + print("cri-containerd: cgroup changed.") + if events ~= nil then + -- reomeve inotify watch on deleted pod dirs + self._ino:RemoveDeletePodWatch(events) + end + end + return is_change +end + +function CcriContainerd:initInotify() + --[[ + in k8s, need to watch + "sys/fs/cgroup/cpu/kubepods.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-besteffort.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-burstable.slice" + --]] + self._ino = CinotifyPod.new() + self._ino:watchKubePod(self._mnt) +end + +function CcriContainerd:queryPodsInfo() + local resptr = self._awsome.CGetContainerInfosfunc(self._valid_endpoint) + if not resptr then return nil end + local infoString = self._ffi.string(resptr) + self._ffi.C.free(resptr) + return cjson.decode(infoString) +end + +function CcriContainerd:setupCons() + local mnt = self._mnt + local cons = {} + local c = 0 + + local consInfo = self:queryPodsInfo() + if not consInfo then + print("cri-containerd: Can't get pods info!") + return nil + end + + for _, cs in ipairs(consInfo) do + local namespace = cs.Namespace + if self._ns_blacklist[namespace] then + goto continue + end + + -- watch pod dirs(containers' changes in pod) + local pod_path = cs.PodCgroup + local full_pod_path = mnt .. "sys/fs/cgroup/cpu" .. pod_path + if unistd.access(full_pod_path) == 0 then + self._ino:addPodWatch(full_pod_path) + end + + local lpod = { + name = cs.PodName, + namespace = cs.Namespace, + } + + local cell = { + pod = lpod, + name = cs.ContainerName, + id = cs.Id + } + + cell.path = cs.ContainerCgroup + if unistd.access(mnt .. "sys/fs/cgroup/cpu/" .. cell.path) == 0 then + c = c + 1 + cons[c] = cell + end + + ::continue:: + end + + return cons +end + +return CcriContainerd \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podMan/runtime/docker.lua b/source/tools/monitor/unity/collector/podMan/runtime/docker.lua new file mode 100644 index 0000000000000000000000000000000000000000..c0740138e7850844df576ea78e4712d354fdb117 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/docker.lua @@ -0,0 +1,123 @@ +require("common.class") + +local unistd = require("posix.unistd") +local Cinotifies = require("common.inotifies") +local docker_api = require("httplib.dockerApi") + +local Cdocker = class("dockerApi") +local defualt_endpoint = "/var/run/docker.sock" +local docker = "/docker/" + +function Cdocker:_init_(resYaml, mnt) + self._name = "Docker" + self._cgTopDir = mnt .. "sys/fs/cgroup/cpu" + self._endpoint = defualt_endpoint + self._mnt = mnt + self._api = docker_api.new('localhost', defualt_endpoint) + self._ino = {} + self._cgroupDriver = "cgroupfs" +end + +function Cdocker:getCgroupDriver() + local qresp, err = self._api:get_system_info() + if err or not qresp then + print("Get docker cgroup driver failed!", err) + return "cgroupfs" -- default is "cgroupfs" + end + + if not qresp["body"] then + return "cgroupfs" + end + + return qresp["body"]["CgroupDriver"] +end + +function Cdocker:initInotify() + local cgroupDriver = self:getCgroupDriver() + -- in docker, default watch /sys/fs/cgroup/cpu/docker (cgroupfs driver) + local cg_path = self._cgTopDir .. docker + if cgroupDriver == "systemd" then + -- systemd cgroup driver, watch /sys/fs/cgroup/system.slice/docker + cg_path = self._cgTopDir .. "/system.slice" + self._cgroupDriver = "systemd" + end + self._ino = Cinotifies.new() + self._ino:add(cg_path) +end + +function Cdocker:checkRuntime() + local qresp, err = self._api:get_version() + if err then + print("Get docker version failed!", err) + return 0 + end + if not qresp then + return 0 + end + print("Using docker runtime version:", qresp["body"].Version) + return 1 +end + +function Cdocker:queryPodInfo() + return self._api:list_containers() +end + +function Cdocker:setupCons() + local cons = {} + local c = 0 + --local url = "http://127.0.0.1/containers/json" + local obj, err = self:queryPodInfo() + if err then + print("Docker: query container info failed!", err) + return nil + end + + if not obj then + return nil + end + + local obj_body = obj["body"] + if not obj_body then + return nil + end + + for _, container in ipairs(obj_body) do + local path = "" + local cg_path = "" + local id = container.Id + -- 返回的容器名会多一个"/"前缀(i.e /con_name) + local name = string.sub(container.Names[1], 2) + + -- i.e /sys/fs/cgroup/cpu/docker/container_id/, path="docker/con_id" + if self._cgroupDriver == "systemd" then + path = "/system.slice" .. "/docker-" .. id .. ".scope" + cg_path = self._cgTopDir .. path + else + path = docker .. id + cg_path = self._cgTopDir .. path + end + + local cell = { + pod = nil, + name = name, + id = id, + path =path + } + if unistd.access(cg_path) == 0 then + c = c + 1 + cons[c] = cell + end + end + + return cons +end + +function Cdocker:cgroupChanged() + local is_change = self._ino:isChange() + if is_change then + print("Docker: cgroup changed.") + end + return is_change +end + +return Cdocker \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/podMan/runtime/k8sApi.lua b/source/tools/monitor/unity/collector/podMan/runtime/k8sApi.lua new file mode 100644 index 0000000000000000000000000000000000000000..68ea173d13cddb0906f3e178ee84c25237669d81 --- /dev/null +++ b/source/tools/monitor/unity/collector/podMan/runtime/k8sApi.lua @@ -0,0 +1,248 @@ +require("common.class") +local unistd = require("posix.unistd") +local json = require("cjson") +local https = require("ssl.https") +local ltn12 = require("ltn12") +local ChttpCli = require("httplib.httpCli") + +local pystring = require("common.pystring") +local CinotifyPod = require("common.inotifyPod") + +local Ck8sApi = class("podsApi") +local default_token_path = "/var/run/secrets/kubernetes.io/serviceaccount/token" +local default_ca_path = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + +local function spiltConId(conId) + local res = pystring:split(conId, "//", 1) + return res[2] +end + +local function getRuntime(mnt) + if unistd.access(mnt .. "var/run/docker/runtime-runc/moby/") == 0 then + return "docker" + end + return "cri-containerd" +end + +local function podPath(lpod) + local paths + if lpod.qos == "guaranteed" then + paths = { + "/kubepods.slice/kubepods-pod", + lpod.uid, + ".slice/" + } + else + paths = { + "/kubepods.slice/kubepods-", + lpod.qos, + ".slice/kubepods-", + lpod.qos, + "-pod", + lpod.uid, + ".slice/" + } + end + return pystring:join("", paths) +end + +local function joinContainerPath(pod_path, cell, runtime) + if pod_path == nil then + pod_path = podPath(cell.pod) + end + + local paths = { + pod_path, + runtime, + "-", + cell.id, + ".scope" + } + return pystring:join("", paths) +end + +local function getQosStr() + return {"BE", "NOR", "LS", "LS", "OT"} +end + +local function get_bvt(bvt_path) + -- local bvt_path = conpath.."cpu.bvt_warp_ns" + local value = 5 + if unistd.access(bvt_path) == 0 then + for line in io.lines(bvt_path) do + value = tonumber(line) + 2 + break + end + end + names = getQosStr() + return names[value] +end + +function Ck8sApi:queryPodsInfo() + if self._token == "" then + local f = io.open(self._token_path, "r") + if not f then + print("service token not exist!") + return nil + end + + self._token = f:read("*a") + f:close() + end + + local header = { + ["Authorization"] = "Bearer "..self._token + } + + local host_ip = os.getenv("HOST_IP") + if not host_ip then + host_ip = "127.0.0.1" + end + + local resp = {} + local params = { + url = "https://" .. host_ip .. ":10250/pods", + method = "GET", + verify = "none", + protocol = "any", + headers = header, + cafile = self._ca_path, + sink = ltn12.sink.table(resp) + } + + local worked, code, _, _ = https.request(params) + if not worked then + print("failed to query k8s api: ", code) + return nil + end + + return table.concat(resp) +end + +function Ck8sApi:setupCons() + local mnt = self._resYaml.config.proc_path + local runtime = getRuntime(mnt) + local cli = ChttpCli.new() + local cons = {} + local c = 0 + local content = cli:get("http://127.0.0.1:10255/pods") + local obj = cli:jdecode(content.body) + if not obj then + local podsinfo = self:queryPodsInfo() + if not podsinfo then + print("Can't get pods info from kube api!") + return nil + end + obj = json.decode(podsinfo) + end + + for _, pod in ipairs(obj.items) do + local metadata = pod.metadata + if self._ns_blacklist[metadata.namespace] then + goto continue + end + + if not pod.status.qosClass then goto continue end + local lpod = { + name = metadata.name, + namespace = metadata.namespace, + uid = pystring:replace(metadata.uid, "-", "_"), + qos = pystring:lower(pod.status.qosClass), + volume = pod.spec.volumes + } + + -- watch pod dirs(containers' changes in pod) + local pod_path = podPath(lpod) + local full_pod_path = mnt .. "sys/fs/cgroup/cpu" .. pod_path + if unistd.access(full_pod_path) == 0 then + self._ino:addPodWatch(full_pod_path) + end + + local containerStatuses = pod.status.containerStatuses + local containers = pod.spec.containers + local containerResources = {} + -- record container resources for pod_storage_stat + for _, con in ipairs(containers) do + containerResources[con.name] = con.resources + end + + for _, con in ipairs(containerStatuses) do + if not con.containerID then goto cs_continue end + local cell = { + pod = lpod, + name = con.name, + id = spiltConId(con.containerID), + resources = containerResources[con.name] + } + + cell.path = joinContainerPath(pod_path, cell, runtime) + if unistd.access(mnt .. "sys/fs/cgroup/cpu/" .. cell.path) == 0 then + cell.bvt = get_bvt(mnt .. "sys/fs/cgroup/cpu/" .. cell.path .. "/cpu.bvt_warp_ns") + c = c + 1 + cons[c] = cell + end + ::cs_continue:: + end + + ::continue:: + end + + return cons +end + +-- for now, just check if 10255 and 10250 ports has pods' info +function Ck8sApi:checkRuntime() + local cli = ChttpCli.new() + local content = cli:get("http://127.0.0.1:10255/pods") + local obj = cli:jdecode(content.body) + if not obj then + local podsinfo = self:queryPodsInfo() + -- both 10250 and 10255 are unavaliable, k8s api not supported + if not podsinfo then + return 0 + end + end + print("Using kubernetes api.") + return 1 +end + +function Ck8sApi:cgroupChanged() + local is_change, events = self._ino:isChange() + if is_change then + print("k8s: cgroup changed.") + if events ~= nil then + -- reomeve inotify watch on deleted pod dirs + self._ino:RemoveDeletePodWatch(events) + end + end + return is_change +end + +function Ck8sApi:initInotify() + --[[ + in k8s, need to watch + "sys/fs/cgroup/cpu/kubepods.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-besteffort.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-burstable.slice" + --]] + self._ino = CinotifyPod.new() + self._ino:watchKubePod(self._mnt) + +end + +function Ck8sApi:_init_(resYaml, mnt) + self._resYaml = resYaml + self._mnt = mnt + self._token_path = default_token_path + self._ca_path = default_ca_path + self._token = "" + self._ino = {} + self._ns_blacklist = {} + + local ns_blacklist = resYaml.container.nsBlacklist + for _, ns in ipairs(ns_blacklist) do + self._ns_blacklist[ns] = 1 + end +end + +return Ck8sApi diff --git a/source/tools/monitor/unity/collector/postEngine/engine.lua b/source/tools/monitor/unity/collector/postEngine/engine.lua index 4ec4fefb1354ecc720098b206e389366fa0cdce7..4219f18eee854c5758e2cfb72e7b73f961ad8943 100644 --- a/source/tools/monitor/unity/collector/postEngine/engine.lua +++ b/source/tools/monitor/unity/collector/postEngine/engine.lua @@ -13,6 +13,8 @@ local system = require("common.system") local cjson = require("cjson.safe") local CexecBase = require("collector.postEngine.execBase") local CexecDiag = require("collector.postEngine.execDiag") +local CexecJobs = require("collector.postEngine.execJobs") +local ChttpReq = require("httplib.httpReq") local Cengine = class("engine", CvProto) local diagExec = { @@ -23,6 +25,8 @@ local diagExec = { net_edge = {block = 5 * 60, time = 60, so = {virtiostat = 5 * 3}}, } + + function Cengine:_init_(que, proto_q, fYaml, tid) CvProto._init_(self, CprotoData.new(que)) self._que = que @@ -33,6 +37,14 @@ function Cengine:_init_(que, proto_q, fYaml, tid) local res = system:parseYaml(fYaml) self._resDiag = res.diagnose self._diags = {} + + if self._resDiag then + self._fYamlJobs = res.diagnose.jobs + self._jobs = {} + self._auth = res.diagnose.token + self._host = res.diagnose.host + end + end function Cengine:setMainloop(main) @@ -43,6 +55,37 @@ function Cengine:setTask(taskMons) self._task = taskMons end +function Cengine:postReq(s, data) + local req = ChttpReq.new() + local url = self._host .. "/api/v1/tasks/sbs_task_result/" + local formData = { + task_id = data.task_id, + results = s + } + local headers = { + accept = "application/json", + ["Content-Type"] = "multipart/form-data", + authorization = self._auth + } + req:postFormData(url,headers,formData) +end + +function Cengine:postReqFile(s, data) + local req = ChttpReq.new() + local url = self._host .. "/api/v1/tasks/sbs_task_result/" + local formData = { + task_id = data.task_id, + files = s, + results = "" + } + local headers = { + accept = "application/json", + ["Content-Type"] = "multipart/form-data", + authorization = self._auth + } + req:postFormData(url,headers,formData) +end + function Cengine:run(e, res, diag) local args = res.args local second = res.second or diag.time @@ -60,13 +103,74 @@ function Cengine:run(e, res, diag) self._diags[res.exec] = diag.block end +function Cengine:runJobs(e, res, diag) + local cmd = res.jobs[1].cmd + local isFile = false + local filename + local filepath + if #res.jobs[1].fetch_file_list~=0 then + isFile = true + filename = res.jobs[1].fetch_file_list[1].name + filepath = res.jobs[1].fetch_file_list[1].remote_path + end + + local time + if diag and diag.time then + time = diag.time + else + time = 30 + end + + if cmd then + local exec = CexecJobs.new("/bin/bash", {"-c",cmd}, time, res.service_name) + --local exec = CexecJobs.new("/bin/bash", {"-c","sysak memgraph"}, time, res.service_name) + exec:addEvents(e) + if isFile then + local file = io.open(filepath, "rb") + if file then + local content = file:read("*a") + file:close() + local s = { + filename, + content, + "application/octet-stream" + } + self:postReqFile(s, res) + else + print("无法打开文件" .. filepath) + end + + else + local s = exec:readIn() + self:postReq(s, res) + end + + end + if diag and diag.block then + self._jobs[res.service_name] = diag.block + else + self._jobs[res.service_name] = 60 + end + +end + function Cengine:pushTask(e, msgs) local events = pystring:split(msgs, '\n') for _, msg in ipairs(events) do - print(msg) local res = cjson.decode(msg) + + local service_name = res.service_name + local cmd = res.cmd - if cmd == "mon_pid" then + if service_name ~= nil then + local diag = self._fYamlJobs[service_name] + if self._jobs[service_name] then + print(service_name .. " is blocking") + else + self:runJobs(e,res,diag) + end + + elseif cmd == "mon_pid" then self._task:add(res.pid, res.loop) elseif cmd == "exec" then -- exec a cmd local execCmd = res.exec @@ -79,7 +183,7 @@ function Cengine:pushTask(e, msgs) local diag = self._resDiag[exec] if diag then system:dumps(diag) - if self._diags[exec] then + if self._diags[exec] then --实现阻塞 print("cmd " .. exec .. " is blocking.") else self:run(e, res, diag) @@ -101,7 +205,7 @@ function Cengine:proc(t, event, msgs) self:pushTask(event, msgs) end -function Cengine:checkDiag() +function Cengine:checkDiag() -- local toDel = {} for k, v in pairs(self._diags) do if v > 0 then @@ -115,12 +219,30 @@ function Cengine:checkDiag() end end +function Cengine:checkJobs() -- + local toDel = {} + for k, v in pairs(self._jobs) do + if v > 0 then + self._jobs[k] = v - 1 + else + table.insert(toDel, k) + end + end + for _, k in ipairs(toDel) do + self._jobs[k] = nil + end +end + function Cengine:work(t, event) local msgs = postQue.pull() if msgs then self:proc(t, event, msgs) end self:checkDiag() + if self._resDiag then + self:checkJobs() + end + end return Cengine diff --git a/source/tools/monitor/unity/collector/postEngine/execBase.lua b/source/tools/monitor/unity/collector/postEngine/execBase.lua index 77b7cb94542d5586208c96d1c692d9abecd98892..fb5579c1c493982978736700563c2e43b386ba23 100644 --- a/source/tools/monitor/unity/collector/postEngine/execBase.lua +++ b/source/tools/monitor/unity/collector/postEngine/execBase.lua @@ -22,8 +22,8 @@ local function checkChild(ppid, pid) for line in f:lines() do if pystring:startswith(line, "PPid:") then - local _, s = pystring:split(line, ":", 1) - if tonumber(pystring:strip(s)) == ppid then + local s = pystring:split(line, ":", 1) + if tonumber(pystring:strip(s[2])) == ppid then ret = true end break @@ -35,13 +35,17 @@ local function checkChild(ppid, pid) return ret end -function CexecBase:_init_(cmd, args, seconds) +function CexecBase:_init_(cmd, args, seconds) -- seconds超时时间 self.cmd = cmd self._cnt = 0 self._loop = seconds / interval self._ppid = unistd.getpid() - self._pid = exec.run(cmd, args) + + self._fIn, self._fOut = unistd.pipe() + assert(self._fIn, "creat pipe failed.") + + self._pid = exec.run(cmd, args, self._fIn, self._fOut) end function CexecBase:addEvents(e) diff --git a/source/tools/monitor/unity/collector/postEngine/execJobs.lua b/source/tools/monitor/unity/collector/postEngine/execJobs.lua new file mode 100644 index 0000000000000000000000000000000000000000..c71044a0695629f072c653ca2fd99d6a715217b6 --- /dev/null +++ b/source/tools/monitor/unity/collector/postEngine/execJobs.lua @@ -0,0 +1,27 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/8/25 16:47 +--- +require("common.class") +local system = require("common.system") +local unistd = require("posix.unistd") +local cjson = require("cjson.safe") +local CprotoData = require("common.protoData") +local CexecBase = require("collector.postEngine.execBase") +local pwait = require("posix.sys.wait") + +local CexecJobs = class("execJobs", CexecBase) + +function CexecJobs:_init_(cmd, args, seconds, service_name) + CexecBase._init_(self, cmd, args, seconds) + self.cmd = service_name +end + +function CexecJobs:readIn() + pwait.wait(self._pid) + return unistd.read(self._fIn, 2 * 1024 * 1024) +end + + +return CexecJobs diff --git a/source/tools/monitor/unity/collector/proc_bled.lua b/source/tools/monitor/unity/collector/proc_bled.lua index 4ba427d6511fc25b87d0a8259d29c178249ddbbc..243619fd32cab73e5371c84d930852e6eec82e65 100644 --- a/source/tools/monitor/unity/collector/proc_bled.lua +++ b/source/tools/monitor/unity/collector/proc_bled.lua @@ -11,7 +11,10 @@ local CvProc = require("collector.vproc") local CprocBled = class("procArp", CvProc) function CprocBled:_init_(proto, pffi, mnt, pFile) - CvProc._init_(self, proto, pffi, mnt, pFile or "dummy") + --CvProc._init_(self, proto, pffi, mnt, pFile or "dummy") + CvProc._init_(self, proto) + print("a") + self._record = 1 end diff --git a/source/tools/monitor/unity/collector/proc_diskstats.lua b/source/tools/monitor/unity/collector/proc_diskstats.lua index 173895e26100eda3193d5c9328aaedb41459b5f7..ceae464730624102860793ee93ab0a8d9df16857 100644 --- a/source/tools/monitor/unity/collector/proc_diskstats.lua +++ b/source/tools/monitor/unity/collector/proc_diskstats.lua @@ -4,7 +4,7 @@ --- DateTime: 2022/12/16 11:49 PM --- -require("common.class") +require("common.class") local system = require("common.system") local CvProc = require("collector.vproc") @@ -14,14 +14,15 @@ function CprocDiskstats:_init_(proto, pffi, mnt, pFile) CvProc._init_(self, proto, pffi, mnt, pFile or "proc/diskstats") self._lastData = {} self._lastDisk = {} - self._diskVNum = 11 + self._diskVNum = 15 end function CprocDiskstats:_diskIndex() return { "reads", "rmerge", "rkb", "rmsec", "writes", "wmerge", "wkb", "wmsec", - "inflight", "time", "backlog" + "inflight", "time", "backlog", + "discards", "dmerge", "dkb", "dmsec" } end @@ -29,21 +30,32 @@ function CprocDiskstats:_diffIndex() return { "reads", "rmerge", "rkb", "rmsec", "writes", "wmerge", "wkb", "wmsec", + "discards", "dmerge", "dkb", "dmsec", "backlog", "xfers" } end -- "reads", "rmerge", "rkb", "rmsec", "writes", "wmerge", "wkb", "wmsec", "inflight", "time", "backlog" +-- "discards", "dmerge", "dkb", "dmsec" function CprocDiskstats:_getNewValue(data) local now = {} local index = self:_diskIndex() for i = 1, self._diskVNum do - local head = index[i] - now[head] = tonumber(self._ffi.string(data.s[i + 2])) + if data.no >= 18 then + local head = index[i] + now[head] = tonumber(self._ffi.string(data.s[i + 2])) + elseif i <= 11 then + local head = index[i] + now[head] = tonumber(self._ffi.string(data.s[i + 2])) + else + now[index[i]] = 0 + end end now["rkb"] = now["rkb"] / 2 -- sectors = 512 bytes now['wkb'] = now['wkb'] / 2 + now['dkb'] = now['dkb'] / 2 + now['xfers'] = now['reads'] + now['writes'] if now['xfers'] == 0 then now['bsize'] = 0 @@ -89,6 +101,48 @@ function CprocDiskstats:_calcDiff(disk_name, now, last, elapsed) value = (now["time"] - last["time"]) / elapsed } table.insert(protoTable.vs, cell) + + local read_lat = 0 + if now["reads"] - last["reads"] == 0 + then + read_lat = 0 + else + read_lat = (now["rmsec"] - last["rmsec"]) / (now["reads"] - last["reads"]) + end + + cell = { + name = "read_lat", + value = read_lat + } + table.insert(protoTable.vs, cell) + + local write_lat = 0 + if now["writes"] - last["writes"] == 0 + then + write_lat = 0 + else + write_lat = (now["wmsec"] - last["wmsec"]) / (now["writes"] - last["writes"]) + end + + cell = { + name = "write_lat", + value = write_lat + } + table.insert(protoTable.vs, cell) + + local discard_lat = 0 + if now["discards"] - last["discards"] == 0 + then + discard_lat = 0 + else + discard_lat = (now["dmsec"] - last["dmsec"]) / (now["discards"] - last["discards"]) + end + cell = { + name = "discard_lat", + value = discard_lat + } + table.insert(protoTable.vs, cell) + self:appendLine(protoTable) end @@ -113,18 +167,18 @@ function CprocDiskstats:checkLastDisks() end function CprocDiskstats:_proc(line, elapsed) - local data = self._ffi.new("var_string_t") + local data = self._ffi.new("var_string_t") assert(self._cffi.var_input_string(self._ffi.string(line), data) == 0) - assert(data.no >= 14) + assert(data.no >= 14) - local disk_name = self._ffi.string(data.s[2]) + local disk_name = self._ffi.string(data.s[2]) self:_calcDisk(disk_name, data, elapsed) end function CprocDiskstats:proc(elapsed, lines) CvProc.proc(self) - for line in io.lines(self.pFile) do - self:_proc(line, elapsed) + for line in io.lines(self.pFile) do + self:_proc(line, elapsed) end self:checkLastDisks() self:push(lines) diff --git a/source/tools/monitor/unity/collector/proc_fd.lua b/source/tools/monitor/unity/collector/proc_fd.lua new file mode 100644 index 0000000000000000000000000000000000000000..8626fc3c0eff69e360b5c57a19737a4a677b21dd --- /dev/null +++ b/source/tools/monitor/unity/collector/proc_fd.lua @@ -0,0 +1,45 @@ +require("common.class") +local pystring = require("common.pystring") +local CvProc = require("collector.vproc") + +local CprocFd = class("procFd", CvProc) + +function CprocFd:_init_(proto, pffi, mnt, pFile) + CvProc._init_(self, proto, pffi, mnt, pFile or "proc/sys/fs/file-nr") +end + +function CprocFd:proc(elapsed, lines) + local c = 0 + CvProc.proc(self) + local values = {} + + local file = io.open(self.pFile, "r") + if file then + local line = file:read("*l") + if line then + local cells = pystring:split(line) + if cells[1] then + local ls + values = { + name = "file-nr", + value = cells[1], + } + self:appendLine(self:_packProto("procfd", nil, {values})) + end + + if cells[3] then + values = { + name = "file-max", + value = cells[3], + } + self:appendLine(self:_packProto("procfd", nil, {values})) + end + end + + file:close() + end + + self:push(lines) +end + +return CprocFd diff --git a/source/tools/monitor/unity/collector/proc_meminfo.lua b/source/tools/monitor/unity/collector/proc_meminfo.lua index 5c6ebc836c918418f40a92d509e51fecea708d81..baa5ffdc69e01dc4451dc2607a7b53b92e75ca82 100644 --- a/source/tools/monitor/unity/collector/proc_meminfo.lua +++ b/source/tools/monitor/unity/collector/proc_meminfo.lua @@ -34,15 +34,16 @@ end function CprocMeminfo:readVmalloc() local pages = 0 - for line in io.lines("/proc/vmallocinfo") do - if string.find(line,"vmalloc") and string.find(line,"pages=") then - local cells = pystring:split(pystring:split(pystring:strip(line),"pages=",1)[2]," ",1) - pages = pages + tonumber(cells[1]) - end - end + --for line in io.lines("/proc/vmallocinfo") do + -- if string.find(line,"vmalloc") and string.find(line,"pages=") then + -- local cells = pystring:split(pystring:split(pystring:strip(line),"pages=",1)[2]," ",1) + -- pages = pages + tonumber(cells[1]) + --end + --end self._protoTable_dict["vs"]["VmallocUsed"]=pages * 4 end +--[[ function CprocMeminfo:readUsed() local f = io.popen('free -k','r') io.input(f) @@ -55,6 +56,7 @@ function CprocMeminfo:readUsed() end f:close() end +]] function CprocMeminfo:readHugepage(size,name) local file = "/sys/kernel/mm/hugepages/hugepages-" .. size .. "kB/nr_hugepages" @@ -89,14 +91,18 @@ function CprocMeminfo:proc(elapsed, lines) end local tmp_dict = self._protoTable_dict.vs self:readVmalloc() - self:readUsed() + --self:readUsed() self:readHugepage(2048,"huge_2M") self:readHugepage(1048576,"huge_1G") local cell = {name="total", value=tmp_dict["MemTotal"]+tmp_dict["res"]} table.insert(self._protoTable["vs"], cell) - cell = {name="used", value=tmp_dict["free_used"]+tmp_dict["Shmem"]} + --cell = {name="used", value=tmp_dict["free_used"]+tmp_dict["Shmem"]} + --table.insert(self._protoTable["vs"], cell) + + local used = tmp_dict["MemTotal"] - tmp_dict["MemFree"] - tmp_dict["Cached"] - tmp_dict["Buffers"] - tmp_dict["SReclaimable"] + tmp_dict["Shmem"] + cell = {name="used", value = used } table.insert(self._protoTable["vs"], cell) local kernel_other = tmp_dict["Slab"]+tmp_dict["KernelStack"]+tmp_dict["PageTables"]+tmp_dict["VmallocUsed"] diff --git a/source/tools/monitor/unity/collector/proc_netdev.lua b/source/tools/monitor/unity/collector/proc_netdev.lua index d546ad006e68cc027043d1890121682c0a41840a..8bfeaf7c976e7a2857f2ffa4b75646f91819626c 100644 --- a/source/tools/monitor/unity/collector/proc_netdev.lua +++ b/source/tools/monitor/unity/collector/proc_netdev.lua @@ -63,6 +63,9 @@ function CprocNetdev:_proc(line, elapsed) assert(data.no == 16) local ifName = string.gsub(self._ffi.string(data.s), ":", "") + if string.sub(ifName, 1, 3) ~= "eth" then + return + end local last = self._lastData[ifName] if last then diff --git a/source/tools/monitor/unity/collector/proc_softnet_stat.lua b/source/tools/monitor/unity/collector/proc_softnet_stat.lua index 6aa952ea58caec01652d67e5bed90d7797f72c54..587e106aee0c8013e0f223add35b0190244303b0 100644 --- a/source/tools/monitor/unity/collector/proc_softnet_stat.lua +++ b/source/tools/monitor/unity/collector/proc_softnet_stat.lua @@ -22,12 +22,15 @@ function CsoftnetStat:proc(elapsed, lines) for line in io.lines(self.pFile) do local cell = {} local cells = pystring:split(line) + local length = #cells cell.packet_process = tonumber(cells[1], 16) cell.packet_drop = tonumber(cells[2], 16) cell.time_squeeze = tonumber(cells[3], 16) cell.cpu_collision = tonumber(cells[9], 16) cell.received_rps = tonumber(cells[10], 16) - cell.flow_limit_count = tonumber(cells[11], 16) + if length > 10 then + cell.flow_limit_count = tonumber(cells[11], 16) + end c = c + 1 softnets[c] = cell end @@ -50,4 +53,4 @@ function CsoftnetStat:proc(elapsed, lines) self:push(lines) end -return CsoftnetStat +return CsoftnetStat \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/proc_statm.lua b/source/tools/monitor/unity/collector/proc_statm.lua index d497c6a5bc99451e7be1f506f306dd43dbf2170d..d2b26be263d80a278469a189155cb8af9333fcb4 100644 --- a/source/tools/monitor/unity/collector/proc_statm.lua +++ b/source/tools/monitor/unity/collector/proc_statm.lua @@ -11,8 +11,7 @@ local CprocStatm = class("procStatm", CvProc) function CprocStatm:_init_(proto, pffi, mnt, pFile) CvProc._init_(self, proto, pffi, mnt, pFile or nil) - local pid = unistd.getpid() - self._fstatm = "/proc/" .. pid .. "/statm" + self._fstatm = "/proc/self/statm" end function CprocStatm:proc(elapsed, lines) diff --git a/source/tools/monitor/unity/collector/rdt/plugin/rdt_llc_occupancy.lua b/source/tools/monitor/unity/collector/rdt/plugin/rdt_llc_occupancy.lua new file mode 100644 index 0000000000000000000000000000000000000000..d992bc67b9a0ee07ce26dfdfb1699163d1c3e5d1 --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/plugin/rdt_llc_occupancy.lua @@ -0,0 +1,34 @@ +require("common.class") +local pystring = require("common.pystring") +local system = require("common.system") +local unistd = require("posix.unistd") +local CvProc = require("collector.vproc") +local rdt_llc_occupancy = class("rdt_llc_occupancy", CvProc) + +function rdt_llc_occupancy:_init_(proto, pffi, mnt, path, labels) + CvProc._init_(self, proto, pffi, mnt, path .. "/" .. "llc_occupancy") + self.labels = labels +end + +function rdt_llc_occupancy:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + + CvProc.proc(self) + local values = {} + + for line in io.lines(self.pFile) do + local v = { + name = "llc_occ", + value = tonumber(line) + } + table.insert(values, v) + + self:appendLine(self:_packProto("rdt_usage", self.labels, values)) + end + + self:push(lines) +end + +return rdt_llc_occupancy diff --git a/source/tools/monitor/unity/collector/rdt/plugin/rdt_local_mem_bw.lua b/source/tools/monitor/unity/collector/rdt/plugin/rdt_local_mem_bw.lua new file mode 100644 index 0000000000000000000000000000000000000000..f603d2119cc84068fa21f5b1e8c4373621e0f4f8 --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/plugin/rdt_local_mem_bw.lua @@ -0,0 +1,39 @@ +require("common.class") +local pystring = require("common.pystring") +local system = require("common.system") +local unistd = require("posix.unistd") +local CvProc = require("collector.vproc") +local rdt_local_mem_bw = class("rdt_local_mem_bw", CvProc) +local rdtffi = require("collector.native.ffi_rdt_helper") + +function rdt_local_mem_bw:_init_(proto, pffi, mnt, path, labels) + CvProc._init_(self, proto, pffi, mnt, path .. "/" .. "mbm_local_bytes") + self._labels = labels + self._prev = nil + self._ffi = rdtffi["rawffi"] + self._rdtffi = rdtffi["rdtffi"] +end + +function rdt_local_mem_bw:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + + CvProc.proc(self) + local values = {} + + for line in io.lines(self.pFile) do + local v = { + name = "local_mem_bw", + value = self._rdtffi.calculate(line, self._prev or "0") + } + + table.insert(values, v) + self:appendLine(self:_packProto("rdt_usage", self._labels, values)) + self._prev = line + end + + self:push(lines) +end + +return rdt_local_mem_bw diff --git a/source/tools/monitor/unity/collector/rdt/plugin/rdt_size.lua b/source/tools/monitor/unity/collector/rdt/plugin/rdt_size.lua new file mode 100644 index 0000000000000000000000000000000000000000..37dca2d6f055c4d3181e24b929047c6486fc612f --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/plugin/rdt_size.lua @@ -0,0 +1,48 @@ +require("common.class") +local pystring = require("common.pystring") +local system = require("common.system") +local unistd = require("posix.unistd") +local CvProc = require("collector.vproc") +local rdt_size = class("rdt_size", CvProc) + +function rdt_size:_init_(proto, pffi, mnt, path, labels) + CvProc._init_(self, proto, pffi, mnt, path .. "/" .. "size") + self.labels = labels +end + +function rdt_size:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + + CvProc.proc(self) + -- MB:0=100;1=100 + -- L3:0=fff;1=fff + for line in io.lines(self.pFile) do + local arr = pystring:split(line, ":") + local type = pystring:strip(arr[1], ' ') -- MB or L3 + local info = pystring:split(arr[2], ";") -- 0=100;1=100 + + for _, resouce in ipairs(info) do + -- 0=100 + local values = {} + local data = pystring:split(resouce, "=") + local socketId = data[1] + local num = tonumber(data[2]) + + local v = { + name = type, + value = num + } + + table.insert(values, v) + local labels = system:deepcopy(self.labels) + table.insert(labels, { name = "socket", index = socketId }) + self:appendLine(self:_packProto("rdt_alloc_policy", labels, values)) + end + end + + self:push(lines) +end + +return rdt_size diff --git a/source/tools/monitor/unity/collector/rdt/plugin/rdt_total_mem_bw.lua b/source/tools/monitor/unity/collector/rdt/plugin/rdt_total_mem_bw.lua new file mode 100644 index 0000000000000000000000000000000000000000..a29cc79c19a685154c5d0105f233fafb8761c81a --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/plugin/rdt_total_mem_bw.lua @@ -0,0 +1,37 @@ +require("common.class") +local pystring = require("common.pystring") +local system = require("common.system") +local CvProc = require("collector.vproc") +local unistd = require("posix.unistd") +local rdt_total_mem_bw = class("rdt_total_mem_bw", CvProc) +local rdtffi = require("collector.native.ffi_rdt_helper") + +function rdt_total_mem_bw:_init_(proto, pffi, mnt, path, labels) + CvProc._init_(self, proto, pffi, mnt, path .. "/" .. "mbm_total_bytes") + self._labels = labels + self._prev = nil + self._ffi = rdtffi["rawffi"] + self._rdtffi = rdtffi["rdtffi"] +end + +function rdt_total_mem_bw:proc(elapsed, lines) + if not unistd.access(self.pFile) then + return + end + + CvProc.proc(self) + local values = {} + for line in io.lines(self.pFile) do + local v = { + name = "total_mem_bw", + value = self._rdtffi.calculate(line, self._prev or "0") + } + table.insert(values, v) + self:appendLine(self:_packProto("rdt_usage", self._labels, values)) + self._prev = line + end + + self:push(lines) +end + +return rdt_total_mem_bw diff --git a/source/tools/monitor/unity/collector/rdt/rdtManager.lua b/source/tools/monitor/unity/collector/rdt/rdtManager.lua new file mode 100644 index 0000000000000000000000000000000000000000..eb7637f364089f7baec4e34c85f9b38b6e053e08 --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/rdtManager.lua @@ -0,0 +1,249 @@ +require("common.class") + +local dirent = require("posix.dirent") +local unistd = require("posix.unistd") +local stat = require("posix.sys.stat") +local bit = require("bit") +local pystring = require("common.pystring") +local system = require("common.system") +local Cinotifies = require("common.inotifies") + +local rdtMgr = class("rdtManager") + +function rdtMgr:_init_(resYaml, proto, pffi, mnt) + local dir = mnt .. resYaml.resctrl.path + if unistd.access(dir) then + self._top = dir + self._path = resYaml.resctrl.path + self._enable = true + else + print("rdtManager: Machine or OS not support resctrl? Please check the resctrl path.\n") + self._enable = false + return + end + + self._verbose = false + self._proto = proto + self._pffi = pffi + self._mnt = mnt + self._resYaml = resYaml + self:resetPlugins(self._resYaml, self._proto, self._pffi, self._mnt) +end + +function rdtMgr:resetPlugins(resYaml, proto, pffi, mnt) + self._ino = Cinotifies.new() + if resYaml.resctrl.auto ~= nil and resYaml.resctrl.auto then + self._resDirs, self._monDirs = self:searchDirs() + else + self._resDirs, self._monDirs = self:checkDirs() + end + + self._plugins = self:setupPlugins(proto, pffi, mnt) + + for _, value in ipairs(self._resDirs) do + self._ino:add(value) + self._ino:add(value .. "/" .. "mon_groups") + if self._verbose then + print("res group: " .. value) + end + end + for _, value in ipairs(self._monDirs) do + if self._verbose then + print("mon group: " .. value) + end + end +end + +function rdtMgr:isSystemFile(file) + local black_list = { "cpus_list", "info", "mon_data", "schemata", "tasks", "cpus", "id", "mode", "size", "mon_groups" } + for _, fname in ipairs(black_list) do + if file == fname then + return true + end + end + return false +end + +function rdtMgr:searchDirs() + local topDir = self._top + local resDirs = {} + local monDirs = {} + + table.insert(resDirs, topDir) + + local fnames = self:getAllFiles(topDir) + -- get all res-group + for _, f in ipairs(fnames) do + if not self:isSystemFile(f) then + local path = topDir .. "/" .. f + table.insert(resDirs, path) + -- print("Add new res:" .. path) + end + end + + for _, resDir in ipairs(resDirs) do + local monDataPath = resDir .. "/" .. "mon_data" + self:addAllMonDirs(monDirs, monDataPath) + + local monGroupsPath = resDir .. "/" .. "mon_groups" + local files = self:getAllFiles(monGroupsPath) + for _, f in ipairs(files) do + local path = monGroupsPath .. "/" .. f .. "/mon_data" + self:addAllMonDirs(monDirs, path) + end + end + + return resDirs, monDirs +end + +function rdtMgr:addAllMonDirs(monDirs, path) + local files = self:getAllFiles(path) + for _, fname in ipairs(files) do + local p = path .. "/" .. fname + table.insert(monDirs, p) + -- print("Add new mon: " .. path .. "/" .. fname) + end +end + +function rdtMgr:setupPlugins(proto, pffi, mnt) + local plugins = {} + for i, path in ipairs(self._resDirs) do + -- print(string.format("rdt plugin[%d] path=%s", i, path)); + + path = pystring:lstrip(path, self._mnt) + for _, plugin in ipairs(self._resYaml.resctrl.resLuaPlugin) do + local CProcs = require("collector.rdt.plugin." .. plugin) + local lables = { + { + name = "path", + index = path + } + } + table.insert(plugins, CProcs.new(proto, pffi, mnt, path, lables)) + end + end + + for i, path in ipairs(self._monDirs) do + -- print(string.format("mon plugin[%d] path=%s", i, path)); + path = pystring:lstrip(path, self._mnt) + local lables = { + { + name = "path", + index = path + } + } + if pystring:find(path, "mon_groups") ~= nil then + local podname = '' + local conname = '' + local items = pystring:split(path, '/') + + for index, n in ipairs(items) do + if n == "mon_groups" then + local str = items[index + 1] + local ret = string.find(str, "#") + + if ret == nil then + break + end + + local strs = pystring:split(str, "#") + + table.insert(lables, { name = "podname", index = strs[1] }) + table.insert(lables, { name = "conname", index = strs[2] }) + break + end + end + end + for _, plugin in ipairs(self._resYaml.resctrl.monLuaPlugin) do + local CProcs = require("collector.rdt.plugin." .. plugin) + table.insert(plugins, CProcs.new(proto, pffi, mnt, path, lables)) + end + end + return plugins +end + +function rdtMgr:getAllFiles(path) + local res = {} + + local ok, files = pcall(dirent.files, path) + if not ok then + return res + end + + for f in files do + if not pystring:startswith(f, ".") then + table.insert(res, f) + end + end + return res +end + +function rdtMgr:checkDirs() + local resDirs = {} + local monDirs = {} + if self._resYaml.resctrl.group == nil then + return resDirs, monDirs + end + for _, rdtGroup in pairs(self._resYaml.resctrl.group) do + local resGrpName = rdtGroup.name + local mons = rdtGroup.monitor + local rdt_group_path + + if resGrpName == "" then + rdt_group_path = self._top + else + rdt_group_path = self._top .. "/" .. resGrpName + end + + if unistd.access(rdt_group_path) then + table.insert(resDirs, rdt_group_path) + local monDataPath = rdt_group_path .. "/" .. "mon_data" + local fnames = self:getAllFiles(monDataPath) + for _, f in ipairs(fnames) do + table.insert(monDirs, monDataPath .. "/" .. f) + end + else + print(string.format("rdtManager: Non-exist path %s", self._top .. "/" .. resGrpName)); + goto continue + end + + if mons == nil then + goto continue + end + for _, monGroup in pairs(mons) do + local monDataPath = pystring:join("/", { rdt_group_path, "mon_groups", monGroup, "mon_data" }) + + if unistd.access(monDataPath) then + local fnames = self:getAllFiles(monDataPath) + for _, f in ipairs(fnames) do + table.insert(monDirs, monDataPath .. "/" .. f) + end + else + print(string.format("rdtManager: Non-exist path %s", monDataPath)); + end + end + ::continue:: + end + return resDirs, monDirs +end + +function rdtMgr:proc(elapsed, lines) + if not self._enable then + return + end + + if self._ino:isChange() or #self._plugins == 0 then + print("rdtManager: resctrl dirs changed.") + self:resetPlugins(self._resYaml, self._proto, self._pffi, self._mnt) + end + + -- print(string.format("rdtMgr: proc")) + for i, plugin in ipairs(self._plugins) do + local stat, res = pcall(plugin.proc, plugin, elapsed, lines) + if not stat or res == -1 then + print("rdtManager: pcall plugin error.") + end + end +end + +return rdtMgr diff --git a/source/tools/monitor/unity/collector/rdt/rdt_helper/Makefile b/source/tools/monitor/unity/collector/rdt/rdt_helper/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..77f67a0faa73bcc9179a3a6c1cb4297eb9e8906b --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/rdt_helper/Makefile @@ -0,0 +1,20 @@ +CC := gcc +CFLAG := -g -fPIC +LDFLAG := -g -fpic -shared +OBJS := rdt_helper.o +SO := librdt_helper.so + +# DEPMOD= ../collector +all: $(SO) install + +%.o: %.c %.h + $(CC) -c $< -o $@ $(CFLAG) + +$(SO): $(OBJS) + $(CC) -o $@ $(OBJS) $(LDFLAG) + +install: $(SO) + cp $(SO) ../../native/ + +clean: + rm -f *.so $(OBJS) diff --git a/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.c b/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.c new file mode 100644 index 0000000000000000000000000000000000000000..2daa882343e807e5273487b22b6f3e9c480bfc7c --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.c @@ -0,0 +1,48 @@ +#include +#include +#include +#include +#include +#include + +#include "rdt_helper.h" + +#define MB (1024 * 1024) + +/* if out of range or no number found return nonzero */ +static int parse_ull(const char* str, uint64_t* retval) { + // printf("input str=[%s]\n", str); + int err = 0; + char* endptr; + errno = 0; + unsigned long long int val = strtoull(str, &endptr, 10); + // printf("parse val=%llu\n", val); + + /* Check for various possible errors */ + if (errno == ERANGE) { + fprintf(stderr, "Failed parse val.\n"); + err = errno; + return err; + } + + if (endptr == str) return err = -1; + *retval = val; + return err; +} + +int calculate(const char* now, const char* prev) { + uint64_t ret = 0; + // printf("hello now=%s, prev=%s\n", now, prev); + uint64_t now_val = 0, prev_val = 0; + parse_ull(now, &now_val); + parse_ull(prev, &prev_val); + + // printf("now_val =%llu prev_val =%llu\n", now_val, prev_val); + if (prev_val) { + ret = now_val >= prev_val ? now_val - prev_val + : now_val + UINT64_MAX - prev_val; + ret = ret / MB; + } + // printf("res=%llu\n", ret); + return ret; +} \ No newline at end of file diff --git a/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.h b/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..956485c4a04217c6671620150eef06689cb6b80d --- /dev/null +++ b/source/tools/monitor/unity/collector/rdt/rdt_helper/rdt_helper.h @@ -0,0 +1,3 @@ + + +int calculate(const char* now, const char* prev); \ No newline at end of file diff --git a/source/tools/monitor/unity/common/addition.lua b/source/tools/monitor/unity/common/addition.lua deleted file mode 100644 index e8a98515a72dccdd69a4d238dd891e45fd11b304..0000000000000000000000000000000000000000 Binary files a/source/tools/monitor/unity/common/addition.lua and /dev/null differ diff --git a/source/tools/monitor/unity/common/exec.lua b/source/tools/monitor/unity/common/exec.lua index ad98d63994f36c42c64a86ca8078739a373586a2..6c5b8a09d020a85a23732a6d1e17f7619078ba98 100644 --- a/source/tools/monitor/unity/common/exec.lua +++ b/source/tools/monitor/unity/common/exec.lua @@ -4,20 +4,49 @@ --- DateTime: 2023/5/4 10:50 --- +local system = require("common.system") local unistd = require("posix.unistd") local pwait = require("posix.sys.wait") local signal = require("posix.signal") local module = {} -function module.run(cmd, args) +local function setFdOpt(fd, size) + local err, errno + local fcntl = require("posix.fcntl") + local F_SETPIPE_SZ = 1024 + 7 -- F_LINUX_SPECIFIC_BASE + 7 refer to https://lxr.missinglinkelectronics.com/linux/include/uapi/linux/fcntl.h#L29 + + system:fdNonBlocking(fd) + _, err, errno = fcntl.fcntl(fd, F_SETPIPE_SZ, size) + assert(not err, "set file F_SETPIPE_SZ failed.") +end + +function module.run(cmd, args, fIn, fOut) + local buffSize = 32 * 1024 local pid, err = unistd.fork() if pid > 0 then -- for self print("pid: " .. pid) + if fIn then + setFdOpt(fIn, buffSize) + end + if fOut then + unistd.close(fOut) + end return pid elseif pid == 0 then -- for child - local errno - prctl_death_kill() + local err, errno + if prctl_death_kill then -- check prctl_death_kill function is already register. + prctl_death_kill() -- when parent exit, child process will be killed. + end + + if fIn then + unistd.close(fIn) + end + if fOut then + setFdOpt(fOut, buffSize) + _, err, errno = unistd.dup2(fOut, 1) + assert(not errno, "dup2 fd failed.") + end _, err, errno = unistd.exec(cmd, args) assert(not errno, "exec failed." .. err .. errno) else diff --git a/source/tools/monitor/unity/common/inotifyPod.lua b/source/tools/monitor/unity/common/inotifyPod.lua new file mode 100644 index 0000000000000000000000000000000000000000..5d97ebf24f0b29acc9ae908190ac5262e132f83a --- /dev/null +++ b/source/tools/monitor/unity/common/inotifyPod.lua @@ -0,0 +1,104 @@ +require("common.class") + +local Cinotifies = require("common.inotifies") +local system = require("common.system") +local dirent = require("posix.dirent") +local pstat = require("posix.sys.stat") +local inotify = require('inotify') + +local CinotifyPod = class("inotifyPod", Cinotifies) + +function CinotifyPod:_init_() + Cinotifies._init_(self, nil) + self.kube_pod_paths = { + "sys/fs/cgroup/cpu/kubepods.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-besteffort.slice", + "sys/fs/cgroup/cpu/kubepods.slice/kubepods-burstable.slice", + "sys/fs/cgroup/cpu/kubepods", + "sys/fs/cgroup/cpu/kubepods/besteffort", + "sys/fs/cgroup/cpu/kubepods/burstable" + } + self._pod_ws = {} -- record ["pod_path"] = pod_ws(return value of addwatch) + self._kpp_map = {} -- record [kube_pod_path_ws] = "kube_pod_paths" +end + +-- watch kubepod dirs (to watch pods' changes) +function CinotifyPod:watchKubePod(mnt) + for _, path in ipairs(self.kube_pod_paths) do + local watch_path = mnt .. path + local w = self._handle:addwatch(watch_path, inotify.IN_CREATE, inotify.IN_MOVE, inotify.IN_DELETE) + if w ~= nil then + if w > 0 then + --print("watchKubePod: " .. watch_path) + table.insert(self._ws, w) + self._kpp_map[w] = watch_path + else + error("add" .. watch_path .. "to watch failed.") + end + end + end +end + +-- watch pod's dir (to watch container's changes) +function CinotifyPod:addPodWatch(pod_path) + if self._pod_ws[pod_path] ~= nil then + return + end + + local pws = self._handle:addwatch(pod_path, inotify.IN_CREATE, inotify.IN_MOVE, inotify.IN_DELETE) + if pws ~= nil then + if pws > 0 then + --print("add " .. pod_path .. "to watch") + self._pod_ws[pod_path] = pws + else + error("add " .. pod_path .. "to watch") + end + end +end + +-- check if the pod has been delete, if so, remove the watch +function CinotifyPod:RemoveDeletePodWatch(events) + local dp_paths = {} + for _, event in ipairs(events) do + -- todo: now pod creations can also pass this event.mask check + -- add another check in line 73 to filter creation temporary + if bit.band(event.mask, inotify.IN_DELETE) then + for wd, kube_path in pairs(self._kpp_map) do + if event.wd == wd then + local d_path = kube_path .. "/" .. event.name .. "/" + --print("delete_pod_path: " .. d_path) + table.insert(dp_paths, d_path) + end + end + end + end + + for _, d_path in ipairs(dp_paths) do + if self._pod_ws[d_path] == nil then goto continue end + self._handle:rmwatch(self._pod_ws[d_path]) + self._pod_ws[d_path] = nil + ::continue:: + end +end + +function CinotifyPod:_del_() + for _, w in ipairs(self._ws) do + self._handle:rmwatch(w) + end + for _, w in pairs(self._pod_ws) do + self._handle:rmwatch(w) + end + self._handle:close() +end + +function CinotifyPod:isChange() + local events = self._handle:read() + if events ~=nil then + if #events > 0 then + return true, events + end + end + return false, nil +end + +return CinotifyPod diff --git a/source/tools/monitor/unity/common/lineParse.lua b/source/tools/monitor/unity/common/lineParse.lua index a70e0d35758dbd0fc55508750e37c5d3695ce511..2e74ef305f6bbed908f10a2d456863c8a48d7bba 100644 --- a/source/tools/monitor/unity/common/lineParse.lua +++ b/source/tools/monitor/unity/common/lineParse.lua @@ -110,6 +110,52 @@ function module.pack(title, ls, vs) return line end +function module.packMetric(line) + local cells + local title = line.line + local ls = {} + local vs = {} + + if line.vs then + cells = line.ls + if cells then + for _, cell in ipairs(cells) do + ls[cell.name] = cell.index + end + end + + cells = line.vs + for _, cell in ipairs(cells) do + vs[cell.name] = cell.value + end + + return module.pack(title, ls, vs) + end +end + +function module.packLog(line) + local cells + local title = line.line + local ls = {} + local vs = {} + + if line.log then + cells = line.ls + if cells then + for _, cell in ipairs(cells) do + ls[cell.name] = cell.index + end + end + + cells = line.log + for _, cell in ipairs(cells) do + vs[cell.name] = cell.log + end + + return module.pack(title, ls, vs) + end +end + function module.packs(line) local cells local title = line.line diff --git a/source/tools/monitor/unity/common/system.lua b/source/tools/monitor/unity/common/system.lua index b1220ade32855aaf4e15b827236141876e7cb90b..0658dcd4723d06d389cc3ba34b75499fca4cfdba 100644 --- a/source/tools/monitor/unity/common/system.lua +++ b/source/tools/monitor/unity/common/system.lua @@ -225,4 +225,18 @@ function system:guid() ) end +function system:randomStr(n) + local t = { + "0","1","2","3","4","5","6","7","8","9", + "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z", + "A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z", + } + local tSize = #t + local s = {} + for i = 1, n do + s[i] = t[math.random(tSize)] + end + return table.concat(s) +end + return system \ No newline at end of file diff --git a/source/tools/monitor/unity/common/transPro.lua b/source/tools/monitor/unity/common/transPro.lua index 328e99988f53a9cb66ce8284c9f2d01dd90c24b8..15aa919a03ba66913b8ca20bf90cbd8deb714339 100644 --- a/source/tools/monitor/unity/common/transPro.lua +++ b/source/tools/monitor/unity/common/transPro.lua @@ -10,6 +10,20 @@ require("common.class") local CtransPro = class("CtransPro") +local function qFormDataDis(from,tData) + local res = {} + local len = #tData + local c = 0 + for i = len, 1, -1 do + local line = tData[i] + if from == line.title then + c = c + 1 + res[c] = line + end + end + return res +end + local function qFormData(from, tData) local res = {} local len = #tData @@ -17,7 +31,6 @@ local function qFormData(from, tData) local c = 0 for i = len, 1, -1 do local line = tData[i] - print(line.title) if from == line.title then if last == 0 or last == line.time then c = c + 1 @@ -62,6 +75,7 @@ end function CtransPro:_init_(instance, fYaml, help, timestamps) self._instance = instance self._help = help or false + self._cluster_id = nil local ms = system:parseYaml(fYaml) self._timestamps = timestamps or false if self._timestamps == true then @@ -70,16 +84,69 @@ function CtransPro:_init_(instance, fYaml, help, timestamps) self.pack_line = packLine end self._tDescr = ms.metrics - print(self._tDescr) + + if ms.container then + if ms.container.cluster_id == true then + local cluster_id = os.getenv("CLUSTER_ID") + if cluster_id then + self._cluster_id = cluster_id + end + end + end + +end + +local function checkLine(blacklist, whitelist, labels) + if blacklist then + for k, v in pairs(blacklist) do + if labels[k] then + if type(v) == "table" then + for _, vv in ipairs(v) do + if labels[k]:match(vv) then + return false + end + end + else + if labels[k]:match(v) then + return false + end + end + end + end + return true + elseif whitelist then + for k, v in pairs(whitelist) do + if labels[k] then + if type(v) == "table" then + for _, vv in ipairs(v) do + if labels[k]:match(vv) then + return true + end + end + else + if labels[k]:match(v) then + return true + end + end + end + end + return false + end + return true end function CtransPro:export(datas) local res = {} local c = 0 for _, line in ipairs(self._tDescr) do - --local from = line.from -- cpu_total - --local tFroms = qFormData(from, datas) - if #datas then + local from = line.from -- cpu_total + local tFroms + if line.discrete then + tFroms = qFormDataDis(from, datas) + else + tFroms = qFormData(from, datas) + end + if #tFroms~=0 then local title = line.title --sysak_proc_cpu_total if self._help then local help = string.format("# HELP %s %s", title, line.help) @@ -90,25 +157,84 @@ function CtransPro:export(datas) res[c] = sType end - for _, tFrom in ipairs(datas) do - local labels = system:deepcopy(tFrom.labels) - if not labels then - labels = {} - end - labels.instance = self._instance - for k, v in pairs(tFrom.vs) do - labels[line.head] = v.name - c = c + 1 - res[c] = self.pack_line(title, labels, v.value, 1) + local blacklist = line.blacklist + local whitelist = line.whitelist + if blacklist and whitelist then + print("cannot set both blacklist and whitelist! ") + goto continue + end + for _, tFrom in ipairs(tFroms) do + if tFrom.values then + local labels = system:deepcopy(tFrom.labels) + if not labels then + labels = {} + end + labels.instance = self._instance + if self._cluster_id then + labels.cluster = self._cluster_id + end + for k, v in pairs(tFrom.values) do + labels[line.head] = k + if checkLine(blacklist, whitelist, labels)==true then + + c = c + 1 + res[c] = self.pack_line(title, labels, v, 1) + end + + end + end end + ::continue:: end end - --c = c + 1 - --res[c] = "" local lines = pystring:join("\n", res) - --print(lines) return lines end +function CtransPro:toMetric(datas) + local res = {} + local c = 0 + local instance = self._instance + for _, line in ipairs(self._tDescr) do + local from = line.from -- cpu_total + local tFroms + if line.discrete then + tFroms = qFormDataDis(from, datas) + else + tFroms = qFormData(from, datas) + end + if #tFroms ~= 0 then + local title = line.title --sysak_proc_cpu_total + + local blacklist = line.blacklist + local whitelist = line.whitelist + if blacklist and whitelist then + print("cannot set both blacklist and whitelist! ") + goto continue + end + for _, tFrom in ipairs(tFroms) do + if tFrom.values then + local labels = system:deepcopy(tFrom.labels) + if not labels then + labels = {} + end + labels.instance = instance + for k, v in pairs(tFrom.values) do + local labels_u = system:deepcopy(labels) + labels_u[line.head] = k + if checkLine(blacklist, whitelist, labels_u) then + c = c + 1 + res[c] = {title, labels_u, v} + end + end + + end + end + ::continue:: + end + end + return res +end + return CtransPro \ No newline at end of file diff --git a/source/tools/monitor/unity/etc/base.yaml b/source/tools/monitor/unity/etc/base.yaml index 9350c344463efd2157cbe1163a849c6e24a1c578..8d357fc92e6e5ddaedc004b07609da03654a4938 100644 --- a/source/tools/monitor/unity/etc/base.yaml +++ b/source/tools/monitor/unity/etc/base.yaml @@ -1,8 +1,10 @@ config: - freq: 15 # unit second + freq: 30 # unit second port: 8400 # bind port bind_addr: 0.0.0.0 # bind ip backlog: 32 # listen backlog + daemon: true + url_safe: close identity: # support hostip, curl(need url arg), hostname, file(need path arg), specify(need name arg) # mode: curl # url: "http://100.100.100.200/latest/meta-data/instance-id" @@ -10,50 +12,105 @@ config: mode: hostip # real_timestamps: true # unix_socket: "/tmp/sysom_unity.sock" - proc_path: / # in container mode, like -v /:/mnt/host , should use /mnt/host/ + proc_path: / # in container mode, like -v /:/mnt/host , should use /mnt/host/ db: - rotate: 7 # tsdb file retention time, unit day + rotate: 7 # tsdb file retention time, unit day budget: 200 # max query buffer from tsdb. limit: cpu: 30 # unit % mem: 50 # unit mb tasks: 10 # monitor 10 pid max. + cellLimit: -1 # set guard limit time. guard time is unlimit when cellLimit is -1,default is 50(ms) + +#pushTo: +# to: "Influx" +# host: "192.168.0.127" +# port: 80 +# url: "/api/v1/cec_proxy/proxy/line_protocol" outline: - /var/sysom/outline +container: + mode: "pods" + runtime: ["k8sApi", "docker"] + nsBlacklist: ["kube-system", "arms-prom", "kube-public", "kube-node-lease"] + luaPlugin: ["cg_cpu_cfs_quota","cg_mem_drcm_glob_latency","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat", + "cg_cpuacct_proc_stat", "cg_memory_drcm_latency", "cg_memory_fail_cnt","cg_memory_dcmp_latency", + "cg_cpuacct_wait_latency", "con_net_stat", "cg_blkio_stat", "cg_memory_oom_cnt", "podmem"] + +# cgroupv2: +# directPaths: ["kubepods/besteffort", "kubepods.slice/kubepods-besteffort.slices"] +# luaPlugin: ["cg_sched_cfs_stat_v2","cg_cpu_stat_v2"] + luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat", - "proc_uptime", "proc_arp", "proc_cgroups", "proc_interrupts", "proc_softirqs", "proc_softnet_stat", -] + "proc_uptime", "proc_arp", "proc_cgroups", "proc_softirqs", "proc_softnet_stat", + "proc_fd"] + +resctrl: + path: "sys/fs/resctrl" + + monLuaPlugin: ["rdt_llc_occupancy", "rdt_local_mem_bw", "rdt_total_mem_bw"] + + resLuaPlugin: ["rdt_size"] + + auto: true # search all mon-groups and res-group, it will ignore config arg group + + # group: + # - name: "" + # - name: "LS" + # # monitor: ["mon1", "mon2"] + # - name: "BE" + # monitor: ["pod3#con3"] plugins: - so: kmsg description: "collect dmesg info." - - - so: proc_schedstat + - so: proc_schedstat description: "collect schedule stat info of percpu" - - - so: proc_loadavg + - so: proc_loadavg description: "collect load avg" - so: net_health description: "tcp net health." - so: net_retrans description: "tcp retrans monitor." - - - so: unity_irqoff - description: "irqoff:detect irq turned off and can't response" - - - so: gpuinfo + - so: unity_nosched + description: "nosched:sys hold cpu and didn't scheduling" + # irqoff may eat up the perf_event_fd ,so pmu_events will fail; + # so we close it temporarily + # - so: unity_irqoff + # description: "irqoff:detect irq turned off and can't response" + - so: gpuinfo description: "collect gpuinfo" + - so: uncore_imc + description: "IMC latency and bandwidth" + - so: pmu_events + description: "collect pmu events" + - so: cpufreq + description: "collect cpu frequence of perf cpu" + - so: cpudist + description: "sched delay" + - so: numainfo + description: "numainfo" metrics: - - - title: sysom_proc_cpu_total + - title: sysom_java_app + from: sysom_java_app + head: value + help: "java application observe." + type: "gauge" + - title: sysom_observe + from: observe + head: value + help: "application observe." + type: "gauge" + - title: sysom_proc_cpu_total from: cpu_total head: mode help: "cpu usage info for total." type: "gauge" + discrete: true # datas are discrete.default is false - title: sysom_proc_cpus from: cpus head: mode @@ -104,6 +161,11 @@ metrics: head: counter help: "file system information." type: "gauge" + - title: sysom_file_descriptor + from: procfd + head: type + help: "used file descriptor number." + type: "gauge" - title: sysom_sock_stat from: sock_stat head: value @@ -228,4 +290,177 @@ metrics: from: net_tcp_ext_count head: value help: "net stat net_tcp_ext_count" - type: "gauge" \ No newline at end of file + type: "gauge" + - title: sysom_ntopo_node + from: sysom_metrics_ntopo_node + head: mode + help: "net topology node" + type: "gauge" + - title: sysom_ntopo_edge + from: sysom_metrics_ntopo_edge + head: mode + help: "net topology edge" + type: "gauge" + - title: sysom_obser_app_rt_ntopo + from: sysom_metrics_ntopo_request + head: value + help: rt and req/resp bytes of apps + type: "gauge" + discrete: true + - title: sysom_obser_mysqld_os + from: sysom_obser_metrics_mysqld_os + head: value + help: "os level metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_process + from: sysom_obser_metrics_mysqld_process + head: value + help: "process level metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_innodb + from: sysom_obser_metrics_mysqld_innodb + head: value + help: "innodb metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_alarm + from: sysom_obser_mysqld_alarm + head: value + help: "alarm on process exception of mysqld" + type: "gauge" + - title: sysom_obser_os_alarm + from: sysom_obser_os_alarm + head: value + help: "alarm on os exception of mysqld" + type: "gauge" + - title: sysom_podmem + from: podmem + head: value + help: "file cache for pod" + type: "gauge" + - title: sysom_container_memfail_cnt + from: cg_memfail_cnt + head: value + help: "sysom_container_memFail_cnt" + type: "gauge" + - title: sysom_container_memory_oomcnt + from: cg_memoom_cnt + head: value + help: "container oom event count" + type: "counter" + - title: sysom_container_memUtil + from: cg_memory_util + head: value + help: "sysom_container_memory_util" + type: "gauge" + - title: sysom_container_memgdrcm_latency + from: cgGlbDrcmLatency + head: value + help: "sysom global memory latency" + type: "gauge" + - title: sysom_container_memdrcm_latency + from: cg_memdrcm_latency + head: value + help: "sysom_container_memdrcm_latency" + type: "gauge" + - title: sysom_container_memmcmp_latency + from: cg_memmcmp_latency + head: value + help: "sysom_container_memmcmp_latency" + type: "gauge" + - title: sysom_container_cpu_stat + from: cg_cpu_stat + head: value + help: "sysom_container_cpu_stat" + type: "gauge" + - title: sysom_container_cpuacct_stat + from: cg_cpuacct_stat + head: value + help: "cpuacct/cpuacct.stat" + type: "gauge" + - title: sysom_container_proc_stat + from: cg_proc_stat + head: value + help: "container cpuacct proc stat" + type: "gauge" + - title: sysom_container_cfs_quota + from: cgCpuQuota + head: value + help: "cfs quota" + type: "gauge" + - title: sysom_container_network_stat + from: con_net_stat + head: value + help: "network stat of containers" + type: "gauge" + - title: sysom_container_blkio_stat + from: cg_blkio_stat + head: value + help: "io stat from blkio cgroup" + type: "gauge" + - title: sysom_container_cpuacct_wait_latency + from: cg_wait_latency + head: value + help: "wait_latency of cgroups" + type: "gauge" + - title: sysom_imc_channel_event + from: imc_channel_event + head: value + help: "imc latency and bw for channels" + type: "gauge" + - title: sysom_imc_event_socket + from: imc_socket_event + head: value + help: "imc latency and bw for socket" + type: "gauge" + - title: sysom_imc_event_node + from: imc_node_event + head: value + help: "imc latency and bw for node" + type: "gauge" + - title: sysom_rdt_usage + from: rdt_usage + head: value + help: "RDT LLC and memory bandwidth usage" + type: "gauge" + - title: sysom_rdt_alloc_policy + from: rdt_alloc_policy + head: value + help: "RDT LLC and memory bandwidth allocation policy" + type: "gauge" + - title: sysom_pmu_events + from: pmu_events + head: value + help: "pmu events, such as cycles/instructions, llc events" + type: "gauge" + - title: sysom_pmu_events_percpu + from: pmu_events_percpu + head: value + help: "pmu events of percpu" + type: "gauge" + - title: sysom_cpu_freq + from: cpufreq + head: value + help: "the frequence of percpu" + type: "gauge" + - title: sysom_numainfo + from: numainfo + head: value + help: "numainfo of system from /sys/devices/system/" + type: "gauge" + - title: sysom_cg_sched_cfs_stat_v2 + from: cg_sched_cfs_stat_v2 + head: value + help: "cgroupv2 cg_sched_cfs_stat_v2 info" + type: "gauge" + - title: sysom_cg_cpu_stat_v2 + from: cg_cpu_stat_v2 + head: value + help: "cgroupv2 cg_cpu_stat_v2 info" + type: "gauge" + + +observe: + comms: + java: "cgroup" + mysqld: "cgroup" + period: 20000 diff --git a/source/tools/monitor/unity/etc/daemonset.yaml b/source/tools/monitor/unity/etc/daemonset.yaml index da1ef1e6565230b813867dffcb962c31af2cc0e8..a5c871b085f2cae306722323c343e245716ab1bd 100644 --- a/source/tools/monitor/unity/etc/daemonset.yaml +++ b/source/tools/monitor/unity/etc/daemonset.yaml @@ -16,6 +16,7 @@ rules: - nodes - nodes/status - nodes/pods + - nodes/stats - events verbs: - get @@ -62,10 +63,20 @@ spec: - command: - /bin/sh - '-c' - - cd /root/dist/app/beeQ && sh ./run.sh + - cd /root/dist/app/beeQ && sh ./run.sh ../etc/k8s.yaml image: 'ackpod-registry.cn-shanghai.cr.aliyuncs.com/mem/sysom:v2.2' imagePullPolicy: IfNotPresent name: sysom + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CLUSTER_ID + valueFrom: + configMapKeyRef: + name: ack-cluster-profile + key: clusterid ports: - containerPort: 8889 hostPort: 8889 @@ -102,3 +113,4 @@ spec: maxSurge: 0 maxUnavailable: 1 type: RollingUpdate + diff --git a/source/tools/monitor/unity/etc/db_observ.yaml b/source/tools/monitor/unity/etc/db_observ.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e6afd53c91753882480f7891c0a0fbb866defbb5 --- /dev/null +++ b/source/tools/monitor/unity/etc/db_observ.yaml @@ -0,0 +1,134 @@ +config: + freq: 30 # unit second + port: 8400 # bind port + bind_addr: 0.0.0.0 # bind ip + backlog: 32 # listen backlog + daemon: false + url_safe: close + identity: # support hostip, curl(need url arg), hostname, file(need path arg), specify(need name arg) + # mode: curl + # url: "http://100.100.100.200/latest/meta-data/instance-id" + # name: test_specify + mode: hostip + # real_timestamps: true + # unix_socket: "/tmp/sysom_unity.sock" + proc_path: / # in container mode, like -v /:/mnt/host , should use /mnt/host/ + db: + rotate: 7 # tsdb file retention time, unit day + budget: 200 # max query buffer from tsdb. + limit: + cpu: 30 # unit % + mem: 50 # unit mb + tasks: 10 # monitor 10 pid max. + cellLimit: -1 # set guard limit time. guard time is unlimit when cellLimit is -1,default is 50(ms) + +forkRun: + - cmd: "../../../ntopo" + args: + [] + - cmd: "../../../sql-obs" + args: + [] + - cmd: "../../../ioMonitor" + args: + ["-y", "/etc/sysak/base.yaml", "-s", "await=5,iops=1000,diagIolat=on"] + +cec: "http://localhost" + +outline: + - /var/sysom/outline + + +luaPlugins: ["proc_diskstats", "proc_stat"] + + # group: + # - name: "" + # - name: "LS" + # # monitor: ["mon1", "mon2"] + # - name: "BE" + # monitor: ["pod3#con3"] + +plugins: [] + +metrics: + - title: sysom_observe + from: observe + head: value + help: "application observe." + type: "gauge" + - title: sysom_proc_disks + from: disks + head: counter + help: "disk info from /proc/diskstats." + type: "gauge" + - title: sysom_ntopo_node + from: sysom_metrics_ntopo_node + head: mode + help: "net topology node" + type: "gauge" + - title: sysom_ntopo_edge + from: sysom_metrics_ntopo_edge + head: mode + help: "net topology edge" + type: "gauge" + - title: sysom_obser_app_rt_ntopo + from: sysom_metrics_ntopo_request + head: value + help: rt and req/resp bytes of apps + type: "gauge" + discrete: true + - title: sysom_obser_mysqld_os + from: sysom_obser_metrics_mysqld_os + head: value + help: "os level metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_process + from: sysom_obser_metrics_mysqld_process + head: value + help: "process level metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_innodb + from: sysom_obser_metrics_mysqld_innodb + head: value + help: "innodb metrics of mysqld" + type: "gauge" + - title: sysom_obser_mysqld_alarm + from: sysom_obser_mysqld_alarm + head: value + help: "alarm on process exception of mysqld" + type: "gauge" + - title: sysom_obser_os_alarm + from: sysom_obser_os_alarm + head: value + help: "alarm on os exception of mysqld" + type: "gauge" + - title: sysom_IOMonIndForDisksIO + from: IOMonIndForDisksIO + head: value + help: "Disk IO indicators and abnormal events" + type: "gauge" + - title: sysom_IOMonIndForSystemIO + from: IOMonIndForSystemIO + head: value + help: "System indicators and abnormal events about IO" + type: "gauge" + # - title: sysom_IOMonDiagLog + # from: IOMonDiagLog + # head: value + # help: "Diagnose log for IO exception" + # type: "gauge" + - title: sysom_iolatency + from: sysom_iolatency + head: value + help: "IO average processing latency information" + type: "gauge" + - title: sysom_iolatency_max + from: sysom_iolatency_max + head: value + help: "IO Max processing latency information" + type: "gauge" + +observe: + comms: + mysqld: "cgroup" + period: 20000 diff --git a/source/tools/monitor/unity/etc/k8s.yaml b/source/tools/monitor/unity/etc/k8s.yaml index bddd986428f66b37318da80e98e8c08c43f5dc90..6cf2633faa813c5fbb3bc53e1374d0b5187b292d 100644 --- a/source/tools/monitor/unity/etc/k8s.yaml +++ b/source/tools/monitor/unity/etc/k8s.yaml @@ -3,6 +3,7 @@ config: port: 8889 # bind port bind_addr: 0.0.0.0 # bind ip backlog: 32 # listen backlog + url_safe: close identity: # support hostip, curl(need url arg), hostname, file(need path arg), specify(need name arg) # mode: curl # url: "http://100.100.100.200/latest/meta-data/instance-id" @@ -13,23 +14,26 @@ config: # unix_socket: "/tmp/sysom_unity.sock" proc_path: /mnt/host/ # in container mode, like -v /:/mnt/host , should use /mnt/host/ db: - rotate: 7 # tsdb file retention time, unit day + rotate: 3 # tsdb file retention time, unit day budget: 200 # max query buffer from tsdb. limit: cpu: 90 # unit % mem: 200 # unit mb tasks: 10 # monitor 10 pid max. + cellLimit: -1 # set guard limit time. guard time is unlimit when cellLimit is -1,default is 50(ms) outline: - /var/sysom/outline container: mode: "pods" - #mode:"cgroup" - #"cg_cpuacct_stat" is a substitute of cg_cpuacct_proc_stat - luaPlugin: ["cg_memory_fail_cnt", "cg_memory_util", "cg_memory_dcmp_latency", - "cg_memory_drcm_latency", "cg_cpuacct_wait_latency", "cg_cpuacct_proc_stat", - "cg_cpu_stat", "cg_pmu_events", "cg_cpu_cfs_quota", "cg_mem_drcm_glob_latency"] + cluster_id: true # if true && CLUSTER_ID env is set, use CLUSTER_ID env (from ack configmap) as cluster id. + runtime: ["k8sApi", "cri_containerd", "docker"] + nsBlacklist: ["kube-system", "arms-prom", "kube-public", "kube-node-lease"] + luaPlugin: ["cg_cpu_cfs_quota","cg_memory_util","cg_cpu_stat_sample", "cg_cpuacct_stat", + "cg_cpuacct_proc_stat", "cg_memory_drcm_latency", "cg_memory_fail_cnt", + "cg_memory_dcmp_latency", "cg_memory_oom_cnt", "con_net_stat", "podmem", "pod_storage_stat"] + directCgPath: - "/" - "/kubepods.slice" @@ -52,14 +56,13 @@ container: child1: "/kubepods%-burstable%-pod" -luaPlugins: ["podmem","proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", +luaPlugins: ["proc_buddyinfo", "proc_diskstats", "proc_meminfo", "proc_mounts", "proc_netdev", "proc_snmp_stat", "proc_sockstat", "proc_stat", "proc_statm", "proc_vmstat", - "proc_uptime", "proc_arp", "proc_cgroups", "proc_softirqs", "proc_softnet_stat", -] + "proc_uptime", "proc_arp", "proc_cgroups", "proc_fd"] plugins: - - so: kmsg - description: "collect dmesg info." + #- so: kmsg + # description: "collect dmesg info." - so: proc_schedstat description: "collect schedule stat info of percpu" @@ -91,6 +94,8 @@ metrics: head: mode help: "cpu usage info for per-cpu." type: "gauge" + whitelist: + mode: ["^softirq", "^user","^idle","^sys","^iowait"] - title: sysom_proc_sirq from: sirq head: type @@ -103,17 +108,26 @@ metrics: type: "gauge" - title: sysom_proc_meminfo from: meminfo - head: value + head: type help: "meminfo from /proc/meminfo." type: "gauge" + whitelist: + type: ["^MemFree","user","kernel_used","^MemTotal","MemAvailable","^total", + "^MemFree","^Cached","^Buffers","^AnonHugePages","^VmallocUsed","^SReclaimable", + "^SUnreclaim","^PageTables","^alloc_page","^KernelStack","^kernel_reserved"] - title: sysom_proc_vmstat from: vmstat - head: value + head: type help: "vmstat info from /proc/vmstat." type: "gauge" + whitelist: + type: ["^thp_fault_alloc","^thp_fault_fallback","^thp_collapse_alloc","^thp_collapse_alloc_failed", + "^pgpgin","^pgpgout","^pswpin","^pswpout","^pgfault","^pgmajfault","^pgfault","^compact_stall", + "^compact_success","^compact_fail","^pgscan_kswapd","^pgscan_direct","^oom_kill","^pgsteal_kswapd", + "^pgsteal_direct","^pgscan_direct_throttle"] - title: sysom_proc_self_statm from: self_statm - head: value + head: type help: "statm info from /proc/self/statm." type: "gauge" - title: sysom_proc_networks @@ -138,196 +152,223 @@ metrics: type: "gauge" - title: sysom_sock_stat from: sock_stat - head: value + head: type help: "sock stat counters from /proc/net/sockstat" type: "gauge" - title: sysom_proc_schedstat from: proc_schedstat - head: value + head: type help: "schedule state of percpu." type: "gauge" - title: sysom_proc_loadavg from: proc_loadavg - head: value + head: type help: "loadavg of system from /proc/loadavg" type: "gauge" - title: sysom_proc_buddyinfo from: buddyinfo - head: value + head: type help: "buddyinfo of system from /proc/buddyinfo" type: "gauge" - title: sysom_IOMonIndForDisksIO from: IOMonIndForDisksIO - head: value + head: type help: "Disk IO indicators and abnormal events" type: "gauge" - title: sysom_IOMonIndForSystemIO from: IOMonIndForSystemIO - head: value + head: type help: "System indicators and abnormal events about IO" type: "gauge" - title: sysom_IOMonDiagLog from: IOMonDiagLog - head: value + head: type help: "Diagnose log for IO exception" type: "gauge" - title: sched_moni_jitter from: sched_moni_jitter - head: value + head: type help: "nosched/irqoff:sys and irqoff hold cpu and didn't scheduling" type: "gauge" - title: sysom_cpu_dist from: cpu_dist - head: value + head: type help: "task cpu sched dist." type: "gauge" - title: sysom_net_health_hist from: net_health_hist - head: value + head: type help: "net_health_hist" type: "gauge" - title: sysom_net_health_count from: net_health_count - head: value + head: type help: "net_health_count" type: "gauge" - title: sysom_net_retrans_count from: net_retrans_count - head: value + head: type help: "net_retrans_count" type: "gauge" - title: sysom_gpuinfo from: gpuinfo - head: value + head: type help: "gpuinfo of system from nvidia-smi" type: "gauge" - title: sysom_uname from: uname - head: value + head: type help: "uname info" type: "gauge" - title: sysom_uptime from: uptime - head: value + head: type help: "uptime from /proc/uptime" type: "gauge" - title: sysom_system_release from: system_release - head: value + head: type help: "system_release from /etc/os-release" type: "gauge" - title: sysom_cgroups from: cgroups - head: value + head: type help: "cgroup number." type: "gauge" + - title: sysom_file_descriptor + from: procfd + head: type + help: "used file descriptor number." + type: "gauge" - title: sysom_per_sirqs from: per_sirqs - head: value + head: type help: "per_sirqs." type: "gauge" - title: sysom_softnets from: softnets - head: value + head: type help: "cgroup number." type: "gauge" - title: sysom_interrupts from: interrupts - head: value + head: type help: "interrupts." type: "gauge" - title: sysom_net_ip_count from: net_ip_count - head: value + head: type help: "net snmp net_ip_count" type: "gauge" - title: sysom_net_icmp_count from: net_icmp_count - head: value + head: type help: "net snmp net_icmp_count" type: "gauge" - title: sysom_net_udp_count from: net_udp_count - head: value + head: type help: "net snmp net_udp_count" type: "gauge" - title: sysom_net_tcp_count from: net_tcp_count - head: value + head: type help: "net snmp net_tcp_count" type: "gauge" - title: sysom_net_tcp_ext_count from: net_tcp_ext_count - head: value + head: type help: "net stat net_tcp_ext_count" type: "gauge" - - title: sysom_podmem + - title: sysom_container_memory_filecache from: podmem - head: value + head: type help: "file cache for pod" type: "gauge" - - title: sysom_alloc_page + - title: sysom_container_memory_allocpage from: pod_alloc - head: value + head: type help: "pod tcp memory" type: "gauge" - - title: sysom_cg_memfail_cnt + - title: sysom_container_memory_failcnt from: cg_memfail_cnt - head: value - help: "sysom_cg_memFail_cnt" - type: "gauge" - - title: sysom_cg_memUtil + head: type + help: "container memory alloc fail cnt" + type: "gauge" + - title: sysom_container_memory_oomcnt + from: cg_memoom_cnt + head: "type" + help: "container oom event count" + type: "counter" + - title: sysom_container_memory_stat from: cg_memory_util - head: value - help: "sysom_cg_memory_util" + head: type + help: "container memory util" type: "gauge" - - title: sysom_cg_mem_glob_drcm_latency + blacklist: + type: ["^pgfault","^dirty","^pgpgout","^pgpgin"] + - title: sysom_container_memory_gdrcm_latency from: cgGlbDrcmLatency - head: value - help: "sysom global memory latency" + head: type + help: "container global memory latency" type: "gauge" - - title: sysom_cg_mem_drcm_latency + - title: sysom_container_memory_cdrcm_latency from: cg_memdrcm_latency - head: value - help: "sysom_cg_memdrcm_latency" + head: type + help: "container cgroup memory reclaim latency" type: "gauge" - - title: sysom_cg_memmcmp_latency + whitelist: + type: ["^memDrcm_lat_10to100ms","^memDrcm_lat_100to500ms","^memDrcm_lat_500to1000ms","^memDrcm_lat_1000ms"] + - title: sysom_container_memory_cpt_latency from: cg_memmcmp_latency - head: value - help: "sysom_cg_memmcmp_latency" + head: type + help: "container memory compact latency" type: "gauge" - - title: sysom_cg_mem_dcmp_latency - from: cg_memdcmp_latency - head: value - help: "sysom_cg_mem_dcmp_latency" + whitelist: + type: ["^memDcmp_lat_10to100ms","^memDcmp_lat_100to500ms", "^memDcmp_lat_500to1000ms", "^memDcmp_lat_1000ms"] + - title: sysom_container_cpu_stat + from: cg_cpu_stat + head: type + help: " container cpu_stat" type: "gauge" - - title: sysom_cg_cpuacct_wait_latency - from: cg_wait_latency - head: value - help: "sysom_cg_cpuacct_wait_latency" + - title: sysom_container_cpu_acctstat + from: cg_cpuacct_stat + head: type + help: "container cpuacct/cpuacct.stat" type: "gauge" - - title: sysom_cg_cpuacct_proc_stat - from: cg_cpuacct_proc_stat - head: value - help: "sysom_cg_cpuacct_proc_stat" + - title: sysom_container_proc_stat + from: cg_proc_stat + head: type + help: "container cpuacct proc stat" type: "gauge" - - title: sysom_cg_cpu_quota + - title: sysom_container_cpu_cfsquota from: cgCpuQuota - head: value - help: "quota_us,peroid_us and quota/period" + head: type + help: "cfs quota" type: "gauge" - - title: sysom_cg_cpu_stat - from: cg_cpu_stat - head: value - help: "sysom_cg_cpu_stat" + - title: sysom_container_network_stat + from: con_net_stat + head: type + help: "network stat of containers" type: "gauge" - - title: sysom_cg_cpuacct_stat - from: cg_cpuacct_stat - head: value - help: "cpuacct/cpuacct.stat" + blacklist: + type: ["^net_tx_packets","^net_rx_packets"] + - title: sysom_container_blkio_stat + from: cg_blkio_stat + head: type + help: "io stat from blkio cgroup" type: "gauge" - - title: sysom_cg_pmu_events - from: pmu_cg_events - head: value - help: "pmu events of cgroups" + - title: sysom_pod_ephemeral_storage_usage + from: pod_ephemeral_storage + head: type + help: "pod ephemeral storage usage" + type: "gauge" + whitelist: + type: ["^usedBytes", "^inodesUsed"] + - title: sysom_container_ephemeral_storage_usage + from: container_ephemeral_storage + head: type + help: "Containers ephemeral storage usage" type: "gauge" + whitelist: + type: ["^usedBytes", "^inodesUsed", "^limit"] diff --git a/source/tools/monitor/unity/httplib/asyncHttp.lua b/source/tools/monitor/unity/httplib/asyncHttp.lua index 37ffa8ed0afb45d3c8c1f70097700a87646ea952..a8e62588a7b316a1e6a9db8fbe2f0c06a0a8d2aa 100644 --- a/source/tools/monitor/unity/httplib/asyncHttp.lua +++ b/source/tools/monitor/unity/httplib/asyncHttp.lua @@ -79,7 +79,7 @@ local function tryConnect(fd, tConn) if errno == 115 then return 1 else - print(string.format("socket connect failed, report:%d, %s", err, errno)) + print(string.format("socket connect failed, report:%d, %s", errno, err)) return end else @@ -159,13 +159,17 @@ local function readChunks(fread, tReq) if s then size, s = unpack(pystring:split(s, "\r\n", 1)) len = tonumber(size, 16) - bodies = waitChuckData(fread, s, len) - if bodies then - body = string.sub(bodies, 1, len) - s = string.sub(bodies, len + 2) - table.insert(cells, body) + if len then + bodies = waitChuckData(fread, s, len) + if bodies then + body = string.sub(bodies, 1, len) + s = string.sub(bodies, len + 2) + table.insert(cells, body) + else + return -2 + end else - return -2 + return -3 end else return -1 @@ -268,7 +272,7 @@ function CasyncHttp:result(fread) return self:parse(fread, stream) end -local function checkConnect(fd, connecting, toWake) +function CasyncHttp:checkConnect(fd, connecting, toWake) local res, msg if connecting > 0 then local e = coroutine.yield() @@ -280,6 +284,7 @@ local function checkConnect(fd, connecting, toWake) return -1 end end + return 0 end function CasyncHttp:procStream(fd, stream, toWake) @@ -288,7 +293,11 @@ function CasyncHttp:procStream(fd, stream, toWake) if res then local fread = g_lb:read(fd) local tReq = self:result(fread) - res, msg = coroutine.resume(toWake, tReq.data) + if tReq then + res, msg = coroutine.resume(toWake, tReq.data) + else + res, msg = coroutine.resume(toWake, 'procSSLStream no req.') + end assert(res, msg) else res, msg = coroutine.resume(toWake, "write failed.") @@ -297,17 +306,6 @@ function CasyncHttp:procStream(fd, stream, toWake) g_lb:co_exit(fd) end -function CasyncHttp:_get(fd) - local toWake, domain, uri, headers, body, connecting = coroutine.yield() - - if checkConnect(fd, connecting, toWake) == -1 then - g_lb:co_exit(fd) - return - end - local stream = self:pack('GET', domain, uri, headers, body) - self:procStream(fd, stream, toWake) -end - function CasyncHttp:connect(domain, uri, port, headers, body, cb) port = port or 80 local ip = getIp(domain) @@ -337,6 +335,29 @@ function CasyncHttp:connect(domain, uri, port, headers, body, cb) end end +function CasyncHttp:assertConnect(fd, connecting, toWake) + local res, msg + + if self:checkConnect(fd, connecting, toWake) == -1 then + g_lb:co_exit(fd) + res, msg = coroutine.resume(toWake, "domain connect failed.") + assert(res, msg) + return -1 + end + return 0 +end + +function CasyncHttp:_get(fd) + local toWake, domain, uri, headers, body, connecting = coroutine.yield() + + if self:assertConnect(fd, connecting, toWake) < 0 then + return + end + + local stream = self:pack('GET', domain, uri, headers, body) + self:procStream(fd, stream, toWake) +end + function CasyncHttp:get(domain, uri, port) return self:connect(domain, uri, port, {}, "", self._get) end @@ -344,10 +365,10 @@ end function CasyncHttp:_put(fd) local toWake, domain, uri, headers, body, connecting = coroutine.yield() - if checkConnect(fd, connecting, toWake) == -1 then - g_lb:co_exit(fd) + if self:assertConnect(fd, connecting, toWake) < 0 then return end + local stream = self:pack('PUT', domain, uri, headers, body) self:procStream(fd, stream, toWake) end @@ -359,10 +380,10 @@ end function CasyncHttp:_post(fd) local toWake, domain, uri, headers, body, connecting = coroutine.yield() - if checkConnect(fd, connecting, toWake) == -1 then - g_lb:co_exit(fd) + if self:assertConnect(fd, connecting, toWake) < 0 then return end + local stream = self:pack('POST', domain, uri, headers, body) self:procStream(fd, stream, toWake) end diff --git a/source/tools/monitor/unity/httplib/asyncHttps.lua b/source/tools/monitor/unity/httplib/asyncHttps.lua new file mode 100644 index 0000000000000000000000000000000000000000..4e150b0d61926076b5dbc0b65c1543ba7e305375 --- /dev/null +++ b/source/tools/monitor/unity/httplib/asyncHttps.lua @@ -0,0 +1,96 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/22 10:24 AM +--- + +require("common.class") +local CasyncHttp = require("httplib.asyncHttp") + +local CasyncHttps = class("CasyncHttps", CasyncHttp) + +function CasyncHttps:_init_() + CasyncHttp._init_(self) +end + +function CasyncHttps:procSSLStream(fd, handle, stream, toWake) + local res, msg + res = g_lb:ssl_write(fd, handle, stream) + if res then + local fread = g_lb:ssl_read(handle) + local tReq = self:result(fread) + if tReq then + res, msg = coroutine.resume(toWake, tReq.data) + else + res, msg = coroutine.resume(toWake, 'procSSLStream no req.') + end + assert(res, msg) + else + res, msg = coroutine.resume(toWake, "write failed.") + assert(res, msg) + end + + g_lb:ssl_del(handle) + g_lb:co_exit(fd) +end + +function CasyncHttps:handshake(fd, toWake) + local res, msg + local handle = g_lb:ssl_handshake(fd) + if not handle then + res, msg = coroutine.resume(toWake, "tls handshake failed.") + g_lb:co_exit(fd) + return nil + end + return handle +end + +function CasyncHttps:httpPre(fd, connecting, toWake) + if self:assertConnect(fd, connecting, toWake) < 0 then + return nil + end + + local handle = self:handshake(fd, toWake) + if not handle then + return + end + return handle +end + +function CasyncHttps:_get(fd) + local toWake, domain, uri, headers, body, connecting = coroutine.yield() + + local handle = self:httpPre(fd, connecting, toWake) + if not handle then + return + end + + local stream = self:pack('GET', domain, uri, headers, body) + self:procSSLStream(fd, handle, stream, toWake) +end + +function CasyncHttps:_put(fd) + local toWake, domain, uri, headers, body, connecting = coroutine.yield() + + local handle = self:httpPre(fd, connecting, toWake) + if not handle then + return + end + + local stream = self:pack('PUT', domain, uri, headers, body) + self:procSSLStream(fd, stream, toWake) +end + +function CasyncHttps:_post(fd) + local toWake, domain, uri, headers, body, connecting = coroutine.yield() + + local handle = self:httpPre(fd, connecting, toWake) + if not handle then + return + end + + local stream = self:pack('POST', domain, uri, headers, body) + self:procSSLStream(fd, stream, toWake) +end + +return CasyncHttps diff --git a/source/tools/monitor/unity/httplib/asyncOSS.lua b/source/tools/monitor/unity/httplib/asyncOSS.lua index b7aa9aa0976937d71f85bf493de0310eb24ef9d8..5479c4a40ad4823c5c56306db5f7fde5ab472e78 100644 --- a/source/tools/monitor/unity/httplib/asyncOSS.lua +++ b/source/tools/monitor/unity/httplib/asyncOSS.lua @@ -8,17 +8,16 @@ require("common.class") local system = require("common.system") local pystring = require("common.pystring") local sha1 = require("sha1") -local base64 = require("base64") +local sls_api = require("sls_api") local CasyncHttp = require("httplib.asyncHttp") - +local base64 = require("base64") local CasyncOSS = class("asyncOSS", CasyncHttp) function CasyncOSS:_init_(res) CasyncHttp._init_(self) self._bucket = res.bucket self._endPoint = res.endPoint - self._ak = base64.decode(res.ak) - self._sk = base64.decode(res.sk) + self._k1, self._k2 = sls_api.decode(res.addition) end function CasyncOSS:sign(cType, date, uri) @@ -30,11 +29,11 @@ function CasyncOSS:sign(cType, date, uri) uri, -- } local s = table.concat(ss, '\n') - return base64.encode(sha1.hmac_binary(self._sk, s)) + return base64.encode(sha1.hmac_binary(self._k2, s)) end function CasyncOSS:auth(cType, date, uri) - local ss = {"OSS ", self._ak, ":", self:sign(cType, date, uri)} + local ss = {"OSS ", self._k1, ":", self:sign(cType, date, uri)} return table.concat(ss) end diff --git a/source/tools/monitor/unity/httplib/coAutoMetrics.lua b/source/tools/monitor/unity/httplib/coAutoMetrics.lua index b7d87aa6d3d30c526434c176a4997cfdcaae3674..b1b40ebada2026140f05bcfe758d2e6d6484d0be 100644 --- a/source/tools/monitor/unity/httplib/coAutoMetrics.lua +++ b/source/tools/monitor/unity/httplib/coAutoMetrics.lua @@ -7,21 +7,42 @@ require("common.class") local CcoMetrics = require("httplib.coMetrics") +local CcoHttpCliInst = require("httplib.coHttpCliInst") +local CtransPro = require("common.transPro") local CcoAutoMetrics = class("coAutoMetrics", CcoMetrics) -function CcoAutoMetrics:_init_(fYaml) - CcoMetrics._init_(self,fYaml) - - local ts = io.popen('curl 100.100.100.200/latest/meta-data/region-id') - local regionid = ts:read("*all") +function CcoAutoMetrics:_init_(fYaml, config, instance) + CcoMetrics._init_(self, config, fYaml) + local ChttpCli = require("httplib.httpCli") + local cli = ChttpCli.new() + local res = cli:get("100.100.100.200/latest/meta-data/region-id") + local regionid =res.body self._project = "sysom-metrics-" .. regionid self._endpoint = regionid .. "-intranet.log.aliyuncs.com" self._metricstore = "auto" - self._url = "/prometheus/" ..self._project.."/"..self._metricstore.."/api/v1/write" - self._host = self._project .."." .. self._endpoint - + self._url = "/prometheus/" .. self._project .. "/" .. self._metricstore .. "/api/v1/write" + self._host = self._project .. "." .. self._endpoint + + local pushMetrics = { + host = self._host, + url = self._url, + port = 80 + } + + CcoHttpCliInst._init_(self, instance, pushMetrics) + -- go ffi + local ffi = require("common.protobuf.metricstore.ffi_lua") + self.ffi = ffi.ffi + self.awesome = ffi.awesome + + --fox ffi + local foxFFI = require("tsdb.native.foxffi") + self.foxffi = foxFFI.ffi + self.foxcffi = foxFFI.cffi + + self._transPro = CtransPro.new(instance, fYaml, false, false) end return CcoAutoMetrics \ No newline at end of file diff --git a/source/tools/monitor/unity/httplib/coCli.lua b/source/tools/monitor/unity/httplib/coCli.lua index c6155e281deab6e55213940dd6664248ffd79fd0..1bcee3680cd921e0f37bbd8b313c9918b4b733d6 100644 --- a/source/tools/monitor/unity/httplib/coCli.lua +++ b/source/tools/monitor/unity/httplib/coCli.lua @@ -40,18 +40,14 @@ local socketWakeTbl = { enumStat.closed, } -function CcoCli:filterLines(cli, lines, body) - return cli:trans(lines, body, nil) -end - -function CcoCli:coQueFunc(cli, cffi, efd) +local function coQueFunc(cli, cffi, efd) local body local ok, msg while true do local lines = coroutine.yield() local stat = cli.status if lines and #lines.lines > 0 then - body = self:filterLines(cli, lines, body) + body = cli:trans(lines, body, nil) end if body then --> has data to send. if cli.co == nil and stat == enumStat.closed then -- not active @@ -75,7 +71,7 @@ function CcoCli:coQueFunc(cli, cffi, efd) end end -function CcoCli:checkOvertime(cli, co, ffi) +local function checkOvertime(cli, co, ffi) local ok, msg if cli.status == enumStat.connecting and cli:checkTime() >= 2 then local e = ffi.new("native_event_t") @@ -138,20 +134,19 @@ local function read_stream(fd) end end -function CcoCli:pushMsg(coOut, bytes) +local function pushMsg(coOut, lines) local ok, msg - local lines = self._proto:decode(bytes) ok, msg = coroutine.resume(coOut, lines) if not ok then - print(string.format("coOut run failed %s", msg)) + print(string.format("coOut run failed %s, check your yaml", msg)) end return ok end function CcoCli:_newOut(cli) - local coOut = coroutine.create(self.coQueFunc) - local ok, msg = coroutine.resume(coOut, self, cli, self.cffi, self._efd, coOut) + local coOut = coroutine.create(coQueFunc) + local ok, msg = coroutine.resume(coOut, cli, self.cffi, self._efd, coOut) if not ok then error(string.format("coOut run failed %s", msg)) return nil @@ -159,57 +154,67 @@ function CcoCli:_newOut(cli) return coOut end -function CcoCli:_pollFd(bfd, cli, nes, coIn, coOut) +function CcoCli:_pollFd(bfd, nes, coIn, coOuts) local ok, msg for i = 0, nes.num - 1 do local e = nes.evs[i]; local fd = e.fd - if fd == bfd then - ok, msg = coroutine.resume(coIn, e) + if fd == bfd then -- get message from pipe + ok, msg = coroutine.resume(coIn, e) -- coIn will call read_stream, yield full message if ok then if msg then - ok = self:pushMsg(coOut, msg) - if not ok then - coOut = self:_newOut(cli) - assert(coOut) + local lines = self._proto:decode(msg) + for cli, coOut in pairs(coOuts) do + ok = pushMsg(coOut, lines) -- call coQueFunc + if not ok then + coOuts[cli] = self:_newOut(cli) + assert(coOuts[cli]) + end end end else error(string.format("coIn run failed %s", msg)) end - elseif fd == cli.fd then - ok, msg = coroutine.resume(cli.co, e) - if not ok then - error(string.format("cli.co run failed %s", msg)) - end - if system:valueIsIn(socketWakeTbl, cli.status) then - ok, msg = coroutine.resume(coOut, nil) - if not ok then - error(string.format("coOut run failed %s", msg)) + + else + local set = false + for cli, coOut in pairs(coOuts) do + if fd == cli.fd then + set = true + ok, msg = coroutine.resume(cli.co, e) -- will call cli work function + if not ok then + error(string.format("cli.co run failed %s", msg)) + end + if system:valueIsIn(socketWakeTbl, cli.status) then + ok, msg = coroutine.resume(coOut, nil) + if not ok then + error(string.format("coOut run failed %s", msg)) + end + end end end - else - print("bad fd " .. fd .. "use fd " .. cli.fd) + + if not set then + print("bad fd " .. fd) + end end end - return coOut end -function CcoCli:_poll(cli) +function CcoCli:_poll(clis) local ok, msg local bfd = self._bfd local efd = self._efd local ffi, cffi = self.ffi, self.cffi + local coOuts = {} - --local coOut = coroutine.create(self.coQueFunc) - --ok, msg = coroutine.resume(coOut, self, cli, cffi, efd, coOut) - --if not ok then - -- error(string.format("coOut run failed %s", msg)) - --end - - local coOut = self:_newOut(cli) - assert(coOut) + -- setup for push to + for _, cli in ipairs(clis) do + coOuts[cli] = self:_newOut(cli) + assert(coOuts[cli]) + end + -- setup for in local coIn = coroutine.create(read_stream) ok, msg = coroutine.resume(coIn, bfd) if not ok then @@ -224,15 +229,15 @@ function CcoCli:_poll(cli) return "end poll." end if nes.num > 0 then - coOut = self:_pollFd(bfd, cli, nes, coIn, coOut) + self:_pollFd(bfd, nes, coIn, coOuts) else - self:checkOvertime(cli, coOut, ffi) + checkOvertime(coOuts, ffi) end end end -function CcoCli:poll(cli) - local _, msg = pcall(self._poll, self, cli) +function CcoCli:poll(clis) + local _, msg = pcall(self._poll, self, clis) print(msg) return 0 end diff --git a/source/tools/monitor/unity/httplib/coHttpCli.lua b/source/tools/monitor/unity/httplib/coHttpCli.lua index 83c7b1a22da676def801b3ef1d8d1c76e36e8a70..cbc0211606bc33ff1cc22677ac985d5a922510af 100644 --- a/source/tools/monitor/unity/httplib/coHttpCli.lua +++ b/source/tools/monitor/unity/httplib/coHttpCli.lua @@ -286,7 +286,6 @@ local function waitHttpRest(fread, tReq) if tReq.header["content-length"] then local lenData = #tReq.data local lenInfo = tonumber(tReq.header["content-length"]) - print("len: " .. lenInfo) local rest = lenInfo - lenData if rest > 10 * 1024 * 1024 then -- limit max data len @@ -433,10 +432,11 @@ function CcoHttpCli:coWrite(cffi, efd, fd, stream) sent, err, errno = socket.send(fd, stream) if sent == nil then if errno == 11 then -- EAGAIN ? + sent = 0 goto continue end checkInt(errno, fd) - print(string.format("socket send failed, report:%d, %s", err, errno)) + print(string.format("socket send failed, report:%s, %d", err, errno)) return false end else -- need to read ? may something error or closed. @@ -454,7 +454,7 @@ function CcoHttpCli:coWrite(cffi, efd, fd, stream) return true else checkInt(errno, fd) - print(string.format("socket send failed, report:%d, %s", err, errno)) + print(string.format("socket send failed, report:%s, %d", err, errno)) return false end end diff --git a/source/tools/monitor/unity/httplib/coInflux.lua b/source/tools/monitor/unity/httplib/coInflux.lua index 61f8e8634563983894871a59aee443dfdd1e335d..e596d12992ce6845ccb0552808194f1451f17b3c 100644 --- a/source/tools/monitor/unity/httplib/coInflux.lua +++ b/source/tools/monitor/unity/httplib/coInflux.lua @@ -12,23 +12,18 @@ local system = require("common.system") local CcoInflux = class("coInflux", CcoHttpCliInst) -function CcoInflux:_init_(fYaml) - local res = system:parseYaml(fYaml) +function CcoInflux:_init_(fYaml, config, instance) local pushInflux = { - host = res.pushTo.host, - url = res.pushTo.url, - port = res.pushTo.port + host = config.host, + url = config.url, + port = config.port } - - local Cidentity = require("beaver.identity") - local inst = Cidentity.new(fYaml) - local instance = inst:id() - CcoHttpCliInst._init_(self, instance, pushInflux) end function CcoInflux:echo(tReq) - if tReq.code ~= "204" then + --if tReq.code ~= "200" then + if string.sub(tReq.code,1,1) ~= "2" then print(tReq.code, tReq.data) end end diff --git a/source/tools/monitor/unity/httplib/coMetrics.lua b/source/tools/monitor/unity/httplib/coMetrics.lua index 1f3a9316d41f0415cf7bc2eef22519363f4f259c..dc71af0ff900f1306423420caa8c22f762a42e61 100644 --- a/source/tools/monitor/unity/httplib/coMetrics.lua +++ b/source/tools/monitor/unity/httplib/coMetrics.lua @@ -12,53 +12,83 @@ local pystring = require("common.pystring") local lineParse = require("common.lineParse") local CtransPro = require("common.transPro") local base64 = require("base64") -local addition = require("common.addition") +local sls_api = require("sls_api") local CcoMetrics = class("coMetrics", CcoHttpCliInst) -function CcoMetrics:_init_(fYaml) +function CcoMetrics:_init_(fYaml, config, instance) local res = system:parseYaml(fYaml) local _metrics = res.metrics self._mhead = _metrics.head self._title = _metrics.title - local Cidentity = require("beaver.identity") - local inst = Cidentity.new(fYaml) - local instance = inst:id() + local _addition = config.addition - local _addition = res.pushTo.addition + self._key1, self._key2 = sls_api.decode(_addition) + self._project = config.project + self._endpoint = config.endpoint + self._metricstore = config.metricstore - self._key1, self._key2 = addition:decode(_addition) - self._project = res.pushTo.project - self._endpoint = res.pushTo.endpoint - self._metricstore = res.pushTo.metricstore - self._url = "/prometheus/" ..self._project.."/"..self._metricstore.."/api/v1/write" - self._host = self._project .."." .. self._endpoint + if self._project and self._endpoint and self._metricstore then + self._url = "/prometheus/" ..self._project.."/"..self._metricstore.."/api/v1/write" + self._host = self._project .."." .. self._endpoint + + local pushMetrics = { + host = self._host, + url = self._url, + port = 80 + } + CcoHttpCliInst._init_(self, instance, pushMetrics) + -- go ffi + local ffi = require("common.protobuf.metricstore.ffi_lua") + self.ffi = ffi.ffi + self.awesome = ffi.awesome + + --fox ffi + local foxFFI = require("tsdb.native.foxffi") + self.foxffi = foxFFI.ffi + self.foxcffi = foxFFI.cffi + + self._transPro = CtransPro.new(instance, fYaml, false, false) + end - local pushMetrics = { - host = self._host, - url = self._url, - port = 80 - } - CcoHttpCliInst._init_(self, instance, pushMetrics) - -- go ffi - local ffi = require("common.protobuf.metricstore.ffi_lua") - self.ffi = ffi.ffi - self.awesome = ffi.awesome - - --fox ffi - local foxFFI = require("tsdb.native.foxffi") - self.foxffi = foxFFI.ffi - self.foxcffi = foxFFI.cffi - - self._transPro = CtransPro.new(instance, fYaml, false, false) end function CcoMetrics:echo(tReq) - --if tReq.code ~= "204" then - print(tReq.code, tReq.data) - --end + --if tReq.code ~= "200" then + if string.sub(tReq.code,1,1) ~= "2" then + print(tReq.code, tReq.data) + end +end + +local function transLines(lines) + if not lines then + return {} + end + local res = {} + local c = 1 + for _, line in ipairs(lines) do + local cell = {title = line.line} + local labels = {} + if line.ls then + for _, vlabel in ipairs(line.ls) do + labels[vlabel.name] = vlabel.index + end + end + cell.labels = labels + + local values = {} + if line.vs then + for _, vvalue in ipairs(line.vs) do + values[vvalue.name] = vvalue.value + end + end + cell.values = values + res[c] = cell + c = c+1 + end + return res end function CcoMetrics:trans(msgs) @@ -66,7 +96,7 @@ function CcoMetrics:trans(msgs) local c = 0 local lines - lines = msgs.lines + lines = transLines(msgs.lines) res = self._transPro:export(lines) local prome = self.ffi.new("GoString") prome.p = res @@ -94,7 +124,6 @@ function CcoMetrics:pack(body) ["Authorization"] = "Basic " .. keys64, } local heads = self:packCliHeaders(head) - print("pack finish") return pystring:join("\r\n", {line, heads, body}) end diff --git a/source/tools/monitor/unity/httplib/coMetricstore.lua b/source/tools/monitor/unity/httplib/coMetricstore.lua new file mode 100644 index 0000000000000000000000000000000000000000..dfcf1c3856a2a2f7b69a53e7e291d1a5afc37233 --- /dev/null +++ b/source/tools/monitor/unity/httplib/coMetricstore.lua @@ -0,0 +1,102 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/11/22 12:15 AM +--- + +require("common.class") + +local CcoHttpCliInst = require("httplib.coHttpCliInst") +local system = require("common.system") +local pystring = require("common.pystring") +local snappy = require("snappy") +local base64 = require("base64") +local pro_encode = require("pro_encode") +local sls_api = require("sls_api") +local CtransPro = require("common.transPro") + +local CcoMetricstore = class("coMetricstore", CcoHttpCliInst) + +local headers = { + ["Content-Encoding"] = "snappy", + ["Content-Type"] = "application/x-protobuf", + ["X-Prometheus-Remote-Write-Version"] = "0.1.0", +} + +function CcoMetricstore:_init_(fYaml, config, instance) + + local k1, k2 = sls_api.decode(config.addition) + local s = base64.encode(k1 .. ":" .. k2) + self._headers = system:deepcopy(headers) + self._headers["Authorization"] = "Basic " .. s + self._headers.Host = config.host + + self._transPro = CtransPro.new(instance, fYaml, false, false) + + local metricstore = { + host = config.host, + url = config.url, + port = 80 + } + + CcoHttpCliInst._init_(self, instance, metricstore) +end + +function CcoMetricstore:echo(tReq) + if string.sub(tReq.code,1,1) ~= "2" then + print(tReq.code, tReq.data) + end +end + +local function transLines(lines) + if not lines then + return {} + end + local res = {} + local c = 1 + for _, line in ipairs(lines) do + local cell = {title = line.line} + local labels = {} + if line.ls then + for _, vlabel in ipairs(line.ls) do + labels[vlabel.name] = vlabel.index + end + end + cell.labels = labels + + local values = {} + if line.vs then + for _, vvalue in ipairs(line.vs) do + values[vvalue.name] = vvalue.value + end + end + cell.values = values + res[c] = cell + c = c + 1 + end + return res +end + +function CcoMetricstore:trans(msgs) + local res + local lines + + lines = transLines(msgs.lines) + res = self._transPro:toMetric(lines) + if res and #res > 0 then + local encode = pro_encode.encode(res, os.time() * 1000) + local compress = snappy.compress(encode) + return compress + else + return nil + end +end + +function CcoMetricstore:pack(body) + local line = self:packCliHead('POST', self._url) + self._headers["Content-Length"] = #body + local heads = self:packCliHeaders(self._headers) + return pystring:join("\r\n", {line, heads, body}) +end + +return CcoMetricstore \ No newline at end of file diff --git a/source/tools/monitor/unity/httplib/coSls.lua b/source/tools/monitor/unity/httplib/coSls.lua new file mode 100644 index 0000000000000000000000000000000000000000..71c55e50491e41fb6c7acf3f199ebd2d0382c6df --- /dev/null +++ b/source/tools/monitor/unity/httplib/coSls.lua @@ -0,0 +1,107 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/11/22 6:03 PM +--- + +require("common.class") +local sls_api = require("sls_api") +local enumStat = require("httplib.enumStat") +local pystring = require("common.pystring") +local lineParse = require("common.lineParse") + +local CcoSls = class("CcoSls") + +function CcoSls:_init_(fYaml, config, instance) + self._sls = sls_api.new({config.endpoint, config.project, config.logstore, config.addition}) + self._instance = instance + + self.status = enumStat.closed + self.fd = -1 +end + +function CcoSls:checkTime() + return os.time() - self.online +end + + +function CcoSls:addInstance(line) -- add instance id for line index. + local cells = line.ls + local hasInstance = false + + if cells then + for _, cell in ipairs(cells) do + if cell.name == "instance" then + hasInstance = true + end + end + end + + if not hasInstance then + local cell = { + name = "instance", + index = self._instance + } + if cells then + table.insert(cells, cell) + else + line.ls = {cell} + end + end +end + +function CcoSls:transLine(line) + return lineParse.packs(line) +end + +function CcoSls:trans(msgs, body, filter) + local res + local c = 0 + local lines + local ts = string.format(" %d000", os.time()) -- nano second + local bodies = {} + + lines = msgs.lines + for _, line in ipairs(lines) do + self:addInstance(line) + local log = self:transLine(line) + if log then + c = c + 1 + bodies[c] = log .. ts + end + end + + if #bodies then + res = pystring:join("\n", bodies) + end + if body and #body > 0 then + if res then + return pystring:join("\n", {body, res}) + else + return body + end + else + return res + end +end + +function CcoSls:pack(body) + return { + instance = self._instance, + log = body + } +end + +function CcoSls:work(cffi, efd) + self.status = enumStat.connected + + self.online = os.time() + while true do + local body = coroutine.yield() + local t = self:pack(body) + self._sls:send(t) + end +end + +return CcoSls + diff --git a/source/tools/monitor/unity/httplib/coSlsLog.lua b/source/tools/monitor/unity/httplib/coSlsLog.lua new file mode 100644 index 0000000000000000000000000000000000000000..b6bcd29830cf5ca2349ee9770acd2b393562ee23 --- /dev/null +++ b/source/tools/monitor/unity/httplib/coSlsLog.lua @@ -0,0 +1,21 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/11/22 6:03 PM +--- + +require("common.class") +local lineParse = require("common.lineParse") + +local coSls = require("httplib.coSls") +local CcoSlsLog = class("CcoSlsLog", coSls) + +function CcoSlsLog:_init_(fYaml, config, instance) + coSls._init_(self, fYaml, config, instance) +end + +function CcoSlsLog:transLine(line) + return lineParse.packLog(line) +end + +return CcoSlsLog diff --git a/source/tools/monitor/unity/httplib/coSlsMetric.lua b/source/tools/monitor/unity/httplib/coSlsMetric.lua new file mode 100644 index 0000000000000000000000000000000000000000..cc36f018199aad348416eee05e595d3377f99d08 --- /dev/null +++ b/source/tools/monitor/unity/httplib/coSlsMetric.lua @@ -0,0 +1,21 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/11/22 6:03 PM +--- + +require("common.class") +local lineParse = require("common.lineParse") + +local coSls = require("httplib.coSls") +local CcoSlsMetric = class("CcoSlsLog", coSls) + +function CcoSlsMetric:_init_(fYaml, config, instance) + coSls._init_(self, fYaml, config, instance) +end + +function CcoSlsMetric:transLine(line) + return lineParse.packMetric(line) +end + +return CcoSlsMetric diff --git a/source/tools/monitor/unity/httplib/httpApp.lua b/source/tools/monitor/unity/httplib/httpApp.lua index 996792efbde2ccbd057012e05c185ebf672f5c5e..ffc5df9464fe8ecbab4277906b6cc24c101c9f1d 100644 --- a/source/tools/monitor/unity/httplib/httpApp.lua +++ b/source/tools/monitor/unity/httplib/httpApp.lua @@ -21,7 +21,7 @@ function ChttpApp:echo(tRet, keep, code) ["Content-Type"] = "application/json", ["Connection"] = (keep and "keep-alive") or "close" } - local body = self:jencode(tRet) + local body = self:jencode(tRet) or "None." local headers = self:packServerHeaders(tHead, #body) local tHttp = {stat, headers, body} return pystring:join("\r\n", tHttp) diff --git a/source/tools/monitor/unity/httplib/httpCli.lua b/source/tools/monitor/unity/httplib/httpCli.lua index 5b25cf189f1a0400fa4186eeb31b463c4ff67b5f..05ed4a90f255ca29a5a1ee0746e6c1e7f9ef1f94 100644 --- a/source/tools/monitor/unity/httplib/httpCli.lua +++ b/source/tools/monitor/unity/httplib/httpCli.lua @@ -5,6 +5,7 @@ --- require("common.class") +local system = require("common.system") local ChttpComm = require("httplib.httpComm") local ChttpCli = class("httpCli", ChttpComm) @@ -90,4 +91,38 @@ function ChttpCli:postLine(Url, line) return self:post(Url, line, headers) end +local function addContent(content, c, line) + content[c] = line + return c + 1 +end + +function ChttpCli:postFormData(Url, fData) + local headers = { + ["accept"] = "application/json", + ["Content-Type"] = "multipart/form-data" + } + + local boundary = "----" .. system:randomStr(32) + local c = 1 + local content = {} + for k, v in pairs(fData) do + c = addContent(content, c, boundary) -- add boundary + if type(v) == "table" then -- file: name, stream, type + c = addContent(content, c, string.format('Content-Disposition: form-data; name="%s"; filename="%s"', k, v[1])) + c = addContent(content, c, string.format('Content-Type: %s', v[3])) + c = addContent(content, c, "") + c = addContent(content, c, v[2]) + else + c = addContent(content, c, string.format('content-disposition: form-data; name="%s"', k)) + c = addContent(content, c, "") + c = addContent(content, c, v) + end + end + addContent(content, c, boundary .. "--") + local s = table.concat(content, "\n") + headers["Content-type"] = string.format("multipart/form-data; boundary=%s", boundary) + headers["Content-Length"] = #s + return self:post(Url, s, headers) +end + return ChttpCli diff --git a/source/tools/monitor/unity/httplib/httpReq.lua b/source/tools/monitor/unity/httplib/httpReq.lua new file mode 100644 index 0000000000000000000000000000000000000000..70a0792169b00a40a952a315e2e1a25832cad7da --- /dev/null +++ b/source/tools/monitor/unity/httplib/httpReq.lua @@ -0,0 +1,117 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/27 11:29 PM +--- + +-- refer to https://github.com/daurnimator/lua-http/blob/master/examples/simple_request.lua +-- doc https://daurnimator.github.io/lua-http/0.4/#http.request + +require("common.class") +local request = require "http.request" +local system = require("common.system") +local ChttpComm = require("httplib.httpComm") +local ChttpReq = class("httpReq", ChttpComm) + + +function ChttpReq:_init_(proxy, tmo) + ChttpComm._init_(self) + self._proxy = proxy + self._tmo = tmo or 1 +end + +local function setReqConfig(obj, req) + if obj._proxy then + req.proxy = obj._proxy + end +end + +local function eachHeaders(header) + local head = {} + for k, v in header:each() do + head[k] = v + end + return head +end + +function ChttpReq:get(uri) + local req = request.new_from_uri(uri) + setReqConfig(self, req) + req.headers:upsert(":method", "GET") + + local headers, stream = req:go(self._tmo) + return { + code = headers:get(":status"), + head = eachHeaders(headers), + body = stream:get_body_as_string() + } +end + +function ChttpReq:post(uri, body, headers) + local req = request.new_from_uri(uri) + setReqConfig(self, req) + req.headers:upsert(":method", "POST") + for k, v in pairs(headers) do + req.headers:upsert(k, v) + end + req:set_body(body) + + local hdrs, stream = req:go(self._tmo) + return { + code = hdrs:get(":status"), + head = eachHeaders(hdrs), + body = stream:get_body_as_string() + } +end + +function ChttpReq:postTable(uri, t) + local req = self:jencode(t) + local headers = { + ["Content-Type"] = "application/json", + } + return self:post(uri, req, headers) +end + +function ChttpReq:postLine(uri, line) + local headers = { + ["Content-Type"] = "text/plain", + } + return self:post(uri, line, headers) +end + +local function addContent(content, c, line) + content[c] = line + return c + 1 +end + +function ChttpReq:postFormData(Url, headers, fData) + if not headers["accept"] then + headers["accept"] = "application/json" + end + if not headers["Content-Type"] then + headers["Content-Type"] = "multipart/form-data" + end + + local boundary = system:randomStr(32) + local c = 1 + local content = {} + for k, v in pairs(fData) do + c = addContent(content, c, "--" .. boundary) -- add boundary + if type(v) == "table" then -- file: name, stream, type -- files:["j"] + c = addContent(content, c, string.format('Content-Disposition: form-data; name="%s"; filename="%s"', k, v[1])) + c = addContent(content, c, string.format('Content-Type: %s', v[3])) + c = addContent(content, c, "") + c = addContent(content, c, v[2]) + else + c = addContent(content, c, string.format('Content-Disposition: form-data; name="%s"', k)) + c = addContent(content, c, "") + c = addContent(content, c, v) + end + end + addContent(content, c, "--" .. boundary .. "--") + local s = table.concat(content, "\r\n") + headers["Content-Type"] = string.format("multipart/form-data; boundary=%s", boundary) + return self:post(Url, s, headers) +end + +return ChttpReq diff --git a/source/tools/monitor/unity/httplib/slsCli.lua b/source/tools/monitor/unity/httplib/slsCli.lua index be15d31b59c8746c07eb29c8dd0bd91223699ebc..3cd60d51a74b57f25b76a863469901a7c9c26d3a 100644 --- a/source/tools/monitor/unity/httplib/slsCli.lua +++ b/source/tools/monitor/unity/httplib/slsCli.lua @@ -7,14 +7,13 @@ require("common.class") local sha1 = require("sha1") -local lz4 = require("lz4") local md5 = require("md5") local base64 = require("base64") local system = require("common.system") local pystring = require("common.pystring") +local log_encode = require("log_encode") local ChttpCli = require("httplib.httpCli") -local CslsProto = require("protobuf.slsProto") local CslsCli = class("slsCli", ChttpCli) function CslsCli:_init_(endPoint, project, store, key, pswd, proxy) @@ -24,7 +23,6 @@ function CslsCli:_init_(endPoint, project, store, key, pswd, proxy) self._store = store self._key = key self._pswd = pswd - self._proto = CslsProto.new() end local function packLog(vm, log) @@ -85,11 +83,11 @@ function CslsCli:signature(heads, uri, msg) heads["Authorization"] = string.format("LOG %s:%s", self._key, sign) end -function CslsCli:putLog(vm, log) +function CslsCli:putLog(log) local uri = "/logstores/" .. self._store .. "/shards/lb" - local logList = packLog(vm, log) - print(self:jencode(logList)) - local msg = self._proto:pack(logList) + + local msg = log_encode:encode(log) + system:hexdump(msg) local heads = packHead(msg, self._project, self._endPoint) self:signature(heads, uri, msg) local url = string.format("http://%s%s", self._endPoint, uri) diff --git a/source/tools/monitor/unity/test/collector/df/df.lua b/source/tools/monitor/unity/test/collector/df/df.lua index 0f7842b20e6ec602bf151c8650bf21f48dc91d64..f65e9a09df3d812d61b2e96969b2b0f08f1fbce0 100644 --- a/source/tools/monitor/unity/test/collector/df/df.lua +++ b/source/tools/monitor/unity/test/collector/df/df.lua @@ -9,6 +9,7 @@ local statvfs = require "posix.sys.statvfs".statvfs local pystring = require("pystring") local system = require("system") + function get_lines(fName) local lines = {} local fName = fName or "/proc/mounts" @@ -65,4 +66,4 @@ function walk_fs() end end -walk_fs() \ No newline at end of file +walk_fs() diff --git a/source/tools/monitor/unity/test/curl/diagnose/getToken.lua b/source/tools/monitor/unity/test/curl/diagnose/getToken.lua new file mode 100644 index 0000000000000000000000000000000000000000..1e1c2a3964bfdba6bdb4ff00e08f1f4bf8d9b5d9 --- /dev/null +++ b/source/tools/monitor/unity/test/curl/diagnose/getToken.lua @@ -0,0 +1,25 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by wrp. +--- DateTime: 2023/8/25 09:29 +--- + +package.path = package.path .. ";../../../?.lua;" +local system = require("common.system") +local ChttpCli = require("httplib.httpCli") + +local cli = ChttpCli.new() +local url = "http://127.0.0.1:8400/api/diag" + +local header = { + accept = "application/json", + ["Content-Type"] = "application/json" +} +local body = { + username = "admin", + password = "123456" +} +local req = {host = "192.168.0.121", uri = "/api/v1/auth/", headers = header, body = body} +local res = cli:postTable(url, req) + +system:dumps(res) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/curl/postCec.py b/source/tools/monitor/unity/test/curl/postCec.py new file mode 100644 index 0000000000000000000000000000000000000000..1b170c9fbbad4b075645775308161539ae98185d --- /dev/null +++ b/source/tools/monitor/unity/test/curl/postCec.py @@ -0,0 +1,17 @@ +import json +import requests + +url = "http://127.0.0.1:8400/api/alert" +data = {"alert_item": "sysload_high", + "alert_category": "MONITOR", + "alert_source_type": "grafana", + "alert_level": "WARNING", + "status": "FIRING", + "labels": { + "instance": "192.168.23.6", + "grafana_folder": "rules", + "alertname": "test" + } +} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/curl/postDiag.lua b/source/tools/monitor/unity/test/curl/postDiag.lua index 8082eb06e1d81abf5c646b131a48ff2ec09b7a00..da3f9f54fd66b23e03b90bb62f5b52bedb17aac7 100644 --- a/source/tools/monitor/unity/test/curl/postDiag.lua +++ b/source/tools/monitor/unity/test/curl/postDiag.lua @@ -9,8 +9,21 @@ local system = require("common.system") local ChttpCli = require("httplib.httpCli") local cli = ChttpCli.new() -local url = "http://127.0.0.1:8400/api/trig" +local url = "http://127.0.0.1:8400/api/diag" --local req = {cmd = "diag", exec = "io_hang", args = {"hangdetect", "vda"}} -local req = {cmd = "diag", exec = "io_hang", args = {"hangdetect", "vda"}, uid = system:guid()} +--local req = {cmd = "diag", exec = "io_hang", args = {"hangdetect", "vda"}, uid = system:guid()} +local headers = { + --accept = "application/json", + --["Content-Type"] = "application/json", + --authorization = "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpZCI6MSwidXNlcm5hbWUiOiJhZG1pbiIsImV4cCI6MTY5MzA0MzYwMi43NDc1NDh9.pm78vETkFeR8xX-TFA4ROVjVzO_VlfUuwUA3TzTxpfA" +} +local body = { + service_name = "iohang", + params = { + instance= "127.0.0.1" + } +} +local req = {host = "192.168.0.121", uri = "/api/v1/tasks/sbs_task_create/", headers = headers, body = body} local res = cli:postTable(url, req) + system:dumps(res) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/curl/postDiag.py b/source/tools/monitor/unity/test/curl/postDiag.py new file mode 100644 index 0000000000000000000000000000000000000000..d8a19d9202235b047056131e76aab55b32135c17 --- /dev/null +++ b/source/tools/monitor/unity/test/curl/postDiag.py @@ -0,0 +1,9 @@ +import json +import requests + +url = "http://127.0.0.1:8400/api/diag" +params = {"instance" : "127.0.0.1", "nums" : "3"} +body = {"service_name": "jruntime", "params": params} +data = {"body": body} +res = requests.post(url, data=json.dumps(data)) +print(res.content, res) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/curl/postFormData.lua b/source/tools/monitor/unity/test/curl/postFormData.lua new file mode 100644 index 0000000000000000000000000000000000000000..319a2fade55808e79558aca4eb1998d5a82a3818 --- /dev/null +++ b/source/tools/monitor/unity/test/curl/postFormData.lua @@ -0,0 +1,17 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/25 8:48 PM +--- + +package.path = package.path .. ";../../?.lua;" +local system = require("common.system") +local ChttpCli = require("httplib.httpCli") + +local url = "http://127.0.0.1:8400/api/404" +local cli = ChttpCli.new() +local formData = { + hello = "world", + file = {"a.out", "hello, world.", "application/octet-stream"} +} +system:dumps(cli:postFormData(url, formData)) diff --git a/source/tools/monitor/unity/test/curl/postLine.lua b/source/tools/monitor/unity/test/curl/postLine.lua index e04791a91168261ab57b404481b8610bdf96c1e8..c114349f31c7c15d1ea45cdbe56ed3e4fcde8942 100644 --- a/source/tools/monitor/unity/test/curl/postLine.lua +++ b/source/tools/monitor/unity/test/curl/postLine.lua @@ -11,4 +11,5 @@ local ChttpCli = require("httplib.httpCli") local cli = ChttpCli.new() local url = "http://127.0.0.1:8400/api/line" local res = cli:postLine(url, "lineTable,index=abc value=1") +assert(res.code == "200") system:dumps(res) diff --git a/source/tools/monitor/unity/test/curl/postSSL.py b/source/tools/monitor/unity/test/curl/postSSL.py new file mode 100644 index 0000000000000000000000000000000000000000..e95b19985d0a5c1d016b27545280d06c209eae67 --- /dev/null +++ b/source/tools/monitor/unity/test/curl/postSSL.py @@ -0,0 +1,26 @@ +import json +import requests +import time +from threading import Thread + +class CsslThread(Thread): + def __init__(self): + super(CsslThread, self).__init__() + self.start() + + def run(self): + url = "http://127.0.0.1:8400/api/ssl" + data = {"host": "cn.bing.com", "uri": "/"} + while True: + try: + res = requests.post(url, data=json.dumps(data)) + except requests.exceptions.ConnectionError: + continue + assert(res.status_code == 200) + time.sleep(0.1) + +ps = [] +for i in range(10): + ps.append(CsslThread()) +for p in ps: + p.join() diff --git a/source/tools/monitor/unity/test/curl/reqGet.lua b/source/tools/monitor/unity/test/curl/reqGet.lua new file mode 100644 index 0000000000000000000000000000000000000000..8131121a34969211e42db25f67901f395d52603c --- /dev/null +++ b/source/tools/monitor/unity/test/curl/reqGet.lua @@ -0,0 +1,13 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/27 11:46 PM +--- + +package.path = package.path .. ";../../?.lua;" +local system = require("common.system") +local ChttpReq = require("httplib.httpReq") + +local req = ChttpReq.new() + +system:dumps(req:get("https://cn.bing.com/")) diff --git a/source/tools/monitor/unity/test/curl/reqLine.lua b/source/tools/monitor/unity/test/curl/reqLine.lua new file mode 100644 index 0000000000000000000000000000000000000000..3f1a2aaa790b46e5afe442fc230153de89be81ed --- /dev/null +++ b/source/tools/monitor/unity/test/curl/reqLine.lua @@ -0,0 +1,15 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/28 7:55 AM +--- + +package.path = package.path .. ";../../?.lua;" +local system = require("common.system") +local ChttpReq = require("httplib.httpReq") + +local cli = ChttpReq.new() +local url = "http://127.0.0.1:8400/api/line" +local res = cli:postLine(url, "lineTable,index=abc value=1") +assert(res.code == "200") +system:dumps(res) diff --git a/source/tools/monitor/unity/test/curl/sls/slsTest.lua b/source/tools/monitor/unity/test/curl/sls/slsTest.lua index a9e2e5752bc2ddbab80cb2046637f32eb83a26ec..82810e0a9ddb3f8fb2f75e86706243e5cd9e9768 100644 --- a/source/tools/monitor/unity/test/curl/sls/slsTest.lua +++ b/source/tools/monitor/unity/test/curl/sls/slsTest.lua @@ -4,13 +4,18 @@ --- DateTime: 2023/1/30 3:56 PM --- -package.path = package.path .. ";../../../common/?.lua;" -package.path = package.path .. ";../../../httplib/?.lua;" +package.path = package.path .. ";../../../?.lua;" +package.path = package.path .. ";../../../?.lua;" -package.path = package.path .. ";../../../protobuf/?.lua;" +package.path = package.path .. ";../../../?.lua;" -local CslsCli = require("slsCli") +local CslsCli = require("httplib.slsCli") -local cli = CslsCli.new("xxx") +local lines = {} +for line in io.lines("config") do + table.insert(lines, line) +end -cli:putLog("abc", "hello.") +local cli = CslsCli.new(lines[1], lines[2], lines[3], lines[4], lines[5]) + +cli:putLog({vm = "abc", log = "hello."}) diff --git a/source/tools/monitor/unity/test/fox/fox.yaml b/source/tools/monitor/unity/test/fox/fox.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a3ec6801f9804f87bf97c10e79ba7ab2c4e27d16 --- /dev/null +++ b/source/tools/monitor/unity/test/fox/fox.yaml @@ -0,0 +1,4 @@ +config: + db: + rotate: 7 # tsdb file retention time, unit day + budget: 200 # max query buffer from tsdb. \ No newline at end of file diff --git a/source/tools/monitor/unity/test/fox/foxRead.lua b/source/tools/monitor/unity/test/fox/foxRead.lua new file mode 100644 index 0000000000000000000000000000000000000000..0f5e8e61a468391d9e8df79b4bc04c7377f8e741 --- /dev/null +++ b/source/tools/monitor/unity/test/fox/foxRead.lua @@ -0,0 +1,27 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/15 10:40 AM +--- + +package.path = package.path .. ";../../?.lua" + +local CfoxTSDB = require("tsdb.foxTSDB") +local system = require("common.system") + +local fYaml = "fox.yaml" + +local rdb = CfoxTSDB.new(fYaml) +local res = {} + +rdb:_setupRead() +rdb:qlast(5, res) +assert(#res >= 4) + +system:sleep(5) +print("after.") +rdb:resize() +res = {} +rdb:qlast(6, res) +assert(#res >= 5) +print("db query test ok.") diff --git a/source/tools/monitor/unity/test/fox/foxWrite.lua b/source/tools/monitor/unity/test/fox/foxWrite.lua new file mode 100644 index 0000000000000000000000000000000000000000..f4f462de9290179df24bb351c33eca4067d9b30e --- /dev/null +++ b/source/tools/monitor/unity/test/fox/foxWrite.lua @@ -0,0 +1,33 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/12 5:48 PM +--- + +package.path = package.path .. ";../../?.lua" + +local pbApi = require("test.protoBuf.pbInterFace") +local CfoxTSDB = require("tsdb.foxTSDB") +local system = require("common.system") + +local fYaml = "fox.yaml" + +local wdb = CfoxTSDB.new(fYaml) +wdb:setupWrite() +system:dumps(wdb:fileNames()) + +local c = 1 +while true do + local lines = { + "hello,title=hello value1=3,value2=4", + string.format("test v=3,add=%d", c), + 'log,title=hello str="test.hello."' + } + local res = pbApi.protoLines(table.concat(lines, "\n")) + wdb:write(res) + + print("write.") + c = c + 1 + system:sleep(1) +end + diff --git a/source/tools/monitor/unity/test/fox/libPath.sh b/source/tools/monitor/unity/test/fox/libPath.sh new file mode 100644 index 0000000000000000000000000000000000000000..d9c01fc71c0242132016228d9d63e1a541f2a546 --- /dev/null +++ b/source/tools/monitor/unity/test/fox/libPath.sh @@ -0,0 +1,3 @@ +#jump to tsdb/native make then source. +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:../../tsdb/native +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/ # for yaml \ No newline at end of file diff --git a/source/tools/monitor/unity/test/fox/query.py b/source/tools/monitor/unity/test/fox/query.py index 41d6e5567de86922e71c451451523f87a1cecc22..734a645bd0002c9b9946a7e2f5c1ae49eace526a 100644 --- a/source/tools/monitor/unity/test/fox/query.py +++ b/source/tools/monitor/unity/test/fox/query.py @@ -8,8 +8,8 @@ import json def post_test(d): url = "http://127.0.0.1:8400/api/query" res = requests.post(url, json=d) - ret = res.content.decode() - print(ret) + ret = json.loads(res.content.decode()) + print(len(ret)) def q_table(): @@ -22,19 +22,31 @@ def q_by_table(): def q_by_date(): now = datetime.datetime.now() - delta1 = datetime.timedelta(days=1, hours=1) - delta2 = datetime.timedelta(minutes=5) + delta1 = datetime.timedelta(minutes=0) + delta2 = datetime.timedelta(minutes=2) d1 = now - delta1 d2 = d1 - delta2 s1 = d1.strftime("%Y-%m-%d %H:%M:%S") s2 = d2.strftime("%Y-%m-%d %H:%M:%S") - print(s1, s2) - post_test({"mode": "date", "start": s2, "stop": s1, "tz": 8, "table": ["cpu_total", "cpus"]}) + print(s2, s1) + post_test({"mode": "date", "start": s2, "stop": s1, "tz": 8, "table": ["cpu_total"]}) + +def q_by_sql(): + post_test("SELECT net_rx, rcu FROM per_sirqs WHERE time > NOW(-10) and cpu = cpu1") + now = datetime.datetime.now() + delta1 = datetime.timedelta(hours=8) + delta2 = datetime.timedelta(minutes=5) + d1 = now + delta1 + d2 = d1 - delta2 + s1 = d1.strftime("%Y-%m-%d %H:%M:%S") + s2 = d2.strftime("%Y-%m-%d %H:%M:%S") + sqlclause = "SELECT net_rx, rcu FROM per_sirqs WHERE time BETWEEN '" + s2 + "' and '" + s1 + "' and cpu = cpu1" + post_test({"mode": "sql", "sql": sqlclause}) if __name__ == "__main__": # post_test({"mode": "last", "time": "4m"}) # q_table() - q_by_table() - # q_by_date() + # q_by_table() + q_by_date() diff --git a/source/tools/monitor/unity/test/fox/sqlquery.py b/source/tools/monitor/unity/test/fox/sqlquery.py new file mode 100644 index 0000000000000000000000000000000000000000..f6217c1546ec45cd1759cba61b1ecf6408834b3e --- /dev/null +++ b/source/tools/monitor/unity/test/fox/sqlquery.py @@ -0,0 +1,27 @@ +import time +import datetime +import requests +import random +import json + +def post_test(d): + url = "http://127.0.0.1:8400/api/sql" + res = requests.post(url, json=d) + ret = res.content.decode() + print(ret) + +def q_by_sql(): + post_test("SELECT net_rx, rcu FROM per_sirqs WHERE time > NOW(-10) and cpu = cpu1") + now = datetime.datetime.now() + delta1 = datetime.timedelta(hours=8) + delta2 = datetime.timedelta(minutes=5) + d1 = now + delta1 + d2 = d1 - delta2 + s1 = d1.strftime("%Y-%m-%d %H:%M:%S") + s2 = d2.strftime("%Y-%m-%d %H:%M:%S") + sqlclause = "SELECT net_rx, rcu FROM per_sirqs WHERE time BETWEEN '" + s2 + "' and '" + s1 + "' and cpu = cpu1" + post_test(sqlclause) + +if __name__ == "__main__": + + q_by_sql() \ No newline at end of file diff --git a/source/tools/monitor/unity/test/fox/tFoxSQL.lua b/source/tools/monitor/unity/test/fox/tFoxSQL.lua new file mode 100644 index 0000000000000000000000000000000000000000..582d008d8ea65677cbb4f51874fb9e5e79d0e8c6 --- /dev/null +++ b/source/tools/monitor/unity/test/fox/tFoxSQL.lua @@ -0,0 +1,266 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/14 12:26 AM +--- + +-- need to make in ../../tsdb/native then source libPath.sh at first. +package.path = package.path .. ";../../?.lua" + +local system = require("common.system") +local CfoxSQL = require("tsdb.foxSQL") +local fYaml = "fox.yaml" +local parser = CfoxSQL.new(fYaml) +local r +local correct + +local sql = "SELECT * FROM tbl_abc WHERE name == 'zhaoyan'" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +assert( SelectStmt ~= nil, "SelectStmt parse error.") + +local targetList = SelectStmt.targetList +assert( type(targetList) == "table", "targetList parse error.") +assert(type(targetList[1].ResTarget.val.ColumnRef.fields[1].A_Star) == "table", "parse select * failed.") + +local fromClause = SelectStmt.fromClause +assert(fromClause[1].RangeVar.relname == "tbl_abc") + + +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +assert(whereClause.A_Expr.kind == "AEXPR_OP", "AEXPR_OP failed.") +assert(whereClause.A_Expr.name[1].String.str == "==", "AEXPR_OP string failed.") +assert(whereClause.A_Expr.lexpr.ColumnRef.fields[1].String.str == "name", "parser op lexpr failed.") +assert( whereClause.A_Expr.rexpr.A_Const.val.String.str == "zhaoyan", "parser op rexpr failed.") + +assert(SelectStmt.limitOption == "LIMIT_OPTION_DEFAULT", "parser limitOption failed.") +assert(SelectStmt.op == "SETOP_NONE", "parser limitOption failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +local sql = "SELECT name FROM tbl_abc WHERE age > 15" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +assert( SelectStmt ~= nil, "SelectStmt parse error.") + +local targetList = SelectStmt.targetList +assert( type(targetList) == "table", "targetList parse error.") +assert(targetList[1].ResTarget.val.ColumnRef.fields[1].String.str == "name", "parse select name failed.") + + +local fromClause = SelectStmt.fromClause +assert(fromClause[1].RangeVar.relname == "tbl_abc") + +local whereClause = SelectStmt.whereClause +assert(whereClause.A_Expr.kind == "AEXPR_OP", "AEXPR_OP failed.") +assert(whereClause.A_Expr.lexpr.ColumnRef.fields[1].String.str == "age", "parser op lexpr failed.") +assert(whereClause.A_Expr.name[1].String.str == ">", "parser op lexpr failed.") +assert( whereClause.A_Expr.rexpr.A_Const.val.Integer.ival == 15, "parser op rexpr failed.") + +assert(SelectStmt.limitOption == "LIMIT_OPTION_DEFAULT", "parser limitOption failed.") +assert(SelectStmt.op == "SETOP_NONE", "parser limitOption failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +local sql = "SELECT * FROM tbl_abc WHERE name NOT IN ('zhaoyan','qiaoke')" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SHOW TABLES" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SHOW DATABASES" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT SQLtime()" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT NOW() FROM tbl_abc" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT * FROM tbl_abc WHERE time > NOW(-10)" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +assert( SelectStmt ~= nil, "SelectStmt parse error.") + +local targetList = SelectStmt.targetList +assert( type(targetList) == "table", "targetList parse error.") +assert(type(targetList[1].ResTarget.val.ColumnRef.fields[1].A_Star) == "table", "parse select * failed.") + +local fromClause = SelectStmt.fromClause +assert(fromClause[1].RangeVar.relname == "tbl_abc") + + +local whereClause = SelectStmt.whereClause +--assert(whereClause.A_Expr.kind == "AEXPR_OP", "AEXPR_OP failed.") +--assert(whereClause.A_Expr.name[1].String.str == "==", "AEXPR_OP string failed.") +assert(whereClause.A_Expr.lexpr.ColumnRef.fields[1].String.str == "time", "parser op lexpr failed.") +--assert( whereClause.A_Expr.rexpr.A_Const.val.String.str == "zhaoyan", "parser op rexpr failed.") +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE age>15 and name IN ('zhaoyan','qiaoke')" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE time < NOW(-10)" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE time BETWEEN '1999-04-23 09:21:00' and '2000-04-23 09:21:00'" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE time > NOW(-10) and age>15 and name IN ('zhaoyan','qiaoke')" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +assert(whereClause.BoolExpr ~= nil, "BoolExpr is nil.") +assert(whereClause.BoolExpr.args[1].A_Expr.lexpr.ColumnRef.fields[1].String.str == "time", "parser op lexpr failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE age>15 and time > NOW(-10) and name IN ('zhaoyan','qiaoke')" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +assert(whereClause.BoolExpr ~= nil, "BoolExpr is nil.") +assert(whereClause.BoolExpr.args[2].A_Expr.lexpr.ColumnRef.fields[1].String.str == "time", "parser op lexpr failed.") +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT name, age FROM tbl_a,tbl_b WHERE time BETWEEN 1 and 2 and age>15 and name IN ('zhaoyan','qiaoke')" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") +local SelectStmt = r.stmts[1].stmt.SelectStmt +local whereClause = SelectStmt.whereClause +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") + +sql = "SELECT * FROM tbl_abc WHERE time > DATE_SUB(NOW(),10)" +r = parser:parse(sql) +assert(r.error == nil, "bad res for " .. sql) +assert(r.cursorpos == nil, "bad res for " .. sql) +assert(r.version == 130008, "check pgsql failed.") + +local SelectStmt = r.stmts[1].stmt.SelectStmt +assert( SelectStmt ~= nil, "SelectStmt parse error.") + +local targetList = SelectStmt.targetList +assert( type(targetList) == "table", "targetList parse error.") +assert(type(targetList[1].ResTarget.val.ColumnRef.fields[1].A_Star) == "table", "parse select * failed.") + +local fromClause = SelectStmt.fromClause +assert(fromClause[1].RangeVar.relname == "tbl_abc") + + +local whereClause = SelectStmt.whereClause +--assert(whereClause.A_Expr.kind == "AEXPR_OP", "AEXPR_OP failed.") +--assert(whereClause.A_Expr.name[1].String.str == "==", "AEXPR_OP string failed.") +assert(whereClause.A_Expr.lexpr.ColumnRef.fields[1].String.str == "time", "parser op lexpr failed.") +--assert( whereClause.A_Expr.rexpr.A_Const.val.String.str == "zhaoyan", "parser op rexpr failed.") +correct = parser:hasTimeLimit(whereClause) +if correct then + print(sql .. " is correct fox SQL") +else + print(sql .. " is not correct fox SQL") +end +print("parse sql \n\t" .. sql .. "\n ok.") \ No newline at end of file diff --git a/source/tools/monitor/unity/test/fox/timeZone.lua b/source/tools/monitor/unity/test/fox/timeZone.lua new file mode 100644 index 0000000000000000000000000000000000000000..f53ea8134900ac9cb90a3b180d5b4a2de26b0d9c --- /dev/null +++ b/source/tools/monitor/unity/test/fox/timeZone.lua @@ -0,0 +1,15 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/9/20 1:57 PM +--- + +local function get_timezone_offset() + local ts = os.time() + local utcdate = os.date("!*t", ts) + local localdate = os.date("*t", ts) + localdate.isdst = false -- this is the trick + return os.difftime(os.time(localdate), os.time(utcdate)) +end + +print(get_timezone_offset()) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/lab/childOut.lua b/source/tools/monitor/unity/test/lab/childOut.lua new file mode 100644 index 0000000000000000000000000000000000000000..edc0cd02144c5c749e50a7c01c9e5a1abc69aa60 --- /dev/null +++ b/source/tools/monitor/unity/test/lab/childOut.lua @@ -0,0 +1,18 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/29 2:03 PM +--- + +package.path = package.path .. ";../../?.lua" + +local exec = require("common.exec") +local unistd = require("posix.unistd") +local pwait = require("posix.sys.wait") + +local fIn, fOut = unistd.pipe() +assert(fIn, "creat pipe failed.") + +local pid = exec.run("/bin/ls", {"-l"}, fIn, fOut) +pwait.wait(pid) +print("child say:\n" .. unistd.read(fIn, 1024)) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/lab/inoFlag.lua b/source/tools/monitor/unity/test/lab/inoFlag.lua new file mode 100644 index 0000000000000000000000000000000000000000..fb45180da8134843b8086bfa7d3bb5066ab40fa9 --- /dev/null +++ b/source/tools/monitor/unity/test/lab/inoFlag.lua @@ -0,0 +1,20 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/20 2:47 PM +--- + +package.path = package.path .. ";../../?.lua;" + +local inotify = require('inotify') +local system = require("common.system") + +local handle = inotify.init() +local w = handle:addwatch("./test.txt", inotify.IN_MODIFY) + +while true do + for e in handle:events() do + system:dumps(e) + end + system:sleep(1) +end diff --git a/source/tools/monitor/unity/test/lab/inoWrite.py b/source/tools/monitor/unity/test/lab/inoWrite.py new file mode 100644 index 0000000000000000000000000000000000000000..e96060b3f1f7a954618a346834596ac669b408ee --- /dev/null +++ b/source/tools/monitor/unity/test/lab/inoWrite.py @@ -0,0 +1,11 @@ + +import os +import time + +i = 1 +with open("./test.txt", 'w') as f: + while True: + time.sleep(i) + i += 1 + os.write(f.fileno(), "hello.".encode()) + print(i) \ No newline at end of file diff --git a/source/tools/monitor/unity/test/lab/randStr.lua b/source/tools/monitor/unity/test/lab/randStr.lua new file mode 100644 index 0000000000000000000000000000000000000000..4a3301d6653705c97ed21a4963e11322f5d14437 --- /dev/null +++ b/source/tools/monitor/unity/test/lab/randStr.lua @@ -0,0 +1,14 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/25 8:05 PM +--- + +package.path = package.path .. ";../../?.lua;" + +local system = require("common.system") + +local len = 32 +local s = system:randomStr(len) +assert(#s == len) +print(s) diff --git a/source/tools/monitor/unity/test/lab/ssl_get/Makefile b/source/tools/monitor/unity/test/lab/ssl_get/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..469bc2fe81923801b9fb05faf3e25b303b3376c0 --- /dev/null +++ b/source/tools/monitor/unity/test/lab/ssl_get/Makefile @@ -0,0 +1,10 @@ +SOURCES=get_ssl +PROGRAMS = $(SOURCES:.cc=) + +default: $(PROGRAMS) + +clean: + rm *.o $(PROGRAMS) -f + +.cc: + g++ -Wall -g $< -o $@ -lssl -lcrypto \ No newline at end of file diff --git a/source/tools/monitor/unity/test/lab/ssl_get/get_ssl.cc b/source/tools/monitor/unity/test/lab/ssl_get/get_ssl.cc new file mode 100644 index 0000000000000000000000000000000000000000..bb6e4894de6b20f8620eb6fb7845b41ac37a476b --- /dev/null +++ b/source/tools/monitor/unity/test/lab/ssl_get/get_ssl.cc @@ -0,0 +1,182 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define log(...) do { printf(__VA_ARGS__); fflush(stdout); } while(0) +#define check0(x, ...) if(x) do { log( __VA_ARGS__); exit(1); } while(0) +#define check1(x, ...) if(!(x)) do { log( __VA_ARGS__); exit(1); } while(0) +SSL_CTX *sslContext; + +struct SSLCon{ + int socket; + SSL *sslHandle; + ~SSLCon() { + SSL_shutdown (sslHandle); + SSL_free (sslHandle); + close(socket); + } +}; + +int setNonBlock(int fd, bool value) { + int flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) { + return errno; + } + if (value) { + return fcntl(fd, F_SETFL, flags | O_NONBLOCK); + } + return fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); +} + +// Establish a regular tcp connection +int tcpConnect (const char* svr, short port) +{ + struct hostent *host = gethostbyname (svr); + int handle = socket (AF_INET, SOCK_STREAM, 0); + check1(handle >= 0, "socket return error"); + setNonBlock(handle, true); + struct sockaddr_in server; + bzero (&server, sizeof server); + server.sin_family = AF_INET; + server.sin_port = htons (port); + server.sin_addr = *((struct in_addr *) host->h_addr); + + log("connecting to %s %d\n", svr, port); + int r = connect (handle, (struct sockaddr *) &server, + sizeof (struct sockaddr)); + if (r < 0 && (errno == EWOULDBLOCK || errno == EAGAIN)) { + struct pollfd pfd; + pfd.fd = handle; + pfd.events = POLLOUT | POLLERR; + while (r == 0) { + r = poll(&pfd, 1, 100); + } + check1(pfd.revents == POLLOUT, "poll return error events: %d", pfd.revents); + } + check1(r, "connect to %s %d failed\n", svr, port); + log("connected to %s %d\n", svr, port); + return handle; +} + +void sslConnect (SSLCon* con, const char* host, short port) +{ + bzero(con, sizeof *con); + con->socket = tcpConnect (host, port); + + con->sslHandle = SSL_new (sslContext); + if (con->sslHandle == NULL) { + ERR_print_errors_fp (stderr); + check1(0, "SSL_new failed"); + } + + if (!SSL_set_fd (con->sslHandle, con->socket)) { + ERR_print_errors_fp (stderr); + check1(0, "SSL_set_fd failed"); + } + + SSL_set_connect_state (con->sslHandle); + int r = 0; + int events = POLLIN | POLLOUT | POLLERR; + while ((r = SSL_do_handshake(con->sslHandle)) != 1) { + int err = SSL_get_error(con->sslHandle, r); + if (err == SSL_ERROR_WANT_WRITE) { + events = POLLIN | POLLOUT | POLLERR; +// events &= ~POLLIN; + log("return want write set events %d\n", events); + } else if (err == SSL_ERROR_WANT_READ) { + events = POLLIN | POLLERR; +// events |= EPOLLIN; +// events &= ~EPOLLOUT; + log("return want read set events %d\n", events); + } else { + log("SSL_do_handshake return %d error %d errno %d msg %s\n", r, err, errno, strerror(errno)); + ERR_print_errors_fp(stderr); + check1(0, "do handshake error"); + } + struct pollfd pfd; + pfd.fd = con->socket; + pfd.events = events; + do { + r = poll(&pfd, 1, 100); + } while (r == 0); + check1(r == 1, "poll return %d error events: %d errno %d %s\n", r, pfd.revents, errno, strerror(errno)); + } + log("ssl connected \n"); +} + +#define READ_SIZE 16384 +void sslRead (SSLCon* con) +{ + char buf[READ_SIZE]; + int rd = 0; + int r = 1; + while (rd < int(sizeof buf) && r) { + log("reading\n"); + + struct pollfd pfd; + pfd.fd = con->socket; + pfd.events = POLLIN; + do { + r = poll(&pfd, 1, 100); + }while (r == 0); + + r = SSL_read(con->sslHandle, buf+rd, sizeof buf - rd); + if (r < 0) { + int err = SSL_get_error(con->sslHandle, r); + if (err == SSL_ERROR_WANT_READ) { + continue; + } + ERR_print_errors_fp (stderr); + } + check1(r >= 0, "SSL_read error return %d errno %d msg %s", r, errno, strerror(errno)); + log("read %d bytes\n", r); + rd += r; + } + log("read %d bytes contents:\n%.*s\n", rd, rd, buf); +} + +void sslWrite (SSLCon* con, const char *text) +{ + int len = strlen(text); + int wd = SSL_write (con->sslHandle, text, len); + check1(wd == len, "SSL_write error. return %d errno %d msg %s", wd, errno, strerror(errno)); + log("sslWrite %d bytes\n", len); +} + +int main (int argc, char **argv) +{ + if (argc < 3) { + printf("usage %s \n", argv[0]); + return 0; + } + SSL_load_error_strings (); + SSL_library_init (); + sslContext = SSL_CTX_new (SSLv23_client_method ()); + if (sslContext == NULL) + ERR_print_errors_fp (stderr); + { + SSLCon con; + sslConnect(&con, argv[1], atoi(argv[2])); + sslWrite (&con, "GET /\r\n\r\n"); + sslRead (&con); + } + SSL_CTX_free (sslContext); + return 0; +} \ No newline at end of file diff --git a/source/tools/monitor/unity/test/protoBuf/pbInterFace.lua b/source/tools/monitor/unity/test/protoBuf/pbInterFace.lua new file mode 100644 index 0000000000000000000000000000000000000000..39cee0a6d6ee4b7d489becb6ad19c320054e2a0c --- /dev/null +++ b/source/tools/monitor/unity/test/protoBuf/pbInterFace.lua @@ -0,0 +1,54 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/15 12:55 AM +--- + +local module = {} + +local lineParse = require("common.lineParse") +local CprotoData = require("common.protoData") +local system = require("common.system") +local pysting = require("common.pystring") + +local function protoLine(line) + local title, ls, vs, log = lineParse.parse(line) + + local lss = {} + for k, v in pairs(ls) do + table.insert(lss, {name = k, index = v}) + end + + local vss = {} + for k, v in pairs(vs) do + table.insert(vss, {name = k, value = v}) + end + + local logs = {} + for k, v in pairs(log) do + table.insert(logs, {name = k, log = v}) + end + + return { + line = title, + ls = lss, + vs = vss, + log = logs, + } +end + +local proto = CprotoData.new() +function module.protoLines(lines) + local lineTable = { + lines = {} + } + + local s_lines = pysting:split(lines, "\n") + for c, line in ipairs(s_lines) do + lineTable.lines[c] = protoLine(line) + end + + return proto:encode(lineTable) +end + +return module diff --git a/source/tools/monitor/unity/tsdb/foxSQL.lua b/source/tools/monitor/unity/tsdb/foxSQL.lua new file mode 100644 index 0000000000000000000000000000000000000000..309c3fc05b70163a6e25e4e268cb7133e6d38f17 --- /dev/null +++ b/source/tools/monitor/unity/tsdb/foxSQL.lua @@ -0,0 +1,454 @@ +--- +--- Generated by EmmyLua(https://github.com/EmmyLua) +--- Created by liaozhaoyan. +--- DateTime: 2023/8/12 9:37 PM +--- + +require("common.class") + +local cjson = require("cjson.safe") +local system = require("common.system") +local json = cjson.new() + +local foxFFI = require("tsdb.native.foxffi") +local CfoxTSDB = require("tsdb.foxTSDB") +local CfoxSQL = class("CfoxSQL",CfoxTSDB) + +local code2err = { + "no lower limit of time in where clauses" , + "bad time condition", + "sql parse to req error", + "bad label in where clause", + "bas value in select clause" +} + +function CfoxSQL:_init_(fYaml) + CfoxTSDB._init_(self,fYaml) +end + +function CfoxSQL:parse(sql) + local ffi, cffi = foxFFI.ffi, foxFFI.cffi + + local pgRes = ffi.gc(ffi.new("PgQueryParseResult"), cffi.parse_sql_free) -- will auto free by cffi.parse_sql_free + + cffi.parse_sql(sql, pgRes) + local perror = pgRes.error + local err = tonumber(ffi.cast("unsigned long", perror)) + if err ~= 0 then -- is NULL + return { + error = ffi.string(perror.message), + cursorpos = tonumber(perror.cursorpos), + } + else + local s = ffi.string(pgRes.parse_tree) + --print(s) + system:dumps(json.decode(s)) + return json.decode(s) + end +end + +function CfoxSQL:hasTimeLimit(whereClause) + if whereClause.A_Expr ~= nil then -- only one where + if whereClause.A_Expr.lexpr.ColumnRef.fields[1].String.str == "time" then + if whereClause.A_Expr.kind == "AEXPR_OP" then + local op = whereClause.A_Expr.name[1].String.str + if op == "<" or op == "<=" then + print("sql error: you must give a lower limit of time in where clauses") + return false + end + end + return true + else + return false + end + end + if whereClause.BoolExpr ~= nil then --not only one where condition + local args = whereClause.BoolExpr.args + for _, arg in ipairs(args) do + if arg.A_Expr.lexpr.ColumnRef.fields[1].String.str == "time" then + if arg.A_Expr.kind == "AEXPR_OP" then + local op = arg.A_Expr.name[1].String.str + if op == "<" or op == "<=" then + print("sql error: you must give a lower limit of time in where clauses") + return false + end + end + + return true + end + end + + end + return false +end + +function CfoxSQL:parseLabelsLimit(args) + local res = {} + for _, arg in ipairs(args) do + if arg.A_Expr.lexpr.ColumnRef.fields[1].String.str ~= "time" then + if arg.A_Expr.name[1].String.str ~= "=" then + print("sql error: only support equal in where condition") + else + local k = arg.A_Expr.lexpr.ColumnRef.fields[1].String.str + local v = arg.A_Expr.rexpr.ColumnRef.fields[1].String.str + res[k] = v + end + end + end + return res +end + + +function CfoxSQL:parseSqlToReq(sql) + local res = {} + local req = {} + local SelectStmt = sql.stmts[1].stmt.SelectStmt + local fromClauses = SelectStmt.fromClause + local fromtbls = {} + + local c = 1 + if fromClauses ~= nil then + for _, fromClause in ipairs(fromClauses) do + fromtbls[c] = fromClause.RangeVar.relname + c = c + 1 + end + end + req["fromtbls"] = fromtbls + + local targetLists = SelectStmt.targetList + local targetvals = {} + c = 1 + if type(targetLists[1].ResTarget.val.ColumnRef.fields[1].A_Star) == "table" then + req["selectAll"] = true + else + req["selectAll"] = false + for _, targetList in ipairs(targetLists) do + targetvals[c] = targetList.ResTarget.val.ColumnRef.fields[1].String.str + c = c + 1 + end + + end + req["targetvals"] = targetvals + + local whereClause = SelectStmt.whereClause + req["labelsLimit"] = {} + + if type(whereClause.A_Expr) == "table" then -- only one where + print("only one where") + if whereClause.A_Expr.kind == "AEXPR_OP" then --example:WHERE time > NOW(-10) + local op = whereClause.A_Expr.name[1].String.str + local secs = 0 + if type(whereClause.A_Expr.rexpr.FuncCall) == "table" and whereClause.A_Expr.rexpr.FuncCall.funcname[1].String.str == "now" then + local args = whereClause.A_Expr.rexpr.FuncCall.args + --assert(args[1].A_Const.val.Integer ~= nil,"now arg error") + if args[1].A_Const.val.Integer == nil then + print("now arg error") + res["code"] = -2 + return res + end + secs = args[1].A_Const.val.Integer.ival + secs = math.abs(secs) + --ms = self:qNow(secs,fromtbls) + else + print("sql error: you must time conditon like 'time > NOW(-secs)'") + res["code"] = -2 + return res + end + req["mode"] = "last" + req["secs"] = secs + elseif whereClause.A_Expr.kind == "AEXPR_BETWEEN" then + local start = whereClause.A_Expr.rexpr.List.items[1].A_Const.val.String.str + local stop = whereClause.A_Expr.rexpr.List.items[2].A_Const.val.String.str + + print("between " .. start .. " and " .. stop) + local dstart = self:str2date(start) + local dstop = self:str2date(stop) + if dstart == nil or dstop == nil then + res["code"] = -2 + return res + end + req["mode"] = "date" + req["start"] = dstart + req["stop"] = dstop + end + + elseif type(whereClause.BoolExpr) == "table" then --not only one where condition + local args = whereClause.BoolExpr.args + local lbs = self:parseLabelsLimit(args) + req["labelsLimit"] = lbs + for _, arg in ipairs(args) do + if arg.A_Expr.lexpr.ColumnRef.fields[1].String.str == "time" then + if arg.A_Expr.kind == "AEXPR_OP" then --example:WHERE time > NOW(-10) + local op = arg.A_Expr.name[1].String.str + local secs = 0 + if type(arg.A_Expr.rexpr.FuncCall) == "table" and arg.A_Expr.rexpr.FuncCall.funcname[1].String.str == "now" then + local as = arg.A_Expr.rexpr.FuncCall.args + assert(as[1].A_Const.val.Integer ~= nil,"now arg error") + if as[1].A_Const.val.Integer == nil then + print("now arg error") + res["code"] = -2 + return res + end + secs = as[1].A_Const.val.Integer.ival + secs = math.abs(secs) + --ms = self:qNow(secs,fromtbls) + req["mode"] = "last" + req["secs"] = secs + else + print("sql error: you must time conditon like 'time > NOW(-secs)'") + res["code"] = -2 + return res + end + + elseif arg.A_Expr.kind == "AEXPR_BETWEEN" then + local start = arg.A_Expr.rexpr.List.items[1].A_Const.val.String.str + local stop = arg.A_Expr.rexpr.List.items[2].A_Const.val.String.str + + print("between " .. start .. " and " .. stop) + local dstart = self:str2date(start) + local dstop = self:str2date(stop) + --ms = self:qDate(dstart,dstop,fromtbls) + if dstart == nil or dstop == nil then + res["code"] = -2 + return res + end + req["mode"] = "date" + req["start"] = dstart + req["stop"] = dstop + end + break + end + end + + end + system:dumps(req) + print("return req") + res["code"] = 0 + res["req"] = req + return res +end + +function CfoxSQL:selectTables(req,ms) + local finalres = {} + local res = {} + if req == {} then + print("req is nil") + finalres["code"] = -3 + finalres["ms"] = {} + return finalres + end + + if req["mode"] == "last" then + local fromtbls = req["fromtbls"] + local secs = req["secs"] + if fromtbls == {} then + ms = self:qNow(secs) + else + ms = self:qNow(secs,fromtbls) + end + + print("select last ") + elseif req["mode"] == "date" then + local fromtbls = req["fromtbls"] + local dstart = req["start"] + local dstop = req["stop"] + if fromtbls == {} then + ms = self:qDate(dstart,dstop) + else + ms = self:qDate(dstart,dstop,fromtbls) + end + + print("select data ") + end + finalres["code"] = 0 + finalres["ms"] = ms + return finalres +end + +function CfoxSQL:selectLabels(req,ms) + local finalres = {} + local res = {} + if req == {} then + print("req is nil") + finalres["code"] = -3 + finalres["ms"] = {} + return finalres + end + if req["labelsLimit"] == {} then + print("there is no labels limit") + finalres["code"] = 0 + finalres["ms"] = ms + return finalres + end + for _, line in ipairs(ms) do + local match = true + for k,v in pairs(req["labelsLimit"]) do + if system:keyIsIn(line["labels"],k)==false then + finalres["code"] = -4 + finalres["ms"] = {} + return finalres + elseif line["labels"][k] ~= v then + match = false + end + + end + if match then + table.insert(res,line) + end + + end + finalres["code"] = 0 + finalres["ms"] = res + return finalres +end + +function CfoxSQL:selectVals(req,ms) + local finalres = {} + local res = {} + if req == {} then + print("req is nil") + finalres["code"] = -3 + finalres["ms"] = {} + return finalres + end + if req["selectAll"] == true then + finalres["code"] = 0 + finalres["ms"] = ms + return finalres + + else + + for _, line in ipairs(ms) do + local lineres = {} + for k,v in pairs(line) do + if k ~= "values" then + lineres[k] = v + else + local vs = {} + for _, targetval in ipairs(req["targetvals"]) do + if system:keyIsIn(v,targetval) then + vs[targetval] = v[targetval] + else + finalres["code"] = -5 + finalres["ms"] = {} + return finalres + end + end + lineres["values"] = vs + end + end + table.insert(res,lineres) + end + + end + finalres["code"] = 0 + finalres["ms"] = res + return finalres +end + +function CfoxSQL:getDateFromTime(ms) + --local finalres = {} + for _,line in ipairs(ms) do + local time = line["time"] + local date = self:getDateFrom_us(tonumber(time)) + --line["time"] = self:date2str(date) + line["time"] = os.date("%x %X", tonumber(time) / 1000000) + end + return ms +end + +function CfoxSQL:select(sql) -- make sure that sql.stmts[1].stmt.SelectStmt~=nil + local ms = {} + local res = { + status = "ok", + error = "", + data = {} + } + if self:hasTimeLimit(sql.stmts[1].stmt.SelectStmt.whereClause) then + local req = {} + local tmp = self:parseSqlToReq(sql,req) + local code = tmp["code"] + if code ~= 0 then + res["status"] = "failed" + code = math.abs(code) + res["error"] = code2err[code] + return res + end + req = tmp["req"] + system:dumps(req) + + tmp = self:selectTables(req,ms) + code = tmp["code"] + if code ~= 0 then + res["status"] = "failed" + code = math.abs(code) + res["error"] = code2err[code] + return res + end + ms = tmp["ms"] + system:dumps(ms) + + tmp = self:selectVals(req,ms) + code = tmp["code"] + if code ~= 0 then + res["status"] = "failed" + code = math.abs(code) + res["error"] = code2err[code] + return res + end + ms = tmp["ms"] + system:dumps(ms) + + tmp = self:selectLabels(req,ms) + code = tmp["code"] + if code ~= 0 then + res["status"] = "failed" + code = math.abs(code) + res["error"] = code2err[code] + return res + end + ms = tmp["ms"] + system:dumps(ms) + + ms = self:getDateFromTime(ms) + --code = tmp["code"] + --if code ~= 0 then + -- res["status"] = "failed" + -- code = math.abs(code) + -- res["error"] = code2err[code] + -- return res + --end + --ms = tmp["ms"] + system:dumps(ms) + + res["data"] = ms + return res + else + print("sql error: you must give one time condition in where clauses") + res["status"] = "failed" + res["error"] = code2err[1] + return res + end + + +end + +function CfoxSQL:show(sql) + +end + +function CfoxSQL:sql(sql) + local SelectStmt = sql.stmts[1].stmt.SelectStmt + local VariableShowStmt = sql.stmts[1].stmt.VariableShowStmt + if SelectStmt ~= nil then + print("sql select") + return self:select(sql) + elseif VariableShowStmt ~= nil then + print("sql show") + return self:show(sql) + + end + +end + +return CfoxSQL diff --git a/source/tools/monitor/unity/tsdb/foxTSDB.lua b/source/tools/monitor/unity/tsdb/foxTSDB.lua index 69f2c5e57481b9d21b562d1809c2db2e6590bba8..ccf7032ad68b92d69f5eb1d63ff58f5ba158ec16 100644 --- a/source/tools/monitor/unity/tsdb/foxTSDB.lua +++ b/source/tools/monitor/unity/tsdb/foxTSDB.lua @@ -11,30 +11,68 @@ local snappy = require("snappy") local pystring = require("common.pystring") local CprotoData = require("common.protoData") local foxFFI = require("tsdb.native.foxffi") +local dirent = require("posix.dirent") +local unistd = require("posix.unistd") +local cjson = require("cjson.safe") +local inotify = require('inotify') local CfoxTSDB = class("CfoxTSDB") +local function get_timezone_offset() + local ts = os.time() + local utcdate = os.date("!*t", ts) + local localdate = os.date("*t", ts) + localdate.isdst = false -- this is the trick + return os.difftime(os.time(localdate), os.time(utcdate)) +end + +local json = cjson.new() function CfoxTSDB:_init_(fYaml) self.ffi = foxFFI.ffi self.cffi = foxFFI.cffi self._proto = CprotoData.new(nil) + self.tz_sec = get_timezone_offset() self:setupConf(fYaml) end function CfoxTSDB:_del_() + if self._man then self.cffi.fox_del_man(self._man) + self._man = nil end - self._man = nil + + if self._ino then + for _, w in ipairs(self._ino_ws) do + self._ino:rmwatch(w) + end + self._ino:close() + self._ino = nil + end +end + +function CfoxTSDB:_initInotify() + self._ino = inotify.init() + system:fdNonBlocking(self._ino:getfd()) + self._ino_ws = {} end function CfoxTSDB:setupConf(fYaml) local conf = system:parseYaml(fYaml) - local dbConf = conf.db or {budget = 200, rotate=7} + local dbConf = conf.config.db or {budget = 200, rotate=7} self._qBudget = dbConf.budget or 200 self._rotate = dbConf.rotate or 7 end +function CfoxTSDB:fileNames() + if self._man then + local iname = self.ffi.string(self._man.iname) + local fname = self.ffi.string(self._man.fname) + return {iname = iname, + fname = fname} + end +end + function CfoxTSDB:get_us() return self.cffi.get_us() end @@ -56,7 +94,7 @@ function CfoxTSDB:getDate() end function CfoxTSDB:makeStamp(foxTime) - return self.cffi.make_stamp(foxTime) + return self.cffi.make_stamp(foxTime) + self.tz_sec * 1e6 end function CfoxTSDB:date2str(date) @@ -132,9 +170,6 @@ function CfoxTSDB:packLine(lines) end function CfoxTSDB:rotateDb() - local dirent = require("posix.dirent") - local unistd = require("posix.unistd") - local usec = self._man.now local sec = self._rotate * 24 * 60 * 60 @@ -152,7 +187,7 @@ function CfoxTSDB:rotateDb() local num = tonumber(sf) if num < level then print("delete " .. "./" .. f) - pcall(unistd.unlink, "./" .. f) + pcall(unistd.unlink, "./" .. f) --delete end end end @@ -164,33 +199,92 @@ function CfoxTSDB:setupWrite() local date = self:getDate() local us = self:get_us() local ret = self.cffi.fox_setup_write(self._man, date, us) + + if ret ~= 0 then + local usec = self:get_us() + + local foxTime = self:getDateFrom_us(usec) + local v = foxTime.year * 10000 + foxTime.mon * 100 + foxTime.mday + local fname = string.format("%08d.fox", v) + pcall(unistd.unlink, "./" .. fname) + fname = string.format("%08d.foxi", v) + pcall(unistd.unlink, "./" .. fname) + ret = self.cffi.fox_setup_write(self._man, date, us) + end assert(ret == 0) return ret end -function CfoxTSDB:write(buff) +local function getTables(proto, buff) + local tables = {} + + local lines = proto:decode(buff) + for _, line in ipairs(lines.lines) do + local title = line.line + if tables[title] then + tables[title] = tables[title] + 1 + else + tables[title] = 1 + end + end + return tables +end + +function CfoxTSDB:_write(tables, buff) assert(self._man ~= nil, "this fox object show setup for read or write, you should call setupWrite after new") local now = self:get_us() local date = self:getDateFrom_us(now) + local tableStream = snappy.compress(json.encode(tables)) local stream = snappy.compress(buff) - assert(self.cffi.fox_write(self._man, date, now, self.ffi.string(stream, #stream), #stream) == 0) + + local tableLen = #tableStream + local streamLen = #stream + + assert(self.cffi.fox_write(self._man, date, now, + tableStream, tableLen, + stream, streamLen) > 0 ) + if self._man.new_day > 0 then self:rotateDb() end - --assert(self.cffi.fox_write(self._man, date, now, self.ffi.string(buff), #buff) == 0) +end + +function CfoxTSDB:write(buff) + local tables = getTables(self._proto, buff) + if system:keyCount(tables) then + self:_write(tables, buff) + end +end + +local function addWatch(ino, ws, fname) + local w = ino:addwatch(fname, inotify.IN_MODIFY) + table.insert(ws, w) +end + +local function dbIsUpdate(ino) + local ret = false + for _ in ino:events() do + ret = true + end + return ret end function CfoxTSDB:_setupRead(us) assert(self._man == nil, "one fox object should have only one manager.") self._man = self.ffi.new("struct fox_manager") - us = us or (self:get_us() - 15e6) + us = us or (self:get_us() - 1) local date = self:getDateFrom_us(us) local res = self.cffi.fox_setup_read(self._man, date, us) assert(res >= 0, string.format("setup read return %d.", res)) if res > 0 then self.cffi.fox_del_man(self._man) self._man = nil + else + self:_initInotify() + local names = self:fileNames() + addWatch(self._ino, self._ino_ws, names.iname) end + return res end @@ -198,46 +292,187 @@ function CfoxTSDB:curMove(us) assert(self._man) local ret = self.cffi.fox_cur_move(self._man, us) assert(ret >= 0, string.format("cur move bad ret: %d", ret)) - return self._man.pos + return self._man.r_index end -function CfoxTSDB:resize() - if self._man then +function CfoxTSDB:_resize() + if dbIsUpdate(self._ino) then local ret = self.cffi.fox_read_resize(self._man) assert(ret >= 0, string.format("resize bad ret: %d", ret)) end end -function CfoxTSDB:loadData(stop_us) +function CfoxTSDB:resize() + print("warn: resize function is no need any more.") +end + +local function loadFoxTable(ffi, cffi, pman) + local tables = {} + + local data = ffi.new("char* [1]") + local ret = cffi.fox_read_table(pman, data) + assert(ret >= 0) + if ret > 0 then + local stream = ffi.string(data[0], ret) + --local ustr = snappy.decompress(stream) + --tables = json.decode(ustr) + --cffi.fox_free_buffer(data) + local stat, ustr = pcall(snappy.decompress, stream) + if stat and ustr ~= nil then + tables = json.decode(ustr) + cffi.fox_free_buffer(data) + else + print("foxTSDB loadFoxTable: snappy error") + end + end + return tables +end + +local function loadFoxData(ffi, cffi, pman, proto) + local lines = {} + + local data = ffi.new("char* [1]") + local us = ffi.new("fox_time_t [1]") + local ret = cffi.fox_read_data(pman, data, us) + assert(ret >= 0) + if ret > 0 then + local stream = ffi.string(data[0], ret) + --local ustr = snappy.decompress(stream) + local stat, ustr = pcall(snappy.decompress, stream) + if stat and ustr ~= nil then + lines = proto:decode(ustr) + lines['time'] = tonumber(us[0]) + cffi.fox_free_buffer(data) + else + print("foxTSDB loadfoxData: snappy error") + end + + end + + return lines +end + +local function checkQTable(qtbl, ffi, cffi, pman) + if qtbl == nil then -- nil means get all + return true + end + + local tables = loadFoxTable(ffi, cffi, pman) + for _, tbl in ipairs(qtbl) do + if tables[tbl] then + return true + end + end + return false +end + +local function transLine(line, time, addLog) + local cell = {time = time, title = line.line} + + local labels = {} + if line.ls then + for _, vlabel in ipairs(line.ls) do + labels[vlabel.name] = vlabel.index + end + end + cell.labels = labels + + local values = {} + if line.vs then + for _, vvalue in ipairs(line.vs) do + values[vvalue.name] = vvalue.value + end + end + cell.values = values + + local logs = {} + if addLog then + if line.log then + for _, log in ipairs(line.log) do + logs[log.name] = log.log + end + end + end + cell.logs = logs + + return cell +end + +local function filterLines(qtbl, lines, cells, addLog) + local c = #cells + 1 + if not lines.lines then + return + end + local time = lines.time + + if qtbl == nil then -- nil means get all + for _, line in ipairs(lines.lines) do + cells[c] = transLine(line, time, addLog) + c = c + 1 + end + return + end + + for _, tbl in ipairs(qtbl) do + for _, line in ipairs(lines.lines) do + if line.line == tbl then + cells[c] = transLine(line, time, addLog) + c = c + 1 + end + end + end + return cells +end + +-- stop_us: end time; qtbl: query tables, if nil, get all; addLog, if nil only get values, +function CfoxTSDB:loadData(stop_us, qtbl, addLog) local stop = false local function fLoad() + local cells = {} + if stop then return nil end - local data = self.ffi.new("char* [1]") - local us = self.ffi.new("fox_time_t [1]") - local ret = self.cffi.fox_read(self._man, stop_us, data, us) - assert(ret >= 0) - if ret > 0 then - local stream = self.ffi.string(data[0], ret) - local ustr = snappy.decompress(stream) - local line = self._proto:decode(ustr) - self.cffi.fox_free_buffer(data) + if checkQTable(qtbl, self.ffi, self.cffi, self._man) then + local datas = loadFoxData(self.ffi, self.cffi, self._man, self._proto) + filterLines(qtbl, datas, cells, addLog) + end - if self._man.fsize == self._man.pos then -- this means cursor is at the end of file. - stop = true - end - line['time'] = tonumber(us[0]) - return line + local ret = self.cffi.fox_next(self._man, stop_us) + assert(ret >= 0, "for next failed.") + if ret > 0 then + stop = true end - return nil + return cells end return fLoad end -function CfoxTSDB:query(start, stop, ms) -- start stop should at the same mday +function CfoxTSDB:loadTable(stop_us) + local stop = false + + local function fTable() + if stop then + return nil + end + + local tables = loadFoxTable(self.ffi, self.cffi, self._man) + + local ret = self.cffi.fox_next(self._man, stop_us) + assert(ret >= 0, "for next failed.") + if ret > 0 then + stop = true + end + + return tables + end + return fTable +end + + +function CfoxTSDB:_loadMetric(start, stop, ms) -- start stop should at the same mday assert(stop > start) local dStart = self:getDateFrom_us(start) local dStop = self:getDateFrom_us(stop) @@ -247,54 +482,49 @@ function CfoxTSDB:query(start, stop, ms) -- start stop should at the same mday self:curMove(start) -- moveto position - local lenMs = #ms - for line in self:loadData(stop) do - local time = line.time - for _, v in ipairs(line.lines) do - local tCell = {time = time, title = v.line} - - local labels = {} - if v.ls then - for _, vlabel in ipairs(v.ls) do - labels[vlabel.name] = vlabel.index - end - end - tCell.labels = labels - - local values = {} - if v.vs then - for _, vvalue in ipairs(v.vs) do - values[vvalue.name] = vvalue.value - end - end - tCell.values = values - + local lenMs = #ms + 1 + for cells in self:loadData(stop, nil, false) do -- only for metric, do not need log. + for _, cell in ipairs(cells) do + ms[lenMs] = cell lenMs = lenMs + 1 - ms[lenMs] = tCell end end return ms end -function CfoxTSDB:_qlast(date, beg, stop, ms) +function CfoxTSDB:_preQuery(date, beg) if not self._man then -- check _man is already installed. if self:_setupRead(beg) ~= 0 then -- try to create new - return ms + return 1 -- beg is nil end + return 0 end if self.cffi.check_pman_date(self._man, date) == 1 then - return self:query(beg, stop, ms) + self:_resize() -- check is need to resize. + return 0 -- at the same day else - self:_del_() - if self:_setupRead(beg) ~= 0 then -- try to create new - return ms + self:_del_() -- to destroy.. + if self:_setupRead(beg) ~= 0 then -- try to setup new + return 1 end - return self:query(beg, stop, ms) + return 0 end end -function CfoxTSDB:qlast(last, ms) +function CfoxTSDB:_qLastMetric(date, beg, stop, ms) + local res = self:_preQuery(date, beg) + + if res > 0 then + return ms + elseif res == 0 then + return self:_loadMetric(beg, stop, ms) + else + error("bad preQuery state: " .. res) + end +end + +function CfoxTSDB:qLastMetric(last, ms) assert(last < 24 * 60 * 60) local now = self:get_us() @@ -304,7 +534,7 @@ function CfoxTSDB:qlast(last, ms) local dStop = self:getDateFrom_us(now) if self.cffi.check_foxdate(dStart, dStop) ~= 0 then - self:_qlast(dStart, beg, now, ms) + self:_qLastMetric(dStart, beg, now, ms) else dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg @@ -312,60 +542,27 @@ function CfoxTSDB:qlast(last, ms) local now1 = beg2 - 1 local now2 = now - self:_qlast(dStart, beg1, now1, ms) - self:_qlast(dStop, beg2, now2, ms) + self:_qLastMetric(dStart, beg1, now1, ms) + self:_qLastMetric(dStop, beg2, now2, ms) end end -function CfoxTSDB:qDay(start, stop, ms, tbls, budget) - if self._man then - self:_del_() - end - - if self:_setupRead(start) ~= 0 then - return {} +function CfoxTSDB:qDay(date, start, stop, ms, tbls, budget) + local ret = self:_preQuery(date, start) + if ret ~= 0 then + return ms end budget = budget or self._qBudget self:curMove(start) local inc = false - local lenMs = #ms - for line in self:loadData(stop) do + local lenMs = #ms + 1 + for cells in self:loadData(stop, tbls, true) do inc = false - local time = line.time - for _, v in ipairs(line.lines) do - local title = v.line - if not tbls or system:valueIsIn(tbls, title) then - local tCell = {time = string.format("%d", time), title = title} - - local labels = {} - if v.ls then - for _, vlabel in ipairs(v.ls) do - labels[vlabel.name] = vlabel.index - end - end - tCell.labels = labels - - local values = {} - if v.vs then - for _, vvalue in ipairs(v.vs) do - values[vvalue.name] = vvalue.value - end - end - tCell.values = values - - local logs = {} - if v.log then - for _, log in ipairs(v.log) do - logs[log.name] = log.log - end - end - tCell.logs = logs - - lenMs = lenMs + 1 - ms[lenMs] = tCell - inc = true - end + for _, cell in ipairs(cells) do + inc = true + ms[lenMs] = cell + lenMs = lenMs + 1 end if inc then @@ -378,23 +575,19 @@ function CfoxTSDB:qDay(start, stop, ms, tbls, budget) return ms end -function CfoxTSDB:qDayTables(start, stop, tbls) - if self._man then - self:_del_() - end - - if self:_setupRead(start) ~= 0 then - return {} +function CfoxTSDB:qDayTables(date, start, stop, tbls) + local ret = self:_preQuery(date, start) + if ret ~= 0 then + return tbls end self:curMove(start) - local lenTbls = #tbls - for line in self:loadData(stop) do - for _, v in ipairs(line.lines) do - local title = v.line - if not system:valueIsIn(tbls, title) then - lenTbls = lenTbls + 1 - tbls[lenTbls] = title + for cells in self:loadTable(stop) do + for k, v in pairs(cells) do + if tbls[k] then + tbls[k] = tbls[k] + v + else + tbls[k] = v end end end @@ -411,7 +604,7 @@ function CfoxTSDB:qDate(dStart, dStop, tbls) local ms = {} if self.cffi.check_foxdate(dStart, dStop) ~= 0 then - self:qDay(beg, now, ms, tbls) + self:qDay(dStart, beg, now, ms, tbls) else dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg @@ -419,10 +612,10 @@ function CfoxTSDB:qDate(dStart, dStop, tbls) local now1 = beg2 - 1 local now2 = now - self:qDay(beg1, now1, ms, tbls) + self:qDay(dStart, beg1, now1, ms, tbls) local budget = self._qBudget - #ms if budget > 0 then - self:qDay(beg2, now2, ms, tbls, budget) + self:qDay(dStop, beg2, now2, ms, tbls, budget) end end return ms @@ -440,7 +633,7 @@ function CfoxTSDB:qNow(sec, tbls) local ms = {} if self.cffi.check_foxdate(dStart, dStop) ~= 0 then - self:qDay(beg, now, ms, tbls) + self:qDay(dStart, beg, now, ms, tbls) else dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg @@ -448,10 +641,10 @@ function CfoxTSDB:qNow(sec, tbls) local now1 = beg2 - 1 local now2 = now - self:qDay(beg1, now1, ms, tbls) + self:qDay(dStart, beg1, now1, ms, tbls) local budget = self._qBudget - #ms if budget > 0 then - self:qDay(beg2, now2, ms, tbls, budget) + self:qDay(dStop, beg2, now2, ms, tbls, budget) end end return ms @@ -469,7 +662,7 @@ function CfoxTSDB:qTabelNow(sec) local tbls = {} if self.cffi.check_foxdate(dStart, dStop) ~= 0 then - self:qDayTables(beg, now, tbls) + self:qDayTables(dStart, beg, now, tbls) else dStop.hour, dStop.min, dStop.sec = 0, 0, 0 local beg1 = beg @@ -477,10 +670,17 @@ function CfoxTSDB:qTabelNow(sec) local now1 = beg2 - 1 local now2 = now - self:qDayTables(beg1, now1, tbls) - self:qDayTables(beg2, now2, tbls) + self:qDayTables(dStart, beg1, now1, tbls) + self:qDayTables(dStop, beg2, now2, tbls) end - return tbls + + local res = {} + local c = 1 + for k, _ in pairs(tbls) do + res[c] = k + c = c + 1 + end + return res end return CfoxTSDB diff --git a/source/tools/monitor/unity/tsdb/native/Makefile b/source/tools/monitor/unity/tsdb/native/Makefile index 85686fcc12bfa151fa2cdc8fd52392e5099d8db7..939a1bf3c2ebbcdf789500721b3bf3a8a1246011 100644 --- a/source/tools/monitor/unity/tsdb/native/Makefile +++ b/source/tools/monitor/unity/tsdb/native/Makefile @@ -1,6 +1,6 @@ CC := gcc CFLAG := -g -fpic -LDFLAG := -g -fpic -shared +LDFLAG := -g -fpic -shared -L./ -lpg_query -lpthread OBJS := foxTSDB.o SO := libfoxTSDB.so diff --git a/source/tools/monitor/unity/tsdb/native/foxTSDB.c b/source/tools/monitor/unity/tsdb/native/foxTSDB.c index 76d211f062dd865073240c32edd9d224f9c1f84b..83bd10fba32953a307bd12394c99c29c120149a7 100644 --- a/source/tools/monitor/unity/tsdb/native/foxTSDB.c +++ b/source/tools/monitor/unity/tsdb/native/foxTSDB.c @@ -15,20 +15,27 @@ #include #include -#define FNAME_SIZE 16 -#define FOX_MAGIC 0xf030 +#define FOX_MAGIC 0x0f030f03 #define MICRO_UNIT (1000 * 1000UL) -#define FOX_VALUE_FLAG (1 << 0ULL) -#define FOX_LOG_FLAG (1 << 1ULL) +// for lseek mode choose. +#ifndef FOX_LSEEK_MODE +#define FOX_LSEEK_MODE 64 +#endif -struct fox_head{ - unsigned int prev; - unsigned int next; +#if (FOX_LSEEK_MODE==64) +#define FOX_LSEEK lseek64 +#else +#define FOX_LSEEK lseek +#endif + +struct fox_index { fox_time_t t_us; - unsigned short magic; - unsigned short flag; + fox_off_t off; + int table_len; + int data_len; }; +#define FOX_INDEX_SIZE sizeof(struct fox_index) fox_time_t get_us(void) { fox_time_t res = 0; @@ -56,7 +63,7 @@ static void date2tm(struct foxDate * p, struct tm * ptm) { ptm->tm_hour = p->hour; ptm->tm_min = p->min; ptm->tm_sec = p->sec; - ptm->tm_isdst = -1; + ptm->tm_isdst = 0; } int get_date_from_us(fox_time_t us, struct foxDate * p) { @@ -108,6 +115,19 @@ static fox_time_t fox_read_day_max(struct fox_manager* pman) { return make_stamp(&date) + 24 * 60 * 60 * MICRO_UNIT; } +int check_foxdate(struct foxDate* d1, struct foxDate* d2) { + return (d1->year == d2->year) && \ + (d1->mon == d2->mon) && \ + (d1->mday == d2->mday); +} + +int check_pman_date(struct fox_manager* pman, struct foxDate* pdate) { + return (pman->year == pdate->year) && \ + (pman->mon == pdate->mon) && \ + (pman->mday == pdate->mday); +} + +// tell file size. static size_t fd_size(int fd) { int ret; struct stat file_info; @@ -119,202 +139,160 @@ static size_t fd_size(int fd) { return (!ret) ? file_info.st_size : -EACCES; } +// create .foxi file name +static void pack_iname(char* pname, struct foxDate * p) { + snprintf(pname, FNAME_SIZE, "%04d%02d%02d.foxi", p->year, p->mon, p->mday); +} + +// create .fox file name static void pack_fname(char* pname, struct foxDate * p) { snprintf(pname, FNAME_SIZE, "%04d%02d%02d.fox", p->year, p->mon, p->mday); } -static int fox_read_head(int fd, struct fox_head* phead) { - size_t size; +//open .foxi to write data index info +static int fox_open_w_fdi(struct foxDate * p, char * iname) { + int fd; + int ret; + fox_off_t off; - size = read(fd, phead, sizeof (struct fox_head)); + pack_iname(iname, p); - if (size == sizeof (struct fox_head)) { - if (phead->magic == FOX_MAGIC) { - return size; - } else { - fprintf(stderr, "bad magic: 0x%x, hope: 0x%x\n", phead->magic, FOX_MAGIC); - return -EINVAL; - } - } else if (size == 0) { - return 0; - } else { - fprintf(stderr, "read file failed. %d, %s\n", errno, strerror(errno)); - return -EACCES; + fd = open(iname, O_RDWR | O_APPEND | O_CREAT); + if (fd < 0) { + fprintf(stderr, "open %s error, return %d, %s\n", iname, errno, strerror(errno)); + ret = -ENOENT; + goto endOpen; } -} -static int fox_check_head(int fd, struct fox_manager* pman) { - int ret; - struct fox_head head; - off_t off = 0; - - head.prev = 0; - while (1) { - off = lseek(fd, off, SEEK_SET); - if (off < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); - ret = -EACCES; - goto endSeek; - } - - ret = fox_read_head(fd, &head); - if (ret > 0) { - off = head.next; - } else if (ret == 0) { - pman->last_pos = head.prev; - pman->pos = off; - break; - } else { - fprintf(stderr, "write file failed. pos: 0x%llx\n", off); - goto endHead; - } + off = FOX_LSEEK(fd, 0, SEEK_END); + if (off < 0) { + fprintf(stderr, "fox_open_w_fdi lseek %s error, return %d, %s\n", iname, errno, strerror(errno)); + ret = -errno; + goto endSeek; } - return ret; - endHead: + return fd; endSeek: + close(fd); + endOpen: return ret; } -static int fox_check(int fd, struct fox_manager* pman) { - int ret = 0; - int retLock = 0; - - ret = lockf(fd, F_LOCK, 0); - if (ret < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - goto endLock; - } +//open .fox to write data index info +static int fox_open_w_fd(struct foxDate * p, char * fname) { + int fd; + int ret; + fox_off_t off; - ret = fox_check_head(fd, pman); - if (ret < 0) { - goto endCheck; + pack_fname(fname, p); + fd = open(fname, O_RDWR | O_APPEND | O_CREAT); + if (fd < 0) { + fprintf(stderr, "open %s error, return %d, %s", fname, errno, strerror(errno)); + ret = -ENOENT; + goto endOpen; } - ret = lockf(fd, F_ULOCK, 0); - if (ret < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - goto endUnLock; + off = FOX_LSEEK(fd, 0, SEEK_END); + if (off < 0) { + fprintf(stderr, "fox_open_w_fd lseek %s error, return %d, %s\n", fname, errno, strerror(errno)); + ret = -errno; + goto endSeek; } - return ret; - - endCheck: - retLock = lockf(fd, F_ULOCK, 0); - if (retLock < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - ret = retLock; - goto endUnLock; - } - return ret; - endUnLock: - endLock: + return fd; + endSeek: + close(fd); + endOpen: return ret; } +// export for setup write int fox_setup_write(struct fox_manager* pman, struct foxDate * p, fox_time_t now) { - char fname[FNAME_SIZE]; int ret = 0; + size_t pos; - pack_fname(fname, p); + pman->fdi = fox_open_w_fdi(p, pman->iname); // open fdi for data index. + if (pman->fdi < 0) { + ret = pman->fdi; + goto endOpeni; + } - pman->fd = open(fname, O_RDWR|O_APPEND|O_CREAT); + pman->fd = fox_open_w_fd(p, pman->fname); // open fd for data. if (pman->fd < 0) { - fprintf(stderr, "open %s error, return %d, %s", fname, errno, strerror(errno)); - ret = -ENOENT; + ret = pman->fd; goto endOpen; } - ret = fox_check(pman->fd, pman); - if (ret < 0) { - goto endCheck; + pos = fd_size(pman->fd); + if (pos < 0) { + ret = pos; + goto endSize; } pman->year = p->year; pman->mon = p->mon; pman->mday = p->mday; pman->now = now; + pman->w_off = pos; return ret; - endCheck: + + endSize: close(pman->fd); - pman->fd = 0; endOpen: + pman->fd = 0; + close(pman->fdi); + endOpeni: + pman->fdi = 0; return ret; } -int check_foxdate(struct foxDate* d1, struct foxDate* d2) { - return (d1->year == d2->year) && \ - (d1->mon == d2->mon) && \ - (d1->mday == d2->mday); -} - -int check_pman_date(struct fox_manager* pman, struct foxDate* pdate) { - return (pman->year == pdate->year) && \ - (pman->mon == pdate->mon) && \ - (pman->mday == pdate->mday); -} - -static int fox_write_data(struct fox_manager* pman, fox_time_t us, const char* data, int len) { +static int fox_write_data(struct fox_manager* pman, fox_time_t us, + const char* tData, int tLen, + const char* data, int len) { int ret = 0; int fd = pman->fd; - struct fox_head head; + int fdi = pman->fdi; + struct fox_index index; - head.prev = pman->last_pos; - head.next = pman->pos + sizeof (struct fox_head) + len; - head.t_us = us; - head.magic = FOX_MAGIC; - head.flag = 0; + index.t_us = us; + index.off = pman->w_off; + index.table_len = tLen; + index.data_len = len; - ret = lockf(fd, F_LOCK, 0); + //write foxi + ret = write(fdi, &index, FOX_INDEX_SIZE); if (ret < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - ret = -EACCES; - goto endLock; + fprintf(stderr, "write foxi file failed. %d, %s\n", errno, strerror(errno)); + goto endWrite; } - ret = write(fd, &head, sizeof (struct fox_head)); + //write fox + ret = write(fd, tData, tLen ); // for table if (ret < 0) { - fprintf(stderr, "write file failed. %d, %s\n", errno, strerror(errno)); + fprintf(stderr, "write data file table failed. %d, %s\n", errno, strerror(errno)); goto endWrite; } - ret = write(fd, data, len ); + ret = write(fd, data, len ); // for stream if (ret < 0) { fprintf(stderr, "write file failed. %d, %s\n", errno, strerror(errno)); goto endWrite; } - ret = lockf(fd, F_ULOCK, 0); - if (ret < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - ret = -EACCES; - goto endUnLock; - } - pman->now = us; - pman->last_pos = pman->pos; // record last position - pman->pos = head.next; + pman->w_off += tLen + len; return ret; - endWrite: - ret = lockf(fd, F_ULOCK, 0); - if (ret < 0) { - fprintf(stderr, "lock file failed. %d, %s\n", errno, strerror(errno)); - ret = -EACCES; - goto endUnLock; - } - return ret; - endUnLock: - endLock: return ret; } int fox_write(struct fox_manager* pman, struct foxDate* pdate, fox_time_t us, - const char* data, int len) { + const char* tData, int tLen, + const char* data, int len) { int res = 0; if (!check_pman_date(pman, pdate)) { // new day? - close(pman->fd); // close this file at first. + fox_del_man(pman); // free old res = fox_setup_write(pman, pdate, us); if (res < 0) { fprintf(stderr, "create new file failed.\n"); @@ -326,123 +304,133 @@ int fox_write(struct fox_manager* pman, struct foxDate* pdate, fox_time_t us, } if (pman->now <= us) { // time should monotonically increasing - res = fox_write_data(pman, us, data, len); + res = fox_write_data(pman, us, tData, tLen, data, len); } - return res; + return res; endCreateFile: return res; } -static int fox_cursor_left(int fd, struct fox_manager* pman, fox_time_t now) { - int ret; - off_t pos; - struct fox_head head; - fox_time_t last_t_us = pman->now; +static int fox_index_get(int fdi, fox_off_t index, struct fox_index *pindex) { + int ret = 0; + size_t size; + fox_off_t pos = index * FOX_INDEX_SIZE; - if (pman->pos == pman->fsize) { - pos = pman->last_pos; + ret = FOX_LSEEK(fdi, pos, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); + ret = -EACCES; + goto endSeek; + } + + size = read(fdi, pindex, FOX_INDEX_SIZE); + if (size == FOX_INDEX_SIZE) { + return size; + } else if (size > 0) { + fprintf(stderr, "read index file return %ld.\n", size); + ret = -EACCES; + goto endRead; } else { - pos = pman->pos; + fprintf(stderr, "read index file failed. %d, %s\n", errno, strerror(errno)); + ret = -EACCES; + goto endRead; } - while (1) { - ret = lseek(fd, pos, SEEK_SET); + return ret; + endRead: + endSeek: + return ret; +} + +static void fox_cursor_save(struct fox_manager* pman, struct fox_index *pindex, fox_off_t find) { + pman->r_index = find; + + pman->r_next = pman->r_index + 1; + if (pman->r_next > pman->cells - 1) { // r_next should less than pman->cells - 1 and 0; + pman->r_next = pman->cells ? pman->cells - 1 : 0 ; + } + pman->now = pindex->t_us; + pman->data_pos = pindex->off; + pman->table_len = pindex->table_len; + pman->data_len = pindex->data_len; +} + +//cursor left +static int fox_cursor_left(int fdi, struct fox_manager* pman, fox_time_t now) { + int ret = 0; + fox_off_t start = 0, end = pman->r_index; + fox_off_t mid = start; + struct fox_index index = {0, 0, 0, 0}; + fox_time_t us = 0; + + while (start < end) { + mid = start + (end - start) / 2; + + ret = fox_index_get(fdi, mid, &index); + us= index.t_us; if (ret < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); + fprintf(stderr, "fox index %ld get failed. %d, %s\n", mid, errno, strerror(errno)); ret = -EACCES; goto endSeek; } - ret = fox_read_head(fd, &head);; - if (ret < 0) { - goto endRead; - } - - if (head.t_us < now) { - pman->pos = head.next; - pman->now = last_t_us; - pman->last_pos = pos; - break; - } - - last_t_us = head.t_us; - pos = head.prev; - if (pos == 0) { //begin of file - pman->pos = pos; - pman->now = last_t_us; - pman->last_pos = pos; + if (now < us) { // now is little, upper half + end = mid; + } else if (now > us) { // now is large, lower region + start = mid + 1; + } else { // equal break; } } - ret = 0; - return ret; + fox_cursor_save(pman, &index, mid); + return 0; endSeek: - endRead: return ret; } -static int fox_cursor_right(int fd, struct fox_manager* pman, fox_time_t now) { - int ret; - off_t last_pos = pman->last_pos; - off_t pos = pman->pos; - struct fox_head head; - - while (1) { - if (pos == pman->fsize) { - pman->last_pos = last_pos; - pman->pos = pos; - pman->now = fox_read_day_max(pman); - ret = 0; - goto endEndoffile; - } else if (pos > pman->fsize) { - ret = -ERANGE; - goto endRange; - } +static int fox_cursor_right(int fdi, struct fox_manager* pman, fox_time_t now) { + int ret = 0; + fox_off_t start = pman->r_index, end = pman->cells ? pman->cells - 1 : 0; + fox_off_t mid = start; + struct fox_index index = {0, 0, 0, 0}; + fox_time_t us = 0; + + while (start < end) { + mid = start + (end - start) / 2; - ret = lseek(fd, pos, SEEK_SET); + ret = fox_index_get(fdi, mid, &index); + us = index.t_us; if (ret < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); + fprintf(stderr, "fox index %ld get failed. %d, %s\n", mid, errno, strerror(errno)); ret = -EACCES; goto endSeek; } - ret = fox_read_head(fd, &head);; - if (ret < 0) { - goto endRead; - } - - if (head.t_us >= now) { // just > - pman->pos = pos; - pman->now = head.t_us; - pman->last_pos = last_pos; + if (now < us) { // now is little, upper half + end = mid; + } else if (now > us) { // now is large, lower region + start = mid + 1; + } else { // equal break; } - last_pos = pos; - pos = head.next; } - ret = 0; - return ret; - endEndoffile: - return ret; - endRange: //out of file range. - return ret; + fox_cursor_save(pman, &index, mid); + return 0; endSeek: - endRead: - endSize: return ret; } -static int fox_cursor_work(int fd, struct fox_manager* pman, fox_time_t now) { +static int fox_cursor(int fdi, struct fox_manager* pman, fox_time_t now) { int ret = 0; if (pman->now > now) { - ret = fox_cursor_left(fd, pman, now); + ret = fox_cursor_left(fdi, pman, now); if (ret < 0) { goto endCursor; } } else if (pman->now < now) { - ret = fox_cursor_right(fd, pman, now); + ret = fox_cursor_right(fdi, pman, now); if (ret < 0) { goto endCursor; } @@ -453,163 +441,136 @@ static int fox_cursor_work(int fd, struct fox_manager* pman, fox_time_t now) { return ret; } -static int fox_cursor(int fd, struct fox_manager* pman, fox_time_t now) { - int ret; +int fox_cur_move(struct fox_manager* pman, fox_time_t now) { + int ret = fox_cursor(pman->fdi, pman, now); + return ret; +} + +static int fox_cursor_next(struct fox_manager* pman, struct fox_index* pindex) { + int ret = 0; - ret = fox_cursor_work(fd, pman, now); + ret = fox_index_get(pman->fdi, pman->r_next, pindex); if (ret < 0) { - goto endLock; + goto endIndex; } + fox_cursor_save(pman, pindex, pman->r_next); return ret; - endLock: + endIndex: return ret; } -int fox_cur_move(struct fox_manager* pman, fox_time_t now) { - int fd; - - fd = pman->fd; - - return fox_cursor(fd, pman, now); -} - -static int fox_cur_next(struct fox_manager* pman, struct fox_head* phead) { +int fox_read_resize(struct fox_manager* pman) { int ret = 0; - off_t pos = phead->next; - struct fox_head head; - int fd = pman->fd; - pman->pos = pos; - if (pos < pman->fsize) { - ret = lseek(fd, pos, SEEK_SET); - if (ret < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); - goto endSeek; - } - - ret = fox_read_head(fd, &head);; - if (ret < 0) { - goto endRead; + size_t isize = fd_size(pman->fdi); // fresh new file size. + if (isize < 0) { + ret = isize; + goto endSize; + } + pman->isize = isize; + pman->cells = isize / FOX_INDEX_SIZE; + if (pman->r_index == pman->r_next) { // end of file? check new next. + if (pman->r_next < pman->cells - 1) { + pman->r_next = pman->r_index + 1; } - pman->now = head.t_us; - } else { - pman->now = fox_read_day_max(pman); } return ret; - endSeek: - endRead: + endSize: return ret; } -static int fox_cur_back(struct fox_manager* pman) { +//open .foxi to write data index info +static int fox_open_r_fdi(struct foxDate * p, int* pfd, char *iname) { + int fd; int ret = 0; - off_t pos = pman->last_pos; - struct fox_head head; - int fd = pman->fd; - pman->pos = pos; - if (pos > 0) { - ret = lseek(fd, pos, SEEK_SET); - if (ret < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); - goto endSeek; - } - - ret = fox_read_head(fd, &head);; - if (ret < 0) { - goto endRead; + pack_iname(iname, p); + fd = open(iname, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) { // not such file. + ret = errno; + goto endOpen; } - pman->now = head.t_us; - } else { - pman->now = fox_read_init_now(pman); + fprintf(stderr, "open %s error, return %d, %s", iname, errno, strerror(errno)); + ret = -errno; + goto endOpen; } + *pfd = fd; return ret; - endSeek: - endRead: + endOpen: return ret; } -int fox_read_resize(struct fox_manager* pman) { +//open .fox to write data index info +static int fox_open_r_fd(struct foxDate * p, int *pfd, char* fname) { + int fd; int ret = 0; - size_t fsize = fd_size(pman->fd); - if (fsize < 0) { - ret = fsize; - goto endSize; - } - - if (fsize > pman->fsize) { - if (pman->pos == pman->fsize) { // at the end of file. - ret = fox_cur_back(pman); - if (ret < 0) { - goto endCur; - } - } - pman->fsize = fsize; - } else { - struct foxDate d; - d.year = pman->year; - d.mon = pman->mon; - d.mday = pman->mday; - d.hour = 0; - d.min = 0; - d.sec = 0; - - fox_time_t now = make_stamp(&d); - - ret = fox_setup_read(pman, &d, now); - if (ret !=0 ) { - ret = -EACCES; - goto endSetup; + pack_fname(fname, p); + fd = open(fname, O_RDONLY); + if (fd < 0) { + if (errno == ENOENT) { // not such file. + ret = errno; + goto endOpen; } + fprintf(stderr, "open %s error, return %d, %s", fname, errno, strerror(errno)); + ret = -ENOENT; + goto endOpen; } + + *pfd = fd; return ret; - endSetup: - endSize: - endCur: + endOpen: return ret; } int fox_setup_read(struct fox_manager* pman, struct foxDate * p, fox_time_t now) { - char fname[FNAME_SIZE]; int ret = 0; + size_t isize = 0; - pack_fname(fname, p); - pman->fd = open(fname, O_RDONLY); - if (pman->fd < 0) { - fprintf(stderr, "open %s failed, return %d, %s", fname, errno, strerror(errno)); - ret = 1; + ret = fox_open_r_fdi(p, &pman->fdi, pman->iname); // for index file + if (ret != 0) { + goto endOpeni; + } + + ret = fox_open_r_fd(p, &pman->fd, pman->fname); // for data file + if (ret != 0) { + ret = pman->fd; goto endOpen; } + isize = fd_size(pman->fdi); + if (isize < 0) { + ret = isize; + goto endSize; + } + pman->year = p->year; pman->mon = p->mon; pman->mday = p->mday; - pman->now = fox_read_init_now(pman); - pman->pos = 0; - pman->last_pos = 0; - pman->fsize = fd_size(pman->fd); - if (pman->fsize < 0) { - ret = -EACCES; - goto endSize; - } + pman->now = fox_read_init_now(pman); // at begin of a day default. + pman->isize = isize; + pman->cells = isize / FOX_INDEX_SIZE; + pman->r_index = 0; // at begin of the file. + pman->r_next = pman->cells ? pman->r_index + 1 : 0; // for empty file, next is 0 - ret = fox_cursor(pman->fd, pman, now); + ret = fox_cursor(pman->fdi, pman, now); if (ret < 0) { goto endCursor; } -// printf("setup %s, fd: %d, size: %ld, pos: %lld, pnow: %ld, now:%ld\n", -// fname, pman->fd, pman->fsize, pman->pos, pman->now, now); return ret; - endSize: endCursor: + endSize: close(pman->fd); endOpen: - pman->fd = -1; + pman->fd = 0; + close(pman->fdi); + endOpeni: + pman->fdi = 0; return ret; } @@ -627,77 +588,129 @@ static int fox_read_stream(int fd, char* stream, int size) { return ret; } -static int fox_read_work(int fd, struct fox_manager* pman, fox_time_t stop, - char **pp, int *len, fox_time_t *us) { +// cursor move to next, < 0 bad, equal 0,>0 end. +int fox_next(struct fox_manager* pman, fox_time_t stop) { int ret = 0; - off_t pos = pman->pos; - int size; - if (pman->now <= stop) { - struct fox_head head; - char *p; + if (pman->now <= stop && pman->r_index < pman->r_next) { // r_index equal r_next means is the end of file. + struct fox_index index; - ret = lseek(fd, pos, SEEK_SET); + ret = fox_cursor_next(pman, &index); // the cursor will point to next position. if (ret < 0) { - fprintf(stderr, "seek file failed. %d, %s\n", errno, strerror(errno)); - ret = -EACCES; - goto endSeek; + goto endNext; } + ret = 0; - ret = fox_read_head(fd, &head); - if (ret < 0) { - goto endRead; - } + } else { + ret = 1; + } + return ret; - size = head.next - pos - sizeof (struct fox_head); - if (size < 0) { - goto endSize; - } - p = malloc(size); - if (p == NULL) { - fprintf(stderr, "malloc %d failed. %d, %s\n", size, errno, strerror(errno)); - ret = -ENOMEM; - goto endMalloc; - } + endNext: + return ret; +} - ret = fox_read_stream(fd, p, size); - if (ret < 0) { - free(p); - goto endRead2; - } +// read function +static int _fox_read_table(struct fox_manager* pman, char **pp, int *len) { + int ret = 0; - ret = fox_cur_next(pman, &head); // the cursor will to next position. - if (ret < 0) { - free(p); - goto endNext; - } + char *p = NULL; + int size = 0; - *us = head.t_us; - *pp = p; - *len = size; - } else { - *pp = NULL; - *len = 0; + size = pman->table_len; + ret = FOX_LSEEK(pman->fd, pman->data_pos, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "lseek data fd %d pos: %lld, failed. %d, %s\n", pman->fd, pman->data_pos, errno, strerror(errno)); + ret = -EACCES; + goto endSeek; + } + + p = malloc(size); + if (p == NULL) { + fprintf(stderr, "malloc %d failed. %d, %s\n", size, errno, strerror(errno)); + ret = -ENOMEM; + goto endMalloc; } + + ret = fox_read_stream(pman->fd, p, size); + if (ret < 0) { + free(p); + goto endRead; + } + + *pp = p; + *len = size; + return ret; - endNext: - endRead2: - endSize: - endMalloc: endRead: + endMalloc: endSeek: *pp = NULL; *len = 0; return ret; } -int fox_read(struct fox_manager* pman, fox_time_t stop, char **pp, fox_time_t *us) { +int fox_read_table(struct fox_manager* pman, char **pp) { int ret; int len; - int fd = pman->fd; - ret = fox_read_work(fd, pman, stop, pp, &len, us); + ret = _fox_read_table(pman, pp, &len); + if (ret < 0) { + goto endWork; + } + return len; + + endWork: + return ret; +} + +// read function +static int _fox_read_data(struct fox_manager* pman, char **pp, int *len, fox_time_t *us) { + int ret = 0; + + char *p = NULL; + int size = 0; + + size = pman->data_len; + ret = FOX_LSEEK(pman->fd, pman->data_pos + pman->table_len, SEEK_SET); + if (ret < 0) { + fprintf(stderr, "lseek data fd failed. %d, %s\n", errno, strerror(errno)); + ret = -EACCES; + goto endSeek; + } + + p = malloc(size); + if (p == NULL) { + fprintf(stderr, "malloc %d failed. %d, %s\n", size, errno, strerror(errno)); + ret = -ENOMEM; + goto endMalloc; + } + + ret = fox_read_stream(pman->fd, p, size); + if (ret < 0) { + free(p); + goto endRead; + } + + *pp = p; + *len = size; + *us = pman->now; + return ret; + + endRead: + endMalloc: + endSeek: + *pp = NULL; + *len = 0; + return ret; +} + +int fox_read_data(struct fox_manager* pman, char **pp, fox_time_t *us) { + int ret = 0; + int len = 0; + + ret = _fox_read_data(pman, pp, &len, us); if (ret < 0) { goto endWork; } @@ -714,7 +727,25 @@ void fox_free_buffer(char **pp) { void fox_del_man(struct fox_manager* pman) { if (pman->fd > 0) { + posix_fadvise(pman->fd, 0, 0, POSIX_FADV_DONTNEED); close(pman->fd); - pman->fd = -1; + pman->fd = 0; + } + if (pman->fdi > 0) { + close(pman->fdi); + pman->fdi = 0; } } + +int parse_sql(const char* sql, PgQueryParseResult* pRes) { + *pRes = pg_query_parse(sql); + return 0; +} + +void parse_sql_free(PgQueryParseResult* pRes) { + pg_query_free_parse_result(*pRes); +} + +void parse_sql_exit() { + pg_query_exit(); +} \ No newline at end of file diff --git a/source/tools/monitor/unity/tsdb/native/foxTSDB.h b/source/tools/monitor/unity/tsdb/native/foxTSDB.h index 88e024bcb7949bbfe2034a88e4275531163a9cee..203ca5a27c9ba10d04d3ba86bd4744ef051bcdd8 100644 --- a/source/tools/monitor/unity/tsdb/native/foxTSDB.h +++ b/source/tools/monitor/unity/tsdb/native/foxTSDB.h @@ -6,8 +6,12 @@ #define TINYINFO_FOXTSDB_H #include +#include "pg_query.h" -typedef unsigned long fox_time_t; +typedef signed long fox_time_t; +typedef size_t fox_off_t; + +#define FNAME_SIZE 16 struct foxDate { short year; @@ -20,10 +24,20 @@ struct foxDate { struct fox_manager { fox_time_t now; - off_t pos; //now offset; - off_t last_pos; // last pos - size_t fsize; // file size. + fox_off_t w_off; // now write offset; + fox_off_t r_index; // now index + fox_off_t r_next; // for next index + fox_off_t cells; // index max + size_t isize; //index file size; + size_t fsize; //data file size. + fox_off_t data_pos; // data position + int table_len; // data table size + int data_len; // data size + int fd; + int fdi; + char iname[FNAME_SIZE]; // info file name, + char fname[FNAME_SIZE]; // data file name, int new_day; short year; @@ -31,6 +45,21 @@ struct fox_manager { char mday; }; +struct PgQueryError_t { + char* message; // exception message + char* funcname; // source function of exception (e.g. SearchSysCache) + char* filename; // source of exception (e.g. parse.l) + int lineno; // source of exception (e.g. 104) + int cursorpos; // char in query at which exception occurred + char* context; // additional context (optional, can be NULL) +}; + +struct PgQueryParseResult_t { + char* parse_tree; + char* stderr_buffer; + struct PgQueryError_t* error; +}; + fox_time_t get_us(void); int get_date_from_us(fox_time_t us, struct foxDate * p); int get_date(struct foxDate * p); @@ -40,12 +69,19 @@ int check_pman_date(struct fox_manager* pman, struct foxDate* pdate); int fox_setup_write(struct fox_manager* pman, struct foxDate * p, fox_time_t now); int fox_write(struct fox_manager* pman, struct foxDate* pdate, fox_time_t us, + const char* tData, int tLen, const char* data, int len); int fox_setup_read(struct fox_manager* pman, struct foxDate * p, fox_time_t now); int fox_cur_move(struct fox_manager* pman, fox_time_t now); int fox_read_resize(struct fox_manager* pman); -int fox_read(struct fox_manager* pman, fox_time_t stop, char **pp, fox_time_t *us); +int fox_next(struct fox_manager* pman, fox_time_t stop); +int fox_read_table(struct fox_manager* pman, char **pp); +int fox_read_data(struct fox_manager* pman, char **pp, fox_time_t *us); void fox_free_buffer(char **pp); void fox_del_man(struct fox_manager* pman); +int parse_sql(const char* sql, PgQueryParseResult* pRes); +void parse_sql_free(PgQueryParseResult* pRes); +void parse_sql_exit(); + #endif //TINYINFO_FOXTSDB_H diff --git a/source/tools/monitor/unity/tsdb/native/foxffi.lua b/source/tools/monitor/unity/tsdb/native/foxffi.lua index e6830816e527350b5623a4eb55c4245e5d63948f..8b194b47352257d91d8f4e5329ce5f201c142cd2 100644 --- a/source/tools/monitor/unity/tsdb/native/foxffi.lua +++ b/source/tools/monitor/unity/tsdb/native/foxffi.lua @@ -8,8 +8,8 @@ local ffi = require("ffi") local cffi = ffi.load('foxTSDB') ffi.cdef [[ -typedef unsigned long off_t; -typedef unsigned long fox_time_t; +typedef signed long fox_time_t; +typedef size_t fox_off_t; struct foxDate { short year; @@ -22,10 +22,20 @@ struct foxDate { struct fox_manager { fox_time_t now; - off_t pos; //now offset; - off_t last_pos; // last pos - size_t fsize; // file size. + fox_off_t w_off; // now write offset; + fox_off_t r_index; // now index + fox_off_t r_next; // for next index + fox_off_t cells; // index max + size_t isize; //index file size; + size_t fsize; //data file size. + fox_off_t data_pos; // data position + int table_len; // data table size + int data_len; // data size + int fd; + int fdi; + char iname[16]; // info file name, + char fname[16]; // data file name, int new_day; short year; @@ -33,6 +43,21 @@ struct fox_manager { char mday; }; +typedef struct { + char* message; // exception message + char* funcname; // source function of exception (e.g. SearchSysCache) + char* filename; // source of exception (e.g. parse.l) + int lineno; // source of exception (e.g. 104) + int cursorpos; // char in query at which exception occurred + char* context; // additional context (optional, can be NULL) +} PgQueryError; + +typedef struct { + char* parse_tree; + char* stderr_buffer; + PgQueryError* error; +} PgQueryParseResult; + fox_time_t get_us(void); int get_date_from_us(fox_time_t us, struct foxDate * p); int get_date(struct foxDate * p); @@ -42,13 +67,20 @@ int check_pman_date(struct fox_manager* pman, struct foxDate* pdate); int fox_setup_write(struct fox_manager* pman, struct foxDate * p, fox_time_t now); int fox_write(struct fox_manager* pman, struct foxDate* pdate, fox_time_t us, + const char* tData, int tLen, const char* data, int len); int fox_setup_read(struct fox_manager* pman, struct foxDate * p, fox_time_t now); int fox_cur_move(struct fox_manager* pman, fox_time_t now); int fox_read_resize(struct fox_manager* pman); -int fox_read(struct fox_manager* pman, fox_time_t stop, char **pp, fox_time_t *us); +int fox_next(struct fox_manager* pman, fox_time_t stop); +int fox_read_table(struct fox_manager* pman, char **pp); +int fox_read_data(struct fox_manager* pman, char **pp, fox_time_t *us); void fox_free_buffer(char **pp); void fox_del_man(struct fox_manager* pman); + +int parse_sql(const char* sql, PgQueryParseResult* pRes); +void parse_sql_free(PgQueryParseResult* pRes); +void parse_sql_exit(); ]] return {ffi = ffi, cffi = cffi} diff --git a/source/tools/monitor/unity/tsdb/native/libpg_query.a b/source/tools/monitor/unity/tsdb/native/libpg_query.a new file mode 100644 index 0000000000000000000000000000000000000000..abf502037852b80bafa87d75853bbbd9bde320f1 Binary files /dev/null and b/source/tools/monitor/unity/tsdb/native/libpg_query.a differ diff --git a/source/tools/monitor/unity/tsdb/native/pg_query.h b/source/tools/monitor/unity/tsdb/native/pg_query.h new file mode 100644 index 0000000000000000000000000000000000000000..49efac4b93ea4f1a8c161185f38a78c38bd7b542 --- /dev/null +++ b/source/tools/monitor/unity/tsdb/native/pg_query.h @@ -0,0 +1,121 @@ +#ifndef PG_QUERY_H +#define PG_QUERY_H + +#include + +typedef struct { + char* message; // exception message + char* funcname; // source function of exception (e.g. SearchSysCache) + char* filename; // source of exception (e.g. parse.l) + int lineno; // source of exception (e.g. 104) + int cursorpos; // char in query at which exception occurred + char* context; // additional context (optional, can be NULL) +} PgQueryError; + +typedef struct { + unsigned int len; + char* data; +} PgQueryProtobuf; + +typedef struct { + PgQueryProtobuf pbuf; + char* stderr_buffer; + PgQueryError* error; +} PgQueryScanResult; + +typedef struct { + char* parse_tree; + char* stderr_buffer; + PgQueryError* error; +} PgQueryParseResult; + +typedef struct { + PgQueryProtobuf parse_tree; + char* stderr_buffer; + PgQueryError* error; +} PgQueryProtobufParseResult; + +typedef struct { + int stmt_location; + int stmt_len; +} PgQuerySplitStmt; + +typedef struct { + PgQuerySplitStmt **stmts; + int n_stmts; + char* stderr_buffer; + PgQueryError* error; +} PgQuerySplitResult; + +typedef struct { + char* query; + PgQueryError* error; +} PgQueryDeparseResult; + +typedef struct { + char* plpgsql_funcs; + PgQueryError* error; +} PgQueryPlpgsqlParseResult; + +typedef struct { + uint64_t fingerprint; + char* fingerprint_str; + char* stderr_buffer; + PgQueryError* error; +} PgQueryFingerprintResult; + +typedef struct { + char* normalized_query; + PgQueryError* error; +} PgQueryNormalizeResult; + +#ifdef __cplusplus +extern "C" { +#endif + +PgQueryNormalizeResult pg_query_normalize(const char* input); +PgQueryScanResult pg_query_scan(const char* input); +PgQueryParseResult pg_query_parse(const char* input); +PgQueryProtobufParseResult pg_query_parse_protobuf(const char* input); +PgQueryPlpgsqlParseResult pg_query_parse_plpgsql(const char* input); + +PgQueryFingerprintResult pg_query_fingerprint(const char* input); + +// Use pg_query_split_with_scanner when you need to split statements that may +// contain parse errors, otherwise pg_query_split_with_parser is recommended +// for improved accuracy due the parser adding additional token handling. +// +// Note that we try to support special cases like comments, strings containing +// ";" on both, as well as oddities like "CREATE RULE .. (SELECT 1; SELECT 2);" +// which is treated as as single statement. +PgQuerySplitResult pg_query_split_with_scanner(const char *input); +PgQuerySplitResult pg_query_split_with_parser(const char *input); + +PgQueryDeparseResult pg_query_deparse_protobuf(PgQueryProtobuf parse_tree); + +void pg_query_free_normalize_result(PgQueryNormalizeResult result); +void pg_query_free_scan_result(PgQueryScanResult result); +void pg_query_free_parse_result(PgQueryParseResult result); +void pg_query_free_split_result(PgQuerySplitResult result); +void pg_query_free_deparse_result(PgQueryDeparseResult result); +void pg_query_free_protobuf_parse_result(PgQueryProtobufParseResult result); +void pg_query_free_plpgsql_parse_result(PgQueryPlpgsqlParseResult result); +void pg_query_free_fingerprint_result(PgQueryFingerprintResult result); + +// Optional, cleans up the top-level memory context (automatically done for threads that exit) +void pg_query_exit(void); + +// Postgres version information +#define PG_VERSION "13.8" +#define PG_MAJORVERSION "13" +#define PG_VERSION_NUM 130008 + +// Deprecated APIs below + +void pg_query_init(void); // Deprecated as of 9.5-1.4.1, this is now run automatically as needed + +#ifdef __cplusplus +} +#endif + +#endif