From ffea15578a7c25fab3ef743911db2346d15484e1 Mon Sep 17 00:00:00 2001 From: Zhen Chen Date: Sat, 14 Jan 2023 06:21:55 +0800 Subject: [PATCH] sync bugfix patches from openeuler/gala-gopher - fix ksliprobe get invalid args occasionally at startup - fix error print when starting gala-gopher - add system_uuid field to distinguish client when post to pyroscope server - repair stackprobe caused cpu rush - add support to pyroscope - bugfix: add check if thread is 0 - fix stackprobe memory allocation and deallocation errors - normalize time format in flamegraph svg filename (cherry picked from commit 6aef5cc8e4e2a34324c3f01663d2b61c0462f4ac) --- add-configuration-instructions.patch | 273 ++++++++++++ add-support-to-pyroscope.patch | 421 ++++++++++++++++++ ...field-to-distinguish-client-when-pos.patch | 174 ++++++++ bugfix-add-check-if-thread-is-0.patch | 209 +++++++++ fix-bug.patch | 38 ++ ...t-invalid-args-occasionally-at-start.patch | 171 +++++++ ...emory-allocation-and-deallocation-er.patch | 56 +++ gala-gopher.spec | 26 +- repair-stackprobe-caused-cpu-rush.patch | 204 +++++++++ some-bugfix.patch | 58 +++ 10 files changed, 1627 insertions(+), 3 deletions(-) create mode 100644 add-configuration-instructions.patch create mode 100644 add-support-to-pyroscope.patch create mode 100644 add-system_uuid-field-to-distinguish-client-when-pos.patch create mode 100644 bugfix-add-check-if-thread-is-0.patch create mode 100644 fix-bug.patch create mode 100644 fix-ksliprobe-get-invalid-args-occasionally-at-start.patch create mode 100644 fix-stackprobe-memory-allocation-and-deallocation-er.patch create mode 100644 repair-stackprobe-caused-cpu-rush.patch create mode 100644 some-bugfix.patch diff --git a/add-configuration-instructions.patch b/add-configuration-instructions.patch new file mode 100644 index 0000000..6d829ad --- /dev/null +++ b/add-configuration-instructions.patch @@ -0,0 +1,273 @@ +From 61431063cc0d52d4cee266cf5af6caccb6bc7803 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Mon, 12 Dec 2022 16:47:57 +0800 +Subject: [PATCH] add configuration instructions + +--- + doc/conf_introduction.md | 70 +++++++++++++++++++ + src/common/util.c | 8 +-- + .../cadvisor.probe/cadvisor_probe.conf | 4 +- + .../cadvisor.probe/cadvisor_probe.meta | 4 +- + .../python.probe/cadvisor.probe/readme.md | 49 +++++++++++++ + .../python.probe/pg_stat.probe/readme.md | 23 ++++++ + 6 files changed, 150 insertions(+), 8 deletions(-) + create mode 100644 src/probes/extends/python.probe/cadvisor.probe/readme.md + create mode 100644 src/probes/extends/python.probe/pg_stat.probe/readme.md + +diff --git a/doc/conf_introduction.md b/doc/conf_introduction.md +index ce15a92..a35a70a 100644 +--- a/doc/conf_introduction.md ++++ b/doc/conf_introduction.md +@@ -5,10 +5,20 @@ gala-gopher启动必须的外部参数通过配置文件`gala-gopher.conf`定义 + + gala-gopher支持用户配置观测的应用范围,即支持用户设置关注的、需要监测的具体应用,此项配置是在`gala-gopher-app.conf`配置文件中配置。 + ++部分extend探针有自己的配置文件,开启该探针前需要设置好探针的配置文件。 ++ + ## 配置介绍 + + 配置文件归档在[config目录](../config)。 + ++extend探针配置文件归档在探针同级目录下。目前有配置文件的探针有: ++ ++[stackprobe](../src/probes/extends/ebpf.probe/src/stackprobe) ++ ++[cadvisor.probe](../src/probes/extends/python.probe/cadvisor.probe) ++ ++[pg_stat.probe](../src/probes/extends/python.probe/pg_stat.probe) ++ + ### gala-gopher.conf + + `gala-gopher.conf`文件的安装路径为 `/opt/gala-gopher/gala-gopher.conf`。该文件配置项说明如下: +@@ -84,6 +94,66 @@ gala-gopher支持用户配置观测的应用范围,即支持用户设置关注 + 配置示例参见 [gala-gopher-app.conf示例](#gala-gopher-app.conf示例) 。 + + ++### stackprobe.conf ++ ++`stackprobe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/stackprobe.conf`。该文件配置项说明如下: ++ ++- general:通用设置 ++ - period:火焰图生成周期 ++ - log_dir:stackprobe探针日志路径 ++ - svg_dir:svg格式火焰图存储路径 ++ - flame_dir:堆栈信息存储路径 ++ - debug_dir:调试信息文件路径 ++- flame_name:各类型火焰图开关 ++ - oncpu:oncpu火焰图开关 ++ - offcpu:offcpu火焰图开关 ++ - io:io火焰图开关 ++ - memleak:内存泄漏火焰图开关 ++- application:暂未使用 ++ ++ ++### cadvisor_probe.conf ++ ++`cadvisor_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/cadvisor_probe.conf`。该文件配置项说明如下: ++ ++- version:配置文件版本号 ++- measurements:待集成到gala-gopher的观测指标 ++ - table_name: 数据表名称 ++ - entity_name: 观测对象名称 ++ - fields:数据字段 ++ - description:数据字段描述信息 ++ - type:数据字段类型,需和cAdvisor上报数据类型一致 ++ - name:数据字段名称,需和cAdvisor上报数据名称一致 ++ ++> 说明:cadvisor_probe.conf和cadvisor_probe.meta的字段需要一致。例外:若conf中type字段为counter,在meta中对应type字段应为gauge ++ ++ ++### pg_stat_probe.conf ++ ++`pg_stat_probe.conf`文件的安装路径为 `/opt/gala-gopher/extend_probes/pg_stat_probe.conf`。该文件配置项说明如下: ++ ++- servers:PostgreSQL服务端配置 ++ - ip:服务端IP ++ - port:服务端端口 ++ - dbname:服务端任意数据库名称 ++ - user:用户名 ++ - password:用户密码 ++ ++上述配置用户需能够访问pg_stat_database视图,配置最小权限的命令如下: ++ ++PostgreSQL: ++```shell ++grant SELECT ON pg_stat_database to ; ++grant pg_monitor to ; ++``` ++ ++GaussDB: ++```shell ++grant usage on schema dbe_perf to ; ++grant select on pg_stat_replication to ; ++``` ++ ++ + + ## 启动参数介绍 + +diff --git a/src/common/util.c b/src/common/util.c +index 1575546..e25e9ee 100644 +--- a/src/common/util.c ++++ b/src/common/util.c +@@ -24,7 +24,7 @@ + + char *get_cur_date(void) + { +- /* return date str, ex: 2021/5/17 */ ++ /* return date str, ex: 2021/05/17 */ + static char tm[TM_STR_LEN] = {0}; + struct tm *tmp_ptr = NULL; + time_t t; +@@ -34,7 +34,7 @@ char *get_cur_date(void) + tmp_ptr = localtime(&t); + (void)snprintf(tm, + TM_STR_LEN, +- "%d-%d-%d", ++ "%d-%02d-%02d", + (1900 + tmp_ptr->tm_year), + (1 + tmp_ptr->tm_mon), + tmp_ptr->tm_mday); +@@ -43,7 +43,7 @@ char *get_cur_date(void) + + char *get_cur_time(void) + { +- /* return time str, ex: 2021/5/17 19:56:03 */ ++ /* return time str, ex: 2021/05/17 19:56:03 */ + static char tm[TM_STR_LEN] = {0}; + struct tm *tmp_ptr = NULL; + time_t t; +@@ -53,7 +53,7 @@ char *get_cur_time(void) + tmp_ptr = localtime(&t); + (void)snprintf(tm, + TM_STR_LEN, +- "%d-%d-%d-%02d-%02d-%02d", ++ "%d-%02d-%02d-%02d-%02d-%02d", + (1900 + tmp_ptr->tm_year), + (1 + tmp_ptr->tm_mon), + tmp_ptr->tm_mday, +diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf +index 215bb70..3027d4f 100644 +--- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf ++++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.conf +@@ -189,12 +189,12 @@ measurements: + name: "container_id", + }, + { +- description: "...", ++ description: "failure type", + type: "label", + name: "failure_type", + }, + { +- description: "...", ++ description: "scope", + type: "label", + name: "scope", + }, +diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta +index 598d585..178c750 100644 +--- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta ++++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.meta +@@ -189,12 +189,12 @@ measurements: + name: "container_id", + }, + { +- description: "...", ++ description: "failure type", + type: "label", + name: "failure_type", + }, + { +- description: "...", ++ description: "scope", + type: "label", + name: "scope", + }, +diff --git a/src/probes/extends/python.probe/cadvisor.probe/readme.md b/src/probes/extends/python.probe/cadvisor.probe/readme.md +new file mode 100644 +index 0000000..62a5532 +--- /dev/null ++++ b/src/probes/extends/python.probe/cadvisor.probe/readme.md +@@ -0,0 +1,49 @@ ++# cadvisor 探针开发说明 ++ ++## 功能描述 ++ ++集成容器性能分析工具[cAdvisor](https://github.com/google/cadvisor)的统计数据。支持的功能有: ++ ++- 设置cAdvisor监听端口(必需) ++ ++ 通过-p参数设置,无默认值,示例: ++ ++ `python3 cadvisor_probe.py -p 8080` ++ ++ 表示监控cAdvisor输出,若cAdvisor未启动,则通过`cadvisor -port 8080`启动cAdvisor ++ ++- 设置观测周期 ++ ++ 通过-d参数设置,单位为秒,默认值5,示例: ++ ++ `python3 cadvisor_probe.py -p 8080 -d 5` ++ ++ 表示每隔5s输出统计信息 ++ ++- 开启观测白名单 ++ ++ 通过-F参数设置,配置为`task`表示按照`gala-gopher-app.conf`过滤,配置为具体进程的pid表示仅监控此进程,不配置则观测所有进程,默认不配置,示例: ++ ++ `python3 cadvisor_probe.py -p 8080 -F task` ++ ++ 表示只观测`gala-gopher-app.conf`中的进程 ++ ++ `python3 cadvisor_probe.py -p 8080 -F 1234` ++ ++ 表示只观测pid为1234的进程 ++ ++- 设置容器观测指标 ++ ++ 通过cadvisor_probe.conf和cadvisor_probe.meta配置,二者需对应。配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#cadvisor_probe.conf) ++ ++- 容器运行信息监控,具体的观测指标信息参见`cadvisor_probe.meta`。 ++ ++## 采集方案 ++ ++拉起cAdvisor进程,并监控[cAdvisor原始Prometheus统计数据](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md), ++采集cadvisor_probe.conf中配置的统计项,将数据格式转换后按照cadvisor_probe.meta输出为gala-gopher框架支持的格式。 ++ ++## 约束条件 ++ ++- 需要预先安装cAdvisor ++ +diff --git a/src/probes/extends/python.probe/pg_stat.probe/readme.md b/src/probes/extends/python.probe/pg_stat.probe/readme.md +new file mode 100644 +index 0000000..1ddd6b7 +--- /dev/null ++++ b/src/probes/extends/python.probe/pg_stat.probe/readme.md +@@ -0,0 +1,23 @@ ++# pg_stat 探针开发说明 ++ ++## 功能描述 ++ ++获取PostgreSQL Sever的TPS统计数据。支持的功能有: ++ ++- 设置被观测服务端信息 ++ ++ 通过pg_stat_probe.conf设置,支持多服务端,配置方法详见[conf_introduction.md](../../../../../doc/conf_introduction.md#pg_stat_probe.conf) ++ ++- 设置观测周期 ++ ++ 通过-d参数设置,单位为秒,默认值5,示例: ++ ++ `python3 pg_stat_probe.py -d 5` ++ ++ 表示每隔5s输出统计信息 ++ ++- 观测PostgreSQL Sever中各数据库的TPS统计数据,具体的观测指标信息参见`pg_stat_probe.meta` ++ ++## 采集方案 ++ ++通过计算数据库已提交的事务数在单位时间内的增长来计算TPS +-- +2.33.0 + diff --git a/add-support-to-pyroscope.patch b/add-support-to-pyroscope.patch new file mode 100644 index 0000000..7899897 --- /dev/null +++ b/add-support-to-pyroscope.patch @@ -0,0 +1,421 @@ +From 8099d91f2584fe6ee12eaea2d95b8d09a25075cb Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Fri, 16 Dec 2022 19:42:55 +0800 +Subject: [PATCH] add support to pyroscope + +--- + gala-gopher.spec | 4 +- + .../ebpf.probe/src/stackprobe/Makefile | 2 +- + .../src/stackprobe/conf/stackprobe.conf | 1 + + .../src/stackprobe/conf/stackprobe_conf.h | 1 + + .../src/stackprobe/conf/stackprobe_config.c | 6 + + .../ebpf.probe/src/stackprobe/flame_graph.c | 193 +++++++++++++++++- + .../ebpf.probe/src/stackprobe/flame_graph.h | 6 +- + .../ebpf.probe/src/stackprobe/stackprobe.c | 13 +- + .../ebpf.probe/src/stackprobe/stackprobe.h | 8 + + 9 files changed, 221 insertions(+), 13 deletions(-) + +diff --git a/gala-gopher.spec b/gala-gopher.spec +index a63351d..b8ed348 100644 +--- a/gala-gopher.spec ++++ b/gala-gopher.spec +@@ -14,8 +14,8 @@ BuildRequires: clang >= 10.0.1 + BuildRequires: llvm + BuildRequires: libconfig-devel librdkafka-devel libmicrohttpd-devel + BuildRequires: uthash-devel libbpf libbpf-devel log4cplus-devel +-BuildRequires: java-1.8.0-openjdk-devel +-Requires: bash glibc elfutils zlib elfutils-devel bpftool iproute erlang-eflame ++BuildRequires: java-1.8.0-openjdk-devel libcurl-devel ++Requires: bash glibc elfutils zlib elfutils-devel bpftool iproute erlang-eflame libcurl + + %description + gala-gopher is a low-overhead eBPF-based probes framework +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/Makefile b/src/probes/extends/ebpf.probe/src/stackprobe/Makefile +index a36224d..c5758fe 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/Makefile ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/Makefile +@@ -31,7 +31,7 @@ deps: $(DEPS) + + app: $(APP) + %: %.c $(SRC_C) +- $(CC) $(CFLAGS) $(patsubst %.cpp, %.o, $(SRC_CPLUS)) $(INCLUDES) $^ $(LDFLAGS) $(LINK_TARGET) -o $@ ++ $(CC) $(CFLAGS) $(patsubst %.cpp, %.o, $(SRC_CPLUS)) $(INCLUDES) $^ $(LDFLAGS) $(LINK_TARGET) -lcurl -o $@ + @echo $@ "compiling completed." + clean: + rm -rf $(DEPS) +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe.conf b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe.conf +index a2edd5d..ab79572 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe.conf ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe.conf +@@ -5,6 +5,7 @@ general = + svg_dir = "/var/log/gala-gopher/stacktrace"; + flame_dir = "/var/log/gala-gopher/flamegraph"; + debug_dir = "/usr/lib/debug"; ++ pyroscope_server = "localhost:4040"; + }; + + flame_name = +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_conf.h b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_conf.h +index fa39dec..6c1787b 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_conf.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_conf.h +@@ -35,6 +35,7 @@ typedef struct { + char svgDir[PATH_LEN]; + char flameDir[PATH_LEN]; + char debugDir[PATH_LEN]; ++ char pyroscopeServer[PATH_LEN]; + } GeneralConfig; + + typedef struct { +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_config.c b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_config.c +index c191187..a9b5dfb 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_config.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/conf/stackprobe_config.c +@@ -205,6 +205,12 @@ static int configLoadGeneral(void *config, config_setting_t *settings) + } + (void)strncpy(generalConfig->debugDir, strVal, PATH_LEN - 1); + ++ ret = config_setting_lookup_string(settings, "pyroscope_server", &strVal); ++ if (ret == 0) { ++ strVal = ""; // will not post to pyroscope ++ } ++ (void)strncpy(generalConfig->pyroscopeServer, strVal, PATH_LEN - 1); ++ + return 0; + } + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +index 870c6d9..126b98d 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #ifdef BPF_PROG_KERN + #undef BPF_PROG_KERN +@@ -37,6 +38,27 @@ + #include "bpf.h" + #include "flame_graph.h" + ++struct post_info_s { ++ int post_flag; ++ int sk; ++ int remain_size; ++ char *buf_start; ++ char *buf; ++ CURL *curl; ++}; ++ ++struct MemoryStruct { ++ char *memory; ++ size_t size; ++}; ++ ++static char *appname[STACK_SVG_MAX] = { ++ "gala-gopher-oncpu", ++ "gala-gopher-offcpu", ++ "gala-gopher-io", ++ "gala-gopher-memleak" ++}; ++ + #if 1 + + static char __test_flame_graph_flags(struct stack_svg_mng_s *svg_mng, u32 flags) +@@ -170,8 +192,10 @@ static void __reopen_flame_graph_file(struct stack_svg_mng_s *svg_mng) + + #define HISTO_TMP_LEN (2 * STACK_SYMBS_LEN) + static char __histo_tmp_str[HISTO_TMP_LEN]; ++ ++#define POST_MAX_LEN 2048 + static int __do_wr_stack_histo(struct stack_svg_mng_s *svg_mng, +- struct stack_trace_histo_s *stack_trace_histo, int first) ++ struct stack_trace_histo_s *stack_trace_histo, int first, struct post_info_s *post_info) + { + FILE *fp = __get_flame_graph_fp(svg_mng); + if (!fp) { +@@ -188,34 +212,166 @@ static int __do_wr_stack_histo(struct stack_svg_mng_s *svg_mng, + (void)snprintf(__histo_tmp_str, HISTO_TMP_LEN, "\n%s %llu", + stack_trace_histo->stack_symbs_str, stack_trace_histo->count); + } ++ if (post_info->post_flag) { ++ (void)__snprintf(&post_info->buf, post_info->remain_size, &post_info->remain_size, "%s", __histo_tmp_str); ++ } ++ + (void)fputs(__histo_tmp_str, fp); + return 0; + } + +-static void __do_wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head) ++static size_t __write_memory_cb(void *contents, size_t size, size_t nmemb, void *userp) ++{ ++ size_t realsize = size * nmemb; ++ struct MemoryStruct *mem = (struct MemoryStruct *)userp; ++ ++ char *ptr = realloc(mem->memory, mem->size + realsize + 1); ++ if(!ptr) { ++ /* out of memory! */ ++ printf("not enough memory (realloc returned NULL)\n"); ++ return 0; ++ } ++ ++ mem->memory = ptr; ++ memcpy(&(mem->memory[mem->size]), contents, realsize); ++ mem->size += realsize; ++ mem->memory[mem->size] = 0; ++ ++ return realsize; ++} ++ ++// http://localhost:4040/ingest?name=gala-gopher-oncpu&from=1671189474&until=1671189534 ++static int __build_url(char *url, struct post_server_s *post_server, int en_type) ++{ ++ time_t now, before; ++ (void)time(&now); ++ if (post_server->last_post_ts == 0) { ++ before = now - 60; // 60s ++ } else { ++ before = post_server->last_post_ts + 1; ++ } ++ post_server->last_post_ts = now; ++ ++ (void)snprintf(url, LINE_BUF_LEN, ++ "http://%s/ingest?name=%s&from=%ld&until=%ld", ++ post_server->host, ++ appname[en_type], ++ (long)before, ++ (long)now); ++ return 0; ++} ++ ++ ++static void __curl_post(struct post_server_s *post_server, struct post_info_s *post_info, int en_type) ++{ ++ CURLcode res; ++ CURL *curl = post_info->curl; ++ if (curl == NULL) { ++ return; ++ } ++ long post_len = (long)strlen(post_info->buf_start); ++ if (post_len == 0) { ++ DEBUG("[FLAMEGRAPH]: buf is null. No need to curl post post to %s\n", appname[en_type]); ++ return; ++ } ++ ++ char url[LINE_BUF_LEN] = {0}; ++ __build_url(url, post_server, en_type); ++ struct MemoryStruct chunk; ++ chunk.memory = malloc(1); /* will be grown as needed by realloc above */ ++ chunk.size = 0; /* no data at this point */ ++ ++ //curl_easy_setopt(curl, CURLOPT_URL, post_server->host); ++ curl_easy_setopt(curl, CURLOPT_URL, url); ++ ++ curl_easy_setopt(curl, CURLOPT_TIMEOUT, post_server->timeout); ++ ++ /* send all data to this function */ ++ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, __write_memory_cb); ++ ++ /* we pass our 'chunk' struct to the callback function */ ++ curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&chunk); ++ ++ /* some servers do not like requests that are made without a user-agent ++ field, so we provide one */ ++ curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); ++ ++ curl_easy_setopt(curl, CURLOPT_POSTFIELDS, post_info->buf_start); ++ ++ /* if we do not provide POSTFIELDSIZE, libcurl will strlen() by ++ itself */ ++ curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, post_len); ++ ++ /* Perform the request, res will get the return code */ ++ res = curl_easy_perform(curl); ++ /* Check for errors */ ++ if(res != CURLE_OK) { ++ ERROR("[FLAMEGRAPH]: curl post failed: %s\n", curl_easy_strerror(res)); ++ } else { ++ INFO("[FLAMEGRAPH]: curl post post to %s success\n", appname[en_type]); ++ } ++ ++ /* always cleanup */ ++ curl_easy_cleanup(curl); ++ if (chunk.memory) { ++ free(chunk.memory); ++ chunk.memory = NULL; ++ } ++ free(post_info->buf_start); ++ post_info->buf_start = NULL; ++ ++ return; ++} ++ ++static void __init_curl_handle(struct post_server_s *post_server, struct post_info_s *post_info) ++{ ++ if (post_server == NULL || post_server->post_flag == 0) { ++ return; ++ } ++ ++ post_info->curl = curl_easy_init(); ++ if(post_info->curl) { ++ post_info->buf = (char *)malloc(POST_MAX_LEN); ++ post_info->buf_start = post_info->buf; ++ if (post_info->buf != NULL) { ++ post_info->buf[0] = 0; ++ post_info->post_flag = 1; ++ } ++ } ++} ++ ++static void __do_wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head, ++ struct post_server_s *post_server, int en_type) + { + int first_flag = 0; ++ struct post_info_s post_info = {.remain_size = POST_MAX_LEN, .post_flag = 0}; + + if (__test_flame_graph_flags(svg_mng, FLAME_GRAPH_NEW)) { + first_flag = 1; + } + +- struct stack_trace_histo_s *item, *tmp; ++ __init_curl_handle(post_server, &post_info); + ++ struct stack_trace_histo_s *item, *tmp; + H_ITER(head, item, tmp) { +- (void)__do_wr_stack_histo(svg_mng, item, first_flag); ++ (void)__do_wr_stack_histo(svg_mng, item, first_flag, &post_info); + first_flag = 0; + } +- ++ if (post_info.post_flag) { ++ __curl_post(post_server, &post_info, en_type); ++ } ++ + __flush_flame_graph_file(svg_mng); + __reset_flame_graph_flags(svg_mng, ~FLAME_GRAPH_NEW); + } + + #endif + +-void wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head, int en_type) ++void wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head, int en_type, ++ struct post_server_s *post_server) + { +- __do_wr_flamegraph(svg_mng, head); ++ __do_wr_flamegraph(svg_mng, head, post_server, en_type); ++ + if (is_svg_tmout(svg_mng)) { + (void)create_svg_file(svg_mng, + __get_flame_graph_file(svg_mng), en_type); +@@ -251,3 +407,26 @@ int set_flame_graph_path(struct stack_svg_mng_s *svg_mng, const char* path, cons + return 0; + } + ++int set_post_server(struct post_server_s *post_server, const char *server_str) ++{ ++ if (server_str == NULL) { ++ return -1; ++ } ++ ++ char *p = strrchr(server_str, ':'); ++ if (p == NULL) { ++ return -1; ++ } ++ ++ curl_global_init(CURL_GLOBAL_ALL); ++ post_server->post_flag = 1; ++ post_server->timeout = 3; ++ (void)strcpy(post_server->host, server_str); ++ ++ return 0; ++} ++ ++void clean_post_server() ++{ ++ curl_global_cleanup(); ++} +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.h b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.h +index a9ed999..ea29107 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.h +@@ -20,7 +20,9 @@ + #include "svg.h" + #include "stackprobe.h" + +-void wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head, int en_type); ++void wr_flamegraph(struct stack_svg_mng_s *svg_mng, struct stack_trace_histo_s *head, int en_type, ++ struct post_server_s *post_server); + int set_flame_graph_path(struct stack_svg_mng_s *svg_mng, const char* path, const char *flame_name); +- ++int set_post_server(struct post_server_s *post_server, const char *pyroscopeServer); ++void clean_post_server(); + #endif +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +index fa37a72..a4733e6 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +@@ -753,6 +753,10 @@ static void destroy_stack_trace(struct stack_trace_s **ptr_st) + return; + } + ++ if (st->post_server.post_flag) { ++ clean_post_server(); ++ } ++ + for (int cpu = 0; cpu < st->cpus_num; cpu++) { + if (st->pmu_fd[cpu] > 0) { + ioctl(st->pmu_fd[cpu], PERF_EVENT_IOC_DISABLE); +@@ -843,6 +847,13 @@ static struct stack_trace_s *create_stack_trace(StackprobeConfig *conf) + } + #endif + ++ if (set_post_server(&st->post_server, conf->generalConfig->pyroscopeServer) != 0) { ++ INFO("[STACKPROBE]: Do not post to Pyroscope Server.\n"); ++ st->post_server.post_flag = 0; ++ } else { ++ INFO("[STACKPROBE]: Will post to Pyroscope Server: %s.\n", conf->generalConfig->pyroscopeServer); ++ } ++ + st->elf_reader = create_elf_reader(conf->generalConfig->debugDir); + if (!st->elf_reader) { + goto err; +@@ -1317,7 +1328,7 @@ static void switch_stackmap() + continue; + } + (void)stack_id2histogram(st, i, st->is_stackmap_a); +- wr_flamegraph(st->svg_stack_traces[i]->svg_mng, st->svg_stack_traces[i]->histo_tbl, i); ++ wr_flamegraph(st->svg_stack_traces[i]->svg_mng, st->svg_stack_traces[i]->histo_tbl, i, &st->post_server); + clear_raw_stack_trace(st->svg_stack_traces[i], st->is_stackmap_a); + } + record_running_ctx(st); +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +index 1f85225..657b6e7 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +@@ -102,6 +102,13 @@ struct svg_stack_trace_s { + struct stack_trace_histo_s *histo_tbl; + }; + ++struct post_server_s { ++ char post_flag; ++ long timeout; // sec ++ char host[PATH_LEN]; ++ time_t last_post_ts; ++}; ++ + struct stack_trace_s { + char pad[3]; + int cpus_num; +@@ -112,6 +119,7 @@ struct stack_trace_s { + int stackmap_b_fd; + u64 convert_stack_count; + time_t running_times; ++ struct post_server_s post_server; + + struct svg_stack_trace_s *svg_stack_traces[STACK_SVG_MAX]; + struct ksymb_tbl_s *ksymbs; +-- +2.33.0 + diff --git a/add-system_uuid-field-to-distinguish-client-when-pos.patch b/add-system_uuid-field-to-distinguish-client-when-pos.patch new file mode 100644 index 0000000..55e6fe2 --- /dev/null +++ b/add-system_uuid-field-to-distinguish-client-when-pos.patch @@ -0,0 +1,174 @@ +From 60f0111ea9eeb1ab4e26de2acafb26e252d679a9 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Mon, 9 Jan 2023 11:06:00 +0800 +Subject: [PATCH] add system_uuid field to distinguish client when post to + pyroscope server + +--- + src/common/common.h | 1 + + src/common/util.c | 20 ++++++++++++++++ + src/lib/imdb/imdb.c | 24 ++----------------- + .../ebpf.probe/src/stackprobe/flame_graph.c | 7 +++--- + .../ebpf.probe/src/stackprobe/stackprobe.c | 3 +++ + .../ebpf.probe/src/stackprobe/stackprobe.h | 2 ++ + 6 files changed, 32 insertions(+), 25 deletions(-) + +diff --git a/src/common/common.h b/src/common/common.h +index f5feebf..64c040a 100644 +--- a/src/common/common.h ++++ b/src/common/common.h +@@ -192,5 +192,6 @@ int exec_cmd(const char *cmd, char *buf, unsigned int buf_len); + char is_exist_mod(const char *mod); + int __snprintf(char **buf, const int bufLen, int *remainLen, const char *format, ...); + char is_digit_str(const char *s); ++int get_system_uuid(char *buffer, size_t size); + + #endif +diff --git a/src/common/util.c b/src/common/util.c +index e25e9ee..945da65 100644 +--- a/src/common/util.c ++++ b/src/common/util.c +@@ -196,3 +196,23 @@ char is_digit_str(const char *s) + return 1; + } + ++int get_system_uuid(char *buffer, size_t size) ++{ ++ FILE *fp = NULL; ++ ++ fp = popen("dmidecode -s system-uuid | tr 'A-Z' 'a-z'", "r"); ++ if (fp == NULL) { ++ return -1; ++ } ++ ++ if (fgets(buffer, (int)size, fp) == NULL) { ++ pclose(fp); ++ return -1; ++ } ++ if (strlen(buffer) > 0 && buffer[strlen(buffer) - 1] == '\n') { ++ buffer[strlen(buffer) - 1] = '\0'; ++ } ++ ++ pclose(fp); ++ return 0; ++} +\ No newline at end of file +diff --git a/src/lib/imdb/imdb.c b/src/lib/imdb/imdb.c +index 767b930..bc562c2 100644 +--- a/src/lib/imdb/imdb.c ++++ b/src/lib/imdb/imdb.c +@@ -18,7 +18,7 @@ + #include + #include + #include +- ++#include "common.h" + #include "imdb.h" + + static uint32_t g_recordTimeout = 60; // default timeout: 60 seconds +@@ -250,26 +250,6 @@ void IMDB_TableDestroy(IMDB_Table *table) + return; + } + +-static int IMDB_GetSystemUuid(char *buffer, size_t size) +-{ +- FILE *fp = NULL; +- +- fp = popen("dmidecode -s system-uuid | tr 'A-Z' 'a-z'", "r"); +- if (fp == NULL) { +- return -1; +- } +- +- if (fgets(buffer, (int)size, fp) == NULL) { +- pclose(fp); +- return -1; +- } +- if (strlen(buffer) > 0 && buffer[strlen(buffer) - 1] == '\n') { +- buffer[strlen(buffer) - 1] = '\0'; +- } +- +- pclose(fp); +- return 0; +-} + + IMDB_DataBaseMgr *IMDB_DataBaseMgrCreate(uint32_t capacity) + { +@@ -282,7 +262,7 @@ IMDB_DataBaseMgr *IMDB_DataBaseMgrCreate(uint32_t capacity) + + memset(mgr, 0, sizeof(IMDB_DataBaseMgr)); + +- ret = IMDB_GetSystemUuid(mgr->nodeInfo.systemUuid, sizeof(mgr->nodeInfo.systemUuid)); ++ ret = get_system_uuid(mgr->nodeInfo.systemUuid, sizeof(mgr->nodeInfo.systemUuid)); + if (ret != 0) { + ERROR("[IMDB] Can not get system uuid.\n"); + free(mgr); +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +index 40c6dcb..d6d2eb2 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +@@ -247,16 +247,17 @@ static int __build_url(char *url, struct post_server_s *post_server, int en_type + time_t now, before; + (void)time(&now); + if (post_server->last_post_ts == 0) { +- before = now - 30; // 60s ++ before = now - TMOUT_PERIOD; + } else { + before = post_server->last_post_ts + 1; + } + post_server->last_post_ts = now; + + (void)snprintf(url, LINE_BUF_LEN, +- "http://%s/ingest?name=%s&from=%ld&until=%ld", ++ "http://%s/ingest?name=%s-%s&from=%ld&until=%ld", + post_server->host, + appname[en_type], ++ post_server->app_suffix, + (long)before, + (long)now); + return 0; +@@ -310,7 +311,7 @@ static void __curl_post(struct post_server_s *post_server, struct post_info_s *p + if(res != CURLE_OK) { + ERROR("[FLAMEGRAPH]: curl post to %s failed: %s\n", url, curl_easy_strerror(res)); + } else { +- INFO("[FLAMEGRAPH]: curl post post to %s success\n", url, post_info->remain_size); ++ INFO("[FLAMEGRAPH]: curl post post to %s success\n", url); + } + + if (chunk.memory) { +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +index 9fa5079..207a5e8 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +@@ -851,6 +851,9 @@ static struct stack_trace_s *create_stack_trace(StackprobeConfig *conf) + INFO("[STACKPROBE]: Do not post to Pyroscope Server.\n"); + st->post_server.post_enable = 0; + } else { ++ if (get_system_uuid(st->post_server.app_suffix, APP_SUFFIX_LEN) != 0) { ++ st->post_server.app_suffix[0] = 0; ++ } + INFO("[STACKPROBE]: Will post to Pyroscope Server: %s.\n", conf->generalConfig->pyroscopeServer); + } + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +index 97e5ea5..7d6bd39 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +@@ -24,6 +24,7 @@ + + #define STACKPROBE_CONF_PATH_DEFAULT "/opt/gala-gopher/extend_probes/stackprobe.conf" + #define BPF_FUNC_NAME_LEN 32 ++#define APP_SUFFIX_LEN 64 + + struct stack_symbs_s { + struct addr_symb_s user_stack_symbs[PERF_MAX_STACK_DEPTH]; +@@ -106,6 +107,7 @@ struct post_server_s { + char post_enable; + long timeout; // sec + char host[PATH_LEN]; ++ char app_suffix[APP_SUFFIX_LEN]; + time_t last_post_ts; + }; + +-- +2.33.0 + diff --git a/bugfix-add-check-if-thread-is-0.patch b/bugfix-add-check-if-thread-is-0.patch new file mode 100644 index 0000000..51d3805 --- /dev/null +++ b/bugfix-add-check-if-thread-is-0.patch @@ -0,0 +1,209 @@ +From cffdb869f03aa454da35d06ad7baf4f2f330b9a0 Mon Sep 17 00:00:00 2001 +From: zhaoyuxing +Date: Tue, 27 Dec 2022 11:06:14 +0800 +Subject: [PATCH] bugfix: add check if thread is 0 + +--- + .../ebpf.probe/src/taskprobe/proc_probe.c | 2 +- + src/probes/system_infos.probe/system_cpu.c | 2 +- + src/probes/system_infos.probe/system_disk.c | 8 +-- + .../system_infos.probe/system_meminfo.c | 4 +- + src/probes/system_infos.probe/system_net.c | 53 +++++++++++-------- + 5 files changed, 38 insertions(+), 31 deletions(-) + +diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c +index 0374c32..40ddff1 100644 +--- a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c ++++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c +@@ -153,7 +153,7 @@ static void report_proc_metrics(struct proc_data_s *proc) + proc->dns_op.gethostname_failed); + } + +- if (proc->proc_io.iowait_us > latency_thr_us) { ++ if (latency_thr_us > 0 && proc->proc_io.iowait_us > latency_thr_us) { + report_logs(OO_NAME, + entityId, + "iowait_us", +diff --git a/src/probes/system_infos.probe/system_cpu.c b/src/probes/system_infos.probe/system_cpu.c +index f433ac4..0582d43 100644 +--- a/src/probes/system_infos.probe/system_cpu.c ++++ b/src/probes/system_infos.probe/system_cpu.c +@@ -85,7 +85,7 @@ static void report_cpu_status(struct probe_params *params) + entityId[0] = 0; + (void)strcpy(entityId, "cpu"); + +- if (util_per > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && util_per > params->res_percent_upper) { + report_logs(ENTITY_NAME, + entityId, + "total_used_per", +diff --git a/src/probes/system_infos.probe/system_disk.c b/src/probes/system_infos.probe/system_disk.c +index b249dbf..84065f9 100644 +--- a/src/probes/system_infos.probe/system_disk.c ++++ b/src/probes/system_infos.probe/system_disk.c +@@ -54,7 +54,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct + + entityid[0] = 0; + +- if (inode_stats.inode_or_blk_used_per > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && inode_stats.inode_or_blk_used_per > params->res_percent_upper) { + (void)strncpy(entityid, inode_stats.mount_on, LINE_BUF_LEN - 1); + report_logs(ENTITY_FS_NAME, + entityid, +@@ -63,7 +63,7 @@ static void report_disk_status(df_stats inode_stats, df_stats blk_stats, struct + "Too many Inodes consumed(%d%%).", + inode_stats.inode_or_blk_used_per); + } +- if (blk_stats.inode_or_blk_used_per > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && blk_stats.inode_or_blk_used_per > params->res_percent_upper) { + if (entityid[0] == 0) { + (void)strncpy(entityid, blk_stats.mount_on, LINE_BUF_LEN - 1); + } +@@ -212,7 +212,7 @@ static void report_disk_iostat(const char *disk_name, disk_io_stats *io_info, st + + entityid[0] = 0; + +- if (io_info->util > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && io_info->util > params->res_percent_upper) { + (void)strncpy(entityid, disk_name, LINE_BUF_LEN - 1); + report_logs(ENTITY_DISK_NAME, + entityid, +@@ -335,4 +335,4 @@ void system_iostat_destroy(void) + g_disk_stats = NULL; + } + return; +-} +\ No newline at end of file ++} +diff --git a/src/probes/system_infos.probe/system_meminfo.c b/src/probes/system_infos.probe/system_meminfo.c +index 5b6ba2d..ceb3815 100644 +--- a/src/probes/system_infos.probe/system_meminfo.c ++++ b/src/probes/system_infos.probe/system_meminfo.c +@@ -85,7 +85,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util, + (void)strcpy(entityId, "/proc/meminfo"); + (void)strcpy(entityName, "mem"); + // mem util +- if (mem_util > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && mem_util > params->res_percent_upper) { + report_logs(entityName, + entityId, + "util", +@@ -94,7 +94,7 @@ static void report_meminfo_status(struct probe_params *params, double mem_util, + mem_util); + } + // swap util +- if (swap_util > params->res_percent_upper) { ++ if (params->res_percent_upper > 0 && swap_util > params->res_percent_upper) { + report_logs(entityName, + entityId, + "swap_util", +diff --git a/src/probes/system_infos.probe/system_net.c b/src/probes/system_infos.probe/system_net.c +index a096464..525aa54 100644 +--- a/src/probes/system_infos.probe/system_net.c ++++ b/src/probes/system_infos.probe/system_net.c +@@ -1,4 +1,4 @@ +-/****************************************************************************** ++/****************************************************************************** + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * gala-gopher licensed under the Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. +@@ -101,16 +101,19 @@ int system_tcp_probe(void) + METRICS_TCP_NAME, + "/proc/dev/snmp", + g_snmp_stats.tcp_curr_estab, +- g_snmp_stats.tcp_in_segs - temp.tcp_in_segs, +- g_snmp_stats.tcp_out_segs - temp.tcp_out_segs, +- g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs, +- g_snmp_stats.tcp_in_errs - temp.tcp_in_errs); ++ (g_snmp_stats.tcp_in_segs > temp.tcp_in_segs) ? (g_snmp_stats.tcp_in_segs - temp.tcp_in_segs) : 0, ++ (g_snmp_stats.tcp_out_segs > temp.tcp_out_segs) ? (g_snmp_stats.tcp_out_segs - temp.tcp_out_segs) : 0, ++ (g_snmp_stats.tcp_retrans_segs > temp.tcp_retrans_segs) ? ++ (g_snmp_stats.tcp_retrans_segs - temp.tcp_retrans_segs) : 0, ++ (g_snmp_stats.tcp_in_errs > temp.tcp_in_errs) ? (g_snmp_stats.tcp_in_errs - temp.tcp_in_errs) : 0); + + (void)nprobe_fprintf(stdout, "|%s|%s|%llu|%llu|\n", + METRICS_UDP_NAME, + "/proc/dev/snmp", +- g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams, +- g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams); ++ (g_snmp_stats.udp_in_datagrams > temp.udp_in_datagrams) ? ++ (g_snmp_stats.udp_in_datagrams - temp.udp_in_datagrams) : 0, ++ (g_snmp_stats.udp_out_datagrams > temp.udp_out_datagrams) ? ++ (g_snmp_stats.udp_out_datagrams - temp.udp_out_datagrams) : 0); + + (void)fclose(f); + return 0; +@@ -278,7 +281,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct + tx_errs = new_info->tx_errs - old_info->tx_errs; + rx_errs = new_info->rx_errs - old_info->rx_errs; + +- if (tx_drops > params->drops_count_thr) { ++ if (params->drops_count_thr > 0 && tx_drops > params->drops_count_thr) { + (void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1); + report_logs(ENTITY_NIC_NAME, + entityid, +@@ -287,7 +290,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct + "net device tx queue drops(%llu).", + tx_drops); + } +- if (rx_drops > params->drops_count_thr) { ++ if (params->drops_count_thr > 0 && rx_drops > params->drops_count_thr) { + if (entityid[0] == 0) { + (void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1); + } +@@ -298,7 +301,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct + "net device rx queue drops(%llu).", + rx_drops); + } +- if (tx_errs > params->drops_count_thr) { ++ if (params->drops_count_thr > 0 && tx_errs > params->drops_count_thr) { + if (entityid[0] == 0) { + (void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1); + } +@@ -309,7 +312,7 @@ static void report_netdev(net_dev_stat *new_info, net_dev_stat *old_info, struct + "net device tx queue errors(%llu).", + tx_errs); + } +- if (rx_errs > params->drops_count_thr) { ++ if (params->drops_count_thr > 0 && rx_errs > params->drops_count_thr) { + if (entityid[0] == 0) { + (void)strncpy(entityid, new_info->dev_name, LINE_BUF_LEN - 1); + } +@@ -375,18 +378,22 @@ int system_net_probe(struct probe_params *params) + METRICS_NIC_NAME, + g_dev_stats[index].dev_name, + g_dev_stats[index].net_status == 1 ? "UP" : "DOWN", +- g_dev_stats[index].rx_bytes - temp.rx_bytes, +- g_dev_stats[index].rx_packets - temp.rx_packets, +- g_dev_stats[index].rx_errs - temp.rx_errs, +- g_dev_stats[index].rx_dropped - temp.rx_dropped, +- g_dev_stats[index].tx_bytes - temp.tx_bytes, +- g_dev_stats[index].tx_packets - temp.tx_packets, +- g_dev_stats[index].tx_errs - temp.tx_errs, +- g_dev_stats[index].tx_dropped - temp.tx_dropped, +- SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period), +- SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period), +- g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count, +- g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count, ++ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ? (g_dev_stats[index].rx_bytes - temp.rx_bytes) : 0, ++ (g_dev_stats[index].rx_packets > temp.rx_packets) ? (g_dev_stats[index].rx_packets - temp.rx_packets) : 0, ++ (g_dev_stats[index].rx_errs > temp.rx_errs) ? (g_dev_stats[index].rx_errs - temp.rx_errs) : 0, ++ (g_dev_stats[index].rx_dropped > temp.rx_dropped) ? (g_dev_stats[index].rx_dropped - temp.rx_dropped) : 0, ++ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ? (g_dev_stats[index].tx_bytes - temp.tx_bytes) : 0, ++ (g_dev_stats[index].tx_packets > temp.tx_packets) ? (g_dev_stats[index].tx_packets - temp.tx_packets) : 0, ++ (g_dev_stats[index].tx_errs > temp.tx_errs) ? (g_dev_stats[index].tx_errs - temp.tx_errs) : 0, ++ (g_dev_stats[index].tx_dropped > temp.tx_dropped) ? (g_dev_stats[index].tx_dropped - temp.tx_dropped) : 0, ++ (g_dev_stats[index].rx_bytes > temp.rx_bytes) ? ++ SPEED_VALUE(temp.rx_bytes, g_dev_stats[index].rx_bytes, params->period) : 0, ++ (g_dev_stats[index].tx_bytes > temp.tx_bytes) ? ++ SPEED_VALUE(temp.tx_bytes, g_dev_stats[index].tx_bytes, params->period) : 0, ++ (g_dev_stats[index].tc_sent_drop_count > temp.tc_sent_drop_count) ? ++ (g_dev_stats[index].tc_sent_drop_count - temp.tc_sent_drop_count) : 0, ++ (g_dev_stats[index].tc_sent_overlimits_count > temp.tc_sent_overlimits_count) ? ++ (g_dev_stats[index].tc_sent_overlimits_count - temp.tc_sent_overlimits_count) : 0, + g_dev_stats[index].tc_backlog_count, + g_dev_stats[index].tc_ecn_mark); + /* output event */ +-- +2.33.0 + diff --git a/fix-bug.patch b/fix-bug.patch new file mode 100644 index 0000000..fe959a8 --- /dev/null +++ b/fix-bug.patch @@ -0,0 +1,38 @@ +From a43af4c9df148d3a71666fdcfda3f2bb5a83fc99 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Mon, 9 Jan 2023 16:14:15 +0800 +Subject: [PATCH] fix bug + +--- + src/common/common.h | 2 +- + src/common/util.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/common/common.h b/src/common/common.h +index 64c040a..72848f8 100644 +--- a/src/common/common.h ++++ b/src/common/common.h +@@ -192,6 +192,6 @@ int exec_cmd(const char *cmd, char *buf, unsigned int buf_len); + char is_exist_mod(const char *mod); + int __snprintf(char **buf, const int bufLen, int *remainLen, const char *format, ...); + char is_digit_str(const char *s); +-int get_system_uuid(char *buffer, size_t size); ++int get_system_uuid(char *buffer, unsigned int size); + + #endif +diff --git a/src/common/util.c b/src/common/util.c +index 945da65..b7a928f 100644 +--- a/src/common/util.c ++++ b/src/common/util.c +@@ -196,7 +196,7 @@ char is_digit_str(const char *s) + return 1; + } + +-int get_system_uuid(char *buffer, size_t size) ++int get_system_uuid(char *buffer, unsigned int size) + { + FILE *fp = NULL; + +-- +2.33.0 + diff --git a/fix-ksliprobe-get-invalid-args-occasionally-at-start.patch b/fix-ksliprobe-get-invalid-args-occasionally-at-start.patch new file mode 100644 index 0000000..88e8701 --- /dev/null +++ b/fix-ksliprobe-get-invalid-args-occasionally-at-start.patch @@ -0,0 +1,171 @@ +From 6c2753778bcc7e124353a1e21612a1ebdb92f241 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Tue, 10 Jan 2023 22:39:28 +0800 +Subject: [PATCH] fix ksliprobe get invalid args occasionally at startup + +--- + src/common/args.c | 2 +- + src/common/args.h | 2 +- + .../ebpf.probe/src/ksliprobe/ksliprobe.bpf.c | 20 ++++++++++++------- + .../ebpf.probe/src/ksliprobe/ksliprobe.c | 8 ++++---- + .../ebpf.probe/src/ksliprobe/ksliprobe.h | 4 ++-- + 5 files changed, 21 insertions(+), 15 deletions(-) + +diff --git a/src/common/args.c b/src/common/args.c +index 6c8e2d1..5c6bee7 100644 +--- a/src/common/args.c ++++ b/src/common/args.c +@@ -157,7 +157,7 @@ static int __period_arg_parse(char opt, char *arg, struct probe_params *params) + } + break; + case 'C': +- params->cycle_sampling_flag = 1; ++ params->continuous_sampling_flag = 1; + break; + case 'k': + params->kafka_port = (unsigned int)atoi(arg); +diff --git a/src/common/args.h b/src/common/args.h +index f8ab447..8fd5139 100644 +--- a/src/common/args.h ++++ b/src/common/args.h +@@ -44,7 +44,7 @@ struct probe_params { + char res_percent_upper; // [-U <>] Upper limit of resource percentage, default is 0% + char res_percent_lower; // [-L <>] Lower limit of resource percentage, default is 0% + unsigned char cport_flag; // [-c <>] Indicates whether the probes(such as tcp) identifies the client port, default is 0 (no identify) +- char cycle_sampling_flag; // [-C <>] Enables the cycle sampling, default is 0 ++ char continuous_sampling_flag; // [-C <>] Enables the continuous sampling, default is 0 + char target_dev[DEV_NAME]; // [-d <>] Device name, default is null + char elf_path[MAX_PATH_LEN]; // [-p <>] Set ELF file path of the monitored software, default is null + char task_whitelist[MAX_PATH_LEN]; // [-w <>] Filtering app monitoring ranges, default is null +diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c +index 6fa64ab..f54df41 100644 +--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c ++++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.bpf.c +@@ -116,17 +116,17 @@ static __always_inline void get_args(struct conn_data_t *conn_data) + { + u32 key = 0; + u64 period = __PERIOD; +- char cycle_sampling_flag = 0; ++ char continuous_sampling_flag = 0; + + struct ksli_args_s *args; + args = (struct ksli_args_s *)bpf_map_lookup_elem(&args_map, &key); + if (args) { + period = args->period; +- cycle_sampling_flag = args->cycle_sampling_flag; ++ continuous_sampling_flag = args->continuous_sampling_flag; + } + + conn_data->report_period = period; +- conn_data->cycle_sampling_flag = cycle_sampling_flag; ++ conn_data->continuous_sampling_flag = continuous_sampling_flag; + + return; + } +@@ -262,7 +262,13 @@ static __always_inline int periodic_report(u64 ts_nsec, struct conn_data_t *conn + { + long err; + int ret = 0; +- u64 period = conn_data->report_period; ++ ++ // period cannot be 0, so it is considered that the user mode has not written to args_map by now. ++ // therefore we try to get the value agagin. ++ if (conn_data->report_period == 0) ++ get_args(conn_data); ++ ++ u64 period = (conn_data->report_period != 0) ? conn_data->report_period : __PERIOD; + + // 表示没有任何采样数据,不上报 + if (conn_data->latency.rtt_nsec == 0) { +@@ -299,7 +305,7 @@ static __always_inline void sample_finished(struct conn_data_t *conn_data, struc + conn_data->latency.rtt_nsec = csd->rtt_ts_nsec; + __builtin_memcpy(&conn_data->latency.command, &csd->command, MAX_COMMAND_REQ_SIZE); + } +- if (conn_data->cycle_sampling_flag) { ++ if (conn_data->continuous_sampling_flag) { + if (conn_data->max.rtt_nsec < csd->rtt_ts_nsec) { + conn_data->max.rtt_nsec = csd->rtt_ts_nsec; + __builtin_memcpy(&conn_data->max.command, &csd->command, MAX_COMMAND_REQ_SIZE); +@@ -350,7 +356,7 @@ static __always_inline void process_rd_msg(u32 tgid, int fd, const char *buf, co + } + + // 非循环采样每次上报后就返回,等待下次上报周期再采样。这种方式无法获取周期内max sli +- if (!conn_data->cycle_sampling_flag && reported) ++ if (!conn_data->continuous_sampling_flag && reported) + return; + + // 连接的协议类型未知时,连续3次read报文时解析不出是redis协议,就确认此条连接非redis请求连接,不做采样 +@@ -403,7 +409,7 @@ KPROBE(ksys_read, pt_regs) + if (conn_data->id.protocol == PROTOCOL_NO_REDIS) + return; + +- if (!conn_data->cycle_sampling_flag) { ++ if (!conn_data->continuous_sampling_flag) { + if (bpf_ktime_get_ns() - conn_data->last_report_ts_nsec < conn_data->report_period) + return; + } +diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c +index d8221c9..ac8484e 100644 +--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c ++++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.c +@@ -40,7 +40,7 @@ + #define MAX_SLI_TBL_NAME "redis_max_sli" + + static volatile sig_atomic_t stop; +-static struct probe_params params = {.period = DEFAULT_PERIOD, .cycle_sampling_flag = 0}; ++static struct probe_params params = {.period = DEFAULT_PERIOD, .continuous_sampling_flag = 0}; + + static void sig_int(int signo) + { +@@ -113,7 +113,7 @@ static void msg_event_handler(void *ctx, int cpu, void *data, unsigned int size) + cli_ip_str, + ntohs(msg_evt_data->client_ip_info.port), + msg_evt_data->latency.rtt_nsec); +- if (params.cycle_sampling_flag) { ++ if (params.continuous_sampling_flag) { + fprintf(stdout, + "|%s|%d|%d|%s|%s|%s|%u|%s|%u|%llu|\n", + MAX_SLI_TBL_NAME, +@@ -173,7 +173,7 @@ static void load_args(int args_fd, struct probe_params* params) + struct ksli_args_s args = {0}; + + args.period = NS(params->period); +- args.cycle_sampling_flag = params->cycle_sampling_flag; ++ args.continuous_sampling_flag = params->continuous_sampling_flag; + + (void)bpf_map_update_elem(args_fd, &key, &args, BPF_ANY); + } +@@ -187,7 +187,7 @@ int main(int argc, char **argv) + return -1; + } + printf("arg parse interval time:%us\n", params.period); +- printf("arg parse if cycle sampling:%s\n", params.cycle_sampling_flag ? "true": "false"); ++ printf("arg parse if cycle sampling:%s\n", params.continuous_sampling_flag ? "true": "false"); + + #ifdef KERNEL_SUPPORT_TSTAMP + load_tc_bpf(params.netcard_list, TC_PROG, TC_TYPE_INGRESS); +diff --git a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h +index 40d2398..023e0e7 100644 +--- a/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h ++++ b/src/probes/extends/ebpf.probe/src/ksliprobe/ksliprobe.h +@@ -36,7 +36,7 @@ + + struct ksli_args_s { + __u64 period; // Sampling period, unit ns +- char cycle_sampling_flag; // Enables the sampling of max sli within a period (which cause some performance degradation) ++ char continuous_sampling_flag; // Enables the sampling of max sli within a period (which cause some performance degradation) + }; + + enum msg_event_rw_t { +@@ -89,7 +89,7 @@ struct conn_data_t { + struct rtt_cmd_t current; + __u64 last_report_ts_nsec; // 上一次上报完成的时间点 + __u64 report_period; // 上报周期 +- char cycle_sampling_flag; ++ char continuous_sampling_flag; + char procotol_check_times; + }; + +-- +2.33.0 + diff --git a/fix-stackprobe-memory-allocation-and-deallocation-er.patch b/fix-stackprobe-memory-allocation-and-deallocation-er.patch new file mode 100644 index 0000000..b2b0a55 --- /dev/null +++ b/fix-stackprobe-memory-allocation-and-deallocation-er.patch @@ -0,0 +1,56 @@ +From 01f2d382131f26ca81b0997532d0b73515ca33c7 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Wed, 14 Dec 2022 16:32:13 +0800 +Subject: [PATCH] fix stackprobe memory allocation and deallocation errors. + +--- + .../extends/ebpf.probe/src/stackprobe/stackprobe.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +index da09094..fa37a72 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +@@ -105,7 +105,7 @@ typedef struct { + struct bpf_link_hash_value { + enum pid_state_t pid_state; + char elf_path[MAX_PATH_LEN]; +- struct bpf_link *bpf_links[32]; ++ struct bpf_link *bpf_links[32]; // 32 cover num of probes in memleak.bpf.c + }; + + struct bpf_link_hash_t { +@@ -314,7 +314,7 @@ static struct raw_stack_trace_s *create_raw_stack_trace(struct stack_trace_s *st + + size_t stack_size = st->cpus_num * PERCPU_SAMPLE_COUNT; + size_t mem_size = sizeof(struct raw_stack_trace_s); +- mem_size += (stack_size * sizeof(struct stack_id_s)); ++ mem_size += (stack_size * sizeof(struct raw_trace_s)); + + raw_stack_trace = (struct raw_stack_trace_s *)malloc(mem_size); + if (!raw_stack_trace) { +@@ -747,7 +747,6 @@ static void destroy_svg_stack_trace(struct svg_stack_trace_s **ptr_svg_st) + + static void destroy_stack_trace(struct stack_trace_s **ptr_st) + { +- // TODO:destroy_svg_stack_trace? + struct stack_trace_s *st = *ptr_st; + *ptr_st = NULL; + if (!st) { +@@ -761,6 +760,13 @@ static void destroy_stack_trace(struct stack_trace_s **ptr_st) + } + } + ++ for (int i = 0; i < STACK_SVG_MAX; i++) { ++ if (st->svg_stack_traces[i] == NULL) { ++ continue; ++ } ++ destroy_svg_stack_trace(&st->svg_stack_traces[i]); ++ } ++ + if (st->ksymbs) { + destroy_ksymbs_tbl(st->ksymbs); + (void)free(st->ksymbs); +-- +2.33.0 + diff --git a/gala-gopher.spec b/gala-gopher.spec index 0f534a5..e1d7acd 100644 --- a/gala-gopher.spec +++ b/gala-gopher.spec @@ -5,18 +5,28 @@ Summary: Intelligent ops toolkit for openEuler Name: gala-gopher Version: 1.0.1 -Release: 1 +Release: 2 License: Mulan PSL v2 URL: https://gitee.com/openeuler/gala-gopher Source: %{name}-%{version}.tar.gz BuildRoot: %{_builddir}/%{name}-%{version} -BuildRequires: systemd cmake gcc-c++ elfutils-devel +BuildRequires: systemd cmake gcc-c++ elfutils-devel libcurl-devel BuildRequires: clang >= 10.0.1 llvm java-1.8.0-openjdk-devel BuildRequires: libconfig-devel librdkafka-devel libmicrohttpd-devel BuildRequires: uthash-devel libbpf libbpf-devel log4cplus-devel Requires: bash glibc elfutils zlib elfutils-devel bpftool Requires: dmidecode python3-psycopg2 python3-yaml erlang-eflame -Requires: flamegraph-stackcollapse iproute +Requires: flamegraph-stackcollapse iproute libcurl + +Patch1: add-configuration-instructions.patch +Patch2: fix-stackprobe-memory-allocation-and-deallocation-er.patch +Patch3: bugfix-add-check-if-thread-is-0.patch +Patch4: some-bugfix.patch +Patch5: add-support-to-pyroscope.patch +Patch6: repair-stackprobe-caused-cpu-rush.patch +Patch7: add-system_uuid-field-to-distinguish-client-when-pos.patch +Patch8: fix-bug.patch +Patch9: fix-ksliprobe-get-invalid-args-occasionally-at-start.patch %description gala-gopher is a low-overhead eBPF-based probes framework @@ -64,6 +74,16 @@ popd /usr/lib/systemd/system/gala-gopher.service %changelog +* Sat Jan 14 2023 Zhen Chen - 1.0.1-2 +- fix ksliprobe get invalid args occasionally at startup +- fix error print when starting gala-gopher +- add system_uuid field to distinguish client when post to pyroscope server +- repair stackprobe caused cpu rush +- add support to pyroscope +- bugfix: add check if thread is 0 +- fix stackprobe memory allocation and deallocation errors. +- normalize time format in flamegraph svg filename + * Mon Dec 12 2022 Zhen Chen - 1.0.1-1 - Update to 1.0.1 diff --git a/repair-stackprobe-caused-cpu-rush.patch b/repair-stackprobe-caused-cpu-rush.patch new file mode 100644 index 0000000..c177083 --- /dev/null +++ b/repair-stackprobe-caused-cpu-rush.patch @@ -0,0 +1,204 @@ +From 8051b4711f4c93e6f6858847555ef0c4c4dd1db5 Mon Sep 17 00:00:00 2001 +From: wo_cow +Date: Fri, 6 Jan 2023 17:18:07 +0800 +Subject: [PATCH] repair stackprobe caused cpu rush + +--- + .../ebpf.probe/src/stackprobe/flame_graph.c | 32 +++++++++++-------- + .../extends/ebpf.probe/src/stackprobe/stack.h | 4 +-- + .../ebpf.probe/src/stackprobe/stackprobe.c | 16 +++++----- + .../ebpf.probe/src/stackprobe/stackprobe.h | 2 +- + 4 files changed, 29 insertions(+), 25 deletions(-) + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +index 126b98d..40c6dcb 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/flame_graph.c +@@ -38,6 +38,8 @@ + #include "bpf.h" + #include "flame_graph.h" + ++#define POST_MAX_LEN 131072 ++ + struct post_info_s { + int post_flag; + int sk; +@@ -193,7 +195,6 @@ static void __reopen_flame_graph_file(struct stack_svg_mng_s *svg_mng) + #define HISTO_TMP_LEN (2 * STACK_SYMBS_LEN) + static char __histo_tmp_str[HISTO_TMP_LEN]; + +-#define POST_MAX_LEN 2048 + static int __do_wr_stack_histo(struct stack_svg_mng_s *svg_mng, + struct stack_trace_histo_s *stack_trace_histo, int first, struct post_info_s *post_info) + { +@@ -246,7 +247,7 @@ static int __build_url(char *url, struct post_server_s *post_server, int en_type + time_t now, before; + (void)time(&now); + if (post_server->last_post_ts == 0) { +- before = now - 60; // 60s ++ before = now - 30; // 60s + } else { + before = post_server->last_post_ts + 1; + } +@@ -267,12 +268,13 @@ static void __curl_post(struct post_server_s *post_server, struct post_info_s *p + CURLcode res; + CURL *curl = post_info->curl; + if (curl == NULL) { +- return; ++ goto end2; + } ++ + long post_len = (long)strlen(post_info->buf_start); + if (post_len == 0) { + DEBUG("[FLAMEGRAPH]: buf is null. No need to curl post post to %s\n", appname[en_type]); +- return; ++ goto end1; + } + + char url[LINE_BUF_LEN] = {0}; +@@ -306,26 +308,28 @@ static void __curl_post(struct post_server_s *post_server, struct post_info_s *p + res = curl_easy_perform(curl); + /* Check for errors */ + if(res != CURLE_OK) { +- ERROR("[FLAMEGRAPH]: curl post failed: %s\n", curl_easy_strerror(res)); ++ ERROR("[FLAMEGRAPH]: curl post to %s failed: %s\n", url, curl_easy_strerror(res)); + } else { +- INFO("[FLAMEGRAPH]: curl post post to %s success\n", appname[en_type]); ++ INFO("[FLAMEGRAPH]: curl post post to %s success\n", url, post_info->remain_size); + } + +- /* always cleanup */ +- curl_easy_cleanup(curl); + if (chunk.memory) { + free(chunk.memory); +- chunk.memory = NULL; + } +- free(post_info->buf_start); +- post_info->buf_start = NULL; +- ++end1: ++ /* always cleanup */ ++ curl_easy_cleanup(curl); ++end2: ++ if (post_info->buf_start != NULL) { ++ free(post_info->buf_start); ++ post_info->buf_start = NULL; ++ } + return; + } + + static void __init_curl_handle(struct post_server_s *post_server, struct post_info_s *post_info) + { +- if (post_server == NULL || post_server->post_flag == 0) { ++ if (post_server == NULL || post_server->post_enable == 0) { + return; + } + +@@ -419,7 +423,7 @@ int set_post_server(struct post_server_s *post_server, const char *server_str) + } + + curl_global_init(CURL_GLOBAL_ALL); +- post_server->post_flag = 1; ++ post_server->post_enable = 1; + post_server->timeout = 3; + (void)strcpy(post_server->host, server_str); + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stack.h b/src/probes/extends/ebpf.probe/src/stackprobe/stack.h +index 5a932b1..976ff81 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stack.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stack.h +@@ -19,10 +19,10 @@ + + #include "common.h" + +-#define AGGRE_PERIOD (1 * 60 * 1000) // 1min ++#define AGGRE_PERIOD (1 * 30 * 1000) // 30s + #define SAMPLE_PERIOD (10) // 10ms + #define TMOUT_PERIOD (AGGRE_PERIOD / 1000) // Second as unit +-#define PROC_CACHE_MAX_COUNT 10 // Cache 10 proc symbols ++#define PROC_CACHE_MAX_COUNT 100 // Cache 100 proc symbols + #define DIV_ROUND_UP(NUM, DEN) ((NUM + DEN - 1) / DEN) + + #define PERCPU_SAMPLE_COUNT (2 * DIV_ROUND_UP(AGGRE_PERIOD, SAMPLE_PERIOD)) +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +index a4733e6..9fa5079 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.c +@@ -753,7 +753,7 @@ static void destroy_stack_trace(struct stack_trace_s **ptr_st) + return; + } + +- if (st->post_server.post_flag) { ++ if (st->post_server.post_enable) { + clean_post_server(); + } + +@@ -849,7 +849,7 @@ static struct stack_trace_s *create_stack_trace(StackprobeConfig *conf) + + if (set_post_server(&st->post_server, conf->generalConfig->pyroscopeServer) != 0) { + INFO("[STACKPROBE]: Do not post to Pyroscope Server.\n"); +- st->post_server.post_flag = 0; ++ st->post_server.post_enable = 0; + } else { + INFO("[STACKPROBE]: Will post to Pyroscope Server: %s.\n", conf->generalConfig->pyroscopeServer); + } +@@ -1085,9 +1085,9 @@ static int add_pids() + // find_bpf_link and add_bpf_link will set bpf_link status + if (!find_bpf_link(pid)) { + if (add_bpf_link(pid) != 0) { +- fprintf(stderr, "add pid %u failed\n", pid); ++ ERROR("[STACKPROBE]: add pid %u failed\n", pid); + } else { +- printf("add of pid %u success\n", pid); ++ INFO("[STACKPROBE]: add of pid %u success\n", pid); + } + } + } +@@ -1103,7 +1103,7 @@ static void clear_invalid_pids() + } + H_ITER(bpf_link_head, pid_bpf_links, tmp) { + if (pid_bpf_links->v.pid_state == PID_NOEXIST) { +- printf("clear bpf link of pid %u\n", pid_bpf_links->pid); ++ INFO("[STACKPROBE]: clear bpf link of pid %u\n", pid_bpf_links->pid); + H_DEL(bpf_link_head, pid_bpf_links); + (void)free(pid_bpf_links); + } +@@ -1344,11 +1344,11 @@ static void init_wr_flame_pthreads(struct svg_stack_trace_s *svg_st, const char + + err = pthread_create(&wr_flame_thd, NULL, __running, (void *)svg_st); + if (err != 0) { +- fprintf(stderr, "Failed to create %s wr_flame_pthread.\n", flame_name); ++ ERROR("[STACKPROBE]: Failed to create %s wr_flame_pthread.\n", flame_name); + g_stop = 1; + return; + } +- printf("%s wr_flame_pthread successfully started!\n", flame_name); ++ INFO("[STACKPROBE]: %s wr_flame_pthread successfully started!\n", flame_name); + + return; + } +@@ -1410,7 +1410,7 @@ int main(int argc, char **argv) + StackprobeConfig *conf; + + if (signal(SIGINT, sig_int) == SIG_ERR) { +- fprintf(stderr, "can't set signal handler: %d\n", errno); ++ ERROR("[STACKPROBE]: can't set signal handler: %d\n", errno); + return errno; + } + +diff --git a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +index 657b6e7..97e5ea5 100644 +--- a/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h ++++ b/src/probes/extends/ebpf.probe/src/stackprobe/stackprobe.h +@@ -103,7 +103,7 @@ struct svg_stack_trace_s { + }; + + struct post_server_s { +- char post_flag; ++ char post_enable; + long timeout; // sec + char host[PATH_LEN]; + time_t last_post_ts; +-- +2.33.0 + diff --git a/some-bugfix.patch b/some-bugfix.patch new file mode 100644 index 0000000..511ed60 --- /dev/null +++ b/some-bugfix.patch @@ -0,0 +1,58 @@ +From 0b1750325993ec3af54aee27a63cb0ecbf625c6f Mon Sep 17 00:00:00 2001 +From: dowzyx +Date: Wed, 28 Dec 2022 09:49:32 +0800 +Subject: [PATCH] some bugfix + +--- + src/probes/extends/ebpf.probe/build.sh | 2 +- + .../ebpf.probe/src/taskprobe/taskprobe.c | 4 +-- + .../cadvisor.probe/cadvisor_probe.py | 5 +++- + 5 files changed, 27 insertions(+), 7 deletions(-) + +diff --git a/src/probes/extends/ebpf.probe/build.sh b/src/probes/extends/ebpf.probe/build.sh +index 4094ed5..eb15fb4 100755 +--- a/src/probes/extends/ebpf.probe/build.sh ++++ b/src/probes/extends/ebpf.probe/build.sh +@@ -81,7 +81,7 @@ function compile_probe() + echo "please create vmlinux.h manually." + echo "methods:" + echo " 1. generate linux_xxx.h by compile the kernel, refer to gen_vmlinux_h.sh;" +- echo " 2. ln -s vmlinux.h linux_xxx.h, (there are some include files in directory src/include)" ++ echo " 2. ln -s linux_xxx.h vmlinux.h, (there are some include files in directory src/include)" + echo " if your kernel version is similar to the include files provided, you can use method 2" + echo "==========================================================================================" + exit +diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/taskprobe.c b/src/probes/extends/ebpf.probe/src/taskprobe/taskprobe.c +index dfa24d3..ec67d10 100644 +--- a/src/probes/extends/ebpf.probe/src/taskprobe/taskprobe.c ++++ b/src/probes/extends/ebpf.probe/src/taskprobe/taskprobe.c +@@ -105,8 +105,8 @@ static void get_wl_proc(struct task_probe_s* probep) + char comm[TASK_COMM_LEN]; + char cmdline[PROC_CMDLINE_LEN]; + char command[COMMAND_LEN]; +- const char *get_comm_fmt = "/usr/bin/cat /proc/%u/comm"; +- const char *get_cmdline_fmt = "/usr/bin/cat /proc/%u/cmdline"; ++ const char *get_comm_fmt = "/usr/bin/cat /proc/%u/comm 2>/dev/null"; ++ const char *get_cmdline_fmt = "/usr/bin/cat /proc/%u/cmdline 2>/dev/null"; + + dir = opendir("/proc"); + if (dir == NULL) { +diff --git a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.py b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.py +index 8b12aac..bbca1b7 100644 +--- a/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.py ++++ b/src/probes/extends/python.probe/cadvisor.probe/cadvisor_probe.py +@@ -360,7 +360,10 @@ def print_metrics(): + value = "" + else: + if field_type == COUNTER: +- value = str(record[field_name][1] - record[field_name][0]) ++ if record[field_name][1] > record[field_name][0]: ++ value = str(record[field_name][1] - record[field_name][0]) ++ else: ++ value = "0" + record[field_name][0] = record[field_name][1] + else: + value = record[field_name] +-- +2.33.0 + -- Gitee