From ec5a75d6a237c0d796185f8a1679a14ae0ad5725 Mon Sep 17 00:00:00 2001 From: Qizhi Zhang Date: Thu, 16 Oct 2025 19:10:15 +0800 Subject: [PATCH] Add SOC Ring sentry function and Add testcase tc_ring for SOC Ring sentry and Add MulanV2 License statement Add SOC Ring sentry function and Add testcase tc_ring for SOC Ring sentry and Add MulanV2 License statement. testcase tc_ring cleancode. Fix issue cores with isolcpus set blacklist. Fix issue inconsistent status and result after single inspection. Use panic instead of coredump file. Fix Security Scan Warning and Fix two code review comments. Signed-off-by: Qizhi Zhang --- Add-MulanV2-License-statement.patch | 110 +++ Add-SOC-Ring-sentry-function.patch | 478 +++++++++ ...testcase-tc_ring-for-SOC-Ring-sentry.patch | 809 ++++++++++++++++ Fix-Security-Scan-Warning.patch | 60 ++ ...sue-cores-with-isolcpus-set-blacklis.patch | 61 ++ ...sue-inconsistent-status-and-result-a.patch | 57 ++ Fix-two-code-review-comments.patch | 45 + Use-panic-instead-of-coredump-file.patch | 78 ++ sysSentry.spec | 43 +- testcase-tc_ring-cleancode.patch | 914 ++++++++++++++++++ 10 files changed, 2654 insertions(+), 1 deletion(-) create mode 100644 Add-MulanV2-License-statement.patch create mode 100644 Add-SOC-Ring-sentry-function.patch create mode 100644 Add-testcase-tc_ring-for-SOC-Ring-sentry.patch create mode 100644 Fix-Security-Scan-Warning.patch create mode 100644 Fix-issue-cores-with-isolcpus-set-blacklis.patch create mode 100644 Fix-issue-inconsistent-status-and-result-a.patch create mode 100644 Fix-two-code-review-comments.patch create mode 100644 Use-panic-instead-of-coredump-file.patch create mode 100644 testcase-tc_ring-cleancode.patch diff --git a/Add-MulanV2-License-statement.patch b/Add-MulanV2-License-statement.patch new file mode 100644 index 0000000..41bfc66 --- /dev/null +++ b/Add-MulanV2-License-statement.patch @@ -0,0 +1,110 @@ +From 7bfe3bfb4cd7d1db43b1baeefc0cfbf2f648a092 Mon Sep 17 00:00:00 2001 +From: Xiaofei Tan +Date: Tue, 23 Sep 2025 11:58:03 +0800 +Subject: [PATCH 9/9] sysSentry: Add MulanV2 License statement + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Add MulanV2 license statement to the header of each file. + +Signed-off-by: Xiaofei Tan +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 12 +++++++++++- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.h | 12 +++++++++++- + 4 files changed, 44 insertions(+), 4 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index df195ae..d5e615c 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: SOC Ring sentry main program + * Author: Yihang Li + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +index 0566496..34fb5ec 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: SOC Ring sentry main header + * Author: Yihang Li + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 3dda5e0..bae7ada 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: tc ring testcase program + * Author: lizixian + * Create: 2025-7-10 +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index 5e93a56..126f9cb 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -1,5 +1,15 @@ + /* +- * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Copyright (c) 2025 Huawei Technologies Co., Ltd. ++ * sysSentry is licensed under Mulan PSL v2. ++ * You can use this software according to the terms and conditions of the Mulan PSL v2. ++ * You may obtain a copy of Mulan PSL v2 at: ++ * http://license.coscl.org.cn/MulanPSL2 ++ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, ++ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, ++ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. ++ * ++ * See the Mulan PSL v2 for more details. ++ * + * Description: tc ring testcase header + * Author: lizixian + * Create: 2025-7-10 +-- +2.33.0 + diff --git a/Add-SOC-Ring-sentry-function.patch b/Add-SOC-Ring-sentry-function.patch new file mode 100644 index 0000000..f5fe6d7 --- /dev/null +++ b/Add-SOC-Ring-sentry-function.patch @@ -0,0 +1,478 @@ +From 1a01994447c2c031daca77b75325a0418104a91f Mon Sep 17 00:00:00 2001 +From: Qizhi Zhang +Date: Thu, 16 Oct 2025 18:47:37 +0800 +Subject: [PATCH] sysSentry: Add SOC Ring sentry function + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + Makefile | 17 +- + config/env/soc_ring_sentry.env | 6 + + config/tasks/soc_ring_sentry.mod | 7 + + src/sentryPlugins/soc_ring_sentry/Makefile | 9 + + src/sentryPlugins/soc_ring_sentry/README.md | 34 +++ + .../soc_ring_sentry/soc_ring_sentry.c | 261 ++++++++++++++++++ + .../soc_ring_sentry/soc_ring_sentry.h | 27 ++ + 7 files changed, 359 insertions(+), 2 deletions(-) + create mode 100644 config/env/soc_ring_sentry.env + create mode 100644 config/tasks/soc_ring_sentry.mod + create mode 100644 src/sentryPlugins/soc_ring_sentry/Makefile + create mode 100644 src/sentryPlugins/soc_ring_sentry/README.md + create mode 100644 src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c + create mode 100644 src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h + +diff --git a/Makefile b/Makefile +index 54e62f4..43991f9 100644 +--- a/Makefile ++++ b/Makefile +@@ -27,7 +27,7 @@ PYTHON_VERSION := $(shell $(PYBIN) --version 2>&1 | awk '{print $$2}' | cut -d ' + PKGVER := syssentry-$(VERSION)-py$(PYTHON_VERSION) + PKGVEREGG := syssentry-$(VERSION)-py$(PYTHON_VERSION).egg-info + +-all: lib ebpf hbm_online_repair sentry_msg_monitor bmc_block_io ++all: lib ebpf hbm_online_repair sentry_msg_monitor bmc_block_io soc_ring_sentry + + lib:libxalarm log + +@@ -53,6 +53,9 @@ sentry_msg_monitor: lib + bmc_block_io: lib + cd $(CURSRCDIR)/sentryPlugins/bmc_block_io/ && sh build.sh + ++soc_ring_sentry: lib ++ cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry/ && make ++ + install: all dirs isentry + + dirs: +@@ -129,6 +132,11 @@ isentry: + install -m 600 $(CURCONFIGDIR)/env/hbm_online_repair.env $(ETCDIR)/sysconfig/ + install -m 600 $(CURCONFIGDIR)/tasks/hbm_online_repair.mod $(ETCDIR)/sysSentry/tasks/ + ++ # soc_ring_sentry ++ install -m 750 $(CURSRCDIR)/sentryPlugins/soc_ring_sentry/soc_ring_sentry $(BINDIR) ++ install -m 600 $(CURCONFIGDIR)/env/soc_ring_sentry.env $(ETCDIR)/sysconfig/ ++ install -m 600 $(CURCONFIGDIR)/tasks/soc_ring_sentry.mod $(ETCDIR)/sysSentry/tasks/ ++ + # sentry_msg_monitor + install -m 550 $(CURSRCDIR)/sentryPlugins/sentry_msg_monitor/sentry_msg_monitor $(BINDIR) + install -m 600 $(CURCONFIGDIR)/env/sentry_msg_monitor.env $(ETCDIR)/sysconfig/ +@@ -172,7 +180,10 @@ smm_clean: + bmc_clean: + cd $(CURSRCDIR)/sentryPlugins/bmc_block_io && sh build.sh clean + +-clean: ebpf_clean hbm_clean smm_clean bmc_clean ++srs_clean: ++ cd $(CURSRCDIR)/sentryPlugins/soc_ring_sentry && make clean ++ ++clean: ebpf_clean hbm_clean smm_clean bmc_clean srs_clean + rm -rf $(CURLIBDIR)/build + rm -rf $(CURSRCDIR)/build + rm -rf $(CURSRCDIR)/libsentry/c/log/build +@@ -185,6 +196,7 @@ uninstall: + rm -rf $(BINDIR)/xalarmd + rm -rf $(BINDIR)/sentryCollector + rm -rf $(BINDIR)/hbm_online_repair ++ rm -rf $(BINDIR)/soc_ring_sentry + rm -rf $(BINDIR)/sentry_msg_monitor + rm -rf $(BINDIR)/bmc_block_io + rm -rf $(BINDIR)/ebpf_collector +@@ -194,6 +206,7 @@ uninstall: + rm -rf $(INCLUDEDIR)/libsentry + rm -rf $(ETCDIR)/sysSentry + rm -rf $(ETCDIR)/hbm_online_repair.env ++ rm -rf $(ETCDIR)/soc_ring_sentry.env + rm -rf $(ETCDIR)/sentry_msg_monitor.env + rm -rf $(LOGSAVEDIR)/sysSentry + rm -rf $(PYDIR)/syssentry +diff --git a/config/env/soc_ring_sentry.env b/config/env/soc_ring_sentry.env +new file mode 100644 +index 0000000..b1fdd68 +--- /dev/null ++++ b/config/env/soc_ring_sentry.env +@@ -0,0 +1,6 @@ ++LOG_LEVEL=info ++SOC_RING_SENTRY_INTENSITY_DELAY=600 ++SOC_RING_SENTRY_MEM_SIZE=4096 ++SOC_RING_SENTRY_LOOP_CNT=0 ++SOC_RING_SENTRY_FAULT_HANDLING=1 ++SOC_RING_SENTRY_BLACKLIST= +\ No newline at end of file +diff --git a/config/tasks/soc_ring_sentry.mod b/config/tasks/soc_ring_sentry.mod +new file mode 100644 +index 0000000..303567a +--- /dev/null ++++ b/config/tasks/soc_ring_sentry.mod +@@ -0,0 +1,7 @@ ++[common] ++enabled=yes ++task_start=/usr/bin/soc_ring_sentry ++task_stop=pkill -f soc_ring_sentry ++type=oneshot ++onstart=yes ++env_file=/etc/sysconfig/soc_ring_sentry.env +\ No newline at end of file +diff --git a/src/sentryPlugins/soc_ring_sentry/Makefile b/src/sentryPlugins/soc_ring_sentry/Makefile +new file mode 100644 +index 0000000..02032d9 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/Makefile +@@ -0,0 +1,9 @@ ++all: soc_ring_sentry ++ ++soc_ring_sentry: ++ gcc *.c -Wall -o3 -I../../libsentry/c/log -L../../libsentry/c/log/build -I../../libs/libxalarm -L../../libs/build/libxalarm -lxalarm -lsentry_log -ljson-c -pthread -lnuma -o soc_ring_sentry ++ ++clean: ++ rm -f soc_ring_sentry ++ ++.PHONY: all clean +diff --git a/src/sentryPlugins/soc_ring_sentry/README.md b/src/sentryPlugins/soc_ring_sentry/README.md +new file mode 100644 +index 0000000..e4b3c53 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/README.md +@@ -0,0 +1,34 @@ ++# soc_ring_sentry ++ ++#### 介绍 ++soc_ring_sentry是一款依赖sysSentry并用于SOC STL巡检的插件,该插件的使用方法是: ++ ++usage: soc_ring_sentry [OPTIONS] ++ ++Options: ++ -h, Show this help message and exit. ++ -g, Get the SOC Ring sentry case. ++ ++用户可以通过 `/etc/sysconfig/soc_ring_sentry.env` 修改环境变量以配置不同参数 ++该文件中各个环境变量含义为: ++`LOG_LEVEL` ++日志登记配置,默认配置为info级别。也可以配置为debug, warning, 或者error. ++ ++`SOC_RING_SENTRY_INTENSITY_DELAY` ++巡检间隔时长配置,单位ms,默认配置为600ms。用户可自定义其他所需间隔时长。 ++ ++`SOC_RING_SENTRY_MEM_SIZE` ++巡检空间大小配置,单位KB,默认配置为4096KB。也可配置为其他64KB的倍数。 ++ ++`SOC_RING_SENTRY_LOOP_CNT` ++巡检次数配置,默认配置为0,即持续巡检。若配置为其他值则为巡检次数。 ++ ++`SOC_RING_SENTRY_FAULT_HANDLING` ++后处理标识配置,默认配置为1,即主动触发panic。 ++设置为0则表示检测到错误不做任何处理。 ++设置为2则表示检测到错误主动关机。 ++设置为3则表示检测到错误主动重启。 ++ ++`SOC_RING_SENTRY_BLACKLIST` ++巡检黑名单配置,默认配置为空。用户可将不运行巡检的CPU核号写入该环境变量,例如`SOC_RING_SENTRY_BLACKLIST=0,2,4,6-10` ++ +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +new file mode 100644 +index 0000000..f407fb8 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -0,0 +1,261 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: SOC Ring sentry main program ++ * Author: Yihang Li ++ * Create: 2025-7-10 ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "soc_ring_sentry.h" ++ ++#define DEFAULT_INTENSITY_DELAY 600 ++#define DEFAULT_HANDLE 1 ++#define DEFAULT_LOOP_CNT 0 ++#define LIMITE_MEM_SIZE 64 ++#define KB 1024 ++#define DEFAULT_MEM_SIZE (4 * KB * KB) ++ ++uint64_t g_intensity_delay; ++uint64_t g_handle; ++uint64_t g_mem_size; ++uint64_t g_loop_cnt; ++bool *g_blacklist; ++ ++static void print_opts_help() ++{ ++ printf("usage: soc_ring_sentry [OPTIONS]\n" ++ "\n" ++ "Options:\n" ++ " -h, Show this help message and exit.\n" ++ " -g, Get the SOC Ring sentry case.\n"); ++} ++ ++static void soc_ring_sentry_case_get() ++{ ++ //todo: print the test case name ++} ++ ++static bool soc_ring_sentry_envtoull(char *env, uint64_t *value) ++{ ++ char *endptr; ++ errno = 0; ++ ++ if (env) { ++ if (*env == '-') { ++ logging_error("Negative input not allowed.\n"); ++ return false; ++ } ++ ++ *value = strtoull(env, &endptr, 10); ++ if (errno == 0 && endptr != env) { ++ while (isspace((unsigned char)*endptr)) { ++ endptr++; ++ } ++ ++ if (*endptr == '\0') { ++ return true; ++ } ++ } ++ } ++ ++ return false; ++} ++ ++static void soc_ring_sentry_log_level_init() ++{ ++ setLogLevel(); ++} ++ ++static void soc_ring_sentry_intensity_delay_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_INTENSITY_DELAY"); ++ g_intensity_delay = DEFAULT_INTENSITY_DELAY; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ g_intensity_delay = value; ++ logging_info("soc_ring_sentry intensity delay set %lums\n", g_intensity_delay); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_INTENSITY_DELAY invalid, using default value %lums\n", g_intensity_delay); ++} ++ ++static void soc_ring_sentry_handle_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_FAULT_HANDLING"); ++ g_handle = DEFAULT_HANDLE; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value) && value < HANDLE_LEVEL_INVALID) { ++ g_handle = value; ++ logging_info("soc_ring_sentry handle set %lu\n", g_handle); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_FAULT_HANDLING invalid, using default value %lu\n", g_handle); ++} ++ ++static void soc_ring_sentry_mem_size_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_MEM_SIZE"); ++ g_mem_size = DEFAULT_MEM_SIZE; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ if (value != 0 && value < (UINT64_MAX / KB) && (value % LIMITE_MEM_SIZE) == 0) { ++ g_mem_size = value * KB; ++ logging_info("soc_ring_sentry memory size set %luKB\n", value); ++ return; ++ } ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_MEM_SIZE invalid, using default value %luKB\n", g_mem_size / KB); ++} ++ ++static void soc_ring_sentry_loop_cnt_init() ++{ ++ char *env = getenv("SOC_RING_SENTRY_LOOP_CNT"); ++ g_loop_cnt = DEFAULT_LOOP_CNT; ++ uint64_t value; ++ ++ if (soc_ring_sentry_envtoull(env, &value)) { ++ g_loop_cnt = value; ++ logging_info("soc_ring_sentry loop cnt set %lu\n", g_loop_cnt); ++ return; ++ } ++ ++ logging_warn("Environment variable SOC_RING_SENTRY_LOOP_CNT invalid, using default value %lu\n", g_loop_cnt); ++} ++ ++static void soc_ring_sentry_blacklist_init(size_t core_num) ++{ ++ char *env = getenv("SOC_RING_SENTRY_BLACKLIST"); ++ char *log_buf, *log_end_ptr; ++ size_t log_buf_len, i; ++ int offset; ++ ++ g_blacklist = (bool *)calloc(core_num, sizeof(bool)); ++ if (!g_blacklist) { ++ logging_error("Failed to allocate memory for blacklist, none CPU set to blacklist\n"); ++ return; ++ } ++ ++ if (env && strlen(env) > 0) { ++ struct bitmask *cpuset = numa_parse_cpustring(env); ++ ++ if (!cpuset) { ++ logging_error("Failed to parse environment variable SOC_RING_SENTRY_BLACKLIST: %s\n", env); ++ return; ++ } ++ ++ for (i = 0; i < core_num; i++) { ++ if (numa_bitmask_isbitset(cpuset, i)) { ++ g_blacklist[i] = true; ++ } ++ } ++ ++ numa_bitmask_free(cpuset); ++ cpuset = NULL; ++ logging_info("soc_ring_sentry blacklist set successful\n"); ++ log_buf_len = strlen("blacklist cores: ") + core_num * 4 + 2; ++ log_buf = (char *)calloc(log_buf_len, sizeof(char)); ++ if (log_buf) { ++ offset = snprintf(log_buf, log_buf_len * sizeof(char), "blacklist cores: "); ++ log_end_ptr = log_buf + offset; ++ for (i = 0; i < core_num; i++) { ++ if (g_blacklist[i]) { ++ offset = snprintf(log_end_ptr, log_buf_len - (log_end_ptr - log_buf), "%ld ", i); ++ if (offset < 0 || offset >= (log_buf_len - (log_end_ptr - log_buf))) { ++ logging_error("Log buffer overflow during snprintf\n"); ++ break; ++ } ++ ++ log_end_ptr += offset; ++ } ++ } ++ ++ logging_info("%s\n", log_buf); ++ free(log_buf); ++ log_buf = NULL; ++ } ++ } ++} ++ ++static void soc_ring_sentry_init(size_t core_num) ++{ ++ soc_ring_sentry_log_level_init(); ++ soc_ring_sentry_intensity_delay_init(); ++ soc_ring_sentry_handle_init(); ++ soc_ring_sentry_mem_size_init(); ++ soc_ring_sentry_loop_cnt_init(); ++ soc_ring_sentry_blacklist_init(core_num); ++} ++ ++static int soc_ring_sentry_delivery(size_t core_num) ++{ ++ //todo: add delivery test ++ return 0; ++} ++ ++size_t get_system_core_num(void) ++{ ++ long core_num = sysconf(_SC_NPROCESSORS_CONF); ++ ++ return (core_num > 0) ? (size_t)core_num : 1; ++} ++ ++static void soc_ring_sentry_exec() ++{ ++ size_t core_num = get_system_core_num(); ++ int ret; ++ ++ soc_ring_sentry_init(core_num); ++ ret = soc_ring_sentry_delivery(core_num); ++ if (ret == 0) { ++ report_result(TOOL_NAME, RESULT_LEVEL_PASS, "{\"msg\":\"SOC STL test pass\", \"code\":1001}"); ++ } ++ ++ if (g_blacklist) { ++ free(g_blacklist); ++ g_blacklist = NULL; ++ } ++} ++ ++int main(int argc, char *argv[]) ++{ ++ int opt; ++ ++ if (argc > 2) { ++ print_opts_help(); ++ return -1; ++ } else if (argc == 2) { ++ while ((opt = getopt(argc, argv, "hg")) != -1) { ++ switch ((char)opt) { ++ case 'h': ++ print_opts_help(); ++ return 0; ++ case 'g': ++ soc_ring_sentry_case_get(); ++ return 0; ++ default: ++ print_opts_help(); ++ return -1; ++ } ++ } ++ } else { ++ soc_ring_sentry_exec(); ++ } ++ ++ return 0; ++} ++ +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +new file mode 100644 +index 0000000..bea991f +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -0,0 +1,27 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: SOC Ring sentry main header ++ * Author: Yihang Li ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef __SOC_RING_SENTRY_H ++#define __SOC_RING_SENTRY_H ++ ++#define TOOL_NAME "soc_ring_sentry" ++ ++enum handle_level { ++ HANDLE_NONE, ++ HANDLE_PANIC, ++ HANDLE_POWEROFF, ++ HANDLE_REBOOT, ++ HANDLE_LEVEL_INVALID ++}; ++ ++extern uint64_t g_intensity_delay; ++extern uint64_t g_handle; ++extern uint64_t g_mem_size; ++extern uint64_t g_loop_cnt; ++extern bool *g_blacklist; ++ ++#endif +-- +2.34.1 + diff --git a/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch b/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch new file mode 100644 index 0000000..81ac1ea --- /dev/null +++ b/Add-testcase-tc_ring-for-SOC-Ring-sentry.patch @@ -0,0 +1,809 @@ +From 6946a70721c26c74b973bf419b6b3c4e8747e419 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 17 Jul 2025 20:47:44 +0800 +Subject: [PATCH 2/9] sysSentry: Add testcase tc_ring for SOC Ring sentry + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/soc_ring_sentry.c | 12 +- + .../soc_ring_sentry/tc_ring_one.c | 689 ++++++++++++++++++ + .../soc_ring_sentry/tc_ring_one.h | 38 + + 3 files changed, 736 insertions(+), 3 deletions(-) + create mode 100755 src/sentryPlugins/soc_ring_sentry/tc_ring_one.c + create mode 100755 src/sentryPlugins/soc_ring_sentry/tc_ring_one.h + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index f407fb8..21c78b6 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -16,6 +17,7 @@ + #include "register_xalarm.h" + #include "log_utils.h" + #include "soc_ring_sentry.h" ++#include "tc_ring_one.h" + + #define DEFAULT_INTENSITY_DELAY 600 + #define DEFAULT_HANDLE 1 +@@ -41,7 +43,7 @@ static void print_opts_help() + + static void soc_ring_sentry_case_get() + { +- //todo: print the test case name ++ printf("1. [soc stl] ring data bit line scan tescase.\n"); + } + + static bool soc_ring_sentry_envtoull(char *env, uint64_t *value) +@@ -203,8 +205,12 @@ static void soc_ring_sentry_init(size_t core_num) + + static int soc_ring_sentry_delivery(size_t core_num) + { +- //todo: add delivery test +- return 0; ++ int ret; ++ ++ ret = tc_ring_one_create_threads(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); ++ tc_ring_one_post_process(ret); ++ ++ return ret; + } + + size_t get_system_core_num(void) +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +new file mode 100755 +index 0000000..9713495 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -0,0 +1,689 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: tc ring testcase program ++ * Author: lizixian ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef _GNU_SOURCE ++#define _GNU_SOURCE ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "register_xalarm.h" ++#include "log_utils.h" ++#include "soc_ring_sentry.h" ++#include "tc_ring_one.h" ++ ++typedef struct tc_ring_one_config { ++ void** test_space_base; // 存储每个numa节点测试空间的指针,需要根据当前系统numa节点数量动态申请存储空间 ++ size_t space_size; // 每个numa节点测试空间内存大小 ++ size_t block_size; // 每个测试块的大小,固定为64kByte ++ uint64_t loop_total; // 测试循环总数,0 -- 无限测试 ++ int64_t sleep_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 ++ int64_t rd_loop; // 每个扫描bit单次循环内的数据扫描次数,固定为0x80 ++ pthread_t* tc_core_threads; // 记录每个核的测试线程 ++ pthread_barrier_t tc_barrier; // 同步标志 ++ pthread_mutex_t* tc_node_mutex; // 每个numa节点数据更新的锁,单个numa节点测试空间的数据刷新,只能由一个本numa节点的测试核来刷新 ++ uint32_t* node_update_flag; // 测试空间数据刷新状态标志 ++ uint64_t err_handle; // 错误后处理 ++ bool* black_list; // 测试黑名单 ++ int tc_core_total; // 测试核总数 ++ int sys_core_total; // 系统核总数 ++ int scan_bit; // 扫描的bit ++ int err_cnt; // 错误计数 ++} tc_ring_one_config_t; ++ ++#define TC_RING_ONE_BLOCK_SIZE 0x10000 ++#define TC_RING_ONE_RD_LOOP 0x80 ++#define TC_RING_ONE_DATA_UNIT 128 ++#define TC_RING_ONE_CACHELINE_SIZE 64 ++ ++#define TC_RING_ONE_PRAMA_ERR -1 ++#define TC_RING_ONE_FAIL -2 ++#define TC_RING_ONE_SUCCESS 0 ++ ++#define TC_ERROR_HANDLE_NONE 0 ++#define TC_ERROR_HANDLE_PANIC 1 ++#define TC_ERROR_HANDLE_SHUTDOWN 2 ++#define TC_ERROR_HANDLE_REBOOT 3 ++ ++static tc_ring_one_config_t g_tc_config = { 0 }; ++static const uint32_t tc_ring_one_pattern[32] = { ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++ 0x00000000, 0x00000000, 0x00000000, 0x00000000, ++}; ++static const uint32_t g_tc_ring_one_special_bits[] = {394, 397, 405, 343, 394, 393, 392, 377}; ++ ++static void* tc_ring_one_thread_entry(void* arg); ++ ++static int get_system_online_core_total(void) ++{ ++ int core_num = 0; ++ ++ core_num = sysconf(_SC_NPROCESSORS_ONLN); ++ core_num = (core_num > 0) ? core_num : 1; ++ ++ return core_num; ++} ++ ++static int get_system_core_total(void) ++{ ++ int core_num = 0; ++ ++ core_num = sysconf(_SC_NPROCESSORS_CONF); ++ core_num = (core_num > 0) ? core_num : 1; ++ ++ return core_num; ++} ++ ++static int get_core_id_by_thread(pthread_t *thread) ++{ ++ int core_total = get_system_core_total(); ++ pthread_t my_thread_id = pthread_self(); ++ int core_id; ++ ++ for (core_id = 0; core_id < core_total; core_id++) { ++ if (my_thread_id == thread[core_id]) { ++ return core_id; ++ } ++ } ++ ++ return -1; ++} ++ ++static int is_cpu_online(int core_id) ++{ ++ char online_file[64]; ++ struct stat buffer; ++ int online; ++ FILE *fp; ++ ++ snprintf(online_file, sizeof(online_file), "/sys/devices/system/cpu/cpu%d/online", core_id); ++ if ((core_id == 0) && (lstat(online_file, &buffer) != 0)) { ++ return 1; ++ } ++ ++ fp = fopen(online_file, "r"); ++ if (!fp) { ++ logging_error("Failed to open %s\n", online_file); ++ return 0; ++ } ++ fscanf(fp, "%d", &online); ++ fclose(fp); ++ ++ return online; ++} ++ ++static uintptr_t vaddr_to_phys(uintptr_t vaddr) ++{ ++ int page_size = sysconf(_SC_PAGESIZE); ++ char page_map_name[64]; ++ int pid = getpid(); ++ uintptr_t offset; ++ uintptr_t pinfo; ++ int fd; ++ ++ offset = vaddr / page_size * (sizeof(pinfo)); ++ sprintf(page_map_name, "/proc/%d/pagemap", pid); ++ fd = open(page_map_name, O_RDONLY); ++ if (fd < 0) { ++ logging_error("Failed to open %s\n", page_map_name); ++ return 0; ++ } ++ if (pread(fd, &pinfo, sizeof(pinfo), offset) != sizeof(pinfo)) { ++ logging_error("Failed to read %s\n", page_map_name); ++ close(fd); ++ return 0; ++ } ++ ++ close(fd); ++ if((pinfo & (1ULL << 63)) == 0) { ++ logging_error("pfn is not present\n"); ++ return 0; ++ } else { ++ return (pinfo & ((1ULL << 55) - 1)) * page_size + (vaddr & (page_size - 1)); ++ } ++} ++ ++static int get_numa_node_of_core(int core_id) ++{ ++ int numa_node; ++ ++ numa_node = numa_node_of_cpu(core_id); ++ if (numa_node < 0) { ++ logging_error("[CORE%d] numa_node_of_cpu failed, errno:%d\n", core_id, errno); ++ return 0; ++ } ++ ++ return numa_node; ++} ++ ++/** ++ * 从指定 NUMA 节点优先分配内存,若失败则尝试其他节点 ++ * @param preferred_node 优先分配的 NUMA 节点号 ++ * @param size 需要分配的内存大小(字节) ++ * @return 成功返回内存指针,失败返回 NULL ++ */ ++static void *numa_alloc_fallback(int preferred_node, size_t size) ++{ ++ struct bitmask *allowed_nodes; ++ void *ptr = NULL; ++ int max_node; ++ int node; ++ ++ // 1. 获取所有可用的 NUMA 节点 ++ allowed_nodes = numa_get_mems_allowed(); ++ if (!allowed_nodes) { ++ logging_error("Failed to get allowed NUMA nodes"); ++ return NULL; ++ } ++ ++ // 2. 优先从指定节点分配内存 ++ if (numa_bitmask_isbitset(allowed_nodes, preferred_node)) { ++ ptr = numa_alloc_onnode(size, preferred_node); ++ if (ptr != NULL) { ++ logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p \n", size, preferred_node, ptr, __LINE__); ++ return ptr; ++ } ++ } ++ ++ // 3. 遍历所有节点(跳过优先节点) ++ max_node = numa_max_node(); ++ for (node = 0; node <= max_node; node++) { ++ if (node == preferred_node || !numa_bitmask_isbitset(allowed_nodes, node)) { ++ continue; // 跳过优先节点或不允许的节点 ++ } ++ ++ ptr = numa_alloc_onnode(size, node); ++ if (ptr != NULL) { ++ logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p \n", size, node, ptr, __LINE__); ++ return ptr; ++ } ++ } ++ ++ // 4. 所有节点均失败,尝试跨节点分配 ++ ptr = numa_alloc_interleaved(size); ++ if (ptr == NULL) { ++ logging_error("Failed to allocate %#x bytes on any NUMA node\n", size); ++ } ++ ++ return ptr; ++} ++ ++static void tc_ring_one_space_init(void *base, size_t size) ++{ ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ memcpy((char*)base + i, tc_ring_one_pattern, sizeof(tc_ring_one_pattern)); ++ } ++} ++ ++static int tc_ring_one_ctrl_var_init(tc_ring_one_config_t *config) ++{ ++ pthread_mutexattr_t attr; ++ int online_core_total; ++ int system_core_total; ++ int tc_core_total; ++ int numa_node_num; ++ int i; ++ ++ system_core_total = config->sys_core_total; ++ online_core_total = get_system_online_core_total(); ++ tc_core_total = online_core_total; ++ numa_node_num = numa_max_node() + 1; ++ ++ for (i = 0; i < system_core_total; i++) { ++ if (config->black_list[i] && is_cpu_online(i)) { ++ // 跳过黑名单中的 CPU 核心 ++ tc_core_total--; ++ } ++ } ++ ++ if (pthread_mutexattr_init(&attr) != 0) { ++ logging_error("Failed to initialize mutex attribute"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); ++ config->tc_node_mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t) * numa_node_num); ++ if (config->tc_node_mutex == NULL) { ++ logging_error("Failed to allocate memory for tc_mutex"); ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ for (i = 0; i < numa_node_num; i++) { ++ if (pthread_mutex_init(&(config->tc_node_mutex[i]), &attr) != 0) { ++ logging_error("Failed to initialize mutex %d", i); ++ free(config->tc_node_mutex); ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ } ++ pthread_mutexattr_destroy(&attr); ++ ++ config->tc_core_threads = (pthread_t *)malloc(sizeof(pthread_t) * system_core_total); ++ if (config->tc_core_threads == NULL) { ++ logging_error("Failed to allocate memory for tc_core_threads"); ++ for (i = 0; i < numa_node_num; i++) { ++ pthread_mutex_destroy(&config->tc_node_mutex[i]); ++ } ++ free(config->tc_node_mutex); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ config->sys_core_total = tc_core_total; ++ pthread_barrier_init(&config->tc_barrier, NULL, tc_core_total); ++ ++ return TC_RING_ONE_SUCCESS; ++} ++ ++static int tc_ring_one_init(tc_ring_one_config_t *config) ++{ ++ int numa_node_num; ++ int ret = 0; ++ void *ptr; ++ int i; ++ ++ if (numa_available() < 0) { ++ logging_error("NUMA is not available on this system"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ // 为每个 NUMA 节点分配测试内存空间 ++ numa_node_num = numa_max_node() + 1; ++ config->test_space_base = (void **)malloc(sizeof(void *) * numa_node_num); ++ if (config->test_space_base == NULL) { ++ logging_error("Failed to allocate memory for test_space_base"); ++ return TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ for (i = 0; i < numa_node_num; i++) { ++ ptr = numa_alloc_fallback(i, config->space_size); ++ if (ptr == NULL) { ++ ret = TC_RING_ONE_PRAMA_ERR; ++ goto numa_alloc_fail; ++ } ++ ++ tc_ring_one_space_init(ptr, config->space_size); ++ config->test_space_base[i] = ptr; ++ } ++ ++ config->node_update_flag = (uint32_t *)malloc(sizeof(uint32_t) * numa_node_num); ++ if (config->node_update_flag == NULL) { ++ logging_error("Failed to allocate memory for node_update_flag"); ++ ret = TC_RING_ONE_PRAMA_ERR; ++ goto numa_alloc_fail; ++ } else { ++ for (i = 0; i < numa_node_num; i++) { ++ config->node_update_flag[i] = 0; ++ } ++ } ++ ++ ret = tc_ring_one_ctrl_var_init(config); ++ if (ret != 0) { ++ logging_error("tc_ring_one_ctrl_var_init fail ret:%d", ret); ++ goto ctrl_var_init_fail; ++ } ++ ++ return ret; ++ ++ctrl_var_init_fail: ++ free(config->node_update_flag); ++ config->node_update_flag = NULL; ++ ++numa_alloc_fail: ++ for (i = 0; i < numa_node_num; i++) { ++ numa_free(config->test_space_base[i], config->space_size); ++ } ++ ++ free(config->test_space_base); ++ config->test_space_base = NULL; ++ return ret; ++} ++ ++static void tc_ring_one_release(tc_ring_one_config_t *config) ++{ ++ int numa_node_num; ++ int node; ++ ++ numa_node_num = numa_max_node() + 1; ++ for (int i = 0; i < numa_node_num; i++) { ++ numa_free(config->test_space_base[i], config->space_size); ++ } ++ free(config->test_space_base); ++ config->test_space_base = NULL; ++ free(config->node_update_flag); ++ config->node_update_flag = NULL; ++ ++ pthread_barrier_destroy(&(config->tc_barrier)); ++ for (node = 0; node < numa_node_num; node++) { ++ pthread_mutex_destroy(&config->tc_node_mutex[node]); ++ } ++ free(config->tc_node_mutex); ++ config->tc_node_mutex = NULL; ++} ++ ++static int is_core_run_tc(tc_ring_one_config_t *config, int core_id) ++{ ++ if (is_cpu_online(core_id) == 0) { ++ return 0; ++ } ++ ++ if ((config->black_list != NULL) && (config->black_list[core_id] == 1)) { ++ return 0; ++ } ++ ++ return 1; ++} ++ ++// 将线程绑定到指定 CORE ++static int bind_thread_to_core(pthread_t thread, int core_id) ++{ ++ cpu_set_t cpuset; ++ int ret = 0; ++ ++ CPU_ZERO(&cpuset); ++ CPU_SET(core_id, &cpuset); ++ ++ ret = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); ++ if (ret != 0) { ++ logging_error("pthread_setaffinity_np failed"); ++ ret = TC_RING_ONE_PRAMA_ERR; ++ } ++ ++ return ret; ++} ++ ++int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, ++ uint64_t err_handle, bool *blacklist, size_t core_num) ++{ ++ int ret = 0; ++ int i; ++ ++ g_tc_config.space_size = mem_size; ++ g_tc_config.loop_total = loop_cnt; ++ g_tc_config.sleep_ms = delay; ++ g_tc_config.err_handle = err_handle; ++ g_tc_config.black_list = blacklist; ++ g_tc_config.sys_core_total = core_num; ++ g_tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; ++ g_tc_config.rd_loop = TC_RING_ONE_RD_LOOP; ++ ++ ret = tc_ring_one_init(&g_tc_config); ++ if (ret != 0) { ++ logging_error("tc_ring_one_init fail ret:%d", ret); ++ return ret; ++ } ++ ++ for (i = 0; i < core_num; i++) { ++ if (is_core_run_tc(&g_tc_config, i) == 0) { ++ // 跳过黑名单 & offline 的core ++ continue; ++ } ++ ret = pthread_create(&g_tc_config.tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)(&g_tc_config)); ++ if (ret != 0) { ++ logging_error("Failed to create thread for core %d", i); ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ return ret; ++ } ++ ++ ret = bind_thread_to_core(g_tc_config.tc_core_threads[i], i); ++ if (ret != 0) { ++ logging_error("Failed to bind thread to core %d", i); ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ return ret; ++ } ++ } ++ ++ // 等待所有线程完成 ++ for (i = 0; i < core_num; i++) { ++ if (is_core_run_tc(&g_tc_config, i) == 0) { ++ // 跳过黑名单 & offline 的core ++ continue; ++ } ++ pthread_join(g_tc_config.tc_core_threads[i], NULL); ++ } ++ ++ tc_ring_one_release(&g_tc_config); ++ free(g_tc_config.tc_core_threads); ++ if (g_tc_config.err_cnt > 0) { ++ return TC_RING_ONE_FAIL; ++ } else { ++ return TC_RING_ONE_SUCCESS; ++ } ++} ++ ++static void tc_ring_one_init_data_pattern(uintptr_t base, size_t size, int scan_bit) ++{ ++ uint64_t dat_pattern = (1ULL << (scan_bit & 0x3F)); ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 bit) ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ *((uint64_t *)(base + i + word_offset + TC_RING_ONE_CACHELINE_SIZE)) = dat_pattern; ++ } ++} ++ ++static void tc_ring_one_data_clear(uintptr_t base, size_t size, int scan_bit) ++{ ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 bit) ++ int i; ++ ++ for (i = 0; i < size; i += TC_RING_ONE_DATA_UNIT) { ++ *((uint64_t *)(base + i + word_offset + TC_RING_ONE_CACHELINE_SIZE)) = 0; ++ } ++} ++ ++static void tc_ring_one_testspace_update(tc_ring_one_config_t *config, int scan_bit) ++{ ++ int core_id = get_core_id_by_thread(config->tc_core_threads); ++ int numa_node = get_numa_node_of_core(core_id); ++ ++ pthread_barrier_wait(&config->tc_barrier); ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 0) { ++ tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); ++ config->node_update_flag[numa_node] = 1; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ pthread_barrier_wait(&config->tc_barrier); ++} ++ ++static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan_bit) ++{ ++ uint32_t core_id = get_core_id_by_thread(config->tc_core_threads); ++ int numa_node = get_numa_node_of_core(core_id); ++ ++ pthread_barrier_wait(&config->tc_barrier); ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 1) { ++ tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); ++ config->node_update_flag[numa_node] = 0; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ pthread_barrier_wait(&config->tc_barrier); ++} ++ ++/** ++ * tc_ring_one_scan_test_block - 针对测试空间的指定块空间进行读扫描, 确保在块空间的的指定bit位置不存在非预期的由1跳变成0的情况 ++ * @base_addr: 测试空间中指定测试块首地址(必须为128B对齐地址) ++ * @scan_bit: 待测试bit位置(0-511) ++ * @block_size: 待测试块空间大小 ++ * @loop: 测试循环 ++ * @err_cnt: 记录错误次数的内存地址 ++ * ++ * 在调用本函数之前,要保证测试空间已经被待验证的数据Pattern初始化。 ++ * 待验证的数据Pattern: ++ * - base_addr + n * 64 各个数据bit为1 ++ * - base_addr + (n + 1) * 64 待验证的数据bit为1, 其余数据bit为0 ++ * ++ * 注意: 调用者要保证 base_addr + block_size 不能超过测试空间长度 ++ */ ++static void tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, int *err_cnt) ++{ ++ uint64_t tgt_dat_pattern = (1ULL << (scan_bit & 0x3F)); ++ size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 Byte) ++ uint64_t tgt_dat_all_one = ~0x0ULL; ++ char json_result[2048]; ++ char err_msg[1024]; ++ uint64_t rd_data[2]; ++ int i; ++ ++ for (i = 0; i < block_size; i += TC_RING_ONE_DATA_UNIT) { ++ rd_data[0] = *((uint64_t *)(base_addr + i + word_offset)); // base_addr + n * 64 + word_offset ++ rd_data[1] = *((uint64_t *)(base_addr + i + TC_RING_ONE_CACHELINE_SIZE + word_offset)); // base_addr + (n + 1) * 64 + word_offset ++ if((rd_data[0] != tgt_dat_all_one) || (rd_data[1] != tgt_dat_pattern)) { ++ __atomic_add_fetch(err_cnt, 1, __ATOMIC_SEQ_CST); // 错误次数加1 ++ snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx target_data = " ++ "%#llx bit_index = %d offset = %#lx block_size = %#lx\n", ++ sched_getcpu(), ++ (base_addr + i + word_offset), ++ vaddr_to_phys(base_addr + i + word_offset), ++ rd_data[1], ++ rd_data[0], ++ (1ULL << (scan_bit & 0x3F)), ++ scan_bit, ++ word_offset, ++ block_size); ++ logging_error("%s", err_msg); ++ snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":2001}", err_msg); ++ report_result(TOOL_NAME, RESULT_LEVEL_MAJOR_ALM, json_result); ++ break; ++ } ++ } ++} ++ ++static void tc_ring_one_scan_bit(tc_ring_one_config_t *config, uint64_t loop) ++{ ++ int core_id = sched_getcpu(); ++ int numa_node; ++ uint32_t i, j; ++ ++ numa_node = get_numa_node_of_core(core_id); ++ for (i = 0; i < config->rd_loop; i++) { ++ for (j = 0; j < config->space_size; j += config->block_size) { ++ // 按测试块大小,扫描测试空间 ++ tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, ++ config->scan_bit, ++ config->block_size, ++ &(config->err_cnt)); ++ ++ if (config->err_cnt > 0) { ++ logging_error("[ERROR][CORE%d] dbls_scan_bit error, scan_bit = %d, err_cnt = %d vir_base_addr = %p " ++ "phy_base_addr = %p block = %ld rd_loop = %d loop = %d\n", ++ sched_getcpu(), ++ config->scan_bit, ++ config->err_cnt, ++ config->test_space_base[numa_node], ++ vaddr_to_phys((uintptr_t)(config->test_space_base[numa_node])), ++ j / config->block_size, ++ i, loop); ++ return; ++ } ++ ++ if (config->sleep_ms > 0) { ++ // 每扫描完一个测试块就休眠一段时间 ++ usleep(config->sleep_ms * 1000); ++ } ++ } ++ } ++} ++ ++static void* tc_ring_one_thread_entry(void *arg) ++{ ++ tc_ring_one_config_t *config = arg; ++ int scan_sequence_id = 0; ++ int scan_special_id = 0; ++ uint64_t loop_cnt = 0; ++ uint64_t tc_flag = 1; ++ ++ // 等待所有线程准备就绪 ++ pthread_barrier_wait(&config->tc_barrier); ++ ++ while (tc_flag == 1) { ++ config->scan_bit = scan_sequence_id; ++ tc_ring_one_testspace_update(config, scan_sequence_id); ++ tc_ring_one_scan_bit(config, loop_cnt); ++ tc_ring_one_testspace_recover(config, scan_sequence_id); ++ if (config->err_cnt > 0) { ++ break; ++ } ++ scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); ++ ++ scan_special_id = g_tc_ring_one_special_bits[rand() % 8]; ++ config->scan_bit = scan_special_id; ++ pthread_barrier_wait(&config->tc_barrier); ++ tc_ring_one_testspace_update(config, config->scan_bit); ++ tc_ring_one_scan_bit(config, loop_cnt); ++ tc_ring_one_testspace_recover(config, config->scan_bit); ++ if (config->err_cnt > 0) { ++ break; ++ } ++ loop_cnt++; ++ if ((config->loop_total != 0) && (loop_cnt >= config->loop_total)) { ++ tc_flag = 0; ++ } ++ } ++ ++ return config; ++} ++ ++void tc_ring_one_post_process(int result) ++{ ++ if (result == TC_RING_ONE_SUCCESS) { ++ logging_info("tc_ring_one test pass\n"); ++ } else if (result == TC_RING_ONE_PRAMA_ERR) { ++ // 通过log打印 ++ logging_error("The system can not run the tc_ring_one:\n"); ++ logging_error("1. the system must support NUMA\n"); ++ logging_error("2. the memory in the system maybe too small\n"); ++ report_result(TOOL_NAME, RESULT_LEVEL_FAIL, "{\"msg\":\"The system can not run the tc_ring_one testcase\", \"code\":1001}"); ++ } else if (result == TC_RING_ONE_FAIL) { ++ switch (g_tc_config.err_handle) { // 根据错误处理策略进行相应的处理 ++ case TC_ERROR_HANDLE_NONE: ++ logging_error("the system administrator must handle this error!!!\n"); ++ break; ++ case TC_ERROR_HANDLE_SHUTDOWN: ++ logging_error("Execute 'shutdown'\n"); ++ if (reboot(RB_POWER_OFF) < 0) { ++ logging_error("ERROR: Failed to execute 'shutdown'\n"); ++ } ++ break; ++ case TC_ERROR_HANDLE_REBOOT: ++ logging_error("Execute 'reboot'\n"); ++ if (reboot(RB_AUTOBOOT) < 0) { ++ logging_error("ERROR: Failed to execute 'reboot'\n"); ++ } ++ break; ++ default: // panic ++ abort(); ++ } ++ } ++} ++ ++// end of tc_ring_one.c +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +new file mode 100755 +index 0000000..015a5b3 +--- /dev/null ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. ++ * Description: tc ring testcase header ++ * Author: lizixian ++ * Create: 2025-7-10 ++ */ ++ ++#ifndef __TC_RING_ONE_H__ ++#define __TC_RING_ONE_H__ ++#include "soc_ring_sentry.h" ++ ++ ++ ++/** ++ * tc_ring_one_create_threads - ++ * 测试用例总入口,该函数会为测试申请测试内存空间,为每个测试核创建测试线程,并将测试线程调度到测试核上 ++ * @mem_size: 用户指定的测试内存空间大小,系统每个numa节点均提供对用大小的测试空间,用于巡检测试 ++ * @loop_cnt: 测试循环次数, 大于0,则按照对应的循环做巡检测试,等于0,则巡检线程持续驻留在测试核中 ++ * @delay: 每个测试块扫描完成后的休眠时长,单位ms, ≤0 则不休眠。 ++ * @err_handle: 用于指示是否检测到错误后,是否需要做相关后处理操作 ++ * 0 - 不处理,错误处理交由上层软件进行 ++ * 1 - 主动触发panic(默认处理) ++ * 2 - 关机 ++ * 3 - 重启(不建议,该用例检测的失效错误为数据出错的致命错误,设备不应继续工作) ++ * 其他 - 非法输入。保持为默认处理。 ++ * @blacklist: 行巡检用例执行核的黑名单数组,用于指定某个核是否执行巡检测试线程,数值大小为测试核个数 ++ * NULL - 无黑名单,系统所有在线核均需要调度巡检线程 ++ * @core_num: 系统核总数(包含online & offline的core的总数),调用者需要保证该参数正确性 ++ * ++ * return: 0 - 在测试块中未检出错误 ++ * -1 - 系统状态不支持巡检用例执行(系统不支持numa or 系统内存空间不足) ++ * -2 - 检出到数据错误 ++ * ++ */ ++int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); ++void tc_ring_one_post_process(int result); ++ ++#endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file +-- +2.33.0 + diff --git a/Fix-Security-Scan-Warning.patch b/Fix-Security-Scan-Warning.patch new file mode 100644 index 0000000..6f5c404 --- /dev/null +++ b/Fix-Security-Scan-Warning.patch @@ -0,0 +1,60 @@ +From 32a6b1c277bc825be22083eb9286622f65dd562c Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Mon, 4 Aug 2025 11:34:47 +0800 +Subject: [PATCH 7/9] sysSentry: Fix Security Scan Warning + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Fix Security Scan Warning: +a. There should be one space between the comment symbol and the comment +content. +b. Use snprintf instead of sprintf. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 4 ++-- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.h | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 5101070..ea8fb34 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -48,7 +48,7 @@ struct tc_ring_one_config { + int sys_core_total; // 系统核总数 + int scan_bit; // 扫描的bit + int err_flag; // 错误计数 +- int numa_node; //系统numa总数 ++ int numa_node; // 系统numa总数 + }; + + #define TC_RING_ONE_BLOCK_SIZE 0x10000 +@@ -119,7 +119,7 @@ static uintptr_t vaddr_to_phys(uintptr_t vaddr) + int fd; + + offset = vaddr / page_size * (sizeof(pinfo)); +- sprintf(page_map_name, "/proc/%d/pagemap", pid); ++ snprintf(page_map_name, sizeof(page_map_name), "/proc/%d/pagemap", pid); + fd = open(page_map_name, O_RDONLY); + if (fd < 0) { + logging_error("Failed to open %s\n", page_map_name); +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index d5a25ee..5e93a56 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -34,4 +34,4 @@ + */ + int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); + +-#endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file ++#endif /*__TC_RING_ONE_H__*/ +-- +2.33.0 + diff --git a/Fix-issue-cores-with-isolcpus-set-blacklis.patch b/Fix-issue-cores-with-isolcpus-set-blacklis.patch new file mode 100644 index 0000000..72e3ba1 --- /dev/null +++ b/Fix-issue-cores-with-isolcpus-set-blacklis.patch @@ -0,0 +1,61 @@ +From a29c44ecdb6357a38f112e3d1cc07258da33acc8 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 24 Jul 2025 20:31:17 +0800 +Subject: [PATCH 4/9] sysSentry: Fix issue cores with isolcpus set blacklist + failed + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c | 2 +- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 7 +++++-- + 2 files changed, 6 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index 1baffe6..df195ae 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -153,7 +153,7 @@ static void soc_ring_sentry_blacklist_init(size_t core_num) + } + + if (env && strlen(env) > 0) { +- struct bitmask *cpuset = numa_parse_cpustring(env); ++ struct bitmask *cpuset = numa_parse_cpustring_all(env); + + if (!cpuset) { + logging_error("Failed to parse environment variable SOC_RING_SENTRY_BLACKLIST: %s\n", env); +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index f926d5f..9473d68 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -410,6 +410,7 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + pthread_join(config->tc_core_threads[i], NULL); + } + ++ i = i - 1; + if (config->err_flag > 0) { + ret = TC_RING_ONE_FAIL; + } else { +@@ -417,8 +418,10 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + } + + pthread_bind_fail: +- pthread_cancel(config->tc_core_threads[i]); +- pthread_join(config->tc_core_threads[i], NULL); ++ if (!is_core_invalid(config, i)) { ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); ++ } + + pthread_create_fail: + for (i = i - 1; i >= 0; i--) { +-- +2.33.0 + diff --git a/Fix-issue-inconsistent-status-and-result-a.patch b/Fix-issue-inconsistent-status-and-result-a.patch new file mode 100644 index 0000000..669ea66 --- /dev/null +++ b/Fix-issue-inconsistent-status-and-result-a.patch @@ -0,0 +1,57 @@ +From 053e1d40f510214343e812a1ae9fbc13b7c8858b Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 31 Jul 2025 14:43:55 +0800 +Subject: [PATCH 5/9] sysSentry: Fix issue inconsistent status and result after + single inspection + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +When configured for a single inspection, the inspection status is set to +FAILED after the inspection is completed, but the inspection result is +empty, which does not meet expectations. + +All inspection threads should directly release resources, return the +corresponding results, and exit after completion. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 9473d68..e8740c0 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -410,13 +410,14 @@ static int tc_ring_one_exec(struct tc_ring_one_config *config) + pthread_join(config->tc_core_threads[i], NULL); + } + +- i = i - 1; + if (config->err_flag > 0) { + ret = TC_RING_ONE_FAIL; + } else { + ret = TC_RING_ONE_SUCCESS; + } + ++ goto out; ++ + pthread_bind_fail: + if (!is_core_invalid(config, i)) { + pthread_cancel(config->tc_core_threads[i]); +@@ -433,6 +434,7 @@ pthread_create_fail: + pthread_join(config->tc_core_threads[i], NULL); + } + ++out: + tc_ring_one_release(config); + free(config->tc_core_threads); + config->tc_core_threads = NULL; +-- +2.33.0 + diff --git a/Fix-two-code-review-comments.patch b/Fix-two-code-review-comments.patch new file mode 100644 index 0000000..d28d577 --- /dev/null +++ b/Fix-two-code-review-comments.patch @@ -0,0 +1,45 @@ +From 0fe147f30031154d43f6a504874bdce1d77047e2 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 7 Aug 2025 16:06:10 +0800 +Subject: [PATCH 8/9] sysSentry: Fix two code review comments + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Fix two code review comments: +a. When a uint64_t type loop_cnt is passed as a parameter to an int type, +truncation occurs. To resolve this issue, the parameter type is changed +to uint64_t. + +b. The loop counter loop_cnt is of type uint64_t. When it increments to +its maximum value and then adds 1, it overflows to 0, causing an infinite +loop that cannot be exited. To address this problem, a loop condition is +added to ensure that the loop continues only if loop_cnt does not overflow +to 0 after incrementing. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + src/sentryPlugins/soc_ring_sentry/tc_ring_one.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index ea8fb34..3dda5e0 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -641,7 +641,7 @@ static bool tc_ring_one_scan_bit(struct tc_ring_one_config *config, int scan_bit + return true; + } + +-static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, int loop_cnt) ++static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, uint64_t loop_cnt) + { + bool ret; + +-- +2.33.0 + diff --git a/Use-panic-instead-of-coredump-file.patch b/Use-panic-instead-of-coredump-file.patch new file mode 100644 index 0000000..495dabe --- /dev/null +++ b/Use-panic-instead-of-coredump-file.patch @@ -0,0 +1,78 @@ +From 9b631cf00449a9e16373600b70031df534a8ff1d Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Thu, 31 Jul 2025 15:01:20 +0800 +Subject: [PATCH 6/9] sysSentry: Use panic instead of coredump file + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Prior to this, when the post-processing flag was configured to 1, +detecting an error would generate a coredump file, which was not as +expected. Instead, a panic should be used to replace the coredump file. + +Additionally, some formatting errors were cleaned up. + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/tc_ring_one.c | 23 ++++++++++++++++--- + 1 file changed, 20 insertions(+), 3 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index e8740c0..5101070 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -441,6 +441,22 @@ out: + return ret; + } + ++static void tc_ring_one_execute_panic(void) ++{ ++ FILE *file = fopen("/proc/sysrq-trigger", "w"); ++ ++ if (file == NULL) { ++ logging_error("Failed to open /proc/sysrq-trigger"); ++ return; ++ } ++ ++ if (fwrite("c", sizeof(char), 1, file) != 1) { ++ logging_error("Failed to write to /proc/sysrq-trigger"); ++ } ++ ++ fclose(file); ++} ++ + void tc_ring_one_post_process(uint64_t err_handle, int result) + { + if (result == TC_RING_ONE_SUCCESS) { +@@ -451,19 +467,20 @@ void tc_ring_one_post_process(uint64_t err_handle, int result) + logging_error("the system administrator must handle this error!!!\n"); + break; + case TC_ERROR_HANDLE_SHUTDOWN: +- logging_error("Execute 'shutdown'\n"); ++ logging_error("Execute 'shutdown'\n"); + if (reboot(RB_POWER_OFF) < 0) { + logging_error("ERROR: Failed to execute 'shutdown'\n"); + } + break; + case TC_ERROR_HANDLE_REBOOT: +- logging_error("Execute 'reboot'\n"); ++ logging_error("Execute 'reboot'\n"); + if (reboot(RB_AUTOBOOT) < 0) { + logging_error("ERROR: Failed to execute 'reboot'\n"); + } + break; + default: // panic +- abort(); ++ logging_error("Execute 'panic'\n"); ++ tc_ring_one_execute_panic(); + break; + } + } else { +-- +2.33.0 + diff --git a/sysSentry.spec b/sysSentry.spec index b0f09a0..1195072 100644 --- a/sysSentry.spec +++ b/sysSentry.spec @@ -4,7 +4,7 @@ Summary: System Inspection Framework Name: sysSentry Version: 1.0.3 -Release: 15 +Release: 16 License: Mulan PSL v2 Group: System Environment/Daemons Source0: https://gitee.com/openeuler/sysSentry/releases/download/v%{version}/%{name}-%{version}.tar.gz @@ -28,6 +28,15 @@ Patch16: Use-malloc-to-allocate-memory-as-much-as-possible.patch Patch17: fix-cpu_sentry-result-when-found_fault_cores_number-.patch Patch18: fix-some-code-bugs.patch Patch19: add-bmc_block_io.patch +Patch20: Add-SOC-Ring-sentry-function.patch +Patch21: Add-testcase-tc_ring-for-SOC-Ring-sentry.patch +Patch22: testcase-tc_ring-cleancode.patch +Patch23: Fix-issue-cores-with-isolcpus-set-blacklis.patch +Patch24: Fix-issue-inconsistent-status-and-result-a.patch +Patch25: Use-panic-instead-of-coredump-file.patch +Patch26: Fix-Security-Scan-Warning.patch +Patch27: Fix-two-code-review-comments.patch +Patch28: Add-MulanV2-License-statement.patch BuildRequires: cmake gcc-c++ BuildRequires: python3 python3-setuptools @@ -35,6 +44,7 @@ BuildRequires: json-c-devel BuildRequires: chrpath BuildRequires: elfutils-devel clang libbpf-devel bpftool BuildRequires: python3-numpy python3-pytest +BuildRequires: numactl-libs numactl-devel Requires: pyxalarm = %{version} Requires: libbpf @@ -130,6 +140,15 @@ Requires: sysSentry = %{version}-%{release} %description -n bmc_block_io This package provides bmc_block_io for the sysSentry. +%package -n soc_ring_sentry +Summary: soc_ring_sentry for the sysSentry +Provides: soc_ring_sentry = %{version} +BuildRequires: numactl-libs numactl-devel +Requires: sysSentry = %{version}-%{release} + +%description -n soc_ring_sentry +This package provides soc_ring_sentry for the sysSentry. + %prep %autosetup -n %{name}-%{version} -p1 @@ -215,6 +234,10 @@ rm -rf /var/run/sysSentry | : %exclude %{_sysconfdir}/sysconfig/sentry_msg_monitor.env %exclude %{_sysconfdir}/sysSentry/tasks/sentry_msg_monitor.mod +# soc_ring_sentry +%exclude %{_sysconfdir}/sysconfig/soc_ring_sentry.env +%exclude %{_sysconfdir}/sysSentry/tasks/sentry_msg_monitor.mod + %files -n libxalarm %attr(0555,root,root) %{_libdir}/libxalarm.so @@ -260,7 +283,25 @@ rm -rf /var/run/sysSentry | : %attr(0600,root,root) %{_sysconfdir}/sysSentry/plugins/bmc_block_io.ini %attr(0600,root,root) %{_sysconfdir}/sysSentry/tasks/bmc_block_io.mod +%files -n soc_ring_sentry +%attr(0750,root,root) %{_bindir}/soc_ring_sentry +%attr(0600,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/soc_ring_sentry.env + %changelog +* Thu Oct 16 2025 Qizhi Zhang - 1.0.3-16 +- Type:feature +- CVE:NA +- SUG:NA +- DESC:Add SOC Ring sentry function + Add testcase tc_ring for SOC Ring sentry + testcase tc_ring cleancode + Fix issue cores with isolcpus set blacklist failed + Fix issue inconsistent status and result after single inspection + Use panic instead of coredump file + Fix Security Scan Warning + Fix two code review comments + Add MulanV2 License statement + * Wed Sep 24 2025 hewanhan - 1.0.3-15 - Type:feature - CVE:NA diff --git a/testcase-tc_ring-cleancode.patch b/testcase-tc_ring-cleancode.patch new file mode 100644 index 0000000..6d067fd --- /dev/null +++ b/testcase-tc_ring-cleancode.patch @@ -0,0 +1,914 @@ +From cba3b2805f825187d4222d8564c0e888f06a1582 Mon Sep 17 00:00:00 2001 +From: Yihang Li +Date: Tue, 22 Jul 2025 09:33:39 +0800 +Subject: [PATCH 3/9] sysSentry: testcase tc_ring cleancode + +driver inclusion +category: feature +bugzilla: https://gitee.com/src-openeuler/sysSentry/issues/ID1UOY +CVE: NA + +---------------------------------------------------------------------- + +Signed-off-by: Yihang Li +Signed-off-by: Qizhi Zhang +--- + .../soc_ring_sentry/soc_ring_sentry.c | 18 +- + .../soc_ring_sentry/soc_ring_sentry.h | 2 + + .../soc_ring_sentry/tc_ring_one.c | 514 +++++++++--------- + .../soc_ring_sentry/tc_ring_one.h | 5 +- + 4 files changed, 266 insertions(+), 273 deletions(-) + +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +index 21c78b6..1baffe6 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.c +@@ -207,8 +207,7 @@ static int soc_ring_sentry_delivery(size_t core_num) + { + int ret; + +- ret = tc_ring_one_create_threads(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); +- tc_ring_one_post_process(ret); ++ ret = tc_ring_one_main(g_mem_size, g_loop_cnt, g_intensity_delay, g_handle, g_blacklist, core_num); + + return ret; + } +@@ -220,6 +219,19 @@ size_t get_system_core_num(void) + return (core_num > 0) ? (size_t)core_num : 1; + } + ++void soc_ring_sentry_report(enum RESULT_LEVEL result_level, const char *report_data) ++{ ++ char json_result[2048]; ++ ++ snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":1001}", report_data); ++ report_result(TOOL_NAME, result_level, json_result); ++ if (result_level == RESULT_LEVEL_PASS) { ++ logging_info("%s\n", report_data); ++ } else { ++ logging_error("%s\n", report_data); ++ } ++} ++ + static void soc_ring_sentry_exec() + { + size_t core_num = get_system_core_num(); +@@ -228,7 +240,7 @@ static void soc_ring_sentry_exec() + soc_ring_sentry_init(core_num); + ret = soc_ring_sentry_delivery(core_num); + if (ret == 0) { +- report_result(TOOL_NAME, RESULT_LEVEL_PASS, "{\"msg\":\"SOC STL test pass\", \"code\":1001}"); ++ soc_ring_sentry_report(RESULT_LEVEL_PASS, "SOC STL test pass"); + } + + if (g_blacklist) { +diff --git a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +index bea991f..0566496 100644 +--- a/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h ++++ b/src/sentryPlugins/soc_ring_sentry/soc_ring_sentry.h +@@ -24,4 +24,6 @@ extern uint64_t g_mem_size; + extern uint64_t g_loop_cnt; + extern bool *g_blacklist; + ++void soc_ring_sentry_report(enum RESULT_LEVEL result_level, const char *report_data); ++ + #endif +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +index 9713495..f926d5f 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.c +@@ -31,12 +31,12 @@ + #include "soc_ring_sentry.h" + #include "tc_ring_one.h" + +-typedef struct tc_ring_one_config { ++struct tc_ring_one_config { + void** test_space_base; // 存储每个numa节点测试空间的指针,需要根据当前系统numa节点数量动态申请存储空间 +- size_t space_size; // 每个numa节点测试空间内存大小 ++ size_t mem_size; // 每个numa节点测试空间内存大小 + size_t block_size; // 每个测试块的大小,固定为64kByte +- uint64_t loop_total; // 测试循环总数,0 -- 无限测试 +- int64_t sleep_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 ++ uint64_t loop_cnt; // 测试循环总数,0 -- 无限测试 ++ int64_t delay_ms; // 每次读完一个数据块后的休眠时间,≤ 0则不休眠,继续下一个数据块的扫描 + int64_t rd_loop; // 每个扫描bit单次循环内的数据扫描次数,固定为0x80 + pthread_t* tc_core_threads; // 记录每个核的测试线程 + pthread_barrier_t tc_barrier; // 同步标志 +@@ -47,8 +47,9 @@ typedef struct tc_ring_one_config { + int tc_core_total; // 测试核总数 + int sys_core_total; // 系统核总数 + int scan_bit; // 扫描的bit +- int err_cnt; // 错误计数 +-} tc_ring_one_config_t; ++ int err_flag; // 错误计数 ++ int numa_node; //系统numa总数 ++}; + + #define TC_RING_ONE_BLOCK_SIZE 0x10000 + #define TC_RING_ONE_RD_LOOP 0x80 +@@ -64,7 +65,6 @@ typedef struct tc_ring_one_config { + #define TC_ERROR_HANDLE_SHUTDOWN 2 + #define TC_ERROR_HANDLE_REBOOT 3 + +-static tc_ring_one_config_t g_tc_config = { 0 }; + static const uint32_t tc_ring_one_pattern[32] = { + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, + 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, +@@ -75,43 +75,14 @@ static const uint32_t tc_ring_one_pattern[32] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + }; ++ + static const uint32_t g_tc_ring_one_special_bits[] = {394, 397, 405, 343, 394, 393, 392, 377}; + + static void* tc_ring_one_thread_entry(void* arg); + +-static int get_system_online_core_total(void) ++static int get_numa_node(void) + { +- int core_num = 0; +- +- core_num = sysconf(_SC_NPROCESSORS_ONLN); +- core_num = (core_num > 0) ? core_num : 1; +- +- return core_num; +-} +- +-static int get_system_core_total(void) +-{ +- int core_num = 0; +- +- core_num = sysconf(_SC_NPROCESSORS_CONF); +- core_num = (core_num > 0) ? core_num : 1; +- +- return core_num; +-} +- +-static int get_core_id_by_thread(pthread_t *thread) +-{ +- int core_total = get_system_core_total(); +- pthread_t my_thread_id = pthread_self(); +- int core_id; +- +- for (core_id = 0; core_id < core_total; core_id++) { +- if (my_thread_id == thread[core_id]) { +- return core_id; +- } +- } +- +- return -1; ++ return numa_max_node() + 1; + } + + static int is_cpu_online(int core_id) +@@ -131,6 +102,7 @@ static int is_cpu_online(int core_id) + logging_error("Failed to open %s\n", online_file); + return 0; + } ++ + fscanf(fp, "%d", &online); + fclose(fp); + +@@ -170,12 +142,10 @@ static uintptr_t vaddr_to_phys(uintptr_t vaddr) + + static int get_numa_node_of_core(int core_id) + { +- int numa_node; ++ int numa_node = numa_node_of_cpu(core_id);; + +- numa_node = numa_node_of_cpu(core_id); + if (numa_node < 0) { + logging_error("[CORE%d] numa_node_of_cpu failed, errno:%d\n", core_id, errno); +- return 0; + } + + return numa_node; +@@ -183,21 +153,21 @@ static int get_numa_node_of_core(int core_id) + + /** + * 从指定 NUMA 节点优先分配内存,若失败则尝试其他节点 +- * @param preferred_node 优先分配的 NUMA 节点号 +- * @param size 需要分配的内存大小(字节) +- * @return 成功返回内存指针,失败返回 NULL ++ * @numa_node: 系统numa总数 ++ * @preferred_node: 优先分配的 NUMA 节点号 ++ * @size: 需要分配的内存大小(字节) ++ * @return: 成功返回内存指针,失败返回 NULL + */ +-static void *numa_alloc_fallback(int preferred_node, size_t size) ++static void *numa_alloc_fallback(int numa_node, int preferred_node, size_t size) + { + struct bitmask *allowed_nodes; + void *ptr = NULL; +- int max_node; + int node; + + // 1. 获取所有可用的 NUMA 节点 + allowed_nodes = numa_get_mems_allowed(); + if (!allowed_nodes) { +- logging_error("Failed to get allowed NUMA nodes"); ++ logging_error("Failed to get allowed NUMA nodes\n"); + return NULL; + } + +@@ -205,21 +175,20 @@ static void *numa_alloc_fallback(int preferred_node, size_t size) + if (numa_bitmask_isbitset(allowed_nodes, preferred_node)) { + ptr = numa_alloc_onnode(size, preferred_node); + if (ptr != NULL) { +- logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p \n", size, preferred_node, ptr, __LINE__); ++ logging_debug("Allocated %#x bytes on NUMA assigned node %d, addr: %p\n", size, preferred_node, ptr); + return ptr; + } + } + + // 3. 遍历所有节点(跳过优先节点) +- max_node = numa_max_node(); +- for (node = 0; node <= max_node; node++) { ++ for (node = 0; node < numa_node; node++) { + if (node == preferred_node || !numa_bitmask_isbitset(allowed_nodes, node)) { + continue; // 跳过优先节点或不允许的节点 + } + + ptr = numa_alloc_onnode(size, node); + if (ptr != NULL) { +- logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p \n", size, node, ptr, __LINE__); ++ logging_debug("Allocated %#x bytes on NUMA other node %d, addr: %p\n", size, node, ptr); + return ptr; + } + } +@@ -242,109 +211,107 @@ static void tc_ring_one_space_init(void *base, size_t size) + } + } + +-static int tc_ring_one_ctrl_var_init(tc_ring_one_config_t *config) ++static bool is_core_invalid(struct tc_ring_one_config *config, int core_id) ++{ ++ return config->black_list[core_id] || !is_cpu_online(core_id); ++} ++ ++static int tc_ring_one_ctrl_var_init(struct tc_ring_one_config *config) + { + pthread_mutexattr_t attr; +- int online_core_total; +- int system_core_total; +- int tc_core_total; +- int numa_node_num; ++ int valid_core_num = 0; + int i; + +- system_core_total = config->sys_core_total; +- online_core_total = get_system_online_core_total(); +- tc_core_total = online_core_total; +- numa_node_num = numa_max_node() + 1; +- +- for (i = 0; i < system_core_total; i++) { +- if (config->black_list[i] && is_cpu_online(i)) { +- // 跳过黑名单中的 CPU 核心 +- tc_core_total--; ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (!is_core_invalid(config, i)) { ++ valid_core_num++; + } + } + + if (pthread_mutexattr_init(&attr) != 0) { +- logging_error("Failed to initialize mutex attribute"); ++ logging_error("Failed to initialize mutex attribute\n"); + return TC_RING_ONE_PRAMA_ERR; + } ++ + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK); +- config->tc_node_mutex = (pthread_mutex_t *)malloc(sizeof(pthread_mutex_t) * numa_node_num); ++ config->tc_node_mutex = (pthread_mutex_t *)calloc(config->numa_node, sizeof(pthread_mutex_t)); + if (config->tc_node_mutex == NULL) { +- logging_error("Failed to allocate memory for tc_mutex"); +- pthread_mutexattr_destroy(&attr); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to allocate memory for tc_mutex\n"); ++ goto mutex_alloc_fail; + } +- for (i = 0; i < numa_node_num; i++) { ++ ++ for (i = 0; i < config->numa_node; i++) { + if (pthread_mutex_init(&(config->tc_node_mutex[i]), &attr) != 0) { +- logging_error("Failed to initialize mutex %d", i); +- free(config->tc_node_mutex); +- pthread_mutexattr_destroy(&attr); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to initialize mutex %d\n", i); ++ goto mutex_init_fail; + } + } +- pthread_mutexattr_destroy(&attr); + +- config->tc_core_threads = (pthread_t *)malloc(sizeof(pthread_t) * system_core_total); ++ config->tc_core_threads = (pthread_t *)calloc(config->sys_core_total, sizeof(pthread_t)); + if (config->tc_core_threads == NULL) { +- logging_error("Failed to allocate memory for tc_core_threads"); +- for (i = 0; i < numa_node_num; i++) { +- pthread_mutex_destroy(&config->tc_node_mutex[i]); +- } +- free(config->tc_node_mutex); +- return TC_RING_ONE_PRAMA_ERR; ++ logging_error("Failed to allocate memory for tc_core_threads\n"); ++ goto mutex_init_fail; + } + +- config->sys_core_total = tc_core_total; +- pthread_barrier_init(&config->tc_barrier, NULL, tc_core_total); +- ++ pthread_mutexattr_destroy(&attr); ++ pthread_barrier_init(&config->tc_barrier, NULL, valid_core_num); + return TC_RING_ONE_SUCCESS; ++ ++mutex_init_fail: ++ for (i = i - 1; i >= 0; i--) { ++ pthread_mutex_destroy(&config->tc_node_mutex[i]); ++ } ++ ++ free(config->tc_node_mutex); ++ config->tc_node_mutex = NULL; ++ ++mutex_alloc_fail: ++ pthread_mutexattr_destroy(&attr); ++ return TC_RING_ONE_PRAMA_ERR; + } + +-static int tc_ring_one_init(tc_ring_one_config_t *config) ++static int tc_ring_one_init(struct tc_ring_one_config *config) + { +- int numa_node_num; +- int ret = 0; ++ int ret, i; + void *ptr; +- int i; + + if (numa_available() < 0) { +- logging_error("NUMA is not available on this system"); ++ logging_error("NUMA is not available on this system\n"); + return TC_RING_ONE_PRAMA_ERR; + } + + // 为每个 NUMA 节点分配测试内存空间 +- numa_node_num = numa_max_node() + 1; +- config->test_space_base = (void **)malloc(sizeof(void *) * numa_node_num); ++ config->test_space_base = (void **)calloc(config->numa_node, sizeof(void *)); + if (config->test_space_base == NULL) { +- logging_error("Failed to allocate memory for test_space_base"); ++ logging_error("Failed to allocate memory for test_space_base\n"); + return TC_RING_ONE_PRAMA_ERR; + } + +- for (i = 0; i < numa_node_num; i++) { +- ptr = numa_alloc_fallback(i, config->space_size); ++ for (i = 0; i < config->numa_node; i++) { ++ ptr = numa_alloc_fallback(config->numa_node, i, config->mem_size); + if (ptr == NULL) { + ret = TC_RING_ONE_PRAMA_ERR; + goto numa_alloc_fail; + } + +- tc_ring_one_space_init(ptr, config->space_size); ++ tc_ring_one_space_init(ptr, config->mem_size); + config->test_space_base[i] = ptr; + } + +- config->node_update_flag = (uint32_t *)malloc(sizeof(uint32_t) * numa_node_num); ++ config->node_update_flag = (uint32_t *)calloc(config->numa_node, sizeof(uint32_t)); + if (config->node_update_flag == NULL) { +- logging_error("Failed to allocate memory for node_update_flag"); ++ logging_error("Failed to allocate memory for node_update_flag\n"); + ret = TC_RING_ONE_PRAMA_ERR; + goto numa_alloc_fail; +- } else { +- for (i = 0; i < numa_node_num; i++) { ++ } ++ ++ for (i = 0; i < config->numa_node; i++) { + config->node_update_flag[i] = 0; +- } + } + + ret = tc_ring_one_ctrl_var_init(config); + if (ret != 0) { +- logging_error("tc_ring_one_ctrl_var_init fail ret:%d", ret); ++ logging_error("tc_ring_one_ctrl_var_init fail ret:%d\n", ret); + goto ctrl_var_init_fail; + } + +@@ -355,8 +322,8 @@ ctrl_var_init_fail: + config->node_update_flag = NULL; + + numa_alloc_fail: +- for (i = 0; i < numa_node_num; i++) { +- numa_free(config->test_space_base[i], config->space_size); ++ for (i = i - 1; i >= 0; i--) { ++ numa_free(config->test_space_base[i], config->mem_size); + } + + free(config->test_space_base); +@@ -364,41 +331,27 @@ numa_alloc_fail: + return ret; + } + +-static void tc_ring_one_release(tc_ring_one_config_t *config) ++static void tc_ring_one_release(struct tc_ring_one_config *config) + { +- int numa_node_num; + int node; + +- numa_node_num = numa_max_node() + 1; +- for (int i = 0; i < numa_node_num; i++) { +- numa_free(config->test_space_base[i], config->space_size); ++ for (int i = 0; i < config->numa_node; i++) { ++ numa_free(config->test_space_base[i], config->mem_size); + } ++ + free(config->test_space_base); + config->test_space_base = NULL; + free(config->node_update_flag); + config->node_update_flag = NULL; +- + pthread_barrier_destroy(&(config->tc_barrier)); +- for (node = 0; node < numa_node_num; node++) { ++ for (node = 0; node < config->numa_node; node++) { + pthread_mutex_destroy(&config->tc_node_mutex[node]); + } ++ + free(config->tc_node_mutex); + config->tc_node_mutex = NULL; + } + +-static int is_core_run_tc(tc_ring_one_config_t *config, int core_id) +-{ +- if (is_cpu_online(core_id) == 0) { +- return 0; +- } +- +- if ((config->black_list != NULL) && (config->black_list[core_id] == 1)) { +- return 0; +- } +- +- return 1; +-} +- + // 将线程绑定到指定 CORE + static int bind_thread_to_core(pthread_t thread, int core_id) + { +@@ -410,72 +363,133 @@ static int bind_thread_to_core(pthread_t thread, int core_id) + + ret = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + if (ret != 0) { +- logging_error("pthread_setaffinity_np failed"); ++ logging_error("pthread_setaffinity_np failed\n"); + ret = TC_RING_ONE_PRAMA_ERR; + } + + return ret; + } + +-int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, +- uint64_t err_handle, bool *blacklist, size_t core_num) ++static int tc_ring_one_exec(struct tc_ring_one_config *config) + { +- int ret = 0; ++ int ret; + int i; + +- g_tc_config.space_size = mem_size; +- g_tc_config.loop_total = loop_cnt; +- g_tc_config.sleep_ms = delay; +- g_tc_config.err_handle = err_handle; +- g_tc_config.black_list = blacklist; +- g_tc_config.sys_core_total = core_num; +- g_tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; +- g_tc_config.rd_loop = TC_RING_ONE_RD_LOOP; +- +- ret = tc_ring_one_init(&g_tc_config); ++ ret = tc_ring_one_init(config); + if (ret != 0) { +- logging_error("tc_ring_one_init fail ret:%d", ret); ++ logging_error("tc_ring_one_init fail ret:%d\n", ret); + return ret; + } + +- for (i = 0; i < core_num; i++) { +- if (is_core_run_tc(&g_tc_config, i) == 0) { ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (is_core_invalid(config, i)) { + // 跳过黑名单 & offline 的core + continue; + } +- ret = pthread_create(&g_tc_config.tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)(&g_tc_config)); ++ ++ ret = pthread_create(&config->tc_core_threads[i], NULL, tc_ring_one_thread_entry, (void *)config); + if (ret != 0) { +- logging_error("Failed to create thread for core %d", i); +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- return ret; ++ logging_error("Failed to create thread for core %d\n", i); ++ goto pthread_create_fail; + } + +- ret = bind_thread_to_core(g_tc_config.tc_core_threads[i], i); ++ ret = bind_thread_to_core(config->tc_core_threads[i], i); + if (ret != 0) { +- logging_error("Failed to bind thread to core %d", i); +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- return ret; ++ logging_error("Failed to bind thread to core %d\n", i); ++ goto pthread_bind_fail; + } + } + + // 等待所有线程完成 +- for (i = 0; i < core_num; i++) { +- if (is_core_run_tc(&g_tc_config, i) == 0) { ++ for (i = 0; i < config->sys_core_total; i++) { ++ if (is_core_invalid(config, i)) { + // 跳过黑名单 & offline 的core + continue; + } +- pthread_join(g_tc_config.tc_core_threads[i], NULL); ++ ++ pthread_join(config->tc_core_threads[i], NULL); + } + +- tc_ring_one_release(&g_tc_config); +- free(g_tc_config.tc_core_threads); +- if (g_tc_config.err_cnt > 0) { +- return TC_RING_ONE_FAIL; ++ if (config->err_flag > 0) { ++ ret = TC_RING_ONE_FAIL; + } else { +- return TC_RING_ONE_SUCCESS; ++ ret = TC_RING_ONE_SUCCESS; ++ } ++ ++pthread_bind_fail: ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); ++ ++pthread_create_fail: ++ for (i = i - 1; i >= 0; i--) { ++ if (is_core_invalid(config, i)) { ++ continue; ++ } ++ ++ pthread_cancel(config->tc_core_threads[i]); ++ pthread_join(config->tc_core_threads[i], NULL); + } ++ ++ tc_ring_one_release(config); ++ free(config->tc_core_threads); ++ config->tc_core_threads = NULL; ++ return ret; ++} ++ ++void tc_ring_one_post_process(uint64_t err_handle, int result) ++{ ++ if (result == TC_RING_ONE_SUCCESS) { ++ logging_info("tc_ring_one test pass\n"); ++ } else if (result == TC_RING_ONE_FAIL) { ++ switch (err_handle) { // 根据错误处理策略进行相应的处理 ++ case TC_ERROR_HANDLE_NONE: ++ logging_error("the system administrator must handle this error!!!\n"); ++ break; ++ case TC_ERROR_HANDLE_SHUTDOWN: ++ logging_error("Execute 'shutdown'\n"); ++ if (reboot(RB_POWER_OFF) < 0) { ++ logging_error("ERROR: Failed to execute 'shutdown'\n"); ++ } ++ break; ++ case TC_ERROR_HANDLE_REBOOT: ++ logging_error("Execute 'reboot'\n"); ++ if (reboot(RB_AUTOBOOT) < 0) { ++ logging_error("ERROR: Failed to execute 'reboot'\n"); ++ } ++ break; ++ default: // panic ++ abort(); ++ break; ++ } ++ } else { ++ // 通过log打印 ++ logging_error("The system can not run the tc_ring_one:\n"); ++ logging_error("1. the system must support NUMA\n"); ++ logging_error("2. the memory in the system maybe too small\n"); ++ soc_ring_sentry_report(RESULT_LEVEL_SKIP, "The system can not run the tc_ring_one testcase"); ++ } ++} ++ ++int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, ++ uint64_t err_handle, bool *blacklist, size_t core_num) ++{ ++ struct tc_ring_one_config tc_config = { 0 }; ++ int ret; ++ ++ tc_config.mem_size = mem_size; ++ tc_config.loop_cnt = loop_cnt; ++ tc_config.delay_ms = delay; ++ tc_config.err_handle = err_handle; ++ tc_config.black_list = blacklist; ++ tc_config.sys_core_total = core_num; ++ tc_config.block_size = TC_RING_ONE_BLOCK_SIZE; ++ tc_config.rd_loop = TC_RING_ONE_RD_LOOP; ++ tc_config.numa_node = get_numa_node(); ++ ++ ret = tc_ring_one_exec(&tc_config); ++ tc_ring_one_post_process(err_handle, ret); ++ ++ return ret; + } + + static void tc_ring_one_init_data_pattern(uintptr_t base, size_t size, int scan_bit) +@@ -499,33 +513,40 @@ static void tc_ring_one_data_clear(uintptr_t base, size_t size, int scan_bit) + } + } + +-static void tc_ring_one_testspace_update(tc_ring_one_config_t *config, int scan_bit) ++static void tc_ring_one_testspace_update(struct tc_ring_one_config *config, int scan_bit) + { +- int core_id = get_core_id_by_thread(config->tc_core_threads); ++ int core_id = sched_getcpu(); + int numa_node = get_numa_node_of_core(core_id); + + pthread_barrier_wait(&config->tc_barrier); +- pthread_mutex_lock(&config->tc_node_mutex[numa_node]); +- if (config->node_update_flag[numa_node] == 0) { +- tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); +- config->node_update_flag[numa_node] = 1; ++ if (numa_node >= 0) { ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 0) { ++ tc_ring_one_init_data_pattern((uintptr_t)config->test_space_base[numa_node], config->mem_size, scan_bit); ++ config->node_update_flag[numa_node] = 1; ++ } ++ ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); + } +- pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ + pthread_barrier_wait(&config->tc_barrier); + } + +-static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan_bit) ++static void tc_ring_one_testspace_recover(struct tc_ring_one_config *config, int scan_bit) + { +- uint32_t core_id = get_core_id_by_thread(config->tc_core_threads); ++ uint32_t core_id = sched_getcpu(); + int numa_node = get_numa_node_of_core(core_id); + + pthread_barrier_wait(&config->tc_barrier); +- pthread_mutex_lock(&config->tc_node_mutex[numa_node]); +- if (config->node_update_flag[numa_node] == 1) { +- tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->space_size, scan_bit); +- config->node_update_flag[numa_node] = 0; ++ if (numa_node >= 0) { ++ pthread_mutex_lock(&config->tc_node_mutex[numa_node]); ++ if (config->node_update_flag[numa_node] == 1) { ++ tc_ring_one_data_clear((uintptr_t)config->test_space_base[numa_node], config->mem_size, scan_bit); ++ config->node_update_flag[numa_node] = 0; ++ } ++ pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); + } +- pthread_mutex_unlock(&config->tc_node_mutex[numa_node]); ++ + pthread_barrier_wait(&config->tc_barrier); + } + +@@ -534,8 +555,7 @@ static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan + * @base_addr: 测试空间中指定测试块首地址(必须为128B对齐地址) + * @scan_bit: 待测试bit位置(0-511) + * @block_size: 待测试块空间大小 +- * @loop: 测试循环 +- * @err_cnt: 记录错误次数的内存地址 ++ * @loop_cnt: 巡检次数 + * + * 在调用本函数之前,要保证测试空间已经被待验证的数据Pattern初始化。 + * 待验证的数据Pattern: +@@ -544,108 +564,101 @@ static void tc_ring_one_testspace_recover(tc_ring_one_config_t *config, int scan + * + * 注意: 调用者要保证 base_addr + block_size 不能超过测试空间长度 + */ +-static void tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, int *err_cnt) ++static bool tc_ring_one_scan_test_block(uintptr_t base_addr, int scan_bit, size_t block_size, uint64_t loop_cnt) + { + uint64_t tgt_dat_pattern = (1ULL << (scan_bit & 0x3F)); + size_t word_offset = (scan_bit >> 6) << 3; // 待测试bit在cacheline中的word的偏移位置(一个word为64 Byte) + uint64_t tgt_dat_all_one = ~0x0ULL; +- char json_result[2048]; + char err_msg[1024]; + uint64_t rd_data[2]; +- int i; ++ size_t i; + + for (i = 0; i < block_size; i += TC_RING_ONE_DATA_UNIT) { + rd_data[0] = *((uint64_t *)(base_addr + i + word_offset)); // base_addr + n * 64 + word_offset + rd_data[1] = *((uint64_t *)(base_addr + i + TC_RING_ONE_CACHELINE_SIZE + word_offset)); // base_addr + (n + 1) * 64 + word_offset + if((rd_data[0] != tgt_dat_all_one) || (rd_data[1] != tgt_dat_pattern)) { +- __atomic_add_fetch(err_cnt, 1, __ATOMIC_SEQ_CST); // 错误次数加1 +- snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx target_data = " +- "%#llx bit_index = %d offset = %#lx block_size = %#lx\n", +- sched_getcpu(), +- (base_addr + i + word_offset), +- vaddr_to_phys(base_addr + i + word_offset), +- rd_data[1], +- rd_data[0], +- (1ULL << (scan_bit & 0x3F)), +- scan_bit, +- word_offset, +- block_size); +- logging_error("%s", err_msg); +- snprintf(json_result, sizeof(json_result), "{\"msg\":\"%s\", \"code\":2001}", err_msg); +- report_result(TOOL_NAME, RESULT_LEVEL_MAJOR_ALM, json_result); +- break; ++ snprintf(err_msg, sizeof(err_msg), "[ERROR][CORE%d] test loop %lu vaddr = %#lx paddr = %#lx read_data = %#llx read_disturb = %#llx " ++ "target_data = %#llx bit_index = %d offset = %#lx block_size = %#lx", ++ sched_getcpu(), loop_cnt, (base_addr + i + word_offset), vaddr_to_phys(base_addr + i + word_offset), rd_data[1], rd_data[0], ++ tgt_dat_pattern, scan_bit, word_offset, block_size); ++ soc_ring_sentry_report(RESULT_LEVEL_MAJOR_ALM, err_msg); ++ return false; + } + } ++ ++ return true; + } + +-static void tc_ring_one_scan_bit(tc_ring_one_config_t *config, uint64_t loop) ++static bool tc_ring_one_scan_bit(struct tc_ring_one_config *config, int scan_bit, uint64_t loop_cnt) + { + int core_id = sched_getcpu(); +- int numa_node; ++ int numa_node = get_numa_node_of_core(core_id);; + uint32_t i, j; + +- numa_node = get_numa_node_of_core(core_id); ++ // numa为无效值时直接返回,不再执行扫描测试 ++ if (numa_node < 0) { ++ return true; ++ } ++ + for (i = 0; i < config->rd_loop; i++) { +- for (j = 0; j < config->space_size; j += config->block_size) { ++ for (j = 0; j < config->mem_size; j += config->block_size) { + // 按测试块大小,扫描测试空间 +- tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, +- config->scan_bit, +- config->block_size, +- &(config->err_cnt)); +- +- if (config->err_cnt > 0) { +- logging_error("[ERROR][CORE%d] dbls_scan_bit error, scan_bit = %d, err_cnt = %d vir_base_addr = %p " +- "phy_base_addr = %p block = %ld rd_loop = %d loop = %d\n", +- sched_getcpu(), +- config->scan_bit, +- config->err_cnt, +- config->test_space_base[numa_node], +- vaddr_to_phys((uintptr_t)(config->test_space_base[numa_node])), +- j / config->block_size, +- i, loop); +- return; ++ if (!tc_ring_one_scan_test_block((uintptr_t)((char *)(config->test_space_base[numa_node])) + j, ++ scan_bit, config->block_size, loop_cnt)) { ++ __atomic_add_fetch(&config->err_flag, 1, __ATOMIC_SEQ_CST); ++ return false; + } + +- if (config->sleep_ms > 0) { ++ if (config->delay_ms > 0) { + // 每扫描完一个测试块就休眠一段时间 +- usleep(config->sleep_ms * 1000); ++ usleep(config->delay_ms * 1000); + } + } + } ++ ++ return true; ++} ++ ++static bool tc_ring_one_test_issue(struct tc_ring_one_config *config, int scan_bit, int loop_cnt) ++{ ++ bool ret; ++ ++ tc_ring_one_testspace_update(config, scan_bit); ++ ret = tc_ring_one_scan_bit(config, scan_bit, loop_cnt); ++ tc_ring_one_testspace_recover(config, scan_bit); ++ ++ if (config->err_flag) { ++ return false; ++ } ++ ++ return ret; + } + + static void* tc_ring_one_thread_entry(void *arg) + { +- tc_ring_one_config_t *config = arg; ++ struct tc_ring_one_config *config = arg; + int scan_sequence_id = 0; + int scan_special_id = 0; + uint64_t loop_cnt = 0; + uint64_t tc_flag = 1; + +- // 等待所有线程准备就绪 +- pthread_barrier_wait(&config->tc_barrier); +- +- while (tc_flag == 1) { ++ while (tc_flag == 1 && config->err_flag == 0) { + config->scan_bit = scan_sequence_id; +- tc_ring_one_testspace_update(config, scan_sequence_id); +- tc_ring_one_scan_bit(config, loop_cnt); +- tc_ring_one_testspace_recover(config, scan_sequence_id); +- if (config->err_cnt > 0) { ++ pthread_barrier_wait(&config->tc_barrier); ++ if (!tc_ring_one_test_issue(config, config->scan_bit, loop_cnt)) { + break; + } +- scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); + +- scan_special_id = g_tc_ring_one_special_bits[rand() % 8]; ++ scan_sequence_id = (scan_sequence_id + 1) % (TC_RING_ONE_CACHELINE_SIZE * 8); ++ scan_special_id = g_tc_ring_one_special_bits[random() % 8]; + config->scan_bit = scan_special_id; + pthread_barrier_wait(&config->tc_barrier); +- tc_ring_one_testspace_update(config, config->scan_bit); +- tc_ring_one_scan_bit(config, loop_cnt); +- tc_ring_one_testspace_recover(config, config->scan_bit); +- if (config->err_cnt > 0) { ++ if (!tc_ring_one_test_issue(config, config->scan_bit, loop_cnt)) { + break; + } ++ + loop_cnt++; +- if ((config->loop_total != 0) && (loop_cnt >= config->loop_total)) { ++ if ((config->loop_cnt != 0) && (loop_cnt >= config->loop_cnt)) { + tc_flag = 0; + } + } +@@ -653,37 +666,4 @@ static void* tc_ring_one_thread_entry(void *arg) + return config; + } + +-void tc_ring_one_post_process(int result) +-{ +- if (result == TC_RING_ONE_SUCCESS) { +- logging_info("tc_ring_one test pass\n"); +- } else if (result == TC_RING_ONE_PRAMA_ERR) { +- // 通过log打印 +- logging_error("The system can not run the tc_ring_one:\n"); +- logging_error("1. the system must support NUMA\n"); +- logging_error("2. the memory in the system maybe too small\n"); +- report_result(TOOL_NAME, RESULT_LEVEL_FAIL, "{\"msg\":\"The system can not run the tc_ring_one testcase\", \"code\":1001}"); +- } else if (result == TC_RING_ONE_FAIL) { +- switch (g_tc_config.err_handle) { // 根据错误处理策略进行相应的处理 +- case TC_ERROR_HANDLE_NONE: +- logging_error("the system administrator must handle this error!!!\n"); +- break; +- case TC_ERROR_HANDLE_SHUTDOWN: +- logging_error("Execute 'shutdown'\n"); +- if (reboot(RB_POWER_OFF) < 0) { +- logging_error("ERROR: Failed to execute 'shutdown'\n"); +- } +- break; +- case TC_ERROR_HANDLE_REBOOT: +- logging_error("Execute 'reboot'\n"); +- if (reboot(RB_AUTOBOOT) < 0) { +- logging_error("ERROR: Failed to execute 'reboot'\n"); +- } +- break; +- default: // panic +- abort(); +- } +- } +-} +- + // end of tc_ring_one.c +diff --git a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +index 015a5b3..d5a25ee 100755 +--- a/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h ++++ b/src/sentryPlugins/soc_ring_sentry/tc_ring_one.h +@@ -12,7 +12,7 @@ + + + /** +- * tc_ring_one_create_threads - ++ * tc_ring_one_main - + * 测试用例总入口,该函数会为测试申请测试内存空间,为每个测试核创建测试线程,并将测试线程调度到测试核上 + * @mem_size: 用户指定的测试内存空间大小,系统每个numa节点均提供对用大小的测试空间,用于巡检测试 + * @loop_cnt: 测试循环次数, 大于0,则按照对应的循环做巡检测试,等于0,则巡检线程持续驻留在测试核中 +@@ -32,7 +32,6 @@ + * -2 - 检出到数据错误 + * + */ +-int tc_ring_one_create_threads(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); +-void tc_ring_one_post_process(int result); ++int tc_ring_one_main(uint64_t mem_size, uint64_t loop_cnt, uint64_t delay, uint64_t err_handle, bool *blacklist, size_t core_num); + + #endif/*__TC_RING_ONE_H__*/ +\ No newline at end of file +-- +2.33.0 + -- Gitee