diff --git a/common/inc/pwrerr.h b/common/inc/pwrerr.h index 19166a04c6701fbb86c9829af0b5c18fef706c2d..68dda32c6e8eb113b61282d90a8a918259fbbfc5 100644 --- a/common/inc/pwrerr.h +++ b/common/inc/pwrerr.h @@ -64,5 +64,6 @@ enum PWR_RtnCode { PWR_ERR_FILE_OPEN_FAILED, PWR_ERR_FILE_SPRINTF_FAILED, PWR_ERR_HBM_NOT_SUPPORTED = 600, + PWR_ERR_HBM_SET_POWER_STATE_FAILED = 601, }; #endif \ No newline at end of file diff --git a/pwrapis/src/hbmservice.c b/pwrapis/src/hbmservice.c index 2516f75903be3cca49297a987ef1de4e001af3b5..d712837957c260c77a33f4379fc79775596e1156 100644 --- a/pwrapis/src/hbmservice.c +++ b/pwrapis/src/hbmservice.c @@ -13,6 +13,7 @@ * Description: provide hbm service * **************************************************************************** */ +#include "config.h" #include "string.h" #include "pwrerr.h" #include "server.h" @@ -30,32 +31,125 @@ pclose(fp); \ } while (0) -static int GetHbmMode(PWR_HBM_SYS_STATE *state) +static int IsNodeEmptyCpuList(const char *node_path) { - char cache_mod_cmd[] = "find /sys/devices/LNXSYSTM* -name 'HISI04A1*'"; - char flat_mod_cmd[] = "find /sys/devices/LNXSYSTM* -name 'PNP0C80*'"; - *state = PWR_HBM_NOT_SUPPORT; + char cpulist_file[MAX_FULL_NAME]; + FILE *cpulist_fp; + char cpulist_buf[256]; + + snprintf(cpulist_file, sizeof(cpulist_file), "%s/cpulist", node_path); + cpulist_fp = fopen(cpulist_file, "r"); + if (cpulist_fp == NULL) { + return 0; + } + + if (fgets(cpulist_buf, sizeof(cpulist_buf), cpulist_fp) != NULL && + (strlen(cpulist_buf) == 0 || strcmp(cpulist_buf, "\n") == 0)) { + fclose(cpulist_fp); + return 1; + } - FILE *cache_mod_fp = popen(cache_mod_cmd, "r"); - if (cache_mod_fp == NULL) { + fclose(cpulist_fp); + return 0; +} + +static int OfflineMemoryState(const char *node_path) +{ + char memory_dir_path[MAX_FULL_NAME]; + char memory_state_file[MAX_FULL_NAME]; + DIR *memory_dir; + struct dirent *memory_entry; + + snprintf(memory_dir_path, sizeof(memory_dir_path), "%s", node_path); + memory_dir = opendir(memory_dir_path); + if (memory_dir == NULL) { + Logger(ERROR, MD_NM_SVR_HBM, "Failed to open memory directory"); return PWR_ERR_COMMON; } - char cache_buf[PWR_MAX_STRING_LEN] = {0}; - if (fgets(cache_buf, PWR_MAX_STRING_LEN, cache_mod_fp) != NULL) { - *state |= PWR_HBM_FLAT_MOD; + + while ((memory_entry = readdir(memory_dir)) != NULL) { + if (strncmp(memory_entry->d_name, "memory", 6) != 0) { + continue; + } + + int ret = snprintf(memory_state_file, sizeof(memory_state_file), "%s/%s/state", memory_dir_path, + memory_entry->d_name); + if (ret >= (int)sizeof(memory_state_file)) { + Logger(ERROR, MD_NM_SVR_HBM, "Buffer overflow detected in memory_state_file"); + continue; + } + if (WriteFile(memory_state_file, "offline", strlen("offline")) != PWR_SUCCESS) { + Logger(ERROR, MD_NM_SVR_HBM, "Failed to write to memory state file"); + return PWR_ERR_COMMON; + } } - FILE *flat_mod_fp = popen(flat_mod_cmd, "r"); - if (flat_mod_fp == NULL) { - pclose(cache_mod_fp); + closedir(memory_dir); + return PWR_SUCCESS; +} + +static int OfflineAllHBMNode() +{ + DIR *dir; + struct dirent *entry; + char node_path[MAX_FULL_NAME]; + + dir = opendir("/sys/devices/system/node"); + if (dir == NULL) { + Logger(ERROR, MD_NM_SVR_HBM, "Failed to open /sys/devices/system/node directory"); return PWR_ERR_COMMON; } - char flat_buf[PWR_MAX_STRING_LEN] = {0}; - if (fgets(flat_buf, PWR_MAX_STRING_LEN, flat_mod_fp) != NULL) { - *state |= PWR_HBM_CACHE_MOD; + + while ((entry = readdir(dir)) != NULL) { + if (strncmp(entry->d_name, "node", 4) == 0) { + // node_id = atoi(entry->d_name + 4); // 获取 node 的编号 + snprintf(node_path, sizeof(node_path), "/sys/devices/system/node/%s", entry->d_name); + + // 如果 cpulist 为空,进行下线操作 + if (IsNodeEmptyCpuList(node_path)) { + if (OfflineMemoryState(node_path) != PWR_SUCCESS) { + continue; // 如果下线操作失败,跳过当前节点 + } + // printf("Node %d offline\n", node_id); + } + } } - pclose(cache_mod_fp); - pclose(flat_mod_fp); + + closedir(dir); + return PWR_SUCCESS; +} + +static int GetHbmMode(PWR_HBM_SYS_STATE *state) +{ + *state = PWR_HBM_NOT_SUPPORT; + + char hbm_mode_file[] = "/sys/firmware/efi/efivars/HBMMode-21f3b3c5-946d-41c1-838c-194e48aa41e2"; + if (access(hbm_mode_file, F_OK) != 0) { + return PWR_ERR_HBM_NOT_SUPPORTED; + } + + char hbm_cmd[] = + "hexdump /sys/firmware/efi/efivars/HBMMode-21f3b3c5-946d-41c1-838c-194e48aa41e2 | grep '0000000 0007 0000 " + "0001' | wc -l"; + FILE *fp = popen(hbm_cmd, "r"); + if (fp == NULL) { + return PWR_ERR_COMMON; + } + + char result_buf[PWR_MAX_STRING_LEN] = {0}; + if (fgets(result_buf, sizeof(result_buf), fp) != NULL) { + int count = atoi(result_buf); + if (count == 0) { + *state = PWR_HBM_FLAT_MOD; + } else if (count == 1) { + *state = PWR_HBM_CACHE_MOD; + } else { + *state = PWR_HBM_NOT_SUPPORT; + } + } + + pclose(fp); + return PWR_SUCCESS; } @@ -76,38 +170,115 @@ void GetHbmSysState(PwrMsg *req) } } -static int SetPowerState(int powerState) +static int HandleCacheMode(const int powerState) { - PWR_HBM_SYS_STATE hbmState = PWR_HBM_NOT_SUPPORT; - if (GetHbmMode(&hbmState) != PWR_SUCCESS) { - Logger(ERROR, MD_NM_SVR_HBM, "GetHbmMode failed"); + char cmd[PWR_MAX_STRING_LEN] = {0}; + const char *stateStr = (powerState == 0) ? "offline" : "online"; + + snprintf(cmd, sizeof(cmd), + "find /sys/kernel/hbm_cache/*/state -type f | xargs -I {} sh -c " + "'echo \"%s\" > {}'", + stateStr); + EXEC_COMMAND(cmd); + + // 检查是否是否真正设置成功 + const char *check_cmd; + if (powerState == 0) { + check_cmd = + "find /sys/kernel/hbm_cache/*/firmware_node/status -type f | xargs -I {} cat {} | grep -q -v '0' && echo " + "\"Failure\" || echo \"Success\""; + } else { + check_cmd = + "find /sys/kernel/hbm_cache/*/firmware_node/status -type f | xargs -I {} cat {} | grep -q -v '15' && echo " + "\"Failure\" || echo \"Success\""; + } + + FILE *fp = popen(check_cmd, "r"); + if (fp == NULL) { + Logger(ERROR, MD_NM_SVR_HBM, "Failed to run command"); + return PWR_ERR_COMMON; + } + + char result[PWR_MAX_NAME_LEN]; + int ret = PWR_ERR_HBM_SET_POWER_STATE_FAILED; + if (fgets(result, sizeof(result), fp) != NULL) { + if (strncmp(result, "Success", 7) == 0) { + ret = PWR_SUCCESS; + } else { + ret = PWR_ERR_HBM_SET_POWER_STATE_FAILED; // 成功 + } + } + fclose(fp); + + return ret; +} + +static int HandleFlatMode(const int powerState) +{ + if (powerState == 0) { + if (OfflineAllHBMNode() != PWR_SUCCESS) { + return PWR_ERR_COMMON; + } + } + + const char *stateStr = (powerState == 0) ? "offline" : "online"; + + char cmd[PWR_MAX_STRING_LEN] = {0}; + snprintf(cmd, sizeof(cmd), + "find /sys/devices/system/container/PNP0A06*/state -type f | xargs -I {} sh -c 'echo " + "\"%s\" > {}'", + stateStr); + EXEC_COMMAND(cmd); + + const char *check_cmd; + if (powerState == 0) { + check_cmd = + "find /sys/devices/system/container/PNP0A06*/firmware_node/PNP0C80*/status -type f | xargs -I {} cat {} | " + "grep -q -v '13' && echo \"Failure\" || echo \"Success\""; + } else { + check_cmd = + "find /sys/devices/system/container/PNP0A06*/firmware_node/PNP0C80*/status -type f | xargs -I {} cat {} | " + "grep -q -v '15' && echo \"Failure\" || echo \"Success\""; + } + + FILE *fp = popen(check_cmd, "r"); + if (fp == NULL) { + Logger(ERROR, MD_NM_SVR_HBM, "Failed to run command"); return PWR_ERR_COMMON; } - if (hbmState == PWR_HBM_NOT_SUPPORT) { + + char result[PWR_MAX_NAME_LEN]; + int ret = PWR_ERR_HBM_SET_POWER_STATE_FAILED; + if (fgets(result, sizeof(result), fp) != NULL) { + if (strncmp(result, "Success", 7) == 0) { + ret = PWR_SUCCESS; + } else { + ret = PWR_ERR_HBM_SET_POWER_STATE_FAILED; // 成功 + } + } + fclose(fp); + + return ret; +} + +static int SetPowerState(int powerState) +{ + PWR_HBM_SYS_STATE hbmState = PWR_HBM_NOT_SUPPORT; + int ret = PWR_ERR_HBM_SET_POWER_STATE_FAILED; + if (GetHbmMode(&hbmState) != PWR_SUCCESS || hbmState == PWR_HBM_NOT_SUPPORT) { Logger(ERROR, MD_NM_SVR_HBM, "SetHbmAllPowerState: HBM is not support"); return PWR_ERR_HBM_NOT_SUPPORTED; } - const char *state_str = (powerState == 0) ? "offline" : "online"; - if (hbmState == PWR_HBM_CACHE_MOD || hbmState == PWR_HBM_HYBRID_MOD) { - char cmd[PWR_MAX_STRING_LEN] = {0}; - snprintf( - cmd, sizeof(cmd), - "find /sys/kernel/hbm_cache/*/state -type f | xargs -I {} sh -c 'echo \"%s\" > {}'", - state_str); - EXEC_COMMAND(cmd); + if (hbmState == PWR_HBM_CACHE_MOD) { + ret = HandleCacheMode(powerState); } - if (hbmState == PWR_HBM_FLAT_MOD || hbmState == PWR_HBM_HYBRID_MOD) { - char cmd[PWR_MAX_STRING_LEN] = {0}; - snprintf(cmd, sizeof(cmd), - "find /sys/devices/system/container/PNP0A06*/state -type f | xargs -I {} sh -c 'echo " - "\"%s\" > {}'", - state_str); - EXEC_COMMAND(cmd); + if (hbmState == PWR_HBM_FLAT_MOD) { + ret = HandleFlatMode(powerState); } - return PWR_SUCCESS; + return ret; } void SetHbmAllPowerState(PwrMsg *req)