From ca1b5cce34f6fc7a8c27fb6ecd7302820b2a841f Mon Sep 17 00:00:00 2001 From: JiangShui Yang Date: Mon, 10 Mar 2025 09:58:05 +0800 Subject: [PATCH] Revert "Support initializing HBW nodes from memory_locality" patch The interface that supports the function of initializing hardware bandwidth nodes form memory_locality is deleted from the oprating system kernel. Therefore, the memkind interface is deleted because the memkind interface needs to work with the kernel. Signed-off-by: JiangShui Yang (cherry picked from commit 597108c78540e6eb3d10866d765929f776d9f1f4) --- ...izing-HBW-nodes-from-memory_locality.patch | 340 ------------------ memkind.spec | 6 +- 2 files changed, 4 insertions(+), 342 deletions(-) delete mode 100644 0002-Support-initializing-HBW-nodes-from-memory_locality.patch diff --git a/0002-Support-initializing-HBW-nodes-from-memory_locality.patch b/0002-Support-initializing-HBW-nodes-from-memory_locality.patch deleted file mode 100644 index 4df39fc..0000000 --- a/0002-Support-initializing-HBW-nodes-from-memory_locality.patch +++ /dev/null @@ -1,340 +0,0 @@ -From 448eb95b45b0cf6ecc7cf1a3e24056a2fdae85bd Mon Sep 17 00:00:00 2001 -From: Yicong Yang -Date: Fri, 13 Oct 2023 15:21:11 +0800 -Subject: [PATCH] Support initializing HBW nodes from memory_locality - -In current implementation we mainly infer the HBW nodes from the -HMAT/SLIT, which may not describe all the cases. For example -the HMAT/SLIT cannot describe the topology below: - -[ Node 0 ] -[ CPU 0-3 ][ CPU 4-7 ] - | | -[ HBM 0 ][ HBM 1 ] -[ Node 1 ][ Node 2 ] - -CPU 0-7 are in one NUMA node, but CPU 0-3 is closest to HBM 0 while -CPU 4-7 is closest to HBM 1. Current HMAT/SLIT cannot support this -case. - -In order to support this, openeuler has merged a HBM device driver -to export the topology by sysfs[1]. The description of above topology -will be like: -$ cat /sys/kernel/hbm_memory/memory_topo/memory_locality -1 0-3 -1 4-7 - -This patch cooperate with the HBM device driver to support initializing -the HBW nodes from memory_locality for memkind. Will try to obtains -the HBW nodes by parsing the memory_locality first, on failure or there -is no memory_locality on the system will fallback to HMAT/SLIT. User -can disable this function by MEMKIND_DISABLE_MEMORY_LOCALITY=1 as well. - -[1] https://gitee.com/openeuler/kernel/pulls/451 -Signed-off-by: Yicong Yang ---- - include/memkind/internal/memkind_bitmask.h | 2 + - src/memkind_bitmask.c | 185 +++++++++++++++++++++ - src/memkind_hbw.c | 42 +++++ - 3 files changed, 229 insertions(+) - -diff --git a/include/memkind/internal/memkind_bitmask.h b/include/memkind/internal/memkind_bitmask.h -index 5c5b8434..6b0c3f64 100644 ---- a/include/memkind/internal/memkind_bitmask.h -+++ b/include/memkind/internal/memkind_bitmask.h -@@ -12,6 +12,8 @@ extern "C" { - - typedef int (*get_node_bitmask)(struct bitmask **); - -+int set_numanode_from_memory_locality(void **numanode, -+ memkind_node_variant_t node_variant); - int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode, - memkind_node_variant_t node_variant); - int set_bitmask_for_current_numanode(unsigned long *nodemask, -diff --git a/src/memkind_bitmask.c b/src/memkind_bitmask.c -index 4f6d9f00..84300395 100644 ---- a/src/memkind_bitmask.c -+++ b/src/memkind_bitmask.c -@@ -1,9 +1,11 @@ - // SPDX-License-Identifier: BSD-2-Clause - /* Copyright (C) 2019 - 2021 Intel Corporation. */ - -+#include - #include - #include - #include -+#include - - #include - #include -@@ -12,6 +14,89 @@ - // Vector of CPUs with memory NUMA Node id(s) - VEC(vec_cpu_node, int); - -+void init_node_closet_cpu(cpu_set_t **cpunode_mask, int num_cpu, int num_nodes) -+{ -+ char *line = NULL; -+ size_t len = 0; -+ ssize_t n; -+ FILE *f; -+ -+ /* -+ * The content of /sys/kernel/hbm_memory/memory_topo/memory_locality should -+ * be like: -+ * 2 0-3 -+ * 3 4-7 -+ * 4 8-11 -+ * 5 12-15 -+ * 6 16-19 -+ * 7 20-23 -+ * 8 24-27 -+ * 9 28-31 -+ * -+ * The 1st column is the HBW node number and the 2nd column is the CPU list -+ * which is closet to the HBW node. -+ */ -+ f = fopen("/sys/kernel/hbm_memory/memory_topo/memory_locality", "r"); -+ if (!f) -+ return; -+ -+ while ((n = getline(&line, &len, f)) != -1) { -+ long int node, begin_cpu, end_cpu; -+ char *begin, *end; -+ -+ /* Get the node number first */ -+ node = strtol(line, &end, 0); -+ -+ /* Either the node number is invalid or the whole line is invalid */ -+ if (line == end || node == LONG_MAX || node == LONG_MIN) -+ break; -+ -+ if (node >= num_nodes) { -+ log_err("Invalid node number provided by memory_locality."); -+ break; -+ } -+ -+ /* Try to find the beginning of the CPU list string */ -+ while (*end == ' ' && end != line + len) -+ end++; -+ -+ if (end == line + len || !isdigit(*end)) -+ break; -+ -+ begin = end; -+ do { -+ begin_cpu = strtol(begin, &end, 0); -+ if (begin == end || begin_cpu == LONG_MAX || begin_cpu == LONG_MIN) -+ break; -+ -+ /* End of the line */ -+ if (*end == '\0' || *end == '\n') { -+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); -+ break; -+ } else if (*end == ',') { -+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); -+ } else if (*end == '-' && isdigit(*(++end))) { -+ begin = end; -+ end_cpu = strtol(begin, &end, 0); -+ if (begin == end || end_cpu == LONG_MAX || end_cpu == LONG_MIN) -+ break; -+ -+ while (begin_cpu <= end_cpu) { -+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]); -+ ++begin_cpu; -+ } -+ } else { -+ break; -+ } -+ -+ begin = end + 1; -+ } while (begin < line + len); -+ } -+ -+ free(line); -+ fclose(f); -+} -+ - int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm) - { - *bm = numa_parse_nodestring(nodes_env); -@@ -22,6 +107,106 @@ int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm) - return MEMKIND_SUCCESS; - } - -+int set_numanode_from_memory_locality(void **numanode, -+ memkind_node_variant_t node_variant) -+{ -+ int num_cpu = numa_num_configured_cpus(); -+ int cpuset_size = CPU_ALLOC_SIZE(num_cpu); -+ int max_node_id = numa_max_node(); -+ cpu_set_t **cpunode_mask; -+ int init_node, cpu_id; -+ int status; -+ -+ cpunode_mask = calloc(max_node_id + 1, sizeof(*cpunode_mask)); -+ if (!cpunode_mask) { -+ status = MEMKIND_ERROR_MALLOC; -+ log_err("calloc() failed."); -+ goto out; -+ } -+ -+ for (init_node = 0; init_node <= max_node_id; init_node++) { -+ cpunode_mask[init_node] = CPU_ALLOC(num_cpu); -+ if (!cpunode_mask[init_node]) { -+ while (init_node >= 0) { -+ CPU_FREE(cpunode_mask[init_node]); -+ init_node--; -+ } -+ -+ status = MEMKIND_ERROR_MALLOC; -+ log_err("CPU_ALLOC_SIZE() failed."); -+ goto free_cpunode_mask; -+ } -+ -+ CPU_ZERO_S(cpuset_size, cpunode_mask[init_node]); -+ } -+ -+ init_node_closet_cpu(cpunode_mask, num_cpu, max_node_id + 1); -+ -+ struct vec_cpu_node *node_arr = -+ (struct vec_cpu_node *)calloc(num_cpu, sizeof(struct vec_cpu_node)); -+ if (!node_arr) { -+ status = MEMKIND_ERROR_MALLOC; -+ log_err("calloc() failed."); -+ goto free_cpunode_mask_array; -+ } -+ -+ /* Scan CPUs once. Assuming the CPU number are much more bigger than NUMA Nodes */ -+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { -+ for (init_node = 0; init_node <= max_node_id; init_node++) { -+ if (CPU_ISSET_S(cpu_id, cpuset_size, cpunode_mask[init_node])) { -+ VEC_PUSH_BACK(&node_arr[cpu_id], init_node); -+ -+ /* -+ * A cpu should always have one closet node, log error if -+ * violate this. -+ */ -+ if (node_variant == NODE_VARIANT_SINGLE && -+ VEC_SIZE(&node_arr[cpu_id]) > 1) { -+ log_err("CPU%d has more than one closet node.", cpu_id); -+ status = MEMKIND_ERROR_RUNTIME; -+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { -+ if (VEC_CAPACITY(&node_arr[cpu_id])) -+ VEC_DELETE(&node_arr[cpu_id]); -+ } -+ -+ goto free_node_arr; -+ } -+ } -+ } -+ } -+ -+ /* Sanity Check each node_arr */ -+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { -+ if (VEC_SIZE(&node_arr[cpu_id]) == 0) { -+ log_err("CPU%d's nodemask is not initialized.", cpu_id); -+ status = MEMKIND_ERROR_RUNTIME; -+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) { -+ if (VEC_CAPACITY(&node_arr[cpu_id])) -+ VEC_DELETE(&node_arr[cpu_id]); -+ } -+ -+ goto free_node_arr; -+ } -+ } -+ -+ *numanode = node_arr; -+ status = MEMKIND_SUCCESS; -+ goto free_cpunode_mask_array; -+ -+free_node_arr: -+ free(node_arr); -+ -+free_cpunode_mask_array: -+ for (init_node = 0; init_node <= max_node_id; init_node++) -+ CPU_FREE(cpunode_mask[init_node]); -+ -+free_cpunode_mask: -+ free(cpunode_mask); -+ -+out: -+ return status; -+} -+ - int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode, - memkind_node_variant_t node_variant) - { -diff --git a/src/memkind_hbw.c b/src/memkind_hbw.c -index 077660ab..e9948593 100644 ---- a/src/memkind_hbw.c -+++ b/src/memkind_hbw.c -@@ -363,10 +363,36 @@ static bool is_hmat_supported(void) - return true; - } - -+/* -+ * OS may provide further information of HBW topology in -+ * /sys/kernel/hbm_memory/memory_topo/memory_locality. Use it unless user -+ * specified HBW nodes or disabled using of memory_locality. -+ */ -+static bool use_memory_locality(void) -+{ -+ char *memory_locality_disable = memkind_get_env("MEMKIND_DISABLE_MEMORY_LOCALITY"); -+ -+ if (memory_locality_disable && !strncmp(memory_locality_disable, "1", 1)) -+ return false; -+ -+ if (memkind_get_env("MEMKIND_HBW_NODES")) -+ return false; -+ -+ return true; -+} -+ - static void memkind_hbw_closest_numanode_init(void) - { - struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_MULTIPLE]; - g->numanode = NULL; -+ -+ if (use_memory_locality()) { -+ g->init_err = set_numanode_from_memory_locality(&g->numanode, -+ NODE_VARIANT_MULTIPLE); -+ if (!g->init_err) -+ return; -+ } -+ - if (!is_hmat_supported()) { - g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, - &g->numanode, NODE_VARIANT_MULTIPLE); -@@ -380,6 +406,14 @@ static void memkind_hbw_closest_preferred_numanode_init(void) - { - struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_SINGLE]; - g->numanode = NULL; -+ -+ if (use_memory_locality()) { -+ g->init_err = set_numanode_from_memory_locality(&g->numanode, -+ NODE_VARIANT_SINGLE); -+ if (!g->init_err) -+ return; -+ } -+ - if (!is_hmat_supported()) { - g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, - &g->numanode, NODE_VARIANT_SINGLE); -@@ -393,6 +427,14 @@ static void memkind_hbw_all_numanode_init(void) - { - struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_ALL]; - g->numanode = NULL; -+ -+ if (use_memory_locality()) { -+ g->init_err = set_numanode_from_memory_locality(&g->numanode, -+ NODE_VARIANT_ALL); -+ if (!g->init_err) -+ return; -+ } -+ - if (!is_hmat_supported()) { - g->init_err = set_closest_numanode(memkind_hbw_get_nodemask, - &g->numanode, NODE_VARIANT_ALL); --- -2.24.0 - diff --git a/memkind.spec b/memkind.spec index 5ed776c..9306cf3 100644 --- a/memkind.spec +++ b/memkind.spec @@ -1,12 +1,11 @@ Name: memkind Summary: Extensible Heap Manager for User Version: 1.14.0 -Release: 6 +Release: 7 License: BSD URL: http://memkind.github.io/memkind Source0: https://github.com/memkind/memkind/archive/v1.14.0/%{name}-%{version}.tar.gz Patch0001: 0001-support-multi-threading-build.patch -Patch0002: 0002-Support-initializing-HBW-nodes-from-memory_locality.patch Patch0003: 0003-memkind-add-sw64-support.patch BuildRequires: automake libtool numactl-devel systemd gcc gcc-c++ hwloc-devel @@ -84,6 +83,9 @@ popd %{_mandir}/man7/* %changelog +* Thu Feb 20 2025 JiangShui Yang - 1.14.0-7 +- Revert "Support initializing HBW nodes from memory_locality" patch + * Mon Feb 17 2025 maqi - 1.14.0-6 - add support sw_64 -- Gitee