From 0b1d7f54a597bd7dbbae1a036276cc0c08fd4bbe Mon Sep 17 00:00:00 2001 From: wangxiaomeng Date: Wed, 7 Sep 2022 10:30:03 +0800 Subject: [PATCH] x86: fix a crash when adding both legacy AMD and CPUID 0x04 caches --- ...when-adding-both-legacy-AMD-and-CPUI.patch | 60 +++++++++++++++++++ openmpi.spec | 7 ++- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 0001-x86-fix-a-crash-when-adding-both-legacy-AMD-and-CPUI.patch diff --git a/0001-x86-fix-a-crash-when-adding-both-legacy-AMD-and-CPUI.patch b/0001-x86-fix-a-crash-when-adding-both-legacy-AMD-and-CPUI.patch new file mode 100644 index 0000000..312e0b1 --- /dev/null +++ b/0001-x86-fix-a-crash-when-adding-both-legacy-AMD-and-CPUI.patch @@ -0,0 +1,60 @@ +From a6f013c52c33d7ade3a2522a84b463657baeebc0 Mon Sep 17 00:00:00 2001 +From: Brice Goglin +Date: Sun, 14 Jan 2018 18:01:41 +0100 +Subject: [PATCH] x86: fix a crash when adding both legacy AMD and CPUID +0x04 caches + +The old code increased numcaches in the second case +but it added additional caches at the beginning of the array. +Uninitialized caches in the array caused a divide by zero +(cache->nbthreads_sharing) when used later. + +Only occurs if the CPUID vendor isn't recognized +(neither Intel, nor AMD, nor Zhaoxin) or in case +of clang 4.0 bug on FreeBSD11.1 (#282). + +That's also why the code was crashing on Zhaoxin +instead of just reporting wrong topology (#279). + +Signed-off-by: Brice Goglin +Signed-off-by: Wangxiaomeng +--- + opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c +index ab6de7c..063e007 100644 +--- a/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c ++++ b/opal/mca/hwloc/hwloc1112/hwloc/src/topology-x86.c +@@ -238,6 +238,9 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns + if (cpuid_type != intel && has_topoext(features)) { + unsigned apic_id, node_id, nodes_per_proc, unit_id, cores_per_unit; + ++ /* the code below doesn't want any other cache yet */ ++ assert(!infos->numcaches); ++ + eax = 0x8000001e; + hwloc_x86_cpuid(&eax, &ebx, &ecx, &edx); + infos->apicid = apic_id = eax; +@@ -327,6 +330,8 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns + */ + if (cpuid_type != amd && highest_cpuid >= 0x04) { + unsigned level; ++ unsigned oldnumcaches = infos->numcaches; /* in case we got caches above */ ++ + for (cachenum = 0; ; cachenum++) { + unsigned type; + eax = 0x04; +@@ -356,7 +361,8 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns + } + } + +- cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); ++ infos->cache = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache)); ++ cache = &infos->cache[oldnumcaches]; + + for (cachenum = 0; ; cachenum++) { + unsigned long linesize, linepart, ways, sets; +-- +2.27.0 + diff --git a/openmpi.spec b/openmpi.spec index 2f3d130..9f80090 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -1,6 +1,6 @@ Name: openmpi Version: 2.1.1 -Release: 18 +Release: 19 Summary: Open Source High Performance Computing License: BSD and MIT and Romio URL: http://www.open-mpi.org/ @@ -10,6 +10,8 @@ Source2: openmpi.pth.py2 Source3: openmpi.pth.py3 Source4: macros.openmpi +Patch0: 0001-x86-fix-a-crash-when-adding-both-legacy-AMD-and-CPUI.patch + BuildRequires: gcc-c++, gcc-gfortran BuildRequires: valgrind-devel, hwloc-devel, java-devel, libfabric-devel, papi-devel BuildRequires: libibverbs-devel >= 1.1.3, opensm-devel > 3.3.0 @@ -211,6 +213,9 @@ make check %{_mandir}/%{name_all}/man*/* %changelog +* Wed Sep 7 2022 wangxiaomeng - 2.1.1-19 +- Backport patch to solve mpirun coredump problem on Hygon + * Mon Dec 07 2020 liuweibo - 2.1.1-18 - Add help package requires to Main -- Gitee