From 0b9f6d775a2ab180e8dd99f43c168a476f9c669f Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Sep 2024 14:17:51 +0800 Subject: [PATCH 1/2] add sw64 support --- openmpi-4.1.4-1007-sw.patch | 1714 +++++++++++++++++++++++++++ openmpi-Add-sw64-architecture.patch | 64 + openmpi.spec | 8 +- 3 files changed, 1785 insertions(+), 1 deletion(-) create mode 100644 openmpi-4.1.4-1007-sw.patch create mode 100644 openmpi-Add-sw64-architecture.patch diff --git a/openmpi-4.1.4-1007-sw.patch b/openmpi-4.1.4-1007-sw.patch new file mode 100644 index 0000000..c7a6997 --- /dev/null +++ b/openmpi-4.1.4-1007-sw.patch @@ -0,0 +1,1714 @@ +diff -uNar openmpi-4.1.4.org/aclocal.m4 openmpi-4.1.4.sw/aclocal.m4 +--- openmpi-4.1.4.org/aclocal.m4 2022-05-26 22:34:29.000000000 +0800 ++++ openmpi-4.1.4.sw/aclocal.m4 2024-03-13 19:37:28.134848433 +0800 +@@ -1,4 +1,4 @@ +-# generated automatically by aclocal 1.15 -*- Autoconf -*- ++# generated automatically by aclocal 1.16 -*- Autoconf -*- + + # Copyright (C) 1996-2014 Free Software Foundation, Inc. + +@@ -32,10 +32,10 @@ + # generated from the m4 files accompanying Automake X.Y. + # (This private macro should not be called outside this file.) + AC_DEFUN([AM_AUTOMAKE_VERSION], +-[am__api_version='1.15' ++[am__api_version='1.16' + dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to + dnl require some minimum version. Point them to the right macro. +-m4_if([$1], [1.15], [], ++m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl + ]) + +@@ -51,7 +51,7 @@ + # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. + # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. + AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +-[AM_AUTOMAKE_VERSION([1.15])dnl ++[AM_AUTOMAKE_VERSION([1.16.5])dnl + m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl + _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) +diff -uNar openmpi-4.1.4.org/config/config.guess openmpi-4.1.4.sw/config/config.guess +--- openmpi-4.1.4.org/config/config.guess 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/config/config.guess 2024-03-13 19:37:28.134848433 +0800 +@@ -935,6 +935,14 @@ + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; ++ sw_64:Linux:*:*) ++ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in ++ sw) UNAME_MACHINE=sw_64 ;; ++ esac ++ objdump --private-headers /bin/sh | grep -q ld.so.1 ++ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi ++ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" ++ exit ;; + alpha:Linux:*:*) + case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in + EV5) UNAME_MACHINE=alphaev5 ;; +diff -uNar openmpi-4.1.4.org/config/config.sub openmpi-4.1.4.sw/config/config.sub +--- openmpi-4.1.4.org/config/config.sub 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/config/config.sub 2024-03-13 19:37:28.134848433 +0800 +@@ -1158,6 +1158,7 @@ + case $cpu in + 1750a | 580 \ + | a29k \ ++ | sw_64 \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ +diff -uNar openmpi-4.1.4.org/config/make_manpage.pl openmpi-4.1.4.sw/config/make_manpage.pl +--- openmpi-4.1.4.org/config/make_manpage.pl 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/config/make_manpage.pl 2024-03-13 19:37:28.134848433 +0800 +@@ -13,7 +13,7 @@ + use Getopt::Long; + + my $package_name; +-my $package_version; ++my $package_version='4.1.4'; + my $ompi_date; + my $opal_date; + my $orte_date; +diff -uNar openmpi-4.1.4.org/config/opal_config_asm.m4 openmpi-4.1.4.sw/config/opal_config_asm.m4 +--- openmpi-4.1.4.org/config/opal_config_asm.m4 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/config/opal_config_asm.m4 2024-03-13 19:37:28.134848433 +0800 +@@ -1070,6 +1070,12 @@ + OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], + [AC_MSG_ERROR([No atomic primitives available for $host])]) + ;; ++ ++ sw_64*) ++ opal_cv_asm_arch="SW_64" ++ OPAL_ASM_SUPPORT_64BIT=1 ++ OPAL_GCC_INLINE_ASSIGN='"mov 0, %0" : "=&r"(ret)' ++ ;; + aarch64*) + opal_cv_asm_arch="ARM64" + OPAL_ASM_SUPPORT_64BIT=1 +diff -uNar openmpi-4.1.4.org/configure openmpi-4.1.4.sw/configure +--- openmpi-4.1.4.org/configure 2022-05-26 22:35:48.000000000 +0800 ++++ openmpi-4.1.4.sw/configure 2024-03-13 19:37:28.314848433 +0800 +@@ -7582,7 +7582,7 @@ + # + # Init automake + # +-am__api_version='1.15' ++am__api_version='1.16' + + # Find a good install program. We prefer a C program (faster), + # so one script is as good as another. But avoid the broken or +@@ -40758,6 +40758,13 @@ + + + ;; ++ ++ sw_64*) ++ opal_cv_asm_arch="SW_64" ++ OPAL_ASM_SUPPORT_64BIT=1 ++ OPAL_GCC_INLINE_ASSIGN='"mov 0, %0" : "=&r"(ret)' ++ ;; ++ + aarch64*) + opal_cv_asm_arch="ARM64" + OPAL_ASM_SUPPORT_64BIT=1 +@@ -163992,7 +163999,7 @@ + cat confdefs.h - <<_ACEOF >conftest.$ac_ext + /* end confdefs.h. */ + +-#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) && !defined(__aarch64__) ++#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) && !defined(__aarch64__) && !defined(__sw_64__) + #error "platform not supported" + #endif + +@@ -186984,7 +186991,7 @@ + fi + + case "${host}" in +- i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*) ++ i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*|sw_64-*linux*) + if test "$timer_linux_happy" = "yes"; then : + if test -r "/proc/cpuinfo"; then : + timer_linux_happy="yes" +diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/aclocal.m4 openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/aclocal.m4 +--- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/aclocal.m4 2022-05-26 22:33:24.000000000 +0800 ++++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/aclocal.m4 2024-03-13 19:37:28.334848433 +0800 +@@ -1,4 +1,4 @@ +-# generated automatically by aclocal 1.15 -*- Autoconf -*- ++# generated automatically by aclocal 1.16 -*- Autoconf -*- + + # Copyright (C) 1996-2014 Free Software Foundation, Inc. + +@@ -32,10 +32,10 @@ + # generated from the m4 files accompanying Automake X.Y. + # (This private macro should not be called outside this file.) + AC_DEFUN([AM_AUTOMAKE_VERSION], +-[am__api_version='1.15' ++[am__api_version='1.16' + dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to + dnl require some minimum version. Point them to the right macro. +-m4_if([$1], [1.15], [], ++m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl + ]) + +@@ -51,7 +51,7 @@ + # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. + # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. + AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +-[AM_AUTOMAKE_VERSION([1.15])dnl ++[AM_AUTOMAKE_VERSION([1.16.5])dnl + m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl + _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) +diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.guess openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.guess +--- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.guess 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.guess 2024-03-13 19:37:28.334848433 +0800 +@@ -935,6 +935,14 @@ + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; ++ sw_64:Linux:*:*) ++ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in ++ sw) UNAME_MACHINE=sw_64 ;; ++ esac ++ objdump --private-headers /bin/sh | grep -q ld.so.1 ++ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi ++ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" ++ exit ;; + alpha:Linux:*:*) + case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in + EV5) UNAME_MACHINE=alphaev5 ;; +diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.sub openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.sub +--- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.sub 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.sub 2024-03-13 19:37:28.334848433 +0800 +@@ -1158,6 +1158,7 @@ + case $cpu in + 1750a | 580 \ + | a29k \ ++ | sw_64 \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ +diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/configure openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/configure +--- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/configure 2022-05-26 22:33:27.000000000 +0800 ++++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/configure 2024-03-13 19:37:28.344848433 +0800 +@@ -2985,7 +2985,7 @@ + + + +-am__api_version='1.15' ++am__api_version='1.16' + + # Find a good install program. We prefer a C program (faster), + # so one script is as good as another. But avoid the broken or +diff -uNar openmpi-4.1.4.org/opal/include/Makefile.in openmpi-4.1.4.sw/opal/include/Makefile.in +--- openmpi-4.1.4.org/opal/include/Makefile.in 2022-05-26 22:35:13.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/Makefile.in 2024-03-13 19:37:28.344848433 +0800 +@@ -669,6 +669,7 @@ + opal/sys/cma.h opal/sys/x86_64/atomic.h \ + opal/sys/x86_64/timer.h opal/sys/arm/atomic.h \ + opal/sys/arm/timer.h opal/sys/arm64/atomic.h \ ++ opal/sys/sw_64/timer.h opal/sys/sw_64/atomic.h \ + opal/sys/arm64/timer.h opal/sys/ia32/atomic.h \ + opal/sys/ia32/timer.h opal/sys/powerpc/atomic.h \ + opal/sys/powerpc/timer.h opal/sys/sparcv9/atomic.h \ +@@ -727,6 +728,7 @@ + $(srcdir)/opal/sys/Makefile.am \ + $(srcdir)/opal/sys/arm/Makefile.am \ + $(srcdir)/opal/sys/arm64/Makefile.am \ ++ $(srcdir)/opal/sys/sw_64/Makefile.am \ + $(srcdir)/opal/sys/gcc_builtin/Makefile.am \ + $(srcdir)/opal/sys/ia32/Makefile.am \ + $(srcdir)/opal/sys/powerpc/Makefile.am \ +@@ -1952,6 +1954,7 @@ + opal/sys/x86_64/atomic.h opal/sys/x86_64/timer.h \ + opal/sys/arm/atomic.h opal/sys/arm/timer.h \ + opal/sys/arm64/atomic.h opal/sys/arm64/timer.h \ ++ opal/sys/sw_64/atomic.h opal/sys/sw_64/timer.h \ + opal/sys/ia32/atomic.h opal/sys/ia32/timer.h \ + opal/sys/powerpc/atomic.h opal/sys/powerpc/timer.h \ + opal/sys/sparcv9/atomic.h opal/sys/sparcv9/timer.h \ +@@ -1965,7 +1968,7 @@ + $(MAKE) $(AM_MAKEFLAGS) all-am + + .SUFFIXES: +-$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__configure_deps) ++$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/sw_64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ +@@ -1985,7 +1988,7 @@ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; +-$(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__empty): ++$(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/sw_64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__empty): + + $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/architecture.h openmpi-4.1.4.sw/opal/include/opal/sys/architecture.h +--- openmpi-4.1.4.org/opal/include/opal/sys/architecture.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/architecture.h 2024-03-13 19:37:28.344848433 +0800 +@@ -42,6 +42,7 @@ + #define OPAL_MIPS 0070 + #define OPAL_ARM 0100 + #define OPAL_ARM64 0101 ++#define OPAL_SW_64 0120 + #define OPAL_S390 0110 + #define OPAL_S390X 0111 + #define OPAL_BUILTIN_SYNC 0200 +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/atomic.h openmpi-4.1.4.sw/opal/include/opal/sys/atomic.h +--- openmpi-4.1.4.org/opal/include/opal/sys/atomic.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/atomic.h 2024-03-13 19:37:28.344848433 +0800 +@@ -165,6 +165,8 @@ + #include "opal/sys/arm/atomic.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + #include "opal/sys/arm64/atomic.h" ++#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 ++#include "opal/sys/sw_64/atomic.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 + #include "opal/sys/ia32/atomic.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/cma.h openmpi-4.1.4.sw/opal/include/opal/sys/cma.h +--- openmpi-4.1.4.org/opal/include/opal/sys/cma.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/cma.h 2024-03-13 19:37:28.344848433 +0800 +@@ -64,6 +64,10 @@ + #define __NR_process_vm_readv 270 + #define __NR_process_vm_writev 271 + ++#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 ++#define __NR_process_vm_readv 504 ++#define __NR_process_vm_writev 505 ++ + #elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS + + #if _MIPS_SIM == _MIPS_SIM_ABI64 +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/Makefile.am openmpi-4.1.4.sw/opal/include/opal/sys/Makefile.am +--- openmpi-4.1.4.org/opal/include/opal/sys/Makefile.am 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/Makefile.am 2024-03-13 19:37:28.344848433 +0800 +@@ -34,6 +34,7 @@ + include opal/sys/x86_64/Makefile.am + include opal/sys/arm/Makefile.am + include opal/sys/arm64/Makefile.am ++include opal/sys/sw_64/Makefile.am + include opal/sys/ia32/Makefile.am + include opal/sys/powerpc/Makefile.am + include opal/sys/sparcv9/Makefile.am +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/atomic.h openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/atomic.h +--- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/atomic.h 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/atomic.h 2024-03-13 21:16:36.744847015 +0800 +@@ -0,0 +1,500 @@ ++/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ ++/* ++ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana ++ * University Research and Technology ++ * Corporation. All rights reserved. ++ * Copyright (c) 2004-2005 The University of Tennessee and The University ++ * of Tennessee Research Foundation. All rights ++ * reserved. ++ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ++ * University of Stuttgart. All rights reserved. ++ * Copyright (c) 2004-2005 The Regents of the University of California. ++ * All rights reserved. ++ * Copyright (c) 2010 IBM Corporation. All rights reserved. ++ * Copyright (c) 2010 ARM ltd. All rights reserved. ++ * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights ++ * reserved. ++ * $COPYRIGHT$ ++ * ++ * Additional copyrights may follow ++ * ++ * $HEADER$ ++ */ ++ ++#if !defined(OPAL_SYS_ARCH_ATOMIC_H) ++ ++#define OPAL_SYS_ARCH_ATOMIC_H 1 ++ ++#if OPAL_GCC_INLINE_ASSEMBLY ++ ++#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 ++#define OPAL_HAVE_ATOMIC_LLSC_32 1 ++#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 ++#define OPAL_HAVE_ATOMIC_SWAP_32 1 ++#define OPAL_HAVE_ATOMIC_MATH_32 1 ++#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 ++#define OPAL_HAVE_ATOMIC_SWAP_64 1 ++#define OPAL_HAVE_ATOMIC_LLSC_64 1 ++#define OPAL_HAVE_ATOMIC_ADD_32 1 ++#define OPAL_HAVE_ATOMIC_AND_32 1 ++#define OPAL_HAVE_ATOMIC_OR_32 1 ++#define OPAL_HAVE_ATOMIC_XOR_32 1 ++#define OPAL_HAVE_ATOMIC_SUB_32 1 ++#define OPAL_HAVE_ATOMIC_ADD_64 1 ++#define OPAL_HAVE_ATOMIC_AND_64 1 ++#define OPAL_HAVE_ATOMIC_OR_64 1 ++#define OPAL_HAVE_ATOMIC_XOR_64 1 ++#define OPAL_HAVE_ATOMIC_SUB_64 1 ++ ++#define MB() __asm__ __volatile__ ("memb" : : : "memory") ++#define RMB() __asm__ __volatile__ ("memb" : : : "memory") ++#define WMB() __asm__ __volatile__ ("memb" : : : "memory") ++ ++/********************************************************************** ++ * ++ * Memory Barriers ++ * ++ *********************************************************************/ ++ ++static inline void opal_atomic_mb (void) ++{ ++ MB(); ++} ++ ++static inline void opal_atomic_rmb (void) ++{ ++ RMB(); ++} ++ ++static inline void opal_atomic_wmb (void) ++{ ++ WMB(); ++} ++ ++static inline void opal_atomic_isync (void) ++{ ++ __asm__ __volatile__ ("memb"); ++} ++ ++/********************************************************************** ++ * ++ * Atomic math operations ++ * ++ *********************************************************************/ ++ ++static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) ++{ ++ int32_t ret, tmp; ++ int32_t tmp1; ++ ++ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " mov %4, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) ++ : "r" (addr), "r" (newval) ++ : "cc", "memory"); ++ ++ return ret; ++} ++ ++/* these two functions aren't inlined in the non-gcc case because then ++ there would be two function calls (since neither cmpset_32 nor ++ atomic_?mb can be inlined). Instead, we "inline" them by hand in ++ the assembly, meaning there is one function call overhead instead ++ of two */ ++static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++ ++static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++#define opal_atomic_ll_32(addr, ret) \ ++ do { \ ++ volatile int32_t *_addr = (addr); \ ++ int32_t _ret; \ ++ \ ++ __asm__ __volatile__ ( \ ++ "ldl %0,0(%1) \n" \ ++ : "=&r" (ret) \ ++ : "m" (*addr)); \ ++ \ ++ ret = (typeof(ret)) _ret; \ ++ } while (0) ++ ++#define opal_atomic_sc_32(addr, newval, ret) \ ++ do { \ ++ volatile int32_t *_addr = (addr); \ ++ int32_t _newval = (int32_t) newval; \ ++ int _ret; \ ++ \ ++ __asm__ __volatile__ ( \ ++ "stl %2,0(%1) \n" \ ++ : "=&r" (ret) \ ++ : "m" (*addr), "r" (newval) \ ++ : "memory"); \ ++ \ ++ ret = (_ret == 0); \ ++ } while (0) ++ ++static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) ++{ ++ int64_t ret; ++ int tmp; ++ int64_t tmp1; ++ ++ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " mov %4, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) ++ : "r" (addr), "r" (newval) ++ : "cc", "memory"); ++ ++ return ret; ++} ++ ++/* these two functions aren't inlined in the non-gcc case because then ++ there would be two function calls (since neither cmpset_64 nor ++ atomic_?mb can be inlined). Instead, we "inline" them by hand in ++ the assembly, meaning there is one function call overhead instead ++ of two */ ++static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++ ++static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++#define opal_atomic_ll_64(addr, ret) \ ++ do { \ ++ volatile int64_t *_addr = (addr); \ ++ int64_t _ret; \ ++ \ ++ __asm__ __volatile__ ("ldi $1, %1 \n" \ ++ "lldl %0,0($1) \n" \ ++ : "=&r" (ret) \ ++ : "m" (*addr)); \ ++ \ ++ ret = (typeof(ret)) _ret; \ ++ } while (0) ++ ++#define opal_atomic_sc_64(addr, newval, ret) \ ++ do { \ ++ volatile int64_t *_addr = (addr); \ ++ int64_t _newval = (int64_t) newval; \ ++ int _ret; \ ++ \ ++ __asm__ __volatile__ ("stl %2,0(%1) \n" \ ++ : "=&r" (_ret) \ ++ : "r" (_addr), "r" (_newval) \ ++ : "cc", "memory"); \ ++ \ ++ ret = (_ret == 0); \ ++ } while (0) ++ ++static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " addw %1, %4, %0 \n"//newval = old + value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " subw %1, %4, %0 \n"//newval = old - value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " and %1, %4, %0 \n"//newval = old & value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old),"=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " bis %1, %4, %0 \n"//newval = old | value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " xor %1, %4, %0 \n"//newval = old ^ value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " addl %1, %4, %0 \n"//newval = old + value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " subl %1, %4, %0 \n"//newval = old - value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old),"=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " and %1, %4, %0 \n"//newval = old & value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " bis %1, %4, %0 \n"//newval = old | value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " xor %1, %4, %0 \n"//newval = old ^ value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old),"=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++#endif /* OPAL_GCC_INLINE_ASSEMBLY */ ++ ++#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/Makefile.am openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/Makefile.am +--- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/Makefile.am 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/Makefile.am 2024-03-13 19:37:28.344848433 +0800 +@@ -0,0 +1,24 @@ ++# ++# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana ++# University Research and Technology ++# Corporation. All rights reserved. ++# Copyright (c) 2004-2008 The University of Tennessee and The University ++# of Tennessee Research Foundation. All rights ++# reserved. ++# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ++# University of Stuttgart. All rights reserved. ++# Copyright (c) 2004-2005 The Regents of the University of California. ++# All rights reserved. ++# $COPYRIGHT$ ++# ++# Additional copyrights may follow ++# ++# $HEADER$ ++# ++ ++# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am ++ ++headers += \ ++ opal/sys/sw_64/atomic.h \ ++ opal/sys/sw_64/timer.h ++ +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/timer.h openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/timer.h +--- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/timer.h 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/timer.h 2024-03-13 19:37:28.344848433 +0800 +@@ -0,0 +1,45 @@ ++/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ ++/* ++ * Copyright (c) 2008 The University of Tennessee and The University ++ * of Tennessee Research Foundation. All rights ++ * reserved. ++ * Copyright (c) 2016 Broadcom Limited. All rights reserved. ++ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights ++ * reserved. ++ * $COPYRIGHT$ ++ * ++ * Additional copyrights may follow ++ * ++ * $HEADER$ ++ */ ++ ++#ifndef OPAL_SYS_ARCH_TIMER_H ++#define OPAL_SYS_ARCH_TIMER_H 1 ++ ++#include ++ ++typedef uint64_t opal_timer_t; ++ ++static inline opal_timer_t ++opal_sys_timer_get_cycles(void) ++{ ++ opal_timer_t ret; ++ ++ __asm__ __volatile__ ("memb" ::: "memory"); ++ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); ++ ++ return ret; ++} ++ ++ ++static inline opal_timer_t ++opal_sys_timer_freq(void) ++{ ++ opal_timer_t freq; ++ __asm__ __volatile__ ("rtc %0" : "=r" (freq)); ++ return (opal_timer_t)(freq); ++} ++ ++#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 ++ ++#endif /* ! OPAL_SYS_ARCH_TIMER_H */ +diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/timer.h openmpi-4.1.4.sw/opal/include/opal/sys/timer.h +--- openmpi-4.1.4.org/opal/include/opal/sys/timer.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/include/opal/sys/timer.h 2024-03-13 19:37:28.344848433 +0800 +@@ -76,6 +76,8 @@ + #include "opal/sys/arm/timer.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + #include "opal/sys/arm64/timer.h" ++#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 ++#include "opal/sys/sw_64/timer.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 + #include "opal/sys/ia32/timer.h" + #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 +diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/aclocal.m4 openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/aclocal.m4 +--- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/aclocal.m4 2022-05-26 22:32:54.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/aclocal.m4 2024-03-13 19:37:28.354848433 +0800 +@@ -1,4 +1,4 @@ +-# generated automatically by aclocal 1.15 -*- Autoconf -*- ++# generated automatically by aclocal 1.16 -*- Autoconf -*- + + # Copyright (C) 1996-2014 Free Software Foundation, Inc. + +@@ -32,10 +32,10 @@ + # generated from the m4 files accompanying Automake X.Y. + # (This private macro should not be called outside this file.) + AC_DEFUN([AM_AUTOMAKE_VERSION], +-[am__api_version='1.15' ++[am__api_version='1.16' + dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to + dnl require some minimum version. Point them to the right macro. +-m4_if([$1], [1.15], [], ++m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl + ]) + +@@ -51,7 +51,7 @@ + # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. + # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. + AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +-[AM_AUTOMAKE_VERSION([1.15])dnl ++[AM_AUTOMAKE_VERSION([1.16.5])dnl + m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl + _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) +diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.guess openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.guess +--- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.guess 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.guess 2024-03-13 19:37:28.354848433 +0800 +@@ -935,6 +935,14 @@ + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; ++ sw_64:Linux:*:*) ++ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in ++ sw) UNAME_MACHINE=sw_64 ;; ++ esac ++ objdump --private-headers /bin/sh | grep -q ld.so.1 ++ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi ++ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" ++ exit ;; + alpha:Linux:*:*) + case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in + EV5) UNAME_MACHINE=alphaev5 ;; +diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.sub openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.sub +--- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.sub 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.sub 2024-03-13 19:37:28.354848433 +0800 +@@ -1158,6 +1158,7 @@ + case $cpu in + 1750a | 580 \ + | a29k \ ++ | sw_64 \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ +diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/configure openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/configure +--- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/configure 2022-05-26 22:32:56.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/configure 2024-03-13 19:37:28.354848433 +0800 +@@ -2642,7 +2642,7 @@ + + + # Open MPI: changed to one 1 arg AM INIT_AUTOMAKE +-am__api_version='1.15' ++am__api_version='1.16' + + ac_aux_dir= + for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/aclocal.m4 openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/aclocal.m4 +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/aclocal.m4 2022-05-26 22:33:09.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/aclocal.m4 2024-03-13 19:37:28.364848433 +0800 +@@ -1,4 +1,4 @@ +-# generated automatically by aclocal 1.15 -*- Autoconf -*- ++# generated automatically by aclocal 1.16 -*- Autoconf -*- + + # Copyright (C) 1996-2014 Free Software Foundation, Inc. + +@@ -32,10 +32,10 @@ + # generated from the m4 files accompanying Automake X.Y. + # (This private macro should not be called outside this file.) + AC_DEFUN([AM_AUTOMAKE_VERSION], +-[am__api_version='1.15' ++[am__api_version='1.16' + dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to + dnl require some minimum version. Point them to the right macro. +-m4_if([$1], [1.15], [], ++m4_if([$1], [1.16.5], [], + [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl + ]) + +@@ -51,7 +51,7 @@ + # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. + # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. + AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], +-[AM_AUTOMAKE_VERSION([1.15])dnl ++[AM_AUTOMAKE_VERSION([1.16.5])dnl + m4_ifndef([AC_AUTOCONF_VERSION], + [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl + _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.guess openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.guess +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.guess 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.guess 2024-03-13 19:37:28.364848433 +0800 +@@ -935,6 +935,14 @@ + UNAME_MACHINE=aarch64_be + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; ++ sw_64:Linux:*:*) ++ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in ++ sw) UNAME_MACHINE=sw_64 ;; ++ esac ++ objdump --private-headers /bin/sh | grep -q ld.so.1 ++ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi ++ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" ++ exit ;; + alpha:Linux:*:*) + case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in + EV5) UNAME_MACHINE=alphaev5 ;; +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.sub openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.sub +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.sub 2022-05-26 22:35:49.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.sub 2024-03-13 19:37:28.364848433 +0800 +@@ -1158,6 +1158,7 @@ + case $cpu in + 1750a | 580 \ + | a29k \ ++ | sw_64 \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 2024-03-13 19:37:28.364848433 +0800 +@@ -1051,6 +1051,11 @@ + PMIX_ASM_SUPPORT_64BIT=0 + fi + case "${host}" in ++ sw_64-*) ++ pmix_cv_asm_arch="SW_64" ++ PMIX_ASM_SUPPORT_64BIT=1 ++ PMIX_GCC_INLINE_ASSIGN='"mov 0,%0" : "=&r"(ret)' ++ ;; + x86_64-*x32) + pmix_cv_asm_arch="X86_64" + PMIX_ASM_SUPPORT_64BIT=1 +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/configure openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/configure +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/configure 2022-05-26 22:33:16.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/configure 2024-03-13 19:37:28.384848433 +0800 +@@ -3629,7 +3629,7 @@ + program_prefix=${target_alias}- + + # Init automake +-am__api_version='1.15' ++am__api_version='1.16' + + # Find a good install program. We prefer a C program (faster), + # so one script is as good as another. But avoid the broken or +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h 2024-03-13 19:37:28.384848433 +0800 +@@ -37,6 +37,7 @@ + #define PMIX_POWERPC64 0051 + #define PMIX_ARM 0100 + #define PMIX_ARM64 0101 ++#define PMIX_SW_64 0120 + #define PMIX_BUILTIN_GCC 0202 + #define PMIX_BUILTIN_NO 0203 + #define PMIX_BUILTIN_C11 0204 +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h 2024-03-13 19:37:28.384848433 +0800 +@@ -170,6 +170,8 @@ + #include "src/atomics/sys/x86_64/atomic.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM + #include "src/atomics/sys/arm/atomic.h" ++#elif PMIX_ASSEMBLY_ARCH == PMIX_SW_64 ++#include "src/atomics/sys/sw_64/atomic.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 + #include "src/atomics/sys/arm64/atomic.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h 2024-03-13 19:37:28.384848433 +0800 +@@ -62,6 +62,9 @@ + #define __NR_process_vm_readv 270 + #define __NR_process_vm_writev 271 + ++#if PMIX_ASSEMBLY_ARCH == PMIX_SW_64 ++#define __NR_process_vm_readv 504 ++#define __NR_process_vm_writev 505 + + #else + #error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include 2024-03-13 19:37:28.384848433 +0800 +@@ -36,6 +36,7 @@ + include atomics/sys/x86_64/Makefile.include + include atomics/sys/arm/Makefile.include + include atomics/sys/arm64/Makefile.include ++include atomics/sys/sw_64/Makefile.include + include atomics/sys/ia32/Makefile.include + include atomics/sys/powerpc/Makefile.include + include atomics/sys/gcc_builtin/Makefile.include +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h 2024-03-13 21:16:10.364847021 +0800 +@@ -0,0 +1,499 @@ ++/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ ++/* ++ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana ++ * University Research and Technology ++ * Corporation. All rights reserved. ++ * Copyright (c) 2004-2005 The University of Tennessee and The University ++ * of Tennessee Research Foundation. All rights ++ * reserved. ++ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ++ * University of Stuttgart. All rights reserved. ++ * Copyright (c) 2004-2005 The Regents of the University of California. ++ * All rights reserved. ++ * Copyright (c) 2010 IBM Corporation. All rights reserved. ++ * Copyright (c) 2010 ARM ltd. All rights reserved. ++ * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights ++ * reserved. ++ * Copyright (c) 2018 Intel, Inc. All rights reserved. ++ * $COPYRIGHT$ ++ * ++ * Additional copyrights may follow ++ * ++ * $HEADER$ ++ */ ++ ++#if !defined(PMIX_SYS_ARCH_ATOMIC_H) ++ ++#define PMIX_SYS_ARCH_ATOMIC_H 1 ++ ++#if PMIX_GCC_INLINE_ASSEMBLY ++ ++#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 ++#define PMIX_HAVE_ATOMIC_LLSC_32 1 ++#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 ++#define PMIX_HAVE_ATOMIC_SWAP_32 1 ++#define PMIX_HAVE_ATOMIC_MATH_32 1 ++#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 ++#define PMIX_HAVE_ATOMIC_SWAP_64 1 ++#define PMIX_HAVE_ATOMIC_LLSC_64 1 ++#define PMIX_HAVE_ATOMIC_ADD_32 1 ++#define PMIX_HAVE_ATOMIC_AND_32 1 ++#define PMIX_HAVE_ATOMIC_OR_32 1 ++#define PMIX_HAVE_ATOMIC_XOR_32 1 ++#define PMIX_HAVE_ATOMIC_SUB_32 1 ++#define PMIX_HAVE_ATOMIC_ADD_64 1 ++#define PMIX_HAVE_ATOMIC_AND_64 1 ++#define PMIX_HAVE_ATOMIC_OR_64 1 ++#define PMIX_HAVE_ATOMIC_XOR_64 1 ++#define PMIX_HAVE_ATOMIC_SUB_64 1 ++ ++#define PMIXMB() __asm__ __volatile__ ("memb" : : : "memory") ++#define PMIXRMB() __asm__ __volatile__ ("memb" : : : "memory") ++#define PMIXWMB() __asm__ __volatile__ ("memb" : : : "memory") ++ ++/********************************************************************** ++ * ++ * Memory Barriers ++ * ++ *********************************************************************/ ++ ++static inline void pmix_atomic_mb (void) ++{ ++ PMIXMB(); ++} ++ ++static inline void pmix_atomic_rmb (void) ++{ ++ PMIXRMB(); ++} ++ ++static inline void pmix_atomic_wmb (void) ++{ ++ PMIXWMB(); ++} ++ ++static inline void pmix_atomic_isync (void) ++{ ++ __asm__ __volatile__ ("memb"); ++} ++ ++/********************************************************************** ++ * ++ * Atomic math operations ++ * ++ *********************************************************************/ ++ ++static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++static inline int32_t pmix_atomic_swap_32(pmix_atomic_int32_t *addr, int32_t newval) ++{ ++ int32_t ret, tmp; ++ int32_t tmp1; ++ ++ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " mov %4, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) ++ : "r" (addr), "r" (newval) ++ : "cc", "memory"); ++ ++ return ret; ++} ++ ++/* these two functions aren't inlined in the non-gcc case because then ++ there would be two function calls (since neither cmpset_32 nor ++ atomic_?mb can be inlined). Instead, we "inline" them by hand in ++ the assembly, meaning there is one function call overhead instead ++ of two */ ++static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++ ++static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) ++{ ++ int32_t prev, tmp; ++ bool ret; ++ int32_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++#define pmix_atomic_ll_32(addr, ret) \ ++ do { \ ++ pmix_atomic_int32_t *_addr = (addr); \ ++ int32_t _ret; \ ++ \ ++ __asm__ __volatile__ ("ldl %0, 0(%1) \n" \ ++ : "=&r" (_ret) \ ++ : "r" (_addr)); \ ++ \ ++ ret = (typeof(ret)) _ret; \ ++ } while (0) ++ ++#define pmix_atomic_sc_32(addr, newval, ret) \ ++ do { \ ++ pmix_atomic_int32_t *_addr = (addr); \ ++ int32_t _newval = (int32_t) newval; \ ++ int _ret; \ ++ \ ++ __asm__ __volatile__ ("stl %2, 0(%1) \n" \ ++ : "=&r" (_ret) \ ++ : "r" (_addr), "r" (_newval) \ ++ : "cc", "memory"); \ ++ \ ++ ret = (_ret == 0); \ ++ } while (0) ++ ++static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++static inline int64_t pmix_atomic_swap_64 (pmix_atomic_int64_t *addr, int64_t newval) ++{ ++ int64_t ret; ++ int tmp; ++ int64_t tmp1; ++ ++ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr ++ "1: lldw %0, 0(%2) \n" //ret=*tmp1 ++ " mov %4, %1 \n" //tmp=newval ++ " lstw %1, 0(%2) \n" //*tmp1=tmp ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) ++ : "r" (addr), "r" (newval) ++ : "cc", "memory"); ++ ++ return ret; ++} ++ ++/* these two functions aren't inlined in the non-gcc case because then ++ there would be two function calls (since neither cmpset_64 nor ++ atomic_?mb can be inlined). Instead, we "inline" them by hand in ++ the assembly, meaning there is one function call overhead instead ++ of two */ ++static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++ ++static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) ++{ ++ int64_t prev; ++ int tmp; ++ bool ret; ++ int64_t tmp1, tmp2; ++ ++ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr ++ "1: lldl %0, 0(%2) \n" //ret=*tmp1 ++ " cmpeq %0, %5, %3 \n" ++ " mov %6, %1 \n" //tmp = newval ++ " lstw %1, 0(%2) \n" //*tmp=tmp1 ++ " beq %3,2f \n" //if(tmp2 == 0) goto 2 ++ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 ++ "2: \n" ++ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) ++ : "r" (addr), "r" (*oldval), "r" (newval) ++ : "cc", "memory"); ++ ++ ret = (prev == *oldval); ++ *oldval = prev; ++ return ret; ++} ++ ++#define pmix_atomic_ll_64(addr, ret) \ ++ do { \ ++ pmix_atomic_int64_t *_addr = (addr); \ ++ int64_t _ret; \ ++ \ ++ __asm__ __volatile__ ("ldi $1,%1 \n" \ ++ "lldl %0,0($1) \n" \ ++ : "=&r" (_ret) \ ++ : "r" (_addr)); \ ++ \ ++ ret = (typeof(ret)) _ret; \ ++ } while (0) ++ ++#define pmix_atomic_sc_64(addr, newval, ret) \ ++ do { \ ++ pmix_atomic_int64_t *_addr = (addr); \ ++ int64_t _newval = (int64_t) newval; \ ++ int _ret; \ ++ \ ++ __asm__ __volatile__ ("stl %2,0(%1) \n" \ ++ : "=&r" (_ret) \ ++ : "r" (_addr), "r" (_newval) \ ++ : "cc", "memory"); \ ++ \ ++ ret = (_ret == 0); \ ++ } while (0) ++ ++static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " addw %1, %4, %0 \n"//newval = old + value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " subw %1, %4, %0 \n"//newval = old - value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t pmix_atomic_fetch_and_32(pmix_atomic_int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " and %1, %4, %0 \n"//newval = old & value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t pmix_atomic_fetch_or_32(pmix_atomic_int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " bis %1, %4, %0 \n"//newval = old | value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int32_t pmix_atomic_fetch_xor_32(pmix_atomic_int32_t *addr, int32_t value) ++{ ++ int32_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldw %1, 0(%2) \n"//old = *ptr ++ " xor %1, %4, %0 \n"//newval = old ^ value ++ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " addl %1, %4, %0 \n"//newval = old + value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " subl %1, %4, %0 \n"//newval = old - value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t pmix_atomic_fetch_and_64(pmix_atomic_int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " and %1, %4, %0 \n"//newval = old & value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t pmix_atomic_fetch_or_64(pmix_atomic_int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " bis %1, %4, %0 \n"//newval = old | value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++static inline int64_t pmix_atomic_fetch_xor_64(pmix_atomic_int64_t *addr, int64_t value) ++{ ++ int64_t newval, old; ++ unsigned long ptr; ++ ++ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr ++ "1: lldl %1, 0(%2) \n"//old = *ptr ++ " xor %1, %4, %0 \n"//newval = old ^ value ++ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) ++ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error ++ : "=&r" (newval), "=&r" (old), "=&r" (ptr) ++ : "r" (addr), "r" (value) ++ : "memory"); ++ ++ return old; ++} ++ ++#endif /* PMIX_GCC_INLINE_ASSEMBLY */ ++ ++#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include 2024-03-13 19:37:28.394848433 +0800 +@@ -0,0 +1,24 @@ ++# ++# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana ++# University Research and Technology ++# Corporation. All rights reserved. ++# Copyright (c) 2004-2008 The University of Tennessee and The University ++# of Tennessee Research Foundation. All rights ++# reserved. ++# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, ++# University of Stuttgart. All rights reserved. ++# Copyright (c) 2004-2005 The Regents of the University of California. ++# All rights reserved. ++# Copyright (c) 2017 Intel, Inc. All rights reserved. ++# $COPYRIGHT$ ++# ++# Additional copyrights may follow ++# ++# $HEADER$ ++# ++ ++# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am ++ ++headers += \ ++ atomics/sys/sw_64/atomic.h \ ++ atomics/sys/sw_64/timer.h +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h 1970-01-01 08:00:00.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h 2024-03-13 19:37:28.394848433 +0800 +@@ -0,0 +1,45 @@ ++/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ ++/* ++ * Copyright (c) 2008 The University of Tennessee and The University ++ * of Tennessee Research Foundation. All rights ++ * reserved. ++ * Copyright (c) 2016 Broadcom Limited. All rights reserved. ++ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights ++ * reserved. ++ * $COPYRIGHT$ ++ * ++ * Additional copyrights may follow ++ * ++ * $HEADER$ ++ */ ++ ++#ifndef PMIX_SYS_ARCH_TIMER_H ++#define PMIX_SYS_ARCH_TIMER_H 1 ++ ++#include ++ ++typedef uint64_t pmix_timer_t; ++ ++static inline pmix_timer_t ++pmix_sys_timer_get_cycles(void) ++{ ++ pmix_timer_t ret; ++ ++ __asm__ __volatile__ ("memb" ::: "memory"); ++ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); ++ ++ return ret; ++} ++ ++ ++static inline pmix_timer_t ++pmix_sys_timer_freq(void) ++{ ++ pmix_timer_t freq; ++ __asm__ __volatile__ ("rtc %0" : "=r" (freq)); ++ return (pmix_timer_t)(freq); ++} ++ ++#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 ++ ++#endif /* ! PMIX_SYS_ARCH_TIMER_H */ +diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h +--- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h 2024-03-13 19:37:28.394848433 +0800 +@@ -75,6 +75,8 @@ + #include "src/atomics/sys/x86_64/timer.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM + #include "src/atomics/sys/arm/timer.h" ++#elif PMIX_ASSEMBLY_ARCH == PMIX_SW_64 ++#include "src/atomics/sys/sw_64/timer.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 + #include "src/atomics/sys/arm64/timer.h" + #elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 +diff -uNar openmpi-4.1.4.org/opal/mca/timer/linux/configure.m4 openmpi-4.1.4.sw/opal/mca/timer/linux/configure.m4 +--- openmpi-4.1.4.org/opal/mca/timer/linux/configure.m4 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/timer/linux/configure.m4 2024-03-13 19:37:28.394848433 +0800 +@@ -47,7 +47,7 @@ + [timer_linux_happy="no"])]) + + case "${host}" in +- i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*) ++ i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*|sw_64-*linux*) + AS_IF([test "$timer_linux_happy" = "yes"], + [AS_IF([test -r "/proc/cpuinfo"], + [timer_linux_happy="yes"], +diff -uNar openmpi-4.1.4.org/opal/mca/timer/linux/timer_linux_component.c openmpi-4.1.4.sw/opal/mca/timer/linux/timer_linux_component.c +--- openmpi-4.1.4.org/opal/mca/timer/linux/timer_linux_component.c 2022-05-26 22:32:35.000000000 +0800 ++++ openmpi-4.1.4.sw/opal/mca/timer/linux/timer_linux_component.c 2024-03-13 19:37:28.394848433 +0800 +@@ -120,6 +120,10 @@ + #if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + opal_timer_linux_freq = opal_sys_timer_freq(); + #endif ++#if OPAL_ASSEMBLY_ARCH == OPAL_SW_64 ++ opal_timer_linux_freq = opal_sys_timer_freq(); ++#endif ++ + + if (0 == opal_timer_linux_freq) { + /* first, look for a timebase field. probably only on PPC, diff --git a/openmpi-Add-sw64-architecture.patch b/openmpi-Add-sw64-architecture.patch new file mode 100644 index 0000000..c1205b0 --- /dev/null +++ b/openmpi-Add-sw64-architecture.patch @@ -0,0 +1,64 @@ +diff -uNar openmpi-4.1.5.org/config/from-savannah/upstream-config.guess openmpi-4.1.5.sw/config/from-savannah/upstream-config.guess +--- openmpi-4.1.5.org/config/from-savannah/upstream-config.guess 2023-02-23 12:25:03.000000000 +0800 ++++ openmpi-4.1.5.sw/config/from-savannah/upstream-config.guess 2024-09-13 17:37:04.709496046 +0800 +@@ -928,6 +928,9 @@ + *:Minix:*:*) + echo "$UNAME_MACHINE"-unknown-minix + exit ;; ++ sw_64:Linux:*:*) ++ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" ++ exit ;; + aarch64:Linux:*:*) + echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" + exit ;; +diff -uNar openmpi-4.1.5.org/config/from-savannah/upstream-config.sub openmpi-4.1.5.sw/config/from-savannah/upstream-config.sub +--- openmpi-4.1.5.org/config/from-savannah/upstream-config.sub 2023-02-23 12:25:03.000000000 +0800 ++++ openmpi-4.1.5.sw/config/from-savannah/upstream-config.sub 2024-09-13 17:36:47.519510896 +0800 +@@ -1158,6 +1158,7 @@ + case $cpu in + 1750a | 580 \ + | a29k \ ++ | sw_64 \ + | aarch64 | aarch64_be \ + | abacus \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ +diff -uNar openmpi-4.1.5.org/config/opal_config_asm.m4 openmpi-4.1.5.sw/config/opal_config_asm.m4 +--- openmpi-4.1.5.org/config/opal_config_asm.m4 2024-09-13 16:11:54.223091670 +0800 ++++ openmpi-4.1.5.sw/config/opal_config_asm.m4 2024-09-13 16:44:15.802135676 +0800 +@@ -1070,6 +1070,11 @@ + OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], + [AC_MSG_ERROR([No atomic primitives available for $host])]) + ;; ++ sw_64*) ++ opal_cv_asm_arch="SW_64" ++ OPAL_ASM_SUPPORT_64BIT=1 ++ OPAL_GCC_INLINE_ASSIGN='"mov 0, %0" : "=&r"(ret)' ++ ;; + aarch64*) + opal_cv_asm_arch="ARM64" + OPAL_ASM_SUPPORT_64BIT=1 +diff -uNar openmpi-4.1.5.org/opal/include/opal/sys/architecture.h openmpi-4.1.5.sw/opal/include/opal/sys/architecture.h +--- openmpi-4.1.5.org/opal/include/opal/sys/architecture.h 2024-09-13 16:11:54.223091670 +0800 ++++ openmpi-4.1.5.sw/opal/include/opal/sys/architecture.h 2024-09-13 16:50:37.031845858 +0800 +@@ -45,6 +45,7 @@ + #define OPAL_S390 0110 + #define OPAL_S390X 0111 + #define OPAL_RISCV64 0120 ++#define OPAL_SW_64 0130 + #define OPAL_BUILTIN_SYNC 0200 + #define OPAL_BUILTIN_GCC 0202 + #define OPAL_BUILTIN_NO 0203 +diff -uNar openmpi-4.1.5.org/opal/include/opal/sys/cma.h openmpi-4.1.5.sw/opal/include/opal/sys/cma.h +--- openmpi-4.1.5.org/opal/include/opal/sys/cma.h 2024-09-13 16:11:54.223091670 +0800 ++++ openmpi-4.1.5.sw/opal/include/opal/sys/cma.h 2024-09-13 16:48:41.881936359 +0800 +@@ -64,6 +64,10 @@ + #define __NR_process_vm_readv 270 + #define __NR_process_vm_writev 271 + ++#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 ++#define __NR_process_vm_readv 504 ++#define __NR_process_vm_writev 505 ++ + #elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS + + #if _MIPS_SIM == _MIPS_SIM_ABI64 diff --git a/openmpi.spec b/openmpi.spec index 7b2d88b..43b4386 100644 --- a/openmpi.spec +++ b/openmpi.spec @@ -1,6 +1,6 @@ Name: openmpi Version: 4.1.5 -Release: 7 +Release: 8 Summary: Open Source High Performance Computing License: BSD-3-Clause URL: http://www.open-mpi.org/ @@ -13,6 +13,7 @@ Patch1000: add-riscv64-support.patch Patch1001: 0001-add-loongarch64-support-for-openmpi.patch %endif Patch1002: 0002-modify-multicore-timeout-support.patch +Patch1003: openmpi-Add-sw64-architecture.patch BuildRequires: gcc-c++, gcc-gfortran %ifarch %{valgrind_arches} @@ -56,6 +57,8 @@ community in order to build the best MPI library available. %global name_all openmpi-x86_64 %elifarch loongarch64 %global name_all openmpi-loongarch64 +%elifarch sw_64 +%global name_all openmpi-sw_64 %else %global namearch openmpi-%{_arch} %endif @@ -234,6 +237,9 @@ make check %{_mandir}/%{name_all}/man*/* %changelog +* Fri Sep 13 2024 wuzx - 4.1.5-8 +- Add sw64 support + * Mon Aug 19 2024 zhangzijian <1220292247@qq.com> - 4.1.5-7 - Modify multicore causing finalize timeout error -- Gitee From 2c385450f7c2524956cb0517bf2e66656f384765 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 13 Sep 2024 14:19:51 +0800 Subject: [PATCH 2/2] add sw64 support --- openmpi-4.1.4-1007-sw.patch | 1714 ----------------------------------- 1 file changed, 1714 deletions(-) delete mode 100644 openmpi-4.1.4-1007-sw.patch diff --git a/openmpi-4.1.4-1007-sw.patch b/openmpi-4.1.4-1007-sw.patch deleted file mode 100644 index c7a6997..0000000 --- a/openmpi-4.1.4-1007-sw.patch +++ /dev/null @@ -1,1714 +0,0 @@ -diff -uNar openmpi-4.1.4.org/aclocal.m4 openmpi-4.1.4.sw/aclocal.m4 ---- openmpi-4.1.4.org/aclocal.m4 2022-05-26 22:34:29.000000000 +0800 -+++ openmpi-4.1.4.sw/aclocal.m4 2024-03-13 19:37:28.134848433 +0800 -@@ -1,4 +1,4 @@ --# generated automatically by aclocal 1.15 -*- Autoconf -*- -+# generated automatically by aclocal 1.16 -*- Autoconf -*- - - # Copyright (C) 1996-2014 Free Software Foundation, Inc. - -@@ -32,10 +32,10 @@ - # generated from the m4 files accompanying Automake X.Y. - # (This private macro should not be called outside this file.) - AC_DEFUN([AM_AUTOMAKE_VERSION], --[am__api_version='1.15' -+[am__api_version='1.16' - dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to - dnl require some minimum version. Point them to the right macro. --m4_if([$1], [1.15], [], -+m4_if([$1], [1.16.5], [], - [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl - ]) - -@@ -51,7 +51,7 @@ - # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. - # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. - AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], --[AM_AUTOMAKE_VERSION([1.15])dnl -+[AM_AUTOMAKE_VERSION([1.16.5])dnl - m4_ifndef([AC_AUTOCONF_VERSION], - [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl - _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) -diff -uNar openmpi-4.1.4.org/config/config.guess openmpi-4.1.4.sw/config/config.guess ---- openmpi-4.1.4.org/config/config.guess 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/config/config.guess 2024-03-13 19:37:28.134848433 +0800 -@@ -935,6 +935,14 @@ - UNAME_MACHINE=aarch64_be - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; -+ sw_64:Linux:*:*) -+ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in -+ sw) UNAME_MACHINE=sw_64 ;; -+ esac -+ objdump --private-headers /bin/sh | grep -q ld.so.1 -+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi -+ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" -+ exit ;; - alpha:Linux:*:*) - case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in - EV5) UNAME_MACHINE=alphaev5 ;; -diff -uNar openmpi-4.1.4.org/config/config.sub openmpi-4.1.4.sw/config/config.sub ---- openmpi-4.1.4.org/config/config.sub 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/config/config.sub 2024-03-13 19:37:28.134848433 +0800 -@@ -1158,6 +1158,7 @@ - case $cpu in - 1750a | 580 \ - | a29k \ -+ | sw_64 \ - | aarch64 | aarch64_be \ - | abacus \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ -diff -uNar openmpi-4.1.4.org/config/make_manpage.pl openmpi-4.1.4.sw/config/make_manpage.pl ---- openmpi-4.1.4.org/config/make_manpage.pl 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/config/make_manpage.pl 2024-03-13 19:37:28.134848433 +0800 -@@ -13,7 +13,7 @@ - use Getopt::Long; - - my $package_name; --my $package_version; -+my $package_version='4.1.4'; - my $ompi_date; - my $opal_date; - my $orte_date; -diff -uNar openmpi-4.1.4.org/config/opal_config_asm.m4 openmpi-4.1.4.sw/config/opal_config_asm.m4 ---- openmpi-4.1.4.org/config/opal_config_asm.m4 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/config/opal_config_asm.m4 2024-03-13 19:37:28.134848433 +0800 -@@ -1070,6 +1070,12 @@ - OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], - [AC_MSG_ERROR([No atomic primitives available for $host])]) - ;; -+ -+ sw_64*) -+ opal_cv_asm_arch="SW_64" -+ OPAL_ASM_SUPPORT_64BIT=1 -+ OPAL_GCC_INLINE_ASSIGN='"mov 0, %0" : "=&r"(ret)' -+ ;; - aarch64*) - opal_cv_asm_arch="ARM64" - OPAL_ASM_SUPPORT_64BIT=1 -diff -uNar openmpi-4.1.4.org/configure openmpi-4.1.4.sw/configure ---- openmpi-4.1.4.org/configure 2022-05-26 22:35:48.000000000 +0800 -+++ openmpi-4.1.4.sw/configure 2024-03-13 19:37:28.314848433 +0800 -@@ -7582,7 +7582,7 @@ - # - # Init automake - # --am__api_version='1.15' -+am__api_version='1.16' - - # Find a good install program. We prefer a C program (faster), - # so one script is as good as another. But avoid the broken or -@@ -40758,6 +40758,13 @@ - - - ;; -+ -+ sw_64*) -+ opal_cv_asm_arch="SW_64" -+ OPAL_ASM_SUPPORT_64BIT=1 -+ OPAL_GCC_INLINE_ASSIGN='"mov 0, %0" : "=&r"(ret)' -+ ;; -+ - aarch64*) - opal_cv_asm_arch="ARM64" - OPAL_ASM_SUPPORT_64BIT=1 -@@ -163992,7 +163999,7 @@ - cat confdefs.h - <<_ACEOF >conftest.$ac_ext - /* end confdefs.h. */ - --#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) && !defined(__aarch64__) -+#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) && !defined(__aarch64__) && !defined(__sw_64__) - #error "platform not supported" - #endif - -@@ -186984,7 +186991,7 @@ - fi - - case "${host}" in -- i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*) -+ i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*|sw_64-*linux*) - if test "$timer_linux_happy" = "yes"; then : - if test -r "/proc/cpuinfo"; then : - timer_linux_happy="yes" -diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/aclocal.m4 openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/aclocal.m4 ---- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/aclocal.m4 2022-05-26 22:33:24.000000000 +0800 -+++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/aclocal.m4 2024-03-13 19:37:28.334848433 +0800 -@@ -1,4 +1,4 @@ --# generated automatically by aclocal 1.15 -*- Autoconf -*- -+# generated automatically by aclocal 1.16 -*- Autoconf -*- - - # Copyright (C) 1996-2014 Free Software Foundation, Inc. - -@@ -32,10 +32,10 @@ - # generated from the m4 files accompanying Automake X.Y. - # (This private macro should not be called outside this file.) - AC_DEFUN([AM_AUTOMAKE_VERSION], --[am__api_version='1.15' -+[am__api_version='1.16' - dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to - dnl require some minimum version. Point them to the right macro. --m4_if([$1], [1.15], [], -+m4_if([$1], [1.16.5], [], - [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl - ]) - -@@ -51,7 +51,7 @@ - # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. - # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. - AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], --[AM_AUTOMAKE_VERSION([1.15])dnl -+[AM_AUTOMAKE_VERSION([1.16.5])dnl - m4_ifndef([AC_AUTOCONF_VERSION], - [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl - _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) -diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.guess openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.guess ---- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.guess 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.guess 2024-03-13 19:37:28.334848433 +0800 -@@ -935,6 +935,14 @@ - UNAME_MACHINE=aarch64_be - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; -+ sw_64:Linux:*:*) -+ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in -+ sw) UNAME_MACHINE=sw_64 ;; -+ esac -+ objdump --private-headers /bin/sh | grep -q ld.so.1 -+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi -+ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" -+ exit ;; - alpha:Linux:*:*) - case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in - EV5) UNAME_MACHINE=alphaev5 ;; -diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.sub openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.sub ---- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/confdb/config.sub 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/confdb/config.sub 2024-03-13 19:37:28.334848433 +0800 -@@ -1158,6 +1158,7 @@ - case $cpu in - 1750a | 580 \ - | a29k \ -+ | sw_64 \ - | aarch64 | aarch64_be \ - | abacus \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ -diff -uNar openmpi-4.1.4.org/ompi/mca/io/romio321/romio/configure openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/configure ---- openmpi-4.1.4.org/ompi/mca/io/romio321/romio/configure 2022-05-26 22:33:27.000000000 +0800 -+++ openmpi-4.1.4.sw/ompi/mca/io/romio321/romio/configure 2024-03-13 19:37:28.344848433 +0800 -@@ -2985,7 +2985,7 @@ - - - --am__api_version='1.15' -+am__api_version='1.16' - - # Find a good install program. We prefer a C program (faster), - # so one script is as good as another. But avoid the broken or -diff -uNar openmpi-4.1.4.org/opal/include/Makefile.in openmpi-4.1.4.sw/opal/include/Makefile.in ---- openmpi-4.1.4.org/opal/include/Makefile.in 2022-05-26 22:35:13.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/Makefile.in 2024-03-13 19:37:28.344848433 +0800 -@@ -669,6 +669,7 @@ - opal/sys/cma.h opal/sys/x86_64/atomic.h \ - opal/sys/x86_64/timer.h opal/sys/arm/atomic.h \ - opal/sys/arm/timer.h opal/sys/arm64/atomic.h \ -+ opal/sys/sw_64/timer.h opal/sys/sw_64/atomic.h \ - opal/sys/arm64/timer.h opal/sys/ia32/atomic.h \ - opal/sys/ia32/timer.h opal/sys/powerpc/atomic.h \ - opal/sys/powerpc/timer.h opal/sys/sparcv9/atomic.h \ -@@ -727,6 +728,7 @@ - $(srcdir)/opal/sys/Makefile.am \ - $(srcdir)/opal/sys/arm/Makefile.am \ - $(srcdir)/opal/sys/arm64/Makefile.am \ -+ $(srcdir)/opal/sys/sw_64/Makefile.am \ - $(srcdir)/opal/sys/gcc_builtin/Makefile.am \ - $(srcdir)/opal/sys/ia32/Makefile.am \ - $(srcdir)/opal/sys/powerpc/Makefile.am \ -@@ -1952,6 +1954,7 @@ - opal/sys/x86_64/atomic.h opal/sys/x86_64/timer.h \ - opal/sys/arm/atomic.h opal/sys/arm/timer.h \ - opal/sys/arm64/atomic.h opal/sys/arm64/timer.h \ -+ opal/sys/sw_64/atomic.h opal/sys/sw_64/timer.h \ - opal/sys/ia32/atomic.h opal/sys/ia32/timer.h \ - opal/sys/powerpc/atomic.h opal/sys/powerpc/timer.h \ - opal/sys/sparcv9/atomic.h opal/sys/sparcv9/timer.h \ -@@ -1965,7 +1968,7 @@ - $(MAKE) $(AM_MAKEFLAGS) all-am - - .SUFFIXES: --$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__configure_deps) -+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/sw_64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ -@@ -1985,7 +1988,7 @@ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; --$(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__empty): -+$(srcdir)/opal/Makefile.am $(srcdir)/opal/sys/Makefile.am $(srcdir)/opal/sys/x86_64/Makefile.am $(srcdir)/opal/sys/arm/Makefile.am $(srcdir)/opal/sys/arm64/Makefile.am $(srcdir)/opal/sys/sw_64/Makefile.am $(srcdir)/opal/sys/ia32/Makefile.am $(srcdir)/opal/sys/powerpc/Makefile.am $(srcdir)/opal/sys/sparcv9/Makefile.am $(srcdir)/opal/sys/sync_builtin/Makefile.am $(srcdir)/opal/sys/gcc_builtin/Makefile.am $(am__empty): - - $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/architecture.h openmpi-4.1.4.sw/opal/include/opal/sys/architecture.h ---- openmpi-4.1.4.org/opal/include/opal/sys/architecture.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/architecture.h 2024-03-13 19:37:28.344848433 +0800 -@@ -42,6 +42,7 @@ - #define OPAL_MIPS 0070 - #define OPAL_ARM 0100 - #define OPAL_ARM64 0101 -+#define OPAL_SW_64 0120 - #define OPAL_S390 0110 - #define OPAL_S390X 0111 - #define OPAL_BUILTIN_SYNC 0200 -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/atomic.h openmpi-4.1.4.sw/opal/include/opal/sys/atomic.h ---- openmpi-4.1.4.org/opal/include/opal/sys/atomic.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/atomic.h 2024-03-13 19:37:28.344848433 +0800 -@@ -165,6 +165,8 @@ - #include "opal/sys/arm/atomic.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 - #include "opal/sys/arm64/atomic.h" -+#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 -+#include "opal/sys/sw_64/atomic.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 - #include "opal/sys/ia32/atomic.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/cma.h openmpi-4.1.4.sw/opal/include/opal/sys/cma.h ---- openmpi-4.1.4.org/opal/include/opal/sys/cma.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/cma.h 2024-03-13 19:37:28.344848433 +0800 -@@ -64,6 +64,10 @@ - #define __NR_process_vm_readv 270 - #define __NR_process_vm_writev 271 - -+#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 -+#define __NR_process_vm_readv 504 -+#define __NR_process_vm_writev 505 -+ - #elif OPAL_ASSEMBLY_ARCH == OPAL_MIPS - - #if _MIPS_SIM == _MIPS_SIM_ABI64 -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/Makefile.am openmpi-4.1.4.sw/opal/include/opal/sys/Makefile.am ---- openmpi-4.1.4.org/opal/include/opal/sys/Makefile.am 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/Makefile.am 2024-03-13 19:37:28.344848433 +0800 -@@ -34,6 +34,7 @@ - include opal/sys/x86_64/Makefile.am - include opal/sys/arm/Makefile.am - include opal/sys/arm64/Makefile.am -+include opal/sys/sw_64/Makefile.am - include opal/sys/ia32/Makefile.am - include opal/sys/powerpc/Makefile.am - include opal/sys/sparcv9/Makefile.am -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/atomic.h openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/atomic.h ---- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/atomic.h 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/atomic.h 2024-03-13 21:16:36.744847015 +0800 -@@ -0,0 +1,500 @@ -+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -+/* -+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -+ * University Research and Technology -+ * Corporation. All rights reserved. -+ * Copyright (c) 2004-2005 The University of Tennessee and The University -+ * of Tennessee Research Foundation. All rights -+ * reserved. -+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -+ * University of Stuttgart. All rights reserved. -+ * Copyright (c) 2004-2005 The Regents of the University of California. -+ * All rights reserved. -+ * Copyright (c) 2010 IBM Corporation. All rights reserved. -+ * Copyright (c) 2010 ARM ltd. All rights reserved. -+ * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights -+ * reserved. -+ * $COPYRIGHT$ -+ * -+ * Additional copyrights may follow -+ * -+ * $HEADER$ -+ */ -+ -+#if !defined(OPAL_SYS_ARCH_ATOMIC_H) -+ -+#define OPAL_SYS_ARCH_ATOMIC_H 1 -+ -+#if OPAL_GCC_INLINE_ASSEMBLY -+ -+#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 -+#define OPAL_HAVE_ATOMIC_LLSC_32 1 -+#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -+#define OPAL_HAVE_ATOMIC_SWAP_32 1 -+#define OPAL_HAVE_ATOMIC_MATH_32 1 -+#define OPAL_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -+#define OPAL_HAVE_ATOMIC_SWAP_64 1 -+#define OPAL_HAVE_ATOMIC_LLSC_64 1 -+#define OPAL_HAVE_ATOMIC_ADD_32 1 -+#define OPAL_HAVE_ATOMIC_AND_32 1 -+#define OPAL_HAVE_ATOMIC_OR_32 1 -+#define OPAL_HAVE_ATOMIC_XOR_32 1 -+#define OPAL_HAVE_ATOMIC_SUB_32 1 -+#define OPAL_HAVE_ATOMIC_ADD_64 1 -+#define OPAL_HAVE_ATOMIC_AND_64 1 -+#define OPAL_HAVE_ATOMIC_OR_64 1 -+#define OPAL_HAVE_ATOMIC_XOR_64 1 -+#define OPAL_HAVE_ATOMIC_SUB_64 1 -+ -+#define MB() __asm__ __volatile__ ("memb" : : : "memory") -+#define RMB() __asm__ __volatile__ ("memb" : : : "memory") -+#define WMB() __asm__ __volatile__ ("memb" : : : "memory") -+ -+/********************************************************************** -+ * -+ * Memory Barriers -+ * -+ *********************************************************************/ -+ -+static inline void opal_atomic_mb (void) -+{ -+ MB(); -+} -+ -+static inline void opal_atomic_rmb (void) -+{ -+ RMB(); -+} -+ -+static inline void opal_atomic_wmb (void) -+{ -+ WMB(); -+} -+ -+static inline void opal_atomic_isync (void) -+{ -+ __asm__ __volatile__ ("memb"); -+} -+ -+/********************************************************************** -+ * -+ * Atomic math operations -+ * -+ *********************************************************************/ -+ -+static inline bool opal_atomic_compare_exchange_strong_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+static inline int32_t opal_atomic_swap_32(volatile int32_t *addr, int32_t newval) -+{ -+ int32_t ret, tmp; -+ int32_t tmp1; -+ -+ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " mov %4, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) -+ : "r" (addr), "r" (newval) -+ : "cc", "memory"); -+ -+ return ret; -+} -+ -+/* these two functions aren't inlined in the non-gcc case because then -+ there would be two function calls (since neither cmpset_32 nor -+ atomic_?mb can be inlined). Instead, we "inline" them by hand in -+ the assembly, meaning there is one function call overhead instead -+ of two */ -+static inline bool opal_atomic_compare_exchange_strong_acq_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+ -+static inline bool opal_atomic_compare_exchange_strong_rel_32 (volatile int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+#define opal_atomic_ll_32(addr, ret) \ -+ do { \ -+ volatile int32_t *_addr = (addr); \ -+ int32_t _ret; \ -+ \ -+ __asm__ __volatile__ ( \ -+ "ldl %0,0(%1) \n" \ -+ : "=&r" (ret) \ -+ : "m" (*addr)); \ -+ \ -+ ret = (typeof(ret)) _ret; \ -+ } while (0) -+ -+#define opal_atomic_sc_32(addr, newval, ret) \ -+ do { \ -+ volatile int32_t *_addr = (addr); \ -+ int32_t _newval = (int32_t) newval; \ -+ int _ret; \ -+ \ -+ __asm__ __volatile__ ( \ -+ "stl %2,0(%1) \n" \ -+ : "=&r" (ret) \ -+ : "m" (*addr), "r" (newval) \ -+ : "memory"); \ -+ \ -+ ret = (_ret == 0); \ -+ } while (0) -+ -+static inline bool opal_atomic_compare_exchange_strong_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) -+{ -+ int64_t ret; -+ int tmp; -+ int64_t tmp1; -+ -+ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " mov %4, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) -+ : "r" (addr), "r" (newval) -+ : "cc", "memory"); -+ -+ return ret; -+} -+ -+/* these two functions aren't inlined in the non-gcc case because then -+ there would be two function calls (since neither cmpset_64 nor -+ atomic_?mb can be inlined). Instead, we "inline" them by hand in -+ the assembly, meaning there is one function call overhead instead -+ of two */ -+static inline bool opal_atomic_compare_exchange_strong_acq_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+ -+static inline bool opal_atomic_compare_exchange_strong_rel_64 (volatile int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+#define opal_atomic_ll_64(addr, ret) \ -+ do { \ -+ volatile int64_t *_addr = (addr); \ -+ int64_t _ret; \ -+ \ -+ __asm__ __volatile__ ("ldi $1, %1 \n" \ -+ "lldl %0,0($1) \n" \ -+ : "=&r" (ret) \ -+ : "m" (*addr)); \ -+ \ -+ ret = (typeof(ret)) _ret; \ -+ } while (0) -+ -+#define opal_atomic_sc_64(addr, newval, ret) \ -+ do { \ -+ volatile int64_t *_addr = (addr); \ -+ int64_t _newval = (int64_t) newval; \ -+ int _ret; \ -+ \ -+ __asm__ __volatile__ ("stl %2,0(%1) \n" \ -+ : "=&r" (_ret) \ -+ : "r" (_addr), "r" (_newval) \ -+ : "cc", "memory"); \ -+ \ -+ ret = (_ret == 0); \ -+ } while (0) -+ -+static inline int32_t opal_atomic_fetch_add_32(volatile int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " addw %1, %4, %0 \n"//newval = old + value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t opal_atomic_fetch_sub_32(volatile int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " subw %1, %4, %0 \n"//newval = old - value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t opal_atomic_fetch_and_32(volatile int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " and %1, %4, %0 \n"//newval = old & value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old),"=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t opal_atomic_fetch_or_32(volatile int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " bis %1, %4, %0 \n"//newval = old | value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t opal_atomic_fetch_xor_32(volatile int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " xor %1, %4, %0 \n"//newval = old ^ value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t opal_atomic_fetch_add_64(volatile int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " addl %1, %4, %0 \n"//newval = old + value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t opal_atomic_fetch_sub_64(volatile int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " subl %1, %4, %0 \n"//newval = old - value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old),"=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t opal_atomic_fetch_and_64(volatile int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " and %1, %4, %0 \n"//newval = old & value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t opal_atomic_fetch_or_64(volatile int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " bis %1, %4, %0 \n"//newval = old | value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t opal_atomic_fetch_xor_64(volatile int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " xor %1, %4, %0 \n"//newval = old ^ value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old),"=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+#endif /* OPAL_GCC_INLINE_ASSEMBLY */ -+ -+#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/Makefile.am openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/Makefile.am ---- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/Makefile.am 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/Makefile.am 2024-03-13 19:37:28.344848433 +0800 -@@ -0,0 +1,24 @@ -+# -+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -+# University Research and Technology -+# Corporation. All rights reserved. -+# Copyright (c) 2004-2008 The University of Tennessee and The University -+# of Tennessee Research Foundation. All rights -+# reserved. -+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -+# University of Stuttgart. All rights reserved. -+# Copyright (c) 2004-2005 The Regents of the University of California. -+# All rights reserved. -+# $COPYRIGHT$ -+# -+# Additional copyrights may follow -+# -+# $HEADER$ -+# -+ -+# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am -+ -+headers += \ -+ opal/sys/sw_64/atomic.h \ -+ opal/sys/sw_64/timer.h -+ -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/sw_64/timer.h openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/timer.h ---- openmpi-4.1.4.org/opal/include/opal/sys/sw_64/timer.h 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/sw_64/timer.h 2024-03-13 19:37:28.344848433 +0800 -@@ -0,0 +1,45 @@ -+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -+/* -+ * Copyright (c) 2008 The University of Tennessee and The University -+ * of Tennessee Research Foundation. All rights -+ * reserved. -+ * Copyright (c) 2016 Broadcom Limited. All rights reserved. -+ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights -+ * reserved. -+ * $COPYRIGHT$ -+ * -+ * Additional copyrights may follow -+ * -+ * $HEADER$ -+ */ -+ -+#ifndef OPAL_SYS_ARCH_TIMER_H -+#define OPAL_SYS_ARCH_TIMER_H 1 -+ -+#include -+ -+typedef uint64_t opal_timer_t; -+ -+static inline opal_timer_t -+opal_sys_timer_get_cycles(void) -+{ -+ opal_timer_t ret; -+ -+ __asm__ __volatile__ ("memb" ::: "memory"); -+ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); -+ -+ return ret; -+} -+ -+ -+static inline opal_timer_t -+opal_sys_timer_freq(void) -+{ -+ opal_timer_t freq; -+ __asm__ __volatile__ ("rtc %0" : "=r" (freq)); -+ return (opal_timer_t)(freq); -+} -+ -+#define OPAL_HAVE_SYS_TIMER_GET_CYCLES 1 -+ -+#endif /* ! OPAL_SYS_ARCH_TIMER_H */ -diff -uNar openmpi-4.1.4.org/opal/include/opal/sys/timer.h openmpi-4.1.4.sw/opal/include/opal/sys/timer.h ---- openmpi-4.1.4.org/opal/include/opal/sys/timer.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/include/opal/sys/timer.h 2024-03-13 19:37:28.344848433 +0800 -@@ -76,6 +76,8 @@ - #include "opal/sys/arm/timer.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 - #include "opal/sys/arm64/timer.h" -+#elif OPAL_ASSEMBLY_ARCH == OPAL_SW_64 -+#include "opal/sys/sw_64/timer.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_IA32 - #include "opal/sys/ia32/timer.h" - #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 -diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/aclocal.m4 openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/aclocal.m4 ---- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/aclocal.m4 2022-05-26 22:32:54.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/aclocal.m4 2024-03-13 19:37:28.354848433 +0800 -@@ -1,4 +1,4 @@ --# generated automatically by aclocal 1.15 -*- Autoconf -*- -+# generated automatically by aclocal 1.16 -*- Autoconf -*- - - # Copyright (C) 1996-2014 Free Software Foundation, Inc. - -@@ -32,10 +32,10 @@ - # generated from the m4 files accompanying Automake X.Y. - # (This private macro should not be called outside this file.) - AC_DEFUN([AM_AUTOMAKE_VERSION], --[am__api_version='1.15' -+[am__api_version='1.16' - dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to - dnl require some minimum version. Point them to the right macro. --m4_if([$1], [1.15], [], -+m4_if([$1], [1.16.5], [], - [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl - ]) - -@@ -51,7 +51,7 @@ - # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. - # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. - AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], --[AM_AUTOMAKE_VERSION([1.15])dnl -+[AM_AUTOMAKE_VERSION([1.16.5])dnl - m4_ifndef([AC_AUTOCONF_VERSION], - [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl - _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) -diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.guess openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.guess ---- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.guess 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.guess 2024-03-13 19:37:28.354848433 +0800 -@@ -935,6 +935,14 @@ - UNAME_MACHINE=aarch64_be - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; -+ sw_64:Linux:*:*) -+ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in -+ sw) UNAME_MACHINE=sw_64 ;; -+ esac -+ objdump --private-headers /bin/sh | grep -q ld.so.1 -+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi -+ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" -+ exit ;; - alpha:Linux:*:*) - case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in - EV5) UNAME_MACHINE=alphaev5 ;; -diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.sub openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.sub ---- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/config.sub 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/config.sub 2024-03-13 19:37:28.354848433 +0800 -@@ -1158,6 +1158,7 @@ - case $cpu in - 1750a | 580 \ - | a29k \ -+ | sw_64 \ - | aarch64 | aarch64_be \ - | abacus \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ -diff -uNar openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/configure openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/configure ---- openmpi-4.1.4.org/opal/mca/event/libevent2022/libevent/configure 2022-05-26 22:32:56.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/event/libevent2022/libevent/configure 2024-03-13 19:37:28.354848433 +0800 -@@ -2642,7 +2642,7 @@ - - - # Open MPI: changed to one 1 arg AM INIT_AUTOMAKE --am__api_version='1.15' -+am__api_version='1.16' - - ac_aux_dir= - for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/aclocal.m4 openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/aclocal.m4 ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/aclocal.m4 2022-05-26 22:33:09.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/aclocal.m4 2024-03-13 19:37:28.364848433 +0800 -@@ -1,4 +1,4 @@ --# generated automatically by aclocal 1.15 -*- Autoconf -*- -+# generated automatically by aclocal 1.16 -*- Autoconf -*- - - # Copyright (C) 1996-2014 Free Software Foundation, Inc. - -@@ -32,10 +32,10 @@ - # generated from the m4 files accompanying Automake X.Y. - # (This private macro should not be called outside this file.) - AC_DEFUN([AM_AUTOMAKE_VERSION], --[am__api_version='1.15' -+[am__api_version='1.16' - dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to - dnl require some minimum version. Point them to the right macro. --m4_if([$1], [1.15], [], -+m4_if([$1], [1.16.5], [], - [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl - ]) - -@@ -51,7 +51,7 @@ - # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. - # This function is AC_REQUIREd by AM_INIT_AUTOMAKE. - AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], --[AM_AUTOMAKE_VERSION([1.15])dnl -+[AM_AUTOMAKE_VERSION([1.16.5])dnl - m4_ifndef([AC_AUTOCONF_VERSION], - [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl - _AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))]) -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.guess openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.guess ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.guess 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.guess 2024-03-13 19:37:28.364848433 +0800 -@@ -935,6 +935,14 @@ - UNAME_MACHINE=aarch64_be - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; -+ sw_64:Linux:*:*) -+ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in -+ sw) UNAME_MACHINE=sw_64 ;; -+ esac -+ objdump --private-headers /bin/sh | grep -q ld.so.1 -+ if test "$?" = 0 ; then LIBC=gnulibc1 ; fi -+ echo "$UNAME_MACHINE"-sunway-linux-"$LIBC" -+ exit ;; - alpha:Linux:*:*) - case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in - EV5) UNAME_MACHINE=alphaev5 ;; -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.sub openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.sub ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/config.sub 2022-05-26 22:35:49.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/config.sub 2024-03-13 19:37:28.364848433 +0800 -@@ -1158,6 +1158,7 @@ - case $cpu in - 1750a | 580 \ - | a29k \ -+ | sw_64 \ - | aarch64 | aarch64_be \ - | abacus \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \ -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/config/pmix_config_asm.m4 2024-03-13 19:37:28.364848433 +0800 -@@ -1051,6 +1051,11 @@ - PMIX_ASM_SUPPORT_64BIT=0 - fi - case "${host}" in -+ sw_64-*) -+ pmix_cv_asm_arch="SW_64" -+ PMIX_ASM_SUPPORT_64BIT=1 -+ PMIX_GCC_INLINE_ASSIGN='"mov 0,%0" : "=&r"(ret)' -+ ;; - x86_64-*x32) - pmix_cv_asm_arch="X86_64" - PMIX_ASM_SUPPORT_64BIT=1 -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/configure openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/configure ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/configure 2022-05-26 22:33:16.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/configure 2024-03-13 19:37:28.384848433 +0800 -@@ -3629,7 +3629,7 @@ - program_prefix=${target_alias}- - - # Init automake --am__api_version='1.15' -+am__api_version='1.16' - - # Find a good install program. We prefer a C program (faster), - # so one script is as good as another. But avoid the broken or -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/architecture.h 2024-03-13 19:37:28.384848433 +0800 -@@ -37,6 +37,7 @@ - #define PMIX_POWERPC64 0051 - #define PMIX_ARM 0100 - #define PMIX_ARM64 0101 -+#define PMIX_SW_64 0120 - #define PMIX_BUILTIN_GCC 0202 - #define PMIX_BUILTIN_NO 0203 - #define PMIX_BUILTIN_C11 0204 -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/atomic.h 2024-03-13 19:37:28.384848433 +0800 -@@ -170,6 +170,8 @@ - #include "src/atomics/sys/x86_64/atomic.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM - #include "src/atomics/sys/arm/atomic.h" -+#elif PMIX_ASSEMBLY_ARCH == PMIX_SW_64 -+#include "src/atomics/sys/sw_64/atomic.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 - #include "src/atomics/sys/arm64/atomic.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/cma.h 2024-03-13 19:37:28.384848433 +0800 -@@ -62,6 +62,9 @@ - #define __NR_process_vm_readv 270 - #define __NR_process_vm_writev 271 - -+#if PMIX_ASSEMBLY_ARCH == PMIX_SW_64 -+#define __NR_process_vm_readv 504 -+#define __NR_process_vm_writev 505 - - #else - #error "Unsupported architecture for process_vm_readv and process_vm_writev syscalls" -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/Makefile.include 2024-03-13 19:37:28.384848433 +0800 -@@ -36,6 +36,7 @@ - include atomics/sys/x86_64/Makefile.include - include atomics/sys/arm/Makefile.include - include atomics/sys/arm64/Makefile.include -+include atomics/sys/sw_64/Makefile.include - include atomics/sys/ia32/Makefile.include - include atomics/sys/powerpc/Makefile.include - include atomics/sys/gcc_builtin/Makefile.include -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/atomic.h 2024-03-13 21:16:10.364847021 +0800 -@@ -0,0 +1,499 @@ -+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -+/* -+ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -+ * University Research and Technology -+ * Corporation. All rights reserved. -+ * Copyright (c) 2004-2005 The University of Tennessee and The University -+ * of Tennessee Research Foundation. All rights -+ * reserved. -+ * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -+ * University of Stuttgart. All rights reserved. -+ * Copyright (c) 2004-2005 The Regents of the University of California. -+ * All rights reserved. -+ * Copyright (c) 2010 IBM Corporation. All rights reserved. -+ * Copyright (c) 2010 ARM ltd. All rights reserved. -+ * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights -+ * reserved. -+ * Copyright (c) 2018 Intel, Inc. All rights reserved. -+ * $COPYRIGHT$ -+ * -+ * Additional copyrights may follow -+ * -+ * $HEADER$ -+ */ -+ -+#if !defined(PMIX_SYS_ARCH_ATOMIC_H) -+ -+#define PMIX_SYS_ARCH_ATOMIC_H 1 -+ -+#if PMIX_GCC_INLINE_ASSEMBLY -+ -+#define PMIX_HAVE_ATOMIC_MEM_BARRIER 1 -+#define PMIX_HAVE_ATOMIC_LLSC_32 1 -+#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_32 1 -+#define PMIX_HAVE_ATOMIC_SWAP_32 1 -+#define PMIX_HAVE_ATOMIC_MATH_32 1 -+#define PMIX_HAVE_ATOMIC_COMPARE_EXCHANGE_64 1 -+#define PMIX_HAVE_ATOMIC_SWAP_64 1 -+#define PMIX_HAVE_ATOMIC_LLSC_64 1 -+#define PMIX_HAVE_ATOMIC_ADD_32 1 -+#define PMIX_HAVE_ATOMIC_AND_32 1 -+#define PMIX_HAVE_ATOMIC_OR_32 1 -+#define PMIX_HAVE_ATOMIC_XOR_32 1 -+#define PMIX_HAVE_ATOMIC_SUB_32 1 -+#define PMIX_HAVE_ATOMIC_ADD_64 1 -+#define PMIX_HAVE_ATOMIC_AND_64 1 -+#define PMIX_HAVE_ATOMIC_OR_64 1 -+#define PMIX_HAVE_ATOMIC_XOR_64 1 -+#define PMIX_HAVE_ATOMIC_SUB_64 1 -+ -+#define PMIXMB() __asm__ __volatile__ ("memb" : : : "memory") -+#define PMIXRMB() __asm__ __volatile__ ("memb" : : : "memory") -+#define PMIXWMB() __asm__ __volatile__ ("memb" : : : "memory") -+ -+/********************************************************************** -+ * -+ * Memory Barriers -+ * -+ *********************************************************************/ -+ -+static inline void pmix_atomic_mb (void) -+{ -+ PMIXMB(); -+} -+ -+static inline void pmix_atomic_rmb (void) -+{ -+ PMIXRMB(); -+} -+ -+static inline void pmix_atomic_wmb (void) -+{ -+ PMIXWMB(); -+} -+ -+static inline void pmix_atomic_isync (void) -+{ -+ __asm__ __volatile__ ("memb"); -+} -+ -+/********************************************************************** -+ * -+ * Atomic math operations -+ * -+ *********************************************************************/ -+ -+static inline bool pmix_atomic_compare_exchange_strong_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+static inline int32_t pmix_atomic_swap_32(pmix_atomic_int32_t *addr, int32_t newval) -+{ -+ int32_t ret, tmp; -+ int32_t tmp1; -+ -+ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " mov %4, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) -+ : "r" (addr), "r" (newval) -+ : "cc", "memory"); -+ -+ return ret; -+} -+ -+/* these two functions aren't inlined in the non-gcc case because then -+ there would be two function calls (since neither cmpset_32 nor -+ atomic_?mb can be inlined). Instead, we "inline" them by hand in -+ the assembly, meaning there is one function call overhead instead -+ of two */ -+static inline bool pmix_atomic_compare_exchange_strong_acq_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+ -+static inline bool pmix_atomic_compare_exchange_strong_rel_32 (pmix_atomic_int32_t *addr, int32_t *oldval, int32_t newval) -+{ -+ int32_t prev, tmp; -+ bool ret; -+ int32_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %3, 2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+#define pmix_atomic_ll_32(addr, ret) \ -+ do { \ -+ pmix_atomic_int32_t *_addr = (addr); \ -+ int32_t _ret; \ -+ \ -+ __asm__ __volatile__ ("ldl %0, 0(%1) \n" \ -+ : "=&r" (_ret) \ -+ : "r" (_addr)); \ -+ \ -+ ret = (typeof(ret)) _ret; \ -+ } while (0) -+ -+#define pmix_atomic_sc_32(addr, newval, ret) \ -+ do { \ -+ pmix_atomic_int32_t *_addr = (addr); \ -+ int32_t _newval = (int32_t) newval; \ -+ int _ret; \ -+ \ -+ __asm__ __volatile__ ("stl %2, 0(%1) \n" \ -+ : "=&r" (_ret) \ -+ : "r" (_addr), "r" (_newval) \ -+ : "cc", "memory"); \ -+ \ -+ ret = (_ret == 0); \ -+ } while (0) -+ -+static inline bool pmix_atomic_compare_exchange_strong_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+static inline int64_t pmix_atomic_swap_64 (pmix_atomic_int64_t *addr, int64_t newval) -+{ -+ int64_t ret; -+ int tmp; -+ int64_t tmp1; -+ -+ __asm__ __volatile__ (" ldi %2, %3 \n" //tmp1 = addr -+ "1: lldw %0, 0(%2) \n" //ret=*tmp1 -+ " mov %4, %1 \n" //tmp=newval -+ " lstw %1, 0(%2) \n" //*tmp1=tmp -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (ret), "=&r" (tmp),"=&r" (tmp1) -+ : "r" (addr), "r" (newval) -+ : "cc", "memory"); -+ -+ return ret; -+} -+ -+/* these two functions aren't inlined in the non-gcc case because then -+ there would be two function calls (since neither cmpset_64 nor -+ atomic_?mb can be inlined). Instead, we "inline" them by hand in -+ the assembly, meaning there is one function call overhead instead -+ of two */ -+static inline bool pmix_atomic_compare_exchange_strong_acq_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+ -+static inline bool pmix_atomic_compare_exchange_strong_rel_64 (pmix_atomic_int64_t *addr, int64_t *oldval, int64_t newval) -+{ -+ int64_t prev; -+ int tmp; -+ bool ret; -+ int64_t tmp1, tmp2; -+ -+ __asm__ __volatile__ (" ldi %2, %4 \n" //tmp1=addr -+ "1: lldl %0, 0(%2) \n" //ret=*tmp1 -+ " cmpeq %0, %5, %3 \n" -+ " mov %6, %1 \n" //tmp = newval -+ " lstw %1, 0(%2) \n" //*tmp=tmp1 -+ " beq %3,2f \n" //if(tmp2 == 0) goto 2 -+ " beq %1, 1b \n" //if(tmp == 0) goto 1, if store option failed goto 1 -+ "2: \n" -+ : "=&r" (prev), "=&r" (tmp), "=&r" (tmp1), "=&r" (tmp2) -+ : "r" (addr), "r" (*oldval), "r" (newval) -+ : "cc", "memory"); -+ -+ ret = (prev == *oldval); -+ *oldval = prev; -+ return ret; -+} -+ -+#define pmix_atomic_ll_64(addr, ret) \ -+ do { \ -+ pmix_atomic_int64_t *_addr = (addr); \ -+ int64_t _ret; \ -+ \ -+ __asm__ __volatile__ ("ldi $1,%1 \n" \ -+ "lldl %0,0($1) \n" \ -+ : "=&r" (_ret) \ -+ : "r" (_addr)); \ -+ \ -+ ret = (typeof(ret)) _ret; \ -+ } while (0) -+ -+#define pmix_atomic_sc_64(addr, newval, ret) \ -+ do { \ -+ pmix_atomic_int64_t *_addr = (addr); \ -+ int64_t _newval = (int64_t) newval; \ -+ int _ret; \ -+ \ -+ __asm__ __volatile__ ("stl %2,0(%1) \n" \ -+ : "=&r" (_ret) \ -+ : "r" (_addr), "r" (_newval) \ -+ : "cc", "memory"); \ -+ \ -+ ret = (_ret == 0); \ -+ } while (0) -+ -+static inline int32_t pmix_atomic_fetch_add_32(pmix_atomic_int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " addw %1, %4, %0 \n"//newval = old + value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t pmix_atomic_fetch_sub_32(pmix_atomic_int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " subw %1, %4, %0 \n"//newval = old - value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t pmix_atomic_fetch_and_32(pmix_atomic_int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " and %1, %4, %0 \n"//newval = old & value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t pmix_atomic_fetch_or_32(pmix_atomic_int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " bis %1, %4, %0 \n"//newval = old | value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int32_t pmix_atomic_fetch_xor_32(pmix_atomic_int32_t *addr, int32_t value) -+{ -+ int32_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldw %1, 0(%2) \n"//old = *ptr -+ " xor %1, %4, %0 \n"//newval = old ^ value -+ " lstw %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t pmix_atomic_fetch_add_64(pmix_atomic_int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " addl %1, %4, %0 \n"//newval = old + value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t pmix_atomic_fetch_sub_64(pmix_atomic_int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " subl %1, %4, %0 \n"//newval = old - value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t pmix_atomic_fetch_and_64(pmix_atomic_int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " and %1, %4, %0 \n"//newval = old & value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t pmix_atomic_fetch_or_64(pmix_atomic_int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " bis %1, %4, %0 \n"//newval = old | value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+static inline int64_t pmix_atomic_fetch_xor_64(pmix_atomic_int64_t *addr, int64_t value) -+{ -+ int64_t newval, old; -+ unsigned long ptr; -+ -+ __asm__ __volatile__(" ldi %2, %3 \n"//ptr = addr -+ "1: lldl %1, 0(%2) \n"//old = *ptr -+ " xor %1, %4, %0 \n"//newval = old ^ value -+ " lstl %0, 0(%2) \n"//*ptr = newval(with atomic check) -+ " beq %0, 1b \n"//if(tmp == 0) goto 1, start again on atomic error -+ : "=&r" (newval), "=&r" (old), "=&r" (ptr) -+ : "r" (addr), "r" (value) -+ : "memory"); -+ -+ return old; -+} -+ -+#endif /* PMIX_GCC_INLINE_ASSEMBLY */ -+ -+#endif /* ! PMIX_SYS_ARCH_ATOMIC_H */ -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/Makefile.include 2024-03-13 19:37:28.394848433 +0800 -@@ -0,0 +1,24 @@ -+# -+# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -+# University Research and Technology -+# Corporation. All rights reserved. -+# Copyright (c) 2004-2008 The University of Tennessee and The University -+# of Tennessee Research Foundation. All rights -+# reserved. -+# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -+# University of Stuttgart. All rights reserved. -+# Copyright (c) 2004-2005 The Regents of the University of California. -+# All rights reserved. -+# Copyright (c) 2017 Intel, Inc. All rights reserved. -+# $COPYRIGHT$ -+# -+# Additional copyrights may follow -+# -+# $HEADER$ -+# -+ -+# This makefile.am does not stand on its own - it is included from pmix/include/Makefile.am -+ -+headers += \ -+ atomics/sys/sw_64/atomic.h \ -+ atomics/sys/sw_64/timer.h -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h 1970-01-01 08:00:00.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/sw_64/timer.h 2024-03-13 19:37:28.394848433 +0800 -@@ -0,0 +1,45 @@ -+/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -+/* -+ * Copyright (c) 2008 The University of Tennessee and The University -+ * of Tennessee Research Foundation. All rights -+ * reserved. -+ * Copyright (c) 2016 Broadcom Limited. All rights reserved. -+ * Copyright (c) 2016 Los Alamos National Security, LLC. All rights -+ * reserved. -+ * $COPYRIGHT$ -+ * -+ * Additional copyrights may follow -+ * -+ * $HEADER$ -+ */ -+ -+#ifndef PMIX_SYS_ARCH_TIMER_H -+#define PMIX_SYS_ARCH_TIMER_H 1 -+ -+#include -+ -+typedef uint64_t pmix_timer_t; -+ -+static inline pmix_timer_t -+pmix_sys_timer_get_cycles(void) -+{ -+ pmix_timer_t ret; -+ -+ __asm__ __volatile__ ("memb" ::: "memory"); -+ __asm__ __volatile__ ("rtc %0" : "=r" (ret)); -+ -+ return ret; -+} -+ -+ -+static inline pmix_timer_t -+pmix_sys_timer_freq(void) -+{ -+ pmix_timer_t freq; -+ __asm__ __volatile__ ("rtc %0" : "=r" (freq)); -+ return (pmix_timer_t)(freq); -+} -+ -+#define PMIX_HAVE_SYS_TIMER_GET_CYCLES 1 -+ -+#endif /* ! PMIX_SYS_ARCH_TIMER_H */ -diff -uNar openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h ---- openmpi-4.1.4.org/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/pmix/pmix3x/pmix/src/atomics/sys/timer.h 2024-03-13 19:37:28.394848433 +0800 -@@ -75,6 +75,8 @@ - #include "src/atomics/sys/x86_64/timer.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM - #include "src/atomics/sys/arm/timer.h" -+#elif PMIX_ASSEMBLY_ARCH == PMIX_SW_64 -+#include "src/atomics/sys/sw_64/timer.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_ARM64 - #include "src/atomics/sys/arm64/timer.h" - #elif PMIX_ASSEMBLY_ARCH == PMIX_IA32 -diff -uNar openmpi-4.1.4.org/opal/mca/timer/linux/configure.m4 openmpi-4.1.4.sw/opal/mca/timer/linux/configure.m4 ---- openmpi-4.1.4.org/opal/mca/timer/linux/configure.m4 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/timer/linux/configure.m4 2024-03-13 19:37:28.394848433 +0800 -@@ -47,7 +47,7 @@ - [timer_linux_happy="no"])]) - - case "${host}" in -- i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*) -+ i?86-*linux*|x86_64*linux*|ia64-*linux*|powerpc-*linux*|powerpc64-*linux*|powerpc64le-*linux*|powerpcle-*linux*|sparc*-*linux*|aarch64-*linux*|sw_64-*linux*) - AS_IF([test "$timer_linux_happy" = "yes"], - [AS_IF([test -r "/proc/cpuinfo"], - [timer_linux_happy="yes"], -diff -uNar openmpi-4.1.4.org/opal/mca/timer/linux/timer_linux_component.c openmpi-4.1.4.sw/opal/mca/timer/linux/timer_linux_component.c ---- openmpi-4.1.4.org/opal/mca/timer/linux/timer_linux_component.c 2022-05-26 22:32:35.000000000 +0800 -+++ openmpi-4.1.4.sw/opal/mca/timer/linux/timer_linux_component.c 2024-03-13 19:37:28.394848433 +0800 -@@ -120,6 +120,10 @@ - #if OPAL_ASSEMBLY_ARCH == OPAL_ARM64 - opal_timer_linux_freq = opal_sys_timer_freq(); - #endif -+#if OPAL_ASSEMBLY_ARCH == OPAL_SW_64 -+ opal_timer_linux_freq = opal_sys_timer_freq(); -+#endif -+ - - if (0 == opal_timer_linux_freq) { - /* first, look for a timebase field. probably only on PPC, -- Gitee