From e0f77135397c29de2dbc7e0fbd92771dbd1e59b6 Mon Sep 17 00:00:00 2001 From: Jingyun Hua Date: Mon, 9 Jan 2023 19:51:22 +0800 Subject: [PATCH] add support loongarch64 Signed-off-by: Jingyun Hua --- add-support-loongarch64.patch | 352 ++++++++++++++++++++++++++++++++++ gperftools.spec | 9 +- 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 add-support-loongarch64.patch diff --git a/add-support-loongarch64.patch b/add-support-loongarch64.patch new file mode 100644 index 0000000..0f7b2d1 --- /dev/null +++ b/add-support-loongarch64.patch @@ -0,0 +1,352 @@ +From 6dbb6c66164dec104344d9a0758807a4ec997105 Mon Sep 17 00:00:00 2001 +From: Jingyun Hua +Date: Sat, 7 Jan 2023 16:27:22 +0800 +Subject: [PATCH] support loongarch64 + +Signed-off-by: Jingyun Hua +--- + m4/pc_from_ucontext.m4 | 1 + + src/base/basictypes.h | 2 + + src/base/linux_syscall_support.h | 199 ++++++++++++++++++++++++++++++- + src/base/linuxthreads.h | 2 +- + src/malloc_hook_mmap_linux.h | 1 + + src/stacktrace.cc | 2 +- + src/tcmalloc.cc | 2 +- + 7 files changed, 201 insertions(+), 8 deletions(-) + +diff --git a/m4/pc_from_ucontext.m4 b/m4/pc_from_ucontext.m4 +index 159b01d..ffe0764 100644 +--- a/m4/pc_from_ucontext.m4 ++++ b/m4/pc_from_ucontext.m4 +@@ -26,6 +26,7 @@ AC_DEFUN([AC_PC_FROM_UCONTEXT], + pc_fields="$pc_fields uc_mcontext.gregs[[REG_EIP]]" # Linux (i386) + pc_fields="$pc_fields uc_mcontext.gregs[[REG_RIP]]" # Linux (x86_64) + pc_fields="$pc_fields uc_mcontext.sc_ip" # Linux (ia64) ++ pc_fields="$pc_fields uc_mcontext.__pc" # Linux (loongarch64) + pc_fields="$pc_fields uc_mcontext.pc" # Linux (mips) + pc_fields="$pc_fields uc_mcontext.uc_regs->gregs[[PT_NIP]]" # Linux (ppc) + pc_fields="$pc_fields uc_mcontext.__gregs[[REG_PC]]" # Linux (riscv64) +diff --git a/src/base/basictypes.h b/src/base/basictypes.h +index fe5c75b..375c902 100644 +--- a/src/base/basictypes.h ++++ b/src/base/basictypes.h +@@ -389,6 +389,8 @@ class AssignAttributeStartEnd { + # define CACHELINE_ALIGNED __attribute__((aligned(64))) + # elif (defined(__e2k__)) + # define CACHELINE_ALIGNED __attribute__((aligned(64))) ++# elif defined(__loongarch64) ++# define CACHELINE_ALIGNED __attribute__((aligned(64))) + # else + # error Could not determine cache line length - unknown architecture + # endif +diff --git a/src/base/linux_syscall_support.h b/src/base/linux_syscall_support.h +index d6899b8..658fe3a 100644 +--- a/src/base/linux_syscall_support.h ++++ b/src/base/linux_syscall_support.h +@@ -131,12 +131,12 @@ + #define SYS_LINUX_SYSCALL_SUPPORT_H + + /* We currently only support x86-32, x86-64, ARM, MIPS, PPC/PPC64, Aarch64, +- * s390, s390x, and riscv64 on Linux. ++ * s390, s390x, riscv64, and loongarch64 on Linux. + * Porting to other related platforms should not be difficult. + */ + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__mips__) || defined(__mips64) || defined(__mips64el__) || defined(__PPC__) || \ +- defined(__aarch64__) || defined(__s390__) || defined(__riscv)) \ ++ defined(__aarch64__) || defined(__s390__) || defined(__riscv)) || defined(__loongarch64) \ + && (defined(__linux)) + + #ifndef SYS_CPLUSPLUS +@@ -541,6 +541,31 @@ struct kernel_stat { + unsigned long __unused4; + unsigned long __unused5; + }; ++ ++/*From linux/include/uapi/asm-generic/stat.h */ ++#elif defined(__loongarch64) ++struct kernel_stat { ++ unsigned long st_dev; ++ unsigned long st_ino; ++ unsigned int st_mode; ++ unsigned int st_nlink; ++ unsigned int st_uid; ++ unsigned int st_gid; ++ unsigned long st_rdev; ++ unsigned long __pad1; ++ long st_size; ++ int st_blksize; ++ int __pad2; ++ long st_blocks; ++ long st_atime_; ++ unsigned long st_atime_nsec_; ++ long st_mtime_; ++ unsigned long st_mtime_nsec_; ++ long st_ctime_; ++ unsigned long st_ctime_nsec_; ++ unsigned int __unused4; ++ unsigned int __unused5; ++}; + #endif + + +@@ -954,8 +979,24 @@ struct kernel_stat { + # ifndef __NR_fstatat + # define __NR_fstatat 79 + # endif +-#endif + ++#elif defined(__loongarch64) ++#ifndef __NR_gettid ++#define __NR_gettid 178 ++#endif ++#ifndef __NR_futex ++#define __NR_futex 98 ++#endif ++#ifndef __NR_openat ++#define __NR_openat 56 ++#endif ++#ifndef __NR_fstatat ++#define __NR_fstatat 79 ++#endif ++#ifndef __NR_getdents64 ++#define __NR_getdents64 61 ++#endif /* End of loongarch64 defininitions */ ++#endif + + /* After forking, we must make sure to only call system calls. */ + #if __BOUNDED_POINTERS__ +@@ -1017,7 +1058,8 @@ struct kernel_stat { + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ +- defined(__aarch64__) || defined(__s390__) || defined(__riscv)) ++ defined(__aarch64__) || defined(__s390__) || defined(__riscv)) || \ ++ defined(__loongarch64) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ +@@ -2564,6 +2606,152 @@ struct kernel_stat { + LSS_BODY(type, name, "r"(__a1), "r"(__a2), "r"(__a3), "r"(__a4), \ + "r"(__a5)); \ + } ++ ++/*Form glibc/sysdeps/unix/sysv/linux/loongarch/sysdep.h */ ++ #elif defined(__loongarch64) ++ #undef LSS_REG ++ #define LSS_REG(r,a) register long __a##r __asm__("$a"#r) = (long)a ++ #define LOONGARCH__SYSCALL_CLOBBERS \ ++ "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8",\ ++ "memory" ++ #undef LSS_BODY ++ #define LSS_BODY(type,name,args...) \ ++ register long __a7 __asm__("$a7") = __NR_##name; \ ++ long __res; \ ++ __asm__ __volatile__ ( \ ++ "syscall 0\n\t" \ ++ : "+r" (__a0) \ ++ : "r" (__a7), ##args \ ++ : LOONGARCH__SYSCALL_CLOBBERS); \ ++ __res = __a0; \ ++ LSS_RETURN(type, __res) ++ #undef _syscall0 ++ #define _syscall0(type,name) \ ++ type LSS_NAME(name)(void) { \ ++ register long __a7 __asm__("$a7") = __NR_##name; \ ++ register long __a0 __asm__("$a0"); \ ++ long __res; \ ++ __asm__ __volatile__ ( \ ++ "syscall 0\n\t" \ ++ : "=r" (__a0) \ ++ : "r" (__a7) \ ++ : LOONGARCH__SYSCALL_CLOBBERS); \ ++ __res = __a0; \ ++ LSS_RETURN(type, __res); \ ++ } ++ #undef _syscall1 ++ #define _syscall1(type,name,type1,arg1) \ ++ type LSS_NAME(name)(type1 arg1) { \ ++ LSS_REG(0, arg1); \ ++ LSS_BODY(type,name); \ ++ } ++ #undef _syscall2 ++ #define _syscall2(type,name,type1,arg1,type2,arg2) \ ++ type LSS_NAME(name)(type1 arg1, type2 arg2) { \ ++ LSS_REG(0,arg1); \ ++ LSS_REG(1,arg2); \ ++ LSS_BODY(type, name,"r"(__a1)); \ ++ } ++ #undef _syscall3 ++ #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ ++ type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ ++ LSS_REG(0,arg1); \ ++ LSS_REG(1,arg2); \ ++ LSS_REG(2,arg3); \ ++ LSS_BODY(type, name,"r"(__a1), "r"(__a2)); \ ++ } ++ #undef _syscall4 ++ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3, \ ++ type4,arg4) \ ++ type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3,type4 arg4) { \ ++ LSS_REG(0,arg1); \ ++ LSS_REG(1,arg2); \ ++ LSS_REG(2,arg3); \ ++ LSS_REG(3,arg4); \ ++ LSS_BODY(type,name, "r"(__a1), "r"(__a2), "r"(__a3)); \ ++ } ++ #undef _syscall5 ++ #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3, \ ++ type4,arg4,type5,arg5) \ ++ type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3,type4 arg4, \ ++ type5 arg5) { \ ++ LSS_REG(0,arg1); \ ++ LSS_REG(1,arg2); \ ++ LSS_REG(2,arg3); \ ++ LSS_REG(3,arg4); \ ++ LSS_REG(4,arg5); \ ++ LSS_BODY(type,name,"r"(__a1), "r"(__a2), "r"(__a3), "r"(__a4));\ ++ } ++ #undef _syscall6 ++ #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3, \ ++ type4,arg4,type5,arg5,type6,arg6) \ ++ type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3,type4 arg4, \ ++ type5 arg5,type6 arg6) { \ ++ LSS_REG(0,arg1); \ ++ LSS_REG(1,arg2); \ ++ LSS_REG(2,arg3); \ ++ LSS_REG(3,arg4); \ ++ LSS_REG(4,arg5); \ ++ LSS_REG(5,arg6); \ ++ LSS_BODY(type,name,"r"(__a1), "r"(__a2), "r"(__a3), "r"(__a4), \ ++ "r"(__a5)); \ ++ } ++ ++/* clone function adapted from glibc 2.28 clone.S */ ++ LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *arg), void *child_stack, ++ int flags, void *arg, int *parent_tidptr, ++ void *newtls, int *child_tidptr) { ++ long __res; ++ { ++ register int (*__fn)(void *) __asm__("$a0") = fn; ++ register void *__stack __asm__("$a1") = child_stack; ++ register int __flags __asm__("$a2") = flags; ++ register void *__arg __asm__("$a3") = arg; ++ register int *__ptid __asm__("$a4") = parent_tidptr; ++ register void *__tls __asm__("$a5") = newtls; ++ register int *__ctid __asm__("$a6") = child_tidptr; ++ __asm__ __volatile__(/*Sanity check arguments */ ++ /* Align stack to 16 or 8 bytes per the ABI. */ ++#ifdef __loongarch_lp64 ++ "bstrins.d $a1,$zero, 3, 0\n" ++#else ++#error "32bit LoongArch systems are not supported" ++#endif ++ ++ "beqz $a0, 1f\n" /* No NULL function pointers. */ ++ "beqz $a1, 1f\n" /* No NULL stack pointers. */ ++ ++ /*Save argument pointer */ ++ "addi.d $a1, $a1, -16\n" /* Reserve argument save space. */ ++ "st.d $a0, $a1, 0\n" /* Save function pointer. */ ++ "st.d $a3, $a1, 8\n" /* Save argument pointer. SZREG is 8 */ ++ ++ ++ /* The syscall expects the args to be in different slots. */ ++ "or $a0, $a2, $zero\n" ++ "or $a2, $a4, $zero\n" ++ "or $a3, $a6, $zero\n" ++ "or $a4, $a5, $zero\n" ++ /* Do the system call */ ++ "li.d $a7, %9\n" /* %9 is __NR_clone */ ++ "syscall 0\n" ++ ++ "blt $a0, $zero, 2f\n" ++ "beqz $a0, 2f\n" ++ ++ "1:\n" ++ "li.d $a0, %1\n" /* Something bad happened -- no child created. */ ++ "2:\n" ++ : "=r" (__res) ++ : "i"(-EINVAL), ++ "r"(__fn), "r"(__stack), "r"(__flags), "r"(__arg), ++ "r"(__ptid), "r"(__tls), "r"(__ctid), ++ "i"(__NR_clone), "i"(__NR_exit) ++ : "memory"); ++ } ++LSS_RETURN(int, __res); ++ } ++ + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid +@@ -2657,7 +2845,7 @@ struct kernel_stat { + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused); + #endif +- #if defined(__x86_64__) || defined(__aarch64__) || \ ++ #if defined(__x86_64__) || defined(__aarch64__) || defined(__loongarch64) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) +@@ -2691,6 +2879,7 @@ struct kernel_stat { + } + #endif + #if (defined(__aarch64__)) || \ ++ (defined(__loongarch64)) || \ + (defined(__mips__) \ + && (_MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32)) + LSS_INLINE int LSS_NAME(sigaction)(int signum, +diff --git a/src/base/linuxthreads.h b/src/base/linuxthreads.h +index a087628..3b488f2 100644 +--- a/src/base/linuxthreads.h ++++ b/src/base/linuxthreads.h +@@ -42,7 +42,7 @@ + * related platforms should not be difficult. + */ + #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \ +- defined(__mips__) || defined(__PPC__) || defined(__aarch64__) || \ ++ defined(__mips__) || defined(__PPC__) || defined(__aarch64__) || defined(__loongarch64) || \ + defined(__s390__)) && defined(__linux) + + /* Define the THREADS symbol to make sure that there is exactly one core dumper +diff --git a/src/malloc_hook_mmap_linux.h b/src/malloc_hook_mmap_linux.h +index 50e19c8..4bd7985 100644 +--- a/src/malloc_hook_mmap_linux.h ++++ b/src/malloc_hook_mmap_linux.h +@@ -54,6 +54,7 @@ + #if defined(__x86_64__) \ + || defined(__PPC64__) \ + || defined(__aarch64__) \ ++ || defined(__loongarch64) \ + || (defined(_MIPS_SIM) && (_MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32)) \ + || defined(__s390__) || (defined(__riscv) && __riscv_xlen == 64) \ + || defined(__e2k__) || defined(__sw_64__) +diff --git a/src/stacktrace.cc b/src/stacktrace.cc +index 2a2c648..32b16fa 100644 +--- a/src/stacktrace.cc ++++ b/src/stacktrace.cc +@@ -219,7 +219,7 @@ static GetStackImplementation *all_impls[] = { + + // ppc and i386 implementations prefer arch-specific asm implementations. + // arm's asm implementation is broken +-#if defined(__i386__) || defined(__ppc__) || defined(__PPC__) ++#if defined(__i386__) || defined(__ppc__) || defined(__PPC__) || defined(__loongarch64) + #if !defined(NO_FRAME_POINTER) + #define TCMALLOC_DONT_PREFER_LIBUNWIND + #endif +diff --git a/src/tcmalloc.cc b/src/tcmalloc.cc +index 9ec663e..b9c5408 100644 +--- a/src/tcmalloc.cc ++++ b/src/tcmalloc.cc +@@ -178,7 +178,7 @@ DECLARE_int64(tcmalloc_heap_limit_mb); + // jump. I am not able to reproduce that anymore. + #if !defined(__i386__) && !defined(__x86_64__) && \ + !defined(__ppc__) && !defined(__PPC__) && \ +- !defined(__aarch64__) && !defined(__mips__) && !defined(__arm__) ++ !defined(__aarch64__) && !defined(__mips__) && !defined(__arm__) && !defined(__loongarch64) + #undef TCMALLOC_NO_ALIASES + #define TCMALLOC_NO_ALIASES + #endif +-- +2.33.0 + diff --git a/gperftools.spec b/gperftools.spec index 174c9d8..e5317e6 100644 --- a/gperftools.spec +++ b/gperftools.spec @@ -1,6 +1,6 @@ Name: gperftools Version: 2.9.1 -Release: 6 +Release: 7 Summary: high-performance malloc and performance analysis tools License: BSD @@ -15,6 +15,7 @@ Patch9003: avoid-exceed-int-range.patch Patch9004: skip-tcm_asserts_unittest.patch Patch9005: Continue-to-release-span-until-the-end-of-one-round.patch Patch9006: gperftools-2.9.1-sw.patch +Patch9007: add-support-loongarch64.patch BuildRequires: autoconf automake gcc-c++ @@ -81,7 +82,10 @@ CXXFLAGS=`echo $RPM_OPT_FLAGS -fno-strict-aliasing -Wno-unused-local-typedefs -D make %{?_smp_mflags} %check +#disable check for loongarch64 +%ifnarch loongarch64 LD_LIBRARY_PATH=./.libs make check +%endif %install %make_install @@ -110,6 +114,9 @@ LD_LIBRARY_PATH=./.libs make check %{_mandir}/man1/*.1.gz %changelog +* Mon Jan 9 2023 huajingyun - 2.9.1-7 +- add loongarch64 support + * Thu Oct 20 2022 wuzx - 2.9.1-6 - add sw64 patch -- Gitee