From a2e1725d583b1535564d3cec6cda968d820fc9ec Mon Sep 17 00:00:00 2001 From: ticat_fp Date: Fri, 23 Feb 2024 14:30:42 +0800 Subject: [PATCH] LoongArch: sync patch from upstream. glibc-version >= 2.34 not support libnsl for LoongArch. Fix spec file format about date. Signed-off-by: ticat_fp --- 0001-LoongArch-update-from-upstream.patch | 10719 ++++++++++++++++++++ glibc.spec | 39 +- 2 files changed, 10748 insertions(+), 10 deletions(-) create mode 100644 0001-LoongArch-update-from-upstream.patch diff --git a/0001-LoongArch-update-from-upstream.patch b/0001-LoongArch-update-from-upstream.patch new file mode 100644 index 0000000..1bb3950 --- /dev/null +++ b/0001-LoongArch-update-from-upstream.patch @@ -0,0 +1,10719 @@ +From ba7d73e755aed2f9394e0f3ef3b03ce995181486 Mon Sep 17 00:00:00 2001 +From: ticat_fp +Date: Fri, 23 Feb 2024 10:08:08 +0800 +Subject: [PATCH] LoongArch: update from upstream + +Signed-off-by: ticat_fp +--- + config.h.in | 5 - + elf/elf.h | 14 + + sysdeps/loongarch/__longjmp.S | 20 +- + sysdeps/loongarch/bits/link.h | 24 +- + sysdeps/loongarch/bits/link_lavcurrent.h | 25 + + sysdeps/loongarch/configure | 41 +- + sysdeps/loongarch/configure.ac | 34 +- + sysdeps/loongarch/dl-audit-check.h | 23 + + sysdeps/loongarch/dl-link.sym | 8 +- + sysdeps/loongarch/dl-machine.h | 17 +- + sysdeps/loongarch/dl-trampoline.S | 179 +--- + sysdeps/loongarch/dl-trampoline.h | 242 ++++++ + sysdeps/loongarch/lp64/multiarch/Makefile | 52 ++ + .../lp64/multiarch/dl-symbol-redir-ifunc.h | 24 + + .../lp64/multiarch/ifunc-impl-list.c | 164 ++++ + sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h | 45 + + .../loongarch/lp64/multiarch/ifunc-memchr.h | 40 + + .../loongarch/lp64/multiarch/ifunc-memcmp.h | 40 + + .../loongarch/lp64/multiarch/ifunc-memrchr.h | 40 + + .../lp64/multiarch/ifunc-rawmemchr.h | 40 + + .../loongarch/lp64/multiarch/ifunc-strchr.h | 41 + + .../lp64/multiarch/ifunc-strchrnul.h | 41 + + .../loongarch/lp64/multiarch/ifunc-strcmp.h | 38 + + .../loongarch/lp64/multiarch/ifunc-strlen.h | 40 + + .../loongarch/lp64/multiarch/ifunc-strncmp.h | 38 + + .../loongarch/lp64/multiarch/ifunc-strnlen.h | 41 + + .../loongarch/lp64/multiarch/ifunc-strrchr.h | 41 + + .../loongarch/lp64/multiarch/memchr-aligned.S | 95 +++ + .../loongarch/lp64/multiarch/memchr-lasx.S | 117 +++ + sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++ + sysdeps/loongarch/lp64/multiarch/memchr.c | 37 + + .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 +++++++ + .../loongarch/lp64/multiarch/memcmp-lasx.S | 207 +++++ + sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++ + sysdeps/loongarch/lp64/multiarch/memcmp.c | 43 + + .../loongarch/lp64/multiarch/memcpy-aligned.S | 783 ++++++++++++++++++ + .../loongarch/lp64/multiarch/memcpy-lasx.S | 20 + + sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S | 20 + + .../lp64/multiarch/memcpy-unaligned.S | 247 ++++++ + sysdeps/loongarch/lp64/multiarch/memcpy.c | 37 + + .../lp64/multiarch/memmove-aligned.S | 20 + + .../loongarch/lp64/multiarch/memmove-lasx.S | 287 +++++++ + .../loongarch/lp64/multiarch/memmove-lsx.S | 534 ++++++++++++ + .../lp64/multiarch/memmove-unaligned.S | 380 +++++++++ + sysdeps/loongarch/lp64/multiarch/memmove.c | 38 + + .../lp64/multiarch/memrchr-generic.c | 23 + + .../loongarch/lp64/multiarch/memrchr-lasx.S | 123 +++ + .../loongarch/lp64/multiarch/memrchr-lsx.S | 105 +++ + sysdeps/loongarch/lp64/multiarch/memrchr.c | 33 + + .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++ + .../loongarch/lp64/multiarch/memset-lasx.S | 142 ++++ + sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 +++ + .../lp64/multiarch/memset-unaligned.S | 162 ++++ + sysdeps/loongarch/lp64/multiarch/memset.c | 37 + + .../lp64/multiarch/rawmemchr-aligned.S | 124 +++ + .../loongarch/lp64/multiarch/rawmemchr-lasx.S | 82 ++ + .../loongarch/lp64/multiarch/rawmemchr-lsx.S | 71 ++ + sysdeps/loongarch/lp64/multiarch/rawmemchr.c | 37 + + .../loongarch/lp64/multiarch/stpcpy-aligned.S | 27 + + .../loongarch/lp64/multiarch/stpcpy-lasx.S | 22 + + sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S | 22 + + .../lp64/multiarch/stpcpy-unaligned.S | 22 + + sysdeps/loongarch/lp64/multiarch/stpcpy.c | 42 + + .../loongarch/lp64/multiarch/strchr-aligned.S | 99 +++ + .../loongarch/lp64/multiarch/strchr-lasx.S | 91 ++ + sysdeps/loongarch/lp64/multiarch/strchr-lsx.S | 73 ++ + sysdeps/loongarch/lp64/multiarch/strchr.c | 36 + + .../lp64/multiarch/strchrnul-aligned.S | 95 +++ + .../loongarch/lp64/multiarch/strchrnul-lasx.S | 22 + + .../loongarch/lp64/multiarch/strchrnul-lsx.S | 22 + + sysdeps/loongarch/lp64/multiarch/strchrnul.c | 39 + + .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++ + sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++ + sysdeps/loongarch/lp64/multiarch/strcmp.c | 35 + + .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 +++++ + .../loongarch/lp64/multiarch/strcpy-lasx.S | 215 +++++ + sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++ + .../lp64/multiarch/strcpy-unaligned.S | 138 +++ + sysdeps/loongarch/lp64/multiarch/strcpy.c | 35 + + .../loongarch/lp64/multiarch/strlen-aligned.S | 100 +++ + .../loongarch/lp64/multiarch/strlen-lasx.S | 63 ++ + sysdeps/loongarch/lp64/multiarch/strlen-lsx.S | 71 ++ + sysdeps/loongarch/lp64/multiarch/strlen.c | 37 + + .../lp64/multiarch/strncmp-aligned.S | 218 +++++ + .../loongarch/lp64/multiarch/strncmp-lsx.S | 208 +++++ + sysdeps/loongarch/lp64/multiarch/strncmp.c | 35 + + .../lp64/multiarch/strnlen-aligned.S | 102 +++ + .../loongarch/lp64/multiarch/strnlen-lasx.S | 100 +++ + .../loongarch/lp64/multiarch/strnlen-lsx.S | 89 ++ + sysdeps/loongarch/lp64/multiarch/strnlen.c | 39 + + .../lp64/multiarch/strrchr-aligned.S | 170 ++++ + .../loongarch/lp64/multiarch/strrchr-lasx.S | 176 ++++ + .../loongarch/lp64/multiarch/strrchr-lsx.S | 144 ++++ + sysdeps/loongarch/lp64/multiarch/strrchr.c | 36 + + sysdeps/loongarch/setjmp.S | 18 +- + sysdeps/loongarch/start.S | 19 +- + sysdeps/loongarch/sys/asm.h | 36 +- + sysdeps/loongarch/sys/regdef.h | 57 ++ + .../unix/sysv/linux/loongarch/bits/hwcap.h | 1 + + sysdeps/unix/sysv/linux/loongarch/configure | 2 +- + .../unix/sysv/linux/loongarch/configure.ac | 2 +- + .../unix/sysv/linux/loongarch/cpu-features.h | 2 + + .../unix/sysv/linux/loongarch/pointer_guard.h | 10 +- + 103 files changed, 9365 insertions(+), 296 deletions(-) + create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h + create mode 100644 sysdeps/loongarch/dl-audit-check.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile + create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S + create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c + +diff --git a/config.h.in b/config.h.in +index 0dedc124..44a34072 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -141,11 +141,6 @@ + /* LOONGARCH floating-point ABI for ld.so. */ + #undef LOONGARCH_ABI_FRLEN + +-/* Assembler support LoongArch LASX/LSX vector instructions. +- This macro becomes obsolete when glibc increased the minimum +- required version of GNU 'binutils' to 2.41 or later. */ +-#define HAVE_LOONGARCH_VEC_ASM 0 +- + /* Linux specific: minimum supported kernel version. */ + #undef __LINUX_KERNEL_VERSION + +diff --git a/elf/elf.h b/elf/elf.h +index 89fc8021..51633079 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -794,6 +794,7 @@ typedef struct + #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ + #define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */ + #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ ++#define NT_X86_SHSTK 0x204 /* x86 SHSTK state */ + #define NT_S390_HIGH_GPRS 0x300 /* s390 upper register halves */ + #define NT_S390_TIMER 0x301 /* s390 timer register */ + #define NT_S390_TODCMP 0x302 /* s390 TOD clock comparator register */ +@@ -832,6 +833,8 @@ typedef struct + #define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers. */ + #define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode. */ + #define NT_MIPS_MSA 0x802 /* MIPS SIMD registers. */ ++#define NT_RISCV_CSR 0x900 /* RISC-V Control and Status Registers */ ++#define NT_RISCV_VECTOR 0x901 /* RISC-V vector registers */ + #define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers. */ + #define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and + status registers. */ +@@ -841,6 +844,8 @@ typedef struct + SIMD Extension registers. */ + #define NT_LOONGARCH_LBT 0xa04 /* LoongArch Loongson Binary + Translation registers. */ ++#define NT_LOONGARCH_HW_BREAK 0xa05 /* LoongArch hardware breakpoint registers */ ++#define NT_LOONGARCH_HW_WATCH 0xa06 /* LoongArch hardware watchpoint registers */ + + /* Legal values for the note segment descriptor types for object files. */ + +@@ -4205,6 +4210,15 @@ enum + #define R_LARCH_TLS_GD_HI20 98 + #define R_LARCH_32_PCREL 99 + #define R_LARCH_RELAX 100 ++#define R_LARCH_DELETE 101 ++#define R_LARCH_ALIGN 102 ++#define R_LARCH_PCREL20_S2 103 ++#define R_LARCH_CFA 104 ++#define R_LARCH_ADD6 105 ++#define R_LARCH_SUB6 106 ++#define R_LARCH_ADD_ULEB128 107 ++#define R_LARCH_SUB_ULEB128 108 ++#define R_LARCH_64_PCREL 109 + + /* ARC specific declarations. */ + +diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S +index cbde1946..e87ce311 100644 +--- a/sysdeps/loongarch/__longjmp.S ++++ b/sysdeps/loongarch/__longjmp.S +@@ -43,18 +43,18 @@ ENTRY (__longjmp) + REG_L s8, a0, 12*SZREG + + #ifndef __loongarch_soft_float +- FREG_L $f24, a0, 13*SZREG + 0*SZFREG +- FREG_L $f25, a0, 13*SZREG + 1*SZFREG +- FREG_L $f26, a0, 13*SZREG + 2*SZFREG +- FREG_L $f27, a0, 13*SZREG + 3*SZFREG +- FREG_L $f28, a0, 13*SZREG + 4*SZFREG +- FREG_L $f29, a0, 13*SZREG + 5*SZFREG +- FREG_L $f30, a0, 13*SZREG + 6*SZFREG +- FREG_L $f31, a0, 13*SZREG + 7*SZFREG ++ FREG_L fs0, a0, 13*SZREG + 0*SZFREG ++ FREG_L fs1, a0, 13*SZREG + 1*SZFREG ++ FREG_L fs2, a0, 13*SZREG + 2*SZFREG ++ FREG_L fs3, a0, 13*SZREG + 3*SZFREG ++ FREG_L fs4, a0, 13*SZREG + 4*SZFREG ++ FREG_L fs5, a0, 13*SZREG + 5*SZFREG ++ FREG_L fs6, a0, 13*SZREG + 6*SZFREG ++ FREG_L fs7, a0, 13*SZREG + 7*SZFREG + #endif + +- sltui a0,a1,1 ++ sltui a0, a1, 1 + ADD a0, a0, a1 # a0 = (a1 == 0) ? 1 : a1 +- jirl zero,ra,0 ++ jirl zero, ra, 0 + + END (__longjmp) +diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h +index 7fa61312..00f6f25f 100644 +--- a/sysdeps/loongarch/bits/link.h ++++ b/sysdeps/loongarch/bits/link.h +@@ -20,10 +20,26 @@ + #error "Never include directly; use instead." + #endif + ++#ifndef __loongarch_soft_float ++typedef float La_loongarch_vr ++ __attribute__ ((__vector_size__ (16), __aligned__ (16))); ++typedef float La_loongarch_xr ++ __attribute__ ((__vector_size__ (32), __aligned__ (16))); ++ ++typedef union ++{ ++ double fpreg[4]; ++ La_loongarch_vr vr[2]; ++ La_loongarch_xr xr[1]; ++} La_loongarch_vector __attribute__ ((__aligned__ (16))); ++#endif ++ + typedef struct La_loongarch_regs + { + unsigned long int lr_reg[8]; /* a0 - a7 */ +- double lr_fpreg[8]; /* fa0 - fa7 */ ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/ ++#endif + unsigned long int lr_ra; + unsigned long int lr_sp; + } La_loongarch_regs; +@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval + { + unsigned long int lrv_a0; + unsigned long int lrv_a1; +- double lrv_fa0; +- double lrv_fa1; ++#ifndef __loongarch_soft_float ++ La_loongarch_vector lrv_vec0; ++ La_loongarch_vector lrv_vec1; ++#endif + } La_loongarch_retval; + + __BEGIN_DECLS +diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h +new file mode 100644 +index 00000000..15f1eb84 +--- /dev/null ++++ b/sysdeps/loongarch/bits/link_lavcurrent.h +@@ -0,0 +1,25 @@ ++/* Data structure for communication from the run-time dynamic linker for ++ loaded ELF shared objects. LAV_CURRENT definition. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _LINK_H ++# error "Never include directly; use instead." ++#endif ++ ++/* Version numbers for la_version handshake interface. */ ++#define LAV_CURRENT 3 +diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure +index 7f1dabbc..30b60d19 100644 +--- a/sysdeps/loongarch/configure ++++ b/sysdeps/loongarch/configure +@@ -4,21 +4,19 @@ + printf "%s\n" "#define HIDDEN_VAR_NEEDS_DYNAMIC_RELOC 1" >>confdefs.h + + +-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the toolchain is sufficient to build static PIE on LoongArch" >&5 +-printf %s "checking if the toolchain is sufficient to build static PIE on LoongArch... " >&6; } ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if ${CC-cc} is sufficient to build static PIE on LoongArch" >&5 ++printf %s "checking if ${CC-cc} is sufficient to build static PIE on LoongArch... " >&6; } + if test ${libc_cv_static_pie_on_loongarch+y} + then : + printf %s "(cached) " >&6 + else $as_nop + +- cat > conftest1.S <<\EOF ++ cat > conftest.S <<\EOF + .global _start + .type _start, @function + _start: + li.w $a7, 93 +- /* This ensures the assembler supports explicit reloc. */ +- pcalau12i $a0, %pc_hi20(x) +- ld.w $a0, $a0, %pc_lo12(x) ++ li.w $a0, 0 + syscall 0 + + .data +@@ -27,41 +25,21 @@ x: + /* This should produce an R_LARCH_RELATIVE in the static PIE. */ + .dword _start + EOF +- cat > conftest2.S <<\EOF +-.global f +-.type f, @function +-f: +- /* The linker should be able to handle this and produce a PLT entry. */ +- la.pcrel $t0, $t0, external_func +- jirl $zero, $t0, 0 +-EOF + + libc_cv_static_pie_on_loongarch=no +- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest1 conftest1.S' +- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 +- (eval $ac_try) 2>&5 +- ac_status=$? +- printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; } \ +- && { ac_try='LC_ALL=C $READELF -Wr conftest1 | grep -q R_LARCH_RELATIVE' ++ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest conftest.S' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } \ +- && ! { ac_try='LC_ALL=C $READELF -Wl conftest1 | grep -q INTERP' ++ && { ac_try='LC_ALL=C $READELF -Wr conftest | grep -q R_LARCH_RELATIVE' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } \ +- && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -shared -fPIC -o conftest2.so conftest2.S' +- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 +- (eval $ac_try) 2>&5 +- ac_status=$? +- printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; } \ +- && { ac_try='LC_ALL=C $READELF -Wr conftest2.so | grep -q 'R_LARCH_JUMP_SLOT.*external_func'' ++ && ! { ac_try='LC_ALL=C $READELF -Wl conftest | grep -q INTERP' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +@@ -128,8 +106,7 @@ rm -f conftest* + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5 + printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; } +-if test $libc_cv_loongarch_vec_asm = yes; then +- printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h +- ++if test $libc_cv_loongarch_vec_asm = no; then ++ as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5 + fi + +diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac +index 39efccfd..28a8ae54 100644 +--- a/sysdeps/loongarch/configure.ac ++++ b/sysdeps/loongarch/configure.ac +@@ -8,19 +8,17 @@ AC_DEFINE(HIDDEN_VAR_NEEDS_DYNAMIC_RELOC) + dnl Test if the toolchain is new enough for static PIE. + dnl We need a GAS supporting explicit reloc (older GAS produces stack-based + dnl reloc and triggers an internal error in the linker). And, we need GCC to +-dnl pass the correct linker flags for static PIE. GCC >= 13 and GAS >= 2.40 +-dnl satisfy the requirement, but a distro may backport static PIE support into +-dnl earlier GCC or Binutils releases as well. +-AC_CACHE_CHECK([if the toolchain is sufficient to build static PIE on LoongArch], ++dnl pass the correct linker flags for static PIE. We strictly require GAS >= ++dnl 2.41 so we don't need to check the assembler capability, but we need to ++dnl check if GCC is doing the correct thing. ++AC_CACHE_CHECK([if ${CC-cc} is sufficient to build static PIE on LoongArch], + libc_cv_static_pie_on_loongarch, [ +- cat > conftest1.S <<\EOF ++ cat > conftest.S <<\EOF + .global _start + .type _start, @function + _start: + li.w $a7, 93 +- /* This ensures the assembler supports explicit reloc. */ +- pcalau12i $a0, %pc_hi20(x) +- ld.w $a0, $a0, %pc_lo12(x) ++ li.w $a0, 0 + syscall 0 + + .data +@@ -29,21 +27,11 @@ x: + /* This should produce an R_LARCH_RELATIVE in the static PIE. */ + .dword _start + EOF +- cat > conftest2.S <<\EOF +-.global f +-.type f, @function +-f: +- /* The linker should be able to handle this and produce a PLT entry. */ +- la.pcrel $t0, $t0, external_func +- jirl $zero, $t0, 0 +-EOF + + libc_cv_static_pie_on_loongarch=no +- if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest1 conftest1.S]) \ +- && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest1 | grep -q R_LARCH_RELATIVE]) \ +- && ! AC_TRY_COMMAND([LC_ALL=C $READELF -Wl conftest1 | grep -q INTERP]) \ +- && AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -shared -fPIC -o conftest2.so conftest2.S]) \ +- && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest2.so | grep -q 'R_LARCH_JUMP_SLOT.*external_func']) ++ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest conftest.S]) \ ++ && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest | grep -q R_LARCH_RELATIVE]) \ ++ && ! AC_TRY_COMMAND([LC_ALL=C $READELF -Wl conftest | grep -q INTERP]) + then + libc_cv_static_pie_on_loongarch=yes + fi +@@ -74,6 +62,6 @@ else + libc_cv_loongarch_vec_asm=no + fi + rm -f conftest*]) +-if test $libc_cv_loongarch_vec_asm = yes; then +- AC_DEFINE(HAVE_LOONGARCH_VEC_ASM) ++if test $libc_cv_loongarch_vec_asm = no; then ++ AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version]) + fi +diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h +new file mode 100644 +index 00000000..a139c939 +--- /dev/null ++++ b/sysdeps/loongarch/dl-audit-check.h +@@ -0,0 +1,23 @@ ++/* rtld-audit version check. LoongArch version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++static inline bool ++_dl_audit_check_version (unsigned int lav) ++{ ++ return lav == LAV_CURRENT; ++} +diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym +index 868ab7c6..b534968e 100644 +--- a/sysdeps/loongarch/dl-link.sym ++++ b/sysdeps/loongarch/dl-link.sym +@@ -6,9 +6,13 @@ DL_SIZEOF_RG sizeof(struct La_loongarch_regs) + DL_SIZEOF_RV sizeof(struct La_loongarch_retval) + + DL_OFFSET_RG_A0 offsetof(struct La_loongarch_regs, lr_reg) +-DL_OFFSET_RG_FA0 offsetof(struct La_loongarch_regs, lr_fpreg) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RG_VEC0 offsetof(struct La_loongarch_regs, lr_vec) ++#endif + DL_OFFSET_RG_RA offsetof(struct La_loongarch_regs, lr_ra) + DL_OFFSET_RG_SP offsetof(struct La_loongarch_regs, lr_sp) + + DL_OFFSET_RV_A0 offsetof(struct La_loongarch_retval, lrv_a0) +-DL_OFFSET_RV_FA0 offsetof(struct La_loongarch_retval, lrv_a1) ++#ifndef __loongarch_soft_float ++DL_OFFSET_RV_VEC0 offsetof(struct La_loongarch_retval, lrv_vec0) ++#endif +diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h +index 51ce9af8..57913cef 100644 +--- a/sysdeps/loongarch/dl-machine.h ++++ b/sysdeps/loongarch/dl-machine.h +@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void) + or $a0, $sp, $zero \n\ + bl _dl_start \n\ + # Stash user entry point in s0. \n\ +- or $s0, $v0, $zero \n\ ++ or $s0, $a0, $zero \n\ + # Load the original argument count. \n\ + ld.d $a1, $sp, 0 \n\ + # Call _dl_init (struct link_map *main_map, int argc, \ +@@ -270,9 +270,11 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* If using PLTs, fill in the first two entries of .got.plt. */ + if (l->l_info[DT_JMPREL]) + { +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + extern void _dl_runtime_resolve_lasx (void) attribute_hidden; + extern void _dl_runtime_resolve_lsx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lasx (void) attribute_hidden; ++ extern void _dl_runtime_profile_lsx (void) attribute_hidden; + #endif + extern void _dl_runtime_resolve (void) attribute_hidden; + extern void _dl_runtime_profile (void) attribute_hidden; +@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + end in this function. */ + if (profile != 0) + { +- gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx; ++ else if (SUPPORT_LSX) ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx; ++ else ++#endif ++ gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile; + + if (GLRO(dl_profile) != NULL + && _dl_name_match_p (GLRO(dl_profile), l)) +@@ -300,7 +309,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* This function will get called to fix up the GOT entry + indicated by the offset on the stack, and then jump to + the resolved address. */ +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + if (SUPPORT_LASX) + gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx; + else if (SUPPORT_LSX) +diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S +index f6ba5e44..bb449ecf 100644 +--- a/sysdeps/loongarch/dl-trampoline.S ++++ b/sysdeps/loongarch/dl-trampoline.S +@@ -19,193 +19,24 @@ + #include + #include + +-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float ++#if !defined __loongarch_soft_float + #define USE_LASX + #define _dl_runtime_resolve _dl_runtime_resolve_lasx ++#define _dl_runtime_profile _dl_runtime_profile_lasx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LASX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + + #define USE_LSX + #define _dl_runtime_resolve _dl_runtime_resolve_lsx ++#define _dl_runtime_profile _dl_runtime_profile_lsx + #include "dl-trampoline.h" + #undef FRAME_SIZE + #undef USE_LSX + #undef _dl_runtime_resolve ++#undef _dl_runtime_profile + #endif + + #include "dl-trampoline.h" +- +-#include "dl-link.h" +- +-ENTRY (_dl_runtime_profile) +- /* LoongArch we get called with: +- t0 linkr_map pointer +- t1 the scaled offset stored in t0, which can be used +- to calculate the offset of the current symbol in .rela.plt +- t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function +- t3 dl resolver entry point, no use in this function +- +- Stack frame layout: +- [sp, #96] La_loongarch_regs +- [sp, #48] La_loongarch_retval +- [sp, #40] frame size return from pltenter +- [sp, #32] dl_profile_call saved a1 +- [sp, #24] dl_profile_call saved a0 +- [sp, #16] T1 +- [sp, #0] ra, fp <- fp +- */ +- +-# define OFFSET_T1 16 +-# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 +-# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 +-# define OFFSET_RV OFFSET_FS + 8 +-# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV +- +-# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) +- +- /* Save arguments to stack. */ +- ADDI sp, sp, -SF_SIZE +- REG_S ra, sp, 0 +- REG_S fp, sp, 8 +- +- or fp, sp, zero +- +- REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- /* Update .got.plt and obtain runtime address of callee. */ +- SLLI a1, t1, 1 +- or a0, t0, zero +- ADD a1, a1, t1 +- or a2, ra, zero /* return addr */ +- ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ +- ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- +- la t2, _dl_profile_fixup +- jirl ra, t2, 0 +- +- REG_L t3, fp, OFFSET_FS +- bge t3, zero, 1f +- +- /* Save the return. */ +- or t4, v0, zero +- +- /* Restore arguments from stack. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- +- REG_L ra, fp, 0 +- REG_L fp, fp, SZREG +- +- ADDI sp, sp, SF_SIZE +- jirl zero, t4, 0 +- +-1: +- /* The new frame size is in t3. */ +- SUB sp, fp, t3 +- BSTRINS sp, zero, 3, 0 +- +- REG_S a0, fp, OFFSET_T1 +- +- or a0, sp, zero +- ADDI a1, fp, SF_SIZE +- or a2, t3, zero +- la t5, memcpy +- jirl ra, t5, 0 +- +- REG_L t6, fp, OFFSET_T1 +- +- /* Call the function. */ +- REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG +- REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG +- REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG +- REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG +- REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG +- REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG +- REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG +- REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG +- FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG +- FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG +- FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG +- FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG +- FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG +- FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG +- FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG +-#endif +- jirl ra, t6, 0 +- +- REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 +- REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 +- FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* Setup call to pltexit. */ +- REG_L a0, fp, OFFSET_SAVED_CALL_A0 +- REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG +- ADDI a2, fp, OFFSET_RG +- ADDI a3, fp, OFFSET_RV +- la t7, _dl_audit_pltexit +- jirl ra, t7, 0 +- +- REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 +- REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG +- +-#ifndef __loongarch_soft_float +- FREG_L fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0 +- FREG_L fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG +-#endif +- +- /* RA from within La_loongarch_reg. */ +- REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA +- or sp, fp, zero +- ADDI sp, sp, SF_SIZE +- REG_S fp, fp, SZREG +- +- jirl zero, ra, 0 +- +-END (_dl_runtime_profile) +diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h +index 99fcacab..e298439d 100644 +--- a/sysdeps/loongarch/dl-trampoline.h ++++ b/sysdeps/loongarch/dl-trampoline.h +@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve) + /* Invoke the callee. */ + jirl zero, t1, 0 + END (_dl_runtime_resolve) ++ ++#include "dl-link.h" ++ ++ENTRY (_dl_runtime_profile) ++ /* LoongArch we get called with: ++ t0 linkr_map pointer ++ t1 the scaled offset stored in t0, which can be used ++ to calculate the offset of the current symbol in .rela.plt ++ t2 %hi(%pcrel(.got.plt)) stored in t2, no use in this function ++ t3 dl resolver entry point, no use in this function ++ ++ Stack frame layout: ++ [sp, #208] La_loongarch_regs ++ [sp, #128] La_loongarch_retval // align: 16 ++ [sp, #112] frame size return from pltenter ++ [sp, #80 ] dl_profile_call saved vec1 ++ [sp, #48 ] dl_profile_call saved vec0 // align: 16 ++ [sp, #32 ] dl_profile_call saved a1 ++ [sp, #24 ] dl_profile_call saved a0 ++ [sp, #16 ] T1 ++ [sp, #0 ] ra, fp <- fp ++ */ ++ ++# define OFFSET_T1 16 ++# define OFFSET_SAVED_CALL_A0 OFFSET_T1 + 8 ++# define OFFSET_FS OFFSET_SAVED_CALL_A0 + 16 + 8 + 64 ++# define OFFSET_RV OFFSET_FS + 8 + 8 ++# define OFFSET_RG OFFSET_RV + DL_SIZEOF_RV ++ ++# define SF_SIZE (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK)) ++ ++ /* Save arguments to stack. */ ++ ADDI sp, sp, -SF_SIZE ++ REG_S ra, sp, 0 ++ REG_S fp, sp, 8 ++ ++ or fp, sp, zero ++ ++ REG_S a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_S a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_S a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_S a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_S a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_S a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_S a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_S a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvst xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvst xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvst xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvst xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvst xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvst xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvst xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vst vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vst vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vst vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vst vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vst vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vst vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vst vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_S fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_S fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_S fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_S fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_S fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_S fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_S fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ /* Update .got.plt and obtain runtime address of callee. */ ++ SLLI a1, t1, 1 ++ or a0, t0, zero ++ ADD a1, a1, t1 ++ or a2, ra, zero /* return addr */ ++ ADDI a3, fp, OFFSET_RG /* La_loongarch_regs pointer */ ++ ADDI a4, fp, OFFSET_FS /* frame size return from pltenter */ ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ++ la t2, _dl_profile_fixup ++ jirl ra, t2, 0 ++ ++ REG_L t3, fp, OFFSET_FS ++ bge t3, zero, 1f ++ ++ /* Save the return. */ ++ or t4, v0, zero ++ ++ /* Restore arguments from stack. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ REG_L ra, fp, 0 ++ REG_L fp, fp, SZREG ++ ++ ADDI sp, sp, SF_SIZE ++ jirl zero, t4, 0 ++ ++1: ++ /* The new frame size is in t3. */ ++ SUB sp, fp, t3 ++ BSTRINS sp, zero, 3, 0 ++ ++ REG_S a0, fp, OFFSET_T1 ++ ++ or a0, sp, zero ++ ADDI a1, fp, SF_SIZE ++ or a2, t3, zero ++ la t5, memcpy ++ jirl ra, t5, 0 ++ ++ REG_L t6, fp, OFFSET_T1 ++ ++ /* Call the function. */ ++ REG_L a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG ++ REG_L a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG ++ REG_L a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG ++ REG_L a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG ++ REG_L a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG ++ REG_L a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG ++ REG_L a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG ++ REG_L a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG ++ xvld xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG ++ xvld xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG ++ xvld xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG ++ xvld xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG ++ xvld xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG ++ xvld xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG ++ xvld xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG ++ vld vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG ++ vld vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG ++ vld vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG ++ vld vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG ++ vld vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG ++ vld vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG ++ vld vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG ++ FREG_L fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG ++ FREG_L fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG ++ FREG_L fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG ++ FREG_L fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG ++ FREG_L fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG ++ FREG_L fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG ++ FREG_L fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG ++#endif ++ ++ jirl ra, t6, 0 ++ ++ REG_S a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 ++ REG_S a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvst xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvst xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vst vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vst vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_S fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_S fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* Setup call to pltexit. */ ++ REG_L a0, fp, OFFSET_SAVED_CALL_A0 ++ REG_L a1, fp, OFFSET_SAVED_CALL_A0 + SZREG ++ ADDI a2, fp, OFFSET_RG ++ ADDI a3, fp, OFFSET_RV ++ la t7, _dl_audit_pltexit ++ jirl ra, t7, 0 ++ ++ REG_L a0, fp, OFFSET_RV + DL_OFFSET_RV_A0 ++ REG_L a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG ++ ++#ifdef USE_LASX ++ xvld xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ xvld xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG ++#elif defined USE_LSX ++ vld vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ vld vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG ++#elif !defined __loongarch_soft_float ++ FREG_L fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 ++ FREG_L fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG ++#endif ++ ++ /* RA from within La_loongarch_reg. */ ++ REG_L ra, fp, OFFSET_RG + DL_OFFSET_RG_RA ++ or sp, fp, zero ++ ADDI sp, sp, SF_SIZE ++ REG_S fp, fp, SZREG ++ ++ jirl zero, ra, 0 ++ ++END (_dl_runtime_profile) +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +new file mode 100644 +index 00000000..fe863e1b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -0,0 +1,52 @@ ++ifeq ($(subdir),string) ++sysdep_routines += \ ++ strlen-aligned \ ++ strlen-lsx \ ++ strlen-lasx \ ++ strnlen-aligned \ ++ strnlen-lsx \ ++ strnlen-lasx \ ++ strchr-aligned \ ++ strchr-lsx \ ++ strchr-lasx \ ++ strrchr-aligned \ ++ strrchr-lsx \ ++ strrchr-lasx \ ++ strchrnul-aligned \ ++ strchrnul-lsx \ ++ strchrnul-lasx \ ++ strcmp-aligned \ ++ strcmp-lsx \ ++ strncmp-aligned \ ++ strncmp-lsx \ ++ strcpy-aligned \ ++ strcpy-unaligned \ ++ strcpy-lsx \ ++ strcpy-lasx \ ++ stpcpy-aligned \ ++ stpcpy-unaligned \ ++ stpcpy-lsx \ ++ stpcpy-lasx \ ++ memcpy-aligned \ ++ memcpy-unaligned \ ++ memmove-unaligned \ ++ memmove-lsx \ ++ memmove-lasx \ ++ rawmemchr-aligned \ ++ rawmemchr-lsx \ ++ rawmemchr-lasx \ ++ memchr-aligned \ ++ memchr-lsx \ ++ memchr-lasx \ ++ memrchr-generic \ ++ memrchr-lsx \ ++ memrchr-lasx \ ++ memset-aligned \ ++ memset-unaligned \ ++ memset-lsx \ ++ memset-lasx \ ++ memcmp-aligned \ ++ memcmp-lsx \ ++ memcmp-lasx \ ++# sysdep_routines ++endif +diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +new file mode 100644 +index 00000000..e2723873 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +@@ -0,0 +1,24 @@ ++/* Symbol rediretion for loader/static initialization code. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef _DL_IFUNC_GENERIC_H ++#define _DL_IFUNC_GENERIC_H ++ ++asm ("memset = __memset_aligned"); ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +new file mode 100644 +index 00000000..529e2369 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c +@@ -0,0 +1,164 @@ ++/* Enumerate available IFUNC implementations of a function LoongArch64 version. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++size_t ++__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ++ size_t max) ++{ ++ ++ size_t i = max; ++ ++ IFUNC_IMPL (i, name, strlen, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx) ++ IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strnlen, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx) ++ IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx) ++ IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strchrnul, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx) ++ IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strcmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strncmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, stpcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, strrchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx) ++ IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memcpy, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx) ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned) ++ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memmove, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx) ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned) ++ IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, rawmemchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx) ++ IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx) ++ IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memrchr, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx) ++ IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic) ++ ) ++ ++ IFUNC_IMPL (i, name, memset, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx) ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned) ++ IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned) ++ ) ++ ++ IFUNC_IMPL (i, name, memcmp, ++#if !defined __loongarch_soft_float ++ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx) ++ IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx) ++#endif ++ IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned) ++ ) ++ return i; ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h +new file mode 100644 +index 00000000..3be67da6 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h +@@ -0,0 +1,45 @@ ++/* Common definition for ifunc selection implementation. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (unaligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ if (SUPPORT_UAL) ++ return OPTIMIZE (unaligned); ++ else ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +new file mode 100644 +index 00000000..9060ccd5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for memchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h +new file mode 100644 +index 00000000..04adc2e5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h +@@ -0,0 +1,40 @@ ++/* Common definition for memcmp ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h +new file mode 100644 +index 00000000..8215f9ad +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for memrchr implementation. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (generic); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +new file mode 100644 +index 00000000..a7bb4cf9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h +@@ -0,0 +1,40 @@ ++/* Common definition for rawmemchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h +new file mode 100644 +index 00000000..4494db79 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h +@@ -0,0 +1,41 @@ ++/* Common definition for strchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h +new file mode 100644 +index 00000000..8a925120 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h +@@ -0,0 +1,41 @@ ++/* Common definition for strchrnul ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h +new file mode 100644 +index 00000000..ca26352b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h +@@ -0,0 +1,38 @@ ++/* Common definition for strcmp ifunc selection. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h +new file mode 100644 +index 00000000..6258bb76 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h +@@ -0,0 +1,40 @@ ++/* Common definition for strlen ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h +new file mode 100644 +index 00000000..1a7dc36b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h +@@ -0,0 +1,38 @@ ++/* Common definition for strncmp ifunc selection. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +new file mode 100644 +index 00000000..5cf89810 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h +@@ -0,0 +1,41 @@ ++/* Common definition for strnlen ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h +new file mode 100644 +index 00000000..bbb34089 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h +@@ -0,0 +1,41 @@ ++/* Common definition for strrchr ifunc selections. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#if !defined __loongarch_soft_float ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden; ++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden; ++#endif ++ ++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden; ++ ++static inline void * ++IFUNC_SELECTOR (void) ++{ ++#if !defined __loongarch_soft_float ++ if (SUPPORT_LASX) ++ return OPTIMIZE (lasx); ++ else if (SUPPORT_LSX) ++ return OPTIMIZE (lsx); ++ else ++#endif ++ return OPTIMIZE (aligned); ++} +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +new file mode 100644 +index 00000000..81d0d004 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S +@@ -0,0 +1,95 @@ ++/* Optimized memchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define MEMCHR_NAME __memchr_aligned ++#else ++# define MEMCHR_NAME memchr ++#endif ++ ++LEAF(MEMCHR_NAME, 6) ++ beqz a2, L(out) ++ andi t1, a0, 0x7 ++ add.d a5, a0, a2 ++ bstrins.d a0, zero, 2, 0 ++ ++ ld.d t0, a0, 0 ++ bstrins.d a1, a1, 15, 8 ++ lu12i.w a3, 0x01010 ++ slli.d t2, t1, 03 ++ ++ bstrins.d a1, a1, 31, 16 ++ ori a3, a3, 0x101 ++ li.d t7, -1 ++ li.d t8, 8 ++ ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a3, a3, 63, 32 ++ sll.d t2, t7, t2 ++ xor t0, t0, a1 ++ ++ ++ addi.d a6, a5, -1 ++ slli.d a4, a3, 7 ++ sub.d t1, t8, t1 ++ orn t0, t0, t2 ++ ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ bstrins.d a6, zero, 2, 0 ++ and t0, t2, t3 ++ ++ bgeu t1, a2, L(end) ++L(loop): ++ bnez t0, L(found) ++ ld.d t1, a0, 8 ++ xor t0, t1, a1 ++ ++ addi.d a0, a0, 8 ++ sub.d t2, t0, a3 ++ andn t3, a4, t0 ++ and t0, t2, t3 ++ ++ ++ bne a0, a6, L(loop) ++L(end): ++ sub.d t1, a5, a6 ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ ++ sltu t1, t0, t1 ++ add.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++L(found): ++ ctz.d t0, t0 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(out): ++ move a0, zero ++ jr ra ++END(MEMCHR_NAME) ++ ++libc_hidden_builtin_def (MEMCHR_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +new file mode 100644 +index 00000000..a26cdf48 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S +@@ -0,0 +1,117 @@ ++/* Optimized memchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lasx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x3f ++ bstrins.d a0, zero, 5, 0 ++ ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ li.d t1, -1 ++ li.d t2, 64 ++ ++ xvreplgr2vr.b xr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ ++ ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ movfr2gr.d t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 5, 0 ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ beq a0, a4, L(out) ++ ++ ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(found): ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++ ++L(out): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.d t1, t1, t2 ++ and t0, t0, t1 ++ ctz.d t1, t0 ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +new file mode 100644 +index 00000000..a73ecd25 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S +@@ -0,0 +1,102 @@ ++/* Optimized memchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCHR __memchr_lsx ++ ++LEAF(MEMCHR, 6) ++ beqz a2, L(ret0) ++ add.d a3, a0, a2 ++ andi t0, a0, 0x1f ++ bstrins.d a0, zero, 4, 0 ++ ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ li.d t1, -1 ++ li.d t2, 32 ++ ++ vreplgr2vr.b vr2, a1 ++ sll.d t3, t1, t0 ++ sub.d t2, t2, t0 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++ and t0, t0, t3 ++ bgeu t2, a2, L(end) ++ bnez t0, L(found) ++ ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 4, 0 ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ beq a0, a4, L(out) ++ ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(found): ++ ctz.w t0, t0 ++ ++ add.d a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++L(out): ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(end): ++ sub.d t2, zero, a3 ++ srl.w t1, t1, t2 ++ and t0, t0, t1 ++ ctz.w t1, t0 ++ ++ ++ add.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++END(MEMCHR) ++ ++libc_hidden_builtin_def (MEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c +new file mode 100644 +index 00000000..059479c0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memchr __redirect_memchr ++# include ++# undef memchr ++ ++# define SYMBOL_NAME memchr ++# include "ifunc-memchr.h" ++ ++libc_ifunc_redirected (__redirect_memchr, memchr, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S +new file mode 100644 +index 00000000..14a7caa9 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S +@@ -0,0 +1,292 @@ ++/* Optimized memcmp implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define MEMCMP_NAME __memcmp_aligned ++#else ++# define MEMCMP_NAME memcmp ++#endif ++ ++LEAF(MEMCMP_NAME, 6) ++ beqz a2, L(ret) ++ andi a4, a1, 0x7 ++ andi a3, a0, 0x7 ++ sltu a5, a4, a3 ++ ++ xor t0, a0, a1 ++ li.w t8, 8 ++ maskeqz t0, t0, a5 ++ li.w t7, -1 ++ ++ xor a0, a0, t0 ++ xor a1, a1, t0 ++ andi a3, a0, 0x7 ++ andi a4, a1, 0x7 ++ ++ xor a0, a0, a3 ++ xor a1, a1, a4 ++ ld.d t2, a0, 0 ++ ld.d t1, a1, 0 ++ ++ slli.d t3, a3, 3 ++ slli.d t4, a4, 3 ++ sub.d a6, t3, t4 ++ srl.d t1, t1, t4 ++ ++ srl.d t0, t2, t3 ++ srl.d t5, t7, t4 ++ sub.d t6, t0, t1 ++ and t6, t6, t5 ++ ++ sub.d t5, t8, a4 ++ bnez t6, L(first_out) ++ bgeu t5, a2, L(ret) ++ sub.d a2, a2, t5 ++ ++ bnez a6, L(unaligned) ++ blt a2, t8, L(al_less_8bytes) ++ andi t1, a2, 31 ++ beq t1, a2, L(al_less_32bytes) ++ ++ sub.d t2, a2, t1 ++ add.d a4, a0, t2 ++ move a2, t1 ++ ++L(al_loop): ++ ld.d t0, a0, 8 ++ ++ ld.d t1, a1, 8 ++ ld.d t2, a0, 16 ++ ld.d t3, a1, 16 ++ ld.d t4, a0, 24 ++ ++ ld.d t5, a1, 24 ++ ld.d t6, a0, 32 ++ ld.d t7, a1, 32 ++ addi.d a0, a0, 32 ++ ++ addi.d a1, a1, 32 ++ bne t0, t1, L(out1) ++ bne t2, t3, L(out2) ++ bne t4, t5, L(out3) ++ ++ bne t6, t7, L(out4) ++ bne a0, a4, L(al_loop) ++ ++L(al_less_32bytes): ++ srai.d a4, a2, 4 ++ beqz a4, L(al_less_16bytes) ++ ++ ld.d t0, a0, 8 ++ ld.d t1, a1, 8 ++ ld.d t2, a0, 16 ++ ld.d t3, a1, 16 ++ ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ bne t0, t1, L(out1) ++ ++ bne t2, t3, L(out2) ++ ++L(al_less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(al_less_8bytes) ++ ld.d t0, a0, 8 ++ ++ ld.d t1, a1, 8 ++ addi.d a0, a0, 8 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ ++ bne t0, t1, L(out1) ++ ++L(al_less_8bytes): ++ beqz a2, L(ret) ++ ld.d t0, a0, 8 ++ ld.d t1, a1, 8 ++ ++ li.d t7, -1 ++ slli.d t2, a2, 3 ++ sll.d t2, t7, t2 ++ sub.d t3, t0, t1 ++ ++ andn t6, t3, t2 ++ bnez t6, L(count_diff) ++ ++L(ret): ++ move a0, zero ++ jr ra ++ ++L(out4): ++ move t0, t6 ++ move t1, t7 ++ sub.d t6, t6, t7 ++ b L(count_diff) ++ ++L(out3): ++ move t0, t4 ++ move t1, t5 ++ sub.d t6, t4, t5 ++ b L(count_diff) ++ ++L(out2): ++ move t0, t2 ++ move t1, t3 ++L(out1): ++ sub.d t6, t0, t1 ++ b L(count_diff) ++ ++L(first_out): ++ slli.d t4, a2, 3 ++ slt t3, a2, t5 ++ sll.d t4, t7, t4 ++ maskeqz t4, t4, t3 ++ ++ andn t6, t6, t4 ++ ++L(count_diff): ++ ctz.d t2, t6 ++ bstrins.d t2, zero, 2, 0 ++ srl.d t0, t0, t2 ++ ++ srl.d t1, t1, t2 ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ ++ sub.d t3, t1, t0 ++ masknez t2, t2, a5 ++ maskeqz t3, t3, a5 ++ or a0, t2, t3 ++ ++ jr ra ++ ++L(unaligned): ++ sub.d a7, zero, a6 ++ srl.d t0, t2, a6 ++ blt a2, t8, L(un_less_8bytes) ++ ++ andi t1, a2, 31 ++ beq t1, a2, L(un_less_32bytes) ++ sub.d t2, a2, t1 ++ add.d a4, a0, t2 ++ ++ move a2, t1 ++ ++L(un_loop): ++ ld.d t2, a0, 8 ++ ld.d t1, a1, 8 ++ ld.d t4, a0, 16 ++ ++ ld.d t3, a1, 16 ++ ld.d t6, a0, 24 ++ ld.d t5, a1, 24 ++ ld.d t8, a0, 32 ++ ++ ld.d t7, a1, 32 ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ sll.d a3, t2, a7 ++ ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ srl.d t0, t2, a6 ++ sll.d a3, t4, a7 ++ ++ or t2, a3, t0 ++ bne t2, t3, L(out2) ++ srl.d t0, t4, a6 ++ sll.d a3, t6, a7 ++ ++ or t4, a3, t0 ++ bne t4, t5, L(out3) ++ srl.d t0, t6, a6 ++ sll.d a3, t8, a7 ++ ++ or t6, t0, a3 ++ bne t6, t7, L(out4) ++ srl.d t0, t8, a6 ++ bne a0, a4, L(un_loop) ++ ++L(un_less_32bytes): ++ srai.d a4, a2, 4 ++ beqz a4, L(un_less_16bytes) ++ ld.d t2, a0, 8 ++ ld.d t1, a1, 8 ++ ++ ld.d t4, a0, 16 ++ ld.d t3, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ addi.d a2, a2, -16 ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ ++ srl.d t0, t2, a6 ++ sll.d a3, t4, a7 ++ or t2, a3, t0 ++ bne t2, t3, L(out2) ++ ++ srl.d t0, t4, a6 ++ ++L(un_less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(un_less_8bytes) ++ ld.d t2, a0, 8 ++ ++ ld.d t1, a1, 8 ++ addi.d a0, a0, 8 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ bne t0, t1, L(out1) ++ srl.d t0, t2, a6 ++ ++L(un_less_8bytes): ++ beqz a2, L(ret) ++ andi a7, a7, 63 ++ slli.d a4, a2, 3 ++ bgeu a7, a4, L(last_cmp) ++ ++ ld.d t2, a0, 8 ++ sll.d a3, t2, a7 ++ or t0, a3, t0 ++ ++L(last_cmp): ++ ld.d t1, a1, 8 ++ ++ li.d t7, -1 ++ sll.d t2, t7, a4 ++ sub.d t3, t0, t1 ++ andn t6, t3, t2 ++ ++ bnez t6, L(count_diff) ++ move a0, zero ++ jr ra ++END(MEMCMP_NAME) ++ ++libc_hidden_builtin_def (MEMCMP_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S +new file mode 100644 +index 00000000..3151a179 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S +@@ -0,0 +1,207 @@ ++/* Optimized memcmp implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCMP __memcmp_lasx ++ ++LEAF(MEMCMP, 6) ++ li.d t2, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t2, a2, L(less32) ++ ++ li.d t1, 160 ++ bgeu a2, t1, L(make_aligned) ++L(loop32): ++ xvld xr0, a0, 0 ++ xvld xr1, a1, 0 ++ ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ xvseq.b xr2, xr0, xr1 ++ ++ xvsetanyeqz.b fcc0, xr2 ++ bcnez fcc0, L(end) ++L(last_bytes): ++ bltu t2, a2, L(loop32) ++ xvld xr0, a3, -32 ++ ++ ++ xvld xr1, a4, -32 ++ xvseq.b xr2, xr0, xr1 ++L(end): ++ xvmsknz.b xr2, xr2 ++ xvpermi.q xr4, xr0, 1 ++ ++ xvpickve.w xr3, xr2, 4 ++ xvpermi.q xr5, xr1, 1 ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa2 ++ ++ cto.w t0, t0 ++ vreplgr2vr.b vr2, t0 ++ vshuf.b vr0, vr4, vr0, vr2 ++ vshuf.b vr1, vr5, vr1, vr2 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++ ++L(less32): ++ srli.d t0, a2, 4 ++ beqz t0, L(less16) ++ vld vr0, a0, 0 ++ vld vr1, a1, 0 ++ ++ vld vr2, a3, -16 ++ vld vr3, a4, -16 ++L(short_ret): ++ vseq.b vr4, vr0, vr1 ++ vseq.b vr5, vr2, vr3 ++ ++ vmsknz.b vr4, vr4 ++ vmsknz.b vr5, vr5 ++ vilvl.h vr4, vr5, vr4 ++ movfr2gr.s t0, fa4 ++ ++ cto.w t0, t0 ++ vreplgr2vr.b vr4, t0 ++ vshuf.b vr0, vr2, vr0, vr4 ++ vshuf.b vr1, vr3, vr1, vr4 ++ ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++L(less16): ++ srli.d t0, a2, 3 ++ beqz t0, L(less8) ++ vldrepl.d vr0, a0, 0 ++ vldrepl.d vr1, a1, 0 ++ ++ vldrepl.d vr2, a3, -8 ++ vldrepl.d vr3, a4, -8 ++ b L(short_ret) ++ nop ++ ++L(less8): ++ srli.d t0, a2, 2 ++ beqz t0, L(less4) ++ vldrepl.w vr0, a0, 0 ++ vldrepl.w vr1, a1, 0 ++ ++ ++ vldrepl.w vr2, a3, -4 ++ vldrepl.w vr3, a4, -4 ++ b L(short_ret) ++ nop ++ ++L(less4): ++ srli.d t0, a2, 1 ++ beqz t0, L(less2) ++ vldrepl.h vr0, a0, 0 ++ vldrepl.h vr1, a1, 0 ++ ++ vldrepl.h vr2, a3, -2 ++ vldrepl.h vr3, a4, -2 ++ b L(short_ret) ++ nop ++ ++L(less2): ++ beqz a2, L(ret0) ++ ld.bu t0, a0, 0 ++ ld.bu t1, a1, 0 ++ sub.d a0, t0, t1 ++ ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ ++L(make_aligned): ++ xvld xr0, a0, 0 ++ ++ xvld xr1, a1, 0 ++ xvseq.b xr2, xr0, xr1 ++ xvsetanyeqz.b fcc0, xr2 ++ bcnez fcc0, L(end) ++ ++ andi t0, a0, 0x1f ++ sub.d t0, t2, t0 ++ sub.d t1, a2, t0 ++ add.d a0, a0, t0 ++ ++ add.d a1, a1, t0 ++ andi a2, t1, 0x3f ++ sub.d t0, t1, a2 ++ add.d a5, a0, t0 ++ ++ ++L(loop_align): ++ xvld xr0, a0, 0 ++ xvld xr1, a1, 0 ++ xvld xr2, a0, 32 ++ xvld xr3, a1, 32 ++ ++ xvseq.b xr0, xr0, xr1 ++ xvseq.b xr1, xr2, xr3 ++ xvmin.bu xr2, xr1, xr0 ++ xvsetanyeqz.b fcc0, xr2 ++ ++ bcnez fcc0, L(pair_end) ++ addi.d a0, a0, 64 ++ addi.d a1, a1, 64 ++ bne a0, a5, L(loop_align) ++ ++ bnez a2, L(last_bytes) ++ move a0, zero ++ jr ra ++ nop ++ ++ ++L(pair_end): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ ++ vilvl.h vr0, vr2, vr0 ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++ cto.d t0, t0 ++ ldx.bu t1, a0, t0 ++ ldx.bu t2, a1, t0 ++ sub.d a0, t1, t2 ++ ++ jr ra ++END(MEMCMP) ++ ++libc_hidden_builtin_def (MEMCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S +new file mode 100644 +index 00000000..38a50a4c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S +@@ -0,0 +1,269 @@ ++/* Optimized memcmp implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define MEMCMP __memcmp_lsx ++ ++LEAF(MEMCMP, 6) ++ beqz a2, L(out) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a3, a0, 0xf ++ vld vr5, t0, %pc_lo12(L(INDEX)) ++ ++ andi a4, a1, 0xf ++ bne a3, a4, L(unaligned) ++ bstrins.d a0, zero, 3, 0 ++ xor a1, a1, a4 ++ ++ vld vr0, a0, 0 ++ vld vr1, a1, 0 ++ li.d t0, 16 ++ vreplgr2vr.b vr3, a3 ++ ++ sub.d t1, t0, a3 ++ vadd.b vr3, vr3, vr5 ++ vshuf.b vr0, vr3, vr0, vr3 ++ vshuf.b vr1, vr3, vr1, vr3 ++ ++ ++ vseq.b vr4, vr0, vr1 ++ bgeu t1, a2, L(al_end) ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(al_found) ++ ++ sub.d t1, a2, t1 ++ andi a2, t1, 31 ++ beq a2, t1, L(al_less_32bytes) ++ sub.d t2, t1, a2 ++ ++ add.d a4, a0, t2 ++L(al_loop): ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ vld vr2, a0, 32 ++ ++ vld vr3, a1, 32 ++ addi.d a0, a0, 32 ++ addi.d a1, a1, 32 ++ vseq.b vr4, vr0, vr1 ++ ++ ++ vseq.b vr6, vr2, vr3 ++ vand.v vr6, vr4, vr6 ++ vsetanyeqz.b fcc0, vr6 ++ bcnez fcc0, L(al_pair_end) ++ ++ bne a0, a4, L(al_loop) ++L(al_less_32bytes): ++ bgeu t0, a2, L(al_less_16bytes) ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ ++ vld vr2, a0, 32 ++ vld vr3, a1, 32 ++ addi.d a2, a2, -16 ++ vreplgr2vr.b vr6, a2 ++ ++ vslt.b vr5, vr5, vr6 ++ vseq.b vr4, vr0, vr1 ++ vseq.b vr6, vr2, vr3 ++ vorn.v vr6, vr6, vr5 ++ ++ ++L(al_pair_end): ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(al_found) ++ vnori.b vr4, vr6, 0 ++ vfrstpi.b vr4, vr4, 0 ++ ++ vshuf.b vr0, vr2, vr2, vr4 ++ vshuf.b vr1, vr3, vr3, vr4 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d a0, t0, t1 ++ jr ra ++ nop ++ nop ++ ++L(al_less_16bytes): ++ beqz a2, L(out) ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ vseq.b vr4, vr0, vr1 ++ ++ ++L(al_end): ++ vreplgr2vr.b vr6, a2 ++ vslt.b vr5, vr5, vr6 ++ vorn.v vr4, vr4, vr5 ++ nop ++ ++L(al_found): ++ vnori.b vr4, vr4, 0 ++ vfrstpi.b vr4, vr4, 0 ++ vshuf.b vr0, vr0, vr0, vr4 ++ vshuf.b vr1, vr1, vr1, vr4 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++L(out): ++ move a0, zero ++ jr ra ++ nop ++ nop ++ ++ ++L(unaligned): ++ xor t2, a0, a1 ++ sltu a5, a3, a4 ++ masknez t2, t2, a5 ++ xor a0, a0, t2 ++ ++ xor a1, a1, t2 ++ andi a3, a0, 0xf ++ andi a4, a1, 0xf ++ bstrins.d a0, zero, 3, 0 ++ ++ xor a1, a1, a4 ++ vld vr4, a0, 0 ++ vld vr1, a1, 0 ++ li.d t0, 16 ++ ++ vreplgr2vr.b vr2, a4 ++ sub.d a6, a4, a3 ++ sub.d t1, t0, a4 ++ sub.d t2, t0, a6 ++ ++ ++ vadd.b vr2, vr2, vr5 ++ vreplgr2vr.b vr6, t2 ++ vadd.b vr6, vr6, vr5 ++ vshuf.b vr0, vr4, vr4, vr6 ++ ++ vshuf.b vr1, vr2, vr1, vr2 ++ vshuf.b vr0, vr2, vr0, vr2 ++ vseq.b vr7, vr0, vr1 ++ bgeu t1, a2, L(un_end) ++ ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++ sub.d a2, a2, t1 ++ andi t1, a2, 31 ++ ++ beq a2, t1, L(un_less_32bytes) ++ sub.d t2, a2, t1 ++ move a2, t1 ++ add.d a4, a1, t2 ++ ++ ++L(un_loop): ++ vld vr2, a0, 16 ++ vld vr1, a1, 16 ++ vld vr3, a1, 32 ++ addi.d a1, a1, 32 ++ ++ addi.d a0, a0, 32 ++ vshuf.b vr0, vr2, vr4, vr6 ++ vld vr4, a0, 0 ++ vseq.b vr7, vr0, vr1 ++ ++ vshuf.b vr2, vr4, vr2, vr6 ++ vseq.b vr8, vr2, vr3 ++ vand.v vr8, vr7, vr8 ++ vsetanyeqz.b fcc0, vr8 ++ ++ bcnez fcc0, L(un_pair_end) ++ bne a1, a4, L(un_loop) ++ ++L(un_less_32bytes): ++ bltu a2, t0, L(un_less_16bytes) ++ vld vr2, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ vshuf.b vr0, vr2, vr4, vr6 ++ vor.v vr4, vr2, vr2 ++ ++ vseq.b vr7, vr0, vr1 ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++L(un_less_16bytes): ++ beqz a2, L(out) ++ vld vr1, a1, 16 ++ bgeu a6, a2, 1f ++ ++ vld vr2, a0, 16 ++1: ++ vshuf.b vr0, vr2, vr4, vr6 ++ vseq.b vr7, vr0, vr1 ++L(un_end): ++ vreplgr2vr.b vr3, a2 ++ ++ ++ vslt.b vr3, vr5, vr3 ++ vorn.v vr7, vr7, vr3 ++ ++L(un_found): ++ vnori.b vr7, vr7, 0 ++ vfrstpi.b vr7, vr7, 0 ++ ++ vshuf.b vr0, vr0, vr0, vr7 ++ vshuf.b vr1, vr1, vr1, vr7 ++L(calc_result): ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ masknez t0, t3, a5 ++ maskeqz t1, t2, a5 ++ ++ or a0, t0, t1 ++ jr ra ++L(un_pair_end): ++ vsetanyeqz.b fcc0, vr7 ++ bcnez fcc0, L(un_found) ++ ++ ++ vnori.b vr7, vr8, 0 ++ vfrstpi.b vr7, vr7, 0 ++ vshuf.b vr0, vr2, vr2, vr7 ++ vshuf.b vr1, vr3, vr3, vr7 ++ ++ b L(calc_result) ++END(MEMCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (MEMCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c +new file mode 100644 +index 00000000..32eccac2 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c +@@ -0,0 +1,43 @@ ++/* Multiple versions of memcmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memcmp __redirect_memcmp ++# include ++# undef memcmp ++ ++# define SYMBOL_NAME memcmp ++# include "ifunc-memcmp.h" ++ ++libc_ifunc_redirected (__redirect_memcmp, memcmp, ++ IFUNC_SELECTOR ()); ++# undef bcmp ++weak_alias (memcmp, bcmp) ++ ++# undef __memcmpeq ++strong_alias (memcmp, __memcmpeq) ++libc_hidden_def (__memcmpeq) ++ ++# ifdef SHARED ++__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +new file mode 100644 +index 00000000..7eb34395 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S +@@ -0,0 +1,783 @@ ++/* Optimized memcpy_aligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define MEMCPY_NAME __memcpy_aligned ++# define MEMMOVE_NAME __memmove_aligned ++#else ++# define MEMCPY_NAME memcpy ++# define MEMMOVE_NAME memmove ++#endif ++ ++#define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++#define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMMOVE_NAME, 6) ++ sub.d t0, a0, a1 ++ bltu t0, a2, L(copy_back) ++END(MEMMOVE_NAME) ++ ++LEAF_NO_ALIGN(MEMCPY_NAME) ++ srai.d a3, a2, 4 ++ beqz a3, L(short_data) ++ ++ move a4, a0 ++ andi a5, a0, 0x7 ++ andi a6, a1, 0x7 ++ li.d t8, 8 ++ beqz a5, L(check_align) ++ ++ sub.d t2, t8, a5 ++ sub.d a2, a2, t2 ++ pcaddi t1, 20 ++ slli.d t3, t2, 3 ++ ++ add.d a1, a1, t2 ++ sub.d t1, t1, t3 ++ add.d a4, a4, t2 ++ jr t1 ++ ++L(al7): ++ ld.b t0, a1, -7 ++ st.b t0, a4, -7 ++L(al6): ++ ld.b t0, a1, -6 ++ st.b t0, a4, -6 ++L(al5): ++ ld.b t0, a1, -5 ++ st.b t0, a4, -5 ++L(al4): ++ ld.b t0, a1, -4 ++ st.b t0, a4, -4 ++L(al3): ++ ld.b t0, a1, -3 ++ st.b t0, a4, -3 ++L(al2): ++ ld.b t0, a1, -2 ++ st.b t0, a4, -2 ++L(al1): ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ ++L(check_align): ++ bne a5, a6, L(unalign) ++ srai.d a3, a2, 4 ++ beqz a3, L(al_less_16bytes) ++ andi a3, a2, 0x3f ++ ++ beq a3, a2, L(al_less_64bytes) ++ sub.d t0, a2, a3 ++ move a2, a3 ++ add.d a5, a1, t0 ++ ++L(loop_64bytes): ++ LD_64(a1, 0) ++ addi.d a1, a1, 64 ++ ST_64(a4, 0) ++ ++ addi.d a4, a4, 64 ++ bne a1, a5, L(loop_64bytes) ++ ++L(al_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(al_less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a4, a4, 32 ++ ++L(al_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(al_less_16bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ addi.d a4, a4, 16 ++ ++L(al_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(al_less_8bytes) ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ st.d t0, a4, 0 ++ addi.d a4, a4, 8 ++ ++L(al_less_8bytes): ++ srai.d a3, a2, 2 ++ beqz a3, L(al_less_4bytes) ++ ++ ld.w t0, a1, 0 ++ addi.d a1, a1, 4 ++ addi.d a2, a2, -4 ++ st.w t0, a4, 0 ++ addi.d a4, a4, 4 ++ ++L(al_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(al_less_2bytes) ++ ++ ld.h t0, a1, 0 ++ addi.d a1, a1, 2 ++ addi.d a2, a2, -2 ++ st.h t0, a4, 0 ++ addi.d a4, a4, 2 ++ ++L(al_less_2bytes): ++ beqz a2, L(al_less_1byte) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a4, 0 ++ ++L(al_less_1byte): ++ jr ra ++ ++L(unalign): ++ andi a5, a1, 0x7 ++ bstrins.d a1, zero, 2, 0 ++ sub.d t8, t8, a5 ++ slli.d a5, a5, 3 ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ slli.d a6, t8, 3 ++ srl.d a7, t0, a5 ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(un_less_16bytes) ++ andi a3, a2, 0x3f ++ beq a3, a2, L(un_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ add.d a3, a1, t0 ++ ++L(un_long_bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ srl.d t4, t0, a5 ++ sll.d t0, t0, a6 ++ srl.d t5, t1, a5 ++ sll.d t1, t1, a6 ++ ++ srl.d t6, t2, a5 ++ sll.d t2, t2, a6 ++ srl.d t7, t3, a5 ++ sll.d t3, t3, a6 ++ ++ or t0, a7, t0 ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ ld.d t4, a1, 32 ++ ld.d t5, a1, 40 ++ ld.d t6, a1, 48 ++ ld.d a7, a1, 56 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a1, a1, 64 ++ ++ srl.d t0, t4, a5 ++ sll.d t4, t4, a6 ++ srl.d t1, t5, a5 ++ sll.d t5, t5, a6 ++ ++ srl.d t2, t6, a5 ++ sll.d t6, t6, a6 ++ sll.d t3, a7, a6 ++ srl.d a7, a7, a5 ++ ++ or t4, t7, t4 ++ or t5, t0, t5 ++ or t6, t1, t6 ++ or t3, t2, t3 ++ ++ st.d t4, a4, 32 ++ st.d t5, a4, 40 ++ st.d t6, a4, 48 ++ st.d t3, a4, 56 ++ ++ addi.d a4, a4, 64 ++ bne a3, a1, L(un_long_bytes) ++ ++L(un_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(un_less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ addi.d a1, a1, 32 ++ addi.d a2, a2, -32 ++ ++ srl.d t4, t0, a5 ++ sll.d t0, t0, a6 ++ srl.d t5, t1, a5 ++ sll.d t1, t1, a6 ++ ++ srl.d t6, t2, a5 ++ sll.d t2, t2, a6 ++ or t0, a7, t0 ++ srl.d a7, t3, a5 ++ sll.d t3, t3, a6 ++ ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ st.d t0, a4, 0 ++ st.d t1, a4, 8 ++ st.d t2, a4, 16 ++ st.d t3, a4, 24 ++ ++ addi.d a4, a4, 32 ++ ++L(un_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(un_less_16bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ addi.d a1, a1, 16 ++ addi.d a2, a2, -16 ++ ++ srl.d t2, t0, a5 ++ sll.d t3, t0, a6 ++ sll.d t4, t1, a6 ++ or t3, a7, t3 ++ or t4, t2, t4 ++ ++ srl.d a7, t1, a5 ++ st.d t3, a4, 0 ++ st.d t4, a4, 8 ++ addi.d a4, a4, 16 ++ ++L(un_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(un_less_8bytes) ++ ++ ld.d t0, a1, 0 ++ addi.d a1, a1, 8 ++ addi.d a2, a2, -8 ++ sll.d t1, t0, a6 ++ ++ or t2, a7, t1 ++ srl.d a7, t0, a5 ++ st.d t2, a4, 0 ++ addi.d a4, a4, 8 ++ ++L(un_less_8bytes): ++ beqz a2, L(un_less_1byte) ++ bge t8, a2, 1f ++ ++ ld.d t0, a1, 0 ++ sll.d t0, t0, a6 ++ or a7, a7, t0 ++ ++1: ++ srai.d a3, a2, 2 ++ beqz a3, L(un_less_4bytes) ++ ++ addi.d a2, a2, -4 ++ st.w a7, a4, 0 ++ addi.d a4, a4, 4 ++ srai.d a7, a7, 32 ++ ++L(un_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(un_less_2bytes) ++ ++ addi.d a2, a2, -2 ++ st.h a7, a4, 0 ++ addi.d a4, a4, 2 ++ srai.d a7, a7, 16 ++ ++L(un_less_2bytes): ++ beqz a2, L(un_less_1byte) ++ st.b a7, a4, 0 ++ ++L(un_less_1byte): ++ jr ra ++ ++L(short_data): ++ pcaddi t1, 36 ++ slli.d t2, a2, 3 ++ add.d a4, a0, a2 ++ sub.d t1, t1, t2 ++ add.d a1, a1, a2 ++ jr t1 ++ ++L(short_15_bytes): ++ ld.b t0, a1, -15 ++ st.b t0, a4, -15 ++L(short_14_bytes): ++ ld.b t0, a1, -14 ++ st.b t0, a4, -14 ++L(short_13_bytes): ++ ld.b t0, a1, -13 ++ st.b t0, a4, -13 ++L(short_12_bytes): ++ ld.b t0, a1, -12 ++ st.b t0, a4, -12 ++L(short_11_bytes): ++ ld.b t0, a1, -11 ++ st.b t0, a4, -11 ++L(short_10_bytes): ++ ld.b t0, a1, -10 ++ st.b t0, a4, -10 ++L(short_9_bytes): ++ ld.b t0, a1, -9 ++ st.b t0, a4, -9 ++L(short_8_bytes): ++ ld.b t0, a1, -8 ++ st.b t0, a4, -8 ++L(short_7_bytes): ++ ld.b t0, a1, -7 ++ st.b t0, a4, -7 ++L(short_6_bytes): ++ ld.b t0, a1, -6 ++ st.b t0, a4, -6 ++L(short_5_bytes): ++ ld.b t0, a1, -5 ++ st.b t0, a4, -5 ++L(short_4_bytes): ++ ld.b t0, a1, -4 ++ st.b t0, a4, -4 ++L(short_3_bytes): ++ ld.b t0, a1, -3 ++ st.b t0, a4, -3 ++L(short_2_bytes): ++ ld.b t0, a1, -2 ++ st.b t0, a4, -2 ++L(short_1_bytes): ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ jr ra ++ ++L(copy_back): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_short_data) ++ ++ add.d a4, a0, a2 ++ add.d a1, a1, a2 ++ ++ andi a5, a4, 0x7 ++ andi a6, a1, 0x7 ++ beqz a5, L(back_check_align) ++ ++ sub.d a2, a2, a5 ++ sub.d a1, a1, a5 ++ sub.d a4, a4, a5 ++ ++ pcaddi t1, 18 ++ slli.d t3, a5, 3 ++ sub.d t1, t1, t3 ++ jr t1 ++ ++ ld.b t0, a1, 6 ++ st.b t0, a4, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a4, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a4, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a4, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a4, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a4, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a4, 0 ++ ++L(back_check_align): ++ bne a5, a6, L(back_unalign) ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(back_less_16bytes) ++ ++ andi a3, a2, 0x3f ++ beq a3, a2, L(back_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ sub.d a5, a1, t0 ++ ++L(back_loop_64bytes): ++ LD_64(a1, -64) ++ addi.d a1, a1, -64 ++ ST_64(a4, -64) ++ ++ addi.d a4, a4, -64 ++ bne a1, a5, L(back_loop_64bytes) ++ ++L(back_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(back_less_32bytes) ++ ++ ld.d t0, a1, -32 ++ ld.d t1, a1, -24 ++ ld.d t2, a1, -16 ++ ld.d t3, a1, -8 ++ ++ addi.d a1, a1, -32 ++ addi.d a2, a2, -32 ++ ++ st.d t0, a4, -32 ++ st.d t1, a4, -24 ++ st.d t2, a4, -16 ++ st.d t3, a4, -8 ++ ++ addi.d a4, a4, -32 ++ ++L(back_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_less_16bytes) ++ ++ ld.d t0, a1, -16 ++ ld.d t1, a1, -8 ++ ++ addi.d a2, a2, -16 ++ addi.d a1, a1, -16 ++ ++ st.d t0, a4, -16 ++ st.d t1, a4, -8 ++ addi.d a4, a4, -16 ++ ++L(back_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(back_less_8bytes) ++ ++ ld.d t0, a1, -8 ++ addi.d a2, a2, -8 ++ addi.d a1, a1, -8 ++ ++ st.d t0, a4, -8 ++ addi.d a4, a4, -8 ++ ++L(back_less_8bytes): ++ srai.d a3, a2, 2 ++ beqz a3, L(back_less_4bytes) ++ ++ ld.w t0, a1, -4 ++ addi.d a2, a2, -4 ++ addi.d a1, a1, -4 ++ ++ st.w t0, a4, -4 ++ addi.d a4, a4, -4 ++ ++L(back_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(back_less_2bytes) ++ ++ ld.h t0, a1, -2 ++ addi.d a2, a2, -2 ++ addi.d a1, a1, -2 ++ ++ st.h t0, a4, -2 ++ addi.d a4, a4, -2 ++ ++L(back_less_2bytes): ++ beqz a2, L(back_less_1byte) ++ ++ ld.b t0, a1, -1 ++ st.b t0, a4, -1 ++ ++L(back_less_1byte): ++ jr ra ++ ++L(back_unalign): ++ andi t8, a1, 0x7 ++ bstrins.d a1, zero, 2, 0 ++ ++ sub.d a6, zero, t8 ++ ++ ld.d t0, a1, 0 ++ slli.d a6, a6, 3 ++ slli.d a5, t8, 3 ++ sll.d a7, t0, a6 ++ ++ srai.d a3, a2, 4 ++ beqz a3, L(back_un_less_16bytes) ++ ++ andi a3, a2, 0x3f ++ beq a3, a2, L(back_un_less_64bytes) ++ ++ sub.d t0, a2, a3 ++ move a2, a3 ++ sub.d a3, a1, t0 ++ ++L(back_un_long_bytes): ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ld.d t2, a1, -24 ++ ld.d t3, a1, -32 ++ ++ sll.d t4, t0, a6 ++ srl.d t0, t0, a5 ++ ++ sll.d t5, t1, a6 ++ srl.d t1, t1, a5 ++ ++ sll.d t6, t2, a6 ++ srl.d t2, t2, a5 ++ ++ sll.d t7, t3, a6 ++ srl.d t3, t3, a5 ++ ++ or t0, t0, a7 ++ or t1, t1, t4 ++ or t2, t2, t5 ++ or t3, t3, t6 ++ ++ ld.d t4, a1, -40 ++ ld.d t5, a1, -48 ++ ld.d t6, a1, -56 ++ ld.d a7, a1, -64 ++ st.d t0, a4, -8 ++ st.d t1, a4, -16 ++ st.d t2, a4, -24 ++ st.d t3, a4, -32 ++ ++ addi.d a1, a1, -64 ++ ++ sll.d t0, t4, a6 ++ srl.d t4, t4, a5 ++ ++ sll.d t1, t5, a6 ++ srl.d t5, t5, a5 ++ ++ sll.d t2, t6, a6 ++ srl.d t6, t6, a5 ++ ++ srl.d t3, a7, a5 ++ sll.d a7, a7, a6 ++ ++ or t4, t7, t4 ++ or t5, t0, t5 ++ or t6, t1, t6 ++ or t3, t2, t3 ++ ++ st.d t4, a4, -40 ++ st.d t5, a4, -48 ++ st.d t6, a4, -56 ++ st.d t3, a4, -64 ++ ++ addi.d a4, a4, -64 ++ bne a3, a1, L(back_un_long_bytes) ++ ++L(back_un_less_64bytes): ++ srai.d a3, a2, 5 ++ beqz a3, L(back_un_less_32bytes) ++ ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ld.d t2, a1, -24 ++ ld.d t3, a1, -32 ++ ++ addi.d a1, a1, -32 ++ addi.d a2, a2, -32 ++ ++ sll.d t4, t0, a6 ++ srl.d t0, t0, a5 ++ ++ sll.d t5, t1, a6 ++ srl.d t1, t1, a5 ++ ++ sll.d t6, t2, a6 ++ srl.d t2, t2, a5 ++ ++ or t0, a7, t0 ++ ++ sll.d a7, t3, a6 ++ srl.d t3, t3, a5 ++ ++ or t1, t4, t1 ++ or t2, t5, t2 ++ or t3, t6, t3 ++ ++ st.d t0, a4, -8 ++ st.d t1, a4, -16 ++ st.d t2, a4, -24 ++ st.d t3, a4, -32 ++ ++ addi.d a4, a4, -32 ++ ++L(back_un_less_32bytes): ++ srai.d a3, a2, 4 ++ beqz a3, L(back_un_less_16bytes) ++ ++ ld.d t0, a1, -8 ++ ld.d t1, a1, -16 ++ ++ addi.d a1, a1, -16 ++ addi.d a2, a2, -16 ++ ++ sll.d t2, t0, a6 ++ srl.d t3, t0, a5 ++ ++ srl.d t4, t1, a5 ++ or t3, a7, t3 ++ or t4, t2, t4 ++ sll.d a7, t1, a6 ++ ++ st.d t3, a4, -8 ++ st.d t4, a4, -16 ++ ++ addi.d a4, a4, -16 ++ ++L(back_un_less_16bytes): ++ srai.d a3, a2, 3 ++ beqz a3, L(back_un_less_8bytes) ++ ++ ld.d t0, a1, -8 ++ ++ addi.d a1, a1, -8 ++ addi.d a2, a2, -8 ++ ++ srl.d t1, t0, a5 ++ or t2, a7, t1 ++ sll.d a7, t0, a6 ++ ++ st.d t2, a4, -8 ++ addi.d a4, a4, -8 ++ ++L(back_un_less_8bytes): ++ beqz a2, L(back_end) ++ bge t8, a2, 1f ++ ++ ld.d t0, a1, -8 ++ srl.d t0, t0, a5 ++ or a7, a7, t0 ++ ++1: ++ srai.d a3, a2, 2 ++ beqz a3, L(back_un_less_4bytes) ++ ++ srai.d t0, a7, 32 ++ addi.d a2, a2, -4 ++ st.w t0, a4, -4 ++ addi.d a4, a4, -4 ++ slli.d a7, a7, 32 ++ ++L(back_un_less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(back_un_less_2bytes) ++ srai.d t0, a7, 48 ++ addi.d a2, a2, -2 ++ st.h t0, a4, -2 ++ addi.d a4, a4, -2 ++ slli.d a7, a7, 16 ++L(back_un_less_2bytes): ++ beqz a2, L(back_un_less_1byte) ++ srai.d t0, a7, 56 ++ st.b t0, a4, -1 ++L(back_un_less_1byte): ++ jr ra ++ ++L(back_short_data): ++ pcaddi t1, 34 ++ slli.d t2, a2, 3 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.b t0, a1, 14 ++ st.b t0, a0, 14 ++ ld.b t0, a1, 13 ++ st.b t0, a0, 13 ++ ld.b t0, a1, 12 ++ st.b t0, a0, 12 ++ ld.b t0, a1, 11 ++ st.b t0, a0, 11 ++ ld.b t0, a1, 10 ++ st.b t0, a0, 10 ++ ld.b t0, a1, 9 ++ st.b t0, a0, 9 ++ ld.b t0, a1, 8 ++ st.b t0, a0, 8 ++ ld.b t0, a1, 7 ++ st.b t0, a0, 7 ++ ld.b t0, a1, 6 ++ st.b t0, a0, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a0, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a0, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a0, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a0, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a0, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(back_end): ++ jr ra ++ ++END(MEMCPY_NAME) ++ ++libc_hidden_builtin_def (MEMMOVE_NAME) ++libc_hidden_builtin_def (MEMCPY_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +new file mode 100644 +index 00000000..ae148df5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S +@@ -0,0 +1,20 @@ ++/* Optimized memcpy implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++/* memcpy is part of memmove.S */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +new file mode 100644 +index 00000000..feb2bb0e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S +@@ -0,0 +1,20 @@ ++/* Optimized memcpy implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++/* memcpy is part of memmove.S */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +new file mode 100644 +index 00000000..31019b13 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S +@@ -0,0 +1,247 @@ ++/* Optimized unaligned memcpy implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++ ++# define MEMCPY_NAME __memcpy_unaligned ++ ++# define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++# define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMCPY_NAME, 3) ++ add.d a4, a1, a2 ++ add.d a3, a0, a2 ++ li.w a6, 16 ++ bge a6, a2, L(less_16bytes) ++ ++ li.w a6, 128 ++ blt a6, a2, L(long_bytes) ++ li.w a6, 64 ++ blt a6, a2, L(more_64bytes) ++ ++ li.w a6, 32 ++ blt a6, a2, L(more_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a4, -16 ++ ld.d t3, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a3, -16 ++ st.d t3, a3, -8 ++ jr ra ++ ++L(more_64bytes): ++ srli.d t8, a0, 3 ++ slli.d t8, t8, 3 ++ addi.d t8, t8, 0x8 ++ sub.d a7, a0, t8 ++ ++ ld.d t0, a1, 0 ++ sub.d a1, a1, a7 ++ st.d t0, a0, 0 ++ add.d a7, a7, a2 ++ addi.d a7, a7, -0x20 ++ ++L(loop_32): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ st.d t0, t8, 0 ++ st.d t1, t8, 8 ++ st.d t2, t8, 16 ++ st.d t3, t8, 24 ++ ++ addi.d t8, t8, 0x20 ++ addi.d a1, a1, 0x20 ++ addi.d a7, a7, -0x20 ++ blt zero, a7, L(loop_32) ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(more_32bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a0, 16 ++ st.d t3, a0, 24 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d a6, a2, 3 ++ beqz a6, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d a6, a2, 2 ++ beqz a6, L(less_4bytes) ++ ++ ld.w t0, a1, 0 ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d a6, a2, 1 ++ beqz a6, L(less_2bytes) ++ ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(long_bytes): ++ srli.d t8, a0, 3 ++ slli.d t8, t8, 3 ++ beq a0, t8, L(start) ++ ld.d t0, a1, 0 ++ ++ addi.d t8, t8, 0x8 ++ st.d t0, a0, 0 ++ sub.d a7, a0, t8 ++ sub.d a1, a1, a7 ++ ++L(start): ++ addi.d a5, a3, -0x80 ++ blt a5, t8, L(align_end_proc) ++ ++L(loop_128): ++ LD_64(a1, 0) ++ ST_64(t8, 0) ++ LD_64(a1, 64) ++ addi.d a1, a1, 0x80 ++ ST_64(t8, 64) ++ addi.d t8, t8, 0x80 ++ bge a5, t8, L(loop_128) ++ ++L(align_end_proc): ++ sub.d a2, a3, t8 ++ pcaddi t1, 34 ++ andi t2, a2, 0x78 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a1, 112 ++ st.d t0, t8, 112 ++ ld.d t0, a1, 104 ++ st.d t0, t8, 104 ++ ld.d t0, a1, 96 ++ st.d t0, t8, 96 ++ ld.d t0, a1, 88 ++ st.d t0, t8, 88 ++ ld.d t0, a1, 80 ++ st.d t0, t8, 80 ++ ld.d t0, a1, 72 ++ st.d t0, t8, 72 ++ ld.d t0, a1, 64 ++ st.d t0, t8, 64 ++ ld.d t0, a1, 56 ++ st.d t0, t8, 56 ++ ld.d t0, a1, 48 ++ st.d t0, t8, 48 ++ ld.d t0, a1, 40 ++ st.d t0, t8, 40 ++ ld.d t0, a1, 32 ++ st.d t0, t8, 32 ++ ld.d t0, a1, 24 ++ st.d t0, t8, 24 ++ ld.d t0, a1, 16 ++ st.d t0, t8, 16 ++ ld.d t0, a1, 8 ++ st.d t0, t8, 8 ++ ld.d t0, a1, 0 ++ st.d t0, t8, 0 ++ ld.d t0, a4, -8 ++ st.d t0, a3, -8 ++ ++ jr ra ++END(MEMCPY_NAME) ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy.c b/sysdeps/loongarch/lp64/multiarch/memcpy.c +new file mode 100644 +index 00000000..93b238ce +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memcpy.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memcpy __redirect_memcpy ++# include ++# undef memcpy ++ ++# define SYMBOL_NAME memcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memcpy, memcpy, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +new file mode 100644 +index 00000000..a02114c0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S +@@ -0,0 +1,20 @@ ++/* Optimized memmove_aligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++/* memmove_aligned is part of memcpy_aligned, see memcpy-aligned.S. */ +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +new file mode 100644 +index 00000000..95d8ee7b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S +@@ -0,0 +1,287 @@ ++/* Optimized memmove implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef MEMCPY_NAME ++# define MEMCPY_NAME __memcpy_lasx ++#endif ++ ++#ifndef MEMMOVE_NAME ++# define MEMMOVE_NAME __memmove_lasx ++#endif ++ ++LEAF(MEMCPY_NAME, 6) ++ li.d t0, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t0, a2, L(less_32bytes) ++ ++ li.d t1, 64 ++ bltu t1, a2, L(copy_long) ++ xvld xr0, a1, 0 ++ xvld xr1, a4, -32 ++ ++ xvst xr0, a0, 0 ++ xvst xr1, a3, -32 ++ jr ra ++L(less_32bytes): ++ srli.d t0, a2, 4 ++ ++ beqz t0, L(less_16bytes) ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ ++ ++ vst vr1, a3, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ ld.w t0, a1, 0 ++ ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ jr ra ++ ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(less_1bytes): ++ jr ra ++END(MEMCPY_NAME) ++ ++LEAF(MEMMOVE_NAME, 6) ++ ++ li.d t0, 32 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t0, a2, L(less_32bytes) ++ ++ li.d t1, 64 ++ bltu t1, a2, L(move_long) ++ xvld xr0, a1, 0 ++ xvld xr1, a4, -32 ++ ++ xvst xr0, a0, 0 ++ xvst xr1, a3, -32 ++ jr ra ++L(move_long): ++ sub.d t2, a0, a1 ++ ++ bltu t2, a2, L(copy_back) ++L(copy_long): ++ andi t2, a0, 0x1f ++ addi.d a2, a2, -1 ++ sub.d t2, t0, t2 ++ ++ ++ xvld xr8, a1, 0 ++ xvld xr9, a4, -32 ++ sub.d t3, a2, t2 ++ add.d a5, a0, t2 ++ ++ andi a2, t3, 0xff ++ add.d a1, a1, t2 ++ beq a2, t3, L(lt256) ++ sub.d a6, a4, a2 ++ ++ addi.d a6, a6, -1 ++L(loop_256): ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ xvld xr2, a1, 64 ++ ++ xvld xr3, a1, 96 ++ xvld xr4, a1, 128 ++ xvld xr5, a1, 160 ++ xvld xr6, a1, 192 ++ ++ ++ xvld xr7, a1, 224 ++ addi.d a1, a1, 256 ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ ++ xvst xr2, a5, 64 ++ xvst xr3, a5, 96 ++ xvst xr4, a5, 128 ++ xvst xr5, a5, 160 ++ ++ xvst xr6, a5, 192 ++ xvst xr7, a5, 224 ++ addi.d a5, a5, 256 ++ bne a1, a6, L(loop_256) ++ ++L(lt256): ++ srli.d t2, a2, 7 ++ beqz t2, L(lt128) ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ ++ ++ xvld xr2, a1, 64 ++ xvld xr3, a1, 96 ++ addi.d a1, a1, 128 ++ addi.d a2, a2, -128 ++ ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ xvst xr2, a5, 64 ++ xvst xr3, a5, 96 ++ ++ addi.d a5, a5, 128 ++L(lt128): ++ bltu a2, t1, L(lt64) ++ xvld xr0, a1, 0 ++ xvld xr1, a1, 32 ++ ++ addi.d a1, a1, 64 ++ addi.d a2, a2, -64 ++ xvst xr0, a5, 0 ++ xvst xr1, a5, 32 ++ ++ ++ addi.d a5, a5, 64 ++L(lt64): ++ bltu a2, t0, L(lt32) ++ xvld xr0, a1, 0 ++ xvst xr0, a5, 0 ++ ++L(lt32): ++ xvst xr8, a0, 0 ++ xvst xr9, a3, -32 ++ jr ra ++ nop ++ ++L(copy_back): ++ addi.d a3, a3, -1 ++ addi.d a2, a2, -2 ++ andi t2, a3, 0x1f ++ xvld xr8, a1, 0 ++ ++ xvld xr9, a4, -32 ++ sub.d t3, a2, t2 ++ sub.d a5, a3, t2 ++ sub.d a4, a4, t2 ++ ++ ++ andi a2, t3, 0xff ++ beq a2, t3, L(back_lt256) ++ add.d a6, a1, a2 ++ addi.d a6, a6, 2 ++ ++L(back_loop_256): ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ xvld xr2, a4, -97 ++ xvld xr3, a4, -129 ++ ++ xvld xr4, a4, -161 ++ xvld xr5, a4, -193 ++ xvld xr6, a4, -225 ++ xvld xr7, a4, -257 ++ ++ addi.d a4, a4, -256 ++ xvst xr0, a5, -32 ++ xvst xr1, a5, -64 ++ xvst xr2, a5, -96 ++ ++ ++ xvst xr3, a5, -128 ++ xvst xr4, a5, -160 ++ xvst xr5, a5, -192 ++ xvst xr6, a5, -224 ++ ++ xvst xr7, a5, -256 ++ addi.d a5, a5, -256 ++ bne a4, a6, L(back_loop_256) ++L(back_lt256): ++ srli.d t2, a2, 7 ++ ++ beqz t2, L(back_lt128) ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ xvld xr2, a4, -97 ++ ++ xvld xr3, a4, -129 ++ addi.d a2, a2, -128 ++ addi.d a4, a4, -128 ++ xvst xr0, a5, -32 ++ ++ ++ xvst xr1, a5, -64 ++ xvst xr2, a5, -96 ++ xvst xr3, a5, -128 ++ addi.d a5, a5, -128 ++ ++L(back_lt128): ++ blt a2, t1, L(back_lt64) ++ xvld xr0, a4, -33 ++ xvld xr1, a4, -65 ++ addi.d a2, a2, -64 ++ ++ addi.d a4, a4, -64 ++ xvst xr0, a5, -32 ++ xvst xr1, a5, -64 ++ addi.d a5, a5, -64 ++ ++L(back_lt64): ++ bltu a2, t0, L(back_lt32) ++ xvld xr0, a4, -33 ++ xvst xr0, a5, -32 ++L(back_lt32): ++ xvst xr8, a0, 0 ++ ++ ++ xvst xr9, a3, -31 ++ jr ra ++END(MEMMOVE_NAME) ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +new file mode 100644 +index 00000000..5eb819ef +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S +@@ -0,0 +1,534 @@ ++/* Optimized memmove implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMCPY_NAME __memcpy_lsx ++# define MEMMOVE_NAME __memmove_lsx ++ ++LEAF(MEMCPY_NAME, 6) ++ li.d t6, 16 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t6, a2, L(less_16bytes) ++ ++ li.d t8, 64 ++ li.d t7, 32 ++ bltu t8, a2, L(copy_long) ++ bltu t7, a2, L(more_32bytes) ++ ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a3, -16 ++ ++ jr ra ++L(more_32bytes): ++ vld vr0, a1, 0 ++ vld vr1, a1, 16 ++ vld vr2, a4, -32 ++ ++ ++ vld vr3, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a0, 16 ++ vst vr2, a3, -32 ++ ++ vst vr3, a3, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ vldrepl.d vr0, a1, 0 ++ vldrepl.d vr1, a4, -8 ++ vstelm.d vr0, a0, 0, 0 ++ vstelm.d vr1, a3, -8, 0 ++ ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ vldrepl.w vr0, a1, 0 ++ ++ ++ vldrepl.w vr1, a4, -4 ++ vstelm.w vr0, a0, 0, 0 ++ vstelm.w vr1, a3, -4, 0 ++ jr ra ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ vldrepl.h vr0, a1, 0 ++ vldrepl.h vr1, a4, -2 ++ ++ vstelm.h vr0, a0, 0, 0 ++ vstelm.h vr1, a3, -2, 0 ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++L(less_1bytes): ++ jr ra ++ nop ++END(MEMCPY_NAME) ++ ++LEAF(MEMMOVE_NAME, 6) ++ li.d t6, 16 ++ add.d a3, a0, a2 ++ add.d a4, a1, a2 ++ bgeu t6, a2, L(less_16bytes) ++ ++ li.d t8, 64 ++ li.d t7, 32 ++ bltu t8, a2, L(move_long) ++ bltu t7, a2, L(more_32bytes) ++ ++ vld vr0, a1, 0 ++ vld vr1, a4, -16 ++ vst vr0, a0, 0 ++ vst vr1, a3, -16 ++ ++ jr ra ++ nop ++L(move_long): ++ sub.d t0, a0, a1 ++ bltu t0, a2, L(copy_back) ++ ++ ++L(copy_long): ++ vld vr2, a1, 0 ++ andi t0, a0, 0xf ++ sub.d t0, t6, t0 ++ add.d a1, a1, t0 ++ ++ sub.d a2, a2, t0 ++ andi t1, a1, 0xf ++ bnez t1, L(unaligned) ++ vld vr0, a1, 0 ++ ++ addi.d a2, a2, -16 ++ vst vr2, a0, 0 ++ andi t2, a2, 0x7f ++ add.d a5, a0, t0 ++ ++ beq a2, t2, L(al_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ add.d a6, a1, t3 ++ ++ ++L(al_loop): ++ vld vr1, a1, 16 ++ vld vr2, a1, 32 ++ vld vr3, a1, 48 ++ vld vr4, a1, 64 ++ ++ vld vr5, a1, 80 ++ vld vr6, a1, 96 ++ vld vr7, a1, 112 ++ vst vr0, a5, 0 ++ ++ vld vr0, a1, 128 ++ addi.d a1, a1, 128 ++ vst vr1, a5, 16 ++ vst vr2, a5, 32 ++ ++ vst vr3, a5, 48 ++ vst vr4, a5, 64 ++ vst vr5, a5, 80 ++ vst vr6, a5, 96 ++ ++ ++ vst vr7, a5, 112 ++ addi.d a5, a5, 128 ++ bne a1, a6, L(al_loop) ++L(al_less_128): ++ blt a2, t8, L(al_less_64) ++ ++ vld vr1, a1, 16 ++ vld vr2, a1, 32 ++ vld vr3, a1, 48 ++ addi.d a2, a2, -64 ++ ++ vst vr0, a5, 0 ++ vld vr0, a1, 64 ++ addi.d a1, a1, 64 ++ vst vr1, a5, 16 ++ ++ vst vr2, a5, 32 ++ vst vr3, a5, 48 ++ addi.d a5, a5, 64 ++L(al_less_64): ++ blt a2, t7, L(al_less_32) ++ ++ ++ vld vr1, a1, 16 ++ addi.d a2, a2, -32 ++ vst vr0, a5, 0 ++ vld vr0, a1, 32 ++ ++ addi.d a1, a1, 32 ++ vst vr1, a5, 16 ++ addi.d a5, a5, 32 ++L(al_less_32): ++ blt a2, t6, L(al_less_16) ++ ++ vst vr0, a5, 0 ++ vld vr0, a1, 16 ++ addi.d a5, a5, 16 ++L(al_less_16): ++ vld vr1, a4, -16 ++ ++ vst vr0, a5, 0 ++ vst vr1, a3, -16 ++ jr ra ++ nop ++ ++ ++L(unaligned): ++ pcalau12i t2, %pc_hi20(L(INDEX)) ++ bstrins.d a1, zero, 3, 0 ++ vld vr8, t2, %pc_lo12(L(INDEX)) ++ vld vr0, a1, 0 ++ ++ vld vr1, a1, 16 ++ addi.d a2, a2, -16 ++ vst vr2, a0, 0 ++ add.d a5, a0, t0 ++ ++ vreplgr2vr.b vr9, t1 ++ andi t2, a2, 0x7f ++ vadd.b vr9, vr9, vr8 ++ addi.d a1, a1, 32 ++ ++ ++ beq t2, a2, L(un_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ add.d a6, a1, t3 ++ ++L(un_loop): ++ vld vr2, a1, 0 ++ vld vr3, a1, 16 ++ vld vr4, a1, 32 ++ vld vr5, a1, 48 ++ ++ vld vr6, a1, 64 ++ vld vr7, a1, 80 ++ vshuf.b vr8, vr1, vr0, vr9 ++ vld vr0, a1, 96 ++ ++ vst vr8, a5, 0 ++ vshuf.b vr8, vr2, vr1, vr9 ++ vld vr1, a1, 112 ++ vst vr8, a5, 16 ++ ++ ++ addi.d a1, a1, 128 ++ vshuf.b vr2, vr3, vr2, vr9 ++ vshuf.b vr3, vr4, vr3, vr9 ++ vst vr2, a5, 32 ++ ++ vshuf.b vr4, vr5, vr4, vr9 ++ vst vr3, a5, 48 ++ vshuf.b vr5, vr6, vr5, vr9 ++ vst vr4, a5, 64 ++ ++ vshuf.b vr6, vr7, vr6, vr9 ++ vst vr5, a5, 80 ++ vshuf.b vr7, vr0, vr7, vr9 ++ vst vr6, a5, 96 ++ ++ vst vr7, a5, 112 ++ addi.d a5, a5, 128 ++ bne a1, a6, L(un_loop) ++L(un_less_128): ++ blt a2, t8, L(un_less_64) ++ ++ ++ vld vr2, a1, 0 ++ vld vr3, a1, 16 ++ vshuf.b vr4, vr1, vr0, vr9 ++ vld vr0, a1, 32 ++ ++ vst vr4, a5, 0 ++ addi.d a2, a2, -64 ++ vshuf.b vr4, vr2, vr1, vr9 ++ vld vr1, a1, 48 ++ ++ addi.d a1, a1, 64 ++ vst vr4, a5, 16 ++ vshuf.b vr2, vr3, vr2, vr9 ++ vshuf.b vr3, vr0, vr3, vr9 ++ ++ vst vr2, a5, 32 ++ vst vr3, a5, 48 ++ addi.d a5, a5, 64 ++L(un_less_64): ++ blt a2, t7, L(un_less_32) ++ ++ ++ vshuf.b vr3, vr1, vr0, vr9 ++ vld vr0, a1, 0 ++ vst vr3, a5, 0 ++ addi.d a2, a2, -32 ++ ++ vshuf.b vr3, vr0, vr1, vr9 ++ vld vr1, a1, 16 ++ addi.d a1, a1, 32 ++ vst vr3, a5, 16 ++ ++ addi.d a5, a5, 32 ++L(un_less_32): ++ blt a2, t6, L(un_less_16) ++ vshuf.b vr2, vr1, vr0, vr9 ++ vor.v vr0, vr1, vr1 ++ ++ vld vr1, a1, 0 ++ vst vr2, a5, 0 ++ addi.d a5, a5, 16 ++L(un_less_16): ++ vld vr2, a4, -16 ++ ++ ++ vshuf.b vr0, vr1, vr0, vr9 ++ vst vr0, a5, 0 ++ vst vr2, a3, -16 ++ jr ra ++ ++L(copy_back): ++ addi.d t0, a3, -1 ++ vld vr2, a4, -16 ++ andi t0, t0, 0xf ++ addi.d t0, t0, 1 ++ ++ sub.d a4, a4, t0 ++ sub.d a2, a2, t0 ++ andi t1, a4, 0xf ++ bnez t1, L(back_unaligned) ++ ++ vld vr0, a4, -16 ++ addi.d a2, a2, -16 ++ vst vr2, a3, -16 ++ andi t2, a2, 0x7f ++ ++ ++ sub.d a3, a3, t0 ++ beq t2, a2, L(back_al_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ ++ sub.d a6, a4, t3 ++L(back_al_loop): ++ vld vr1, a4, -32 ++ vld vr2, a4, -48 ++ vld vr3, a4, -64 ++ ++ vld vr4, a4, -80 ++ vld vr5, a4, -96 ++ vld vr6, a4, -112 ++ vld vr7, a4, -128 ++ ++ vst vr0, a3, -16 ++ vld vr0, a4, -144 ++ addi.d a4, a4, -128 ++ vst vr1, a3, -32 ++ ++ ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ vst vr4, a3, -80 ++ vst vr5, a3, -96 ++ ++ vst vr6, a3, -112 ++ vst vr7, a3, -128 ++ addi.d a3, a3, -128 ++ bne a4, a6, L(back_al_loop) ++ ++L(back_al_less_128): ++ blt a2, t8, L(back_al_less_64) ++ vld vr1, a4, -32 ++ vld vr2, a4, -48 ++ vld vr3, a4, -64 ++ ++ addi.d a2, a2, -64 ++ vst vr0, a3, -16 ++ vld vr0, a4, -80 ++ addi.d a4, a4, -64 ++ ++ ++ vst vr1, a3, -32 ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ addi.d a3, a3, -64 ++ ++L(back_al_less_64): ++ blt a2, t7, L(back_al_less_32) ++ vld vr1, a4, -32 ++ addi.d a2, a2, -32 ++ vst vr0, a3, -16 ++ ++ vld vr0, a4, -48 ++ vst vr1, a3, -32 ++ addi.d a3, a3, -32 ++ addi.d a4, a4, -32 ++ ++L(back_al_less_32): ++ blt a2, t6, L(back_al_less_16) ++ vst vr0, a3, -16 ++ vld vr0, a4, -32 ++ addi.d a3, a3, -16 ++ ++ ++L(back_al_less_16): ++ vld vr1, a1, 0 ++ vst vr0, a3, -16 ++ vst vr1, a0, 0 ++ jr ra ++ ++L(back_unaligned): ++ pcalau12i t2, %pc_hi20(L(INDEX)) ++ bstrins.d a4, zero, 3, 0 ++ vld vr8, t2, %pc_lo12(L(INDEX)) ++ vld vr0, a4, 0 ++ ++ vld vr1, a4, -16 ++ addi.d a2, a2, -16 ++ vst vr2, a3, -16 ++ sub.d a3, a3, t0 ++ ++ ++ vreplgr2vr.b vr9, t1 ++ andi t2, a2, 0x7f ++ vadd.b vr9, vr9, vr8 ++ addi.d a4, a4, -16 ++ ++ beq t2, a2, L(back_un_less_128) ++ sub.d t3, a2, t2 ++ move a2, t2 ++ sub.d a6, a4, t3 ++ ++L(back_un_loop): ++ vld vr2, a4, -16 ++ vld vr3, a4, -32 ++ vld vr4, a4, -48 ++ ++ vld vr5, a4, -64 ++ vld vr6, a4, -80 ++ vld vr7, a4, -96 ++ vshuf.b vr8, vr0, vr1, vr9 ++ ++ ++ vld vr0, a4, -112 ++ vst vr8, a3, -16 ++ vshuf.b vr8, vr1, vr2, vr9 ++ vld vr1, a4, -128 ++ ++ vst vr8, a3, -32 ++ addi.d a4, a4, -128 ++ vshuf.b vr2, vr2, vr3, vr9 ++ vshuf.b vr3, vr3, vr4, vr9 ++ ++ vst vr2, a3, -48 ++ vshuf.b vr4, vr4, vr5, vr9 ++ vst vr3, a3, -64 ++ vshuf.b vr5, vr5, vr6, vr9 ++ ++ vst vr4, a3, -80 ++ vshuf.b vr6, vr6, vr7, vr9 ++ vst vr5, a3, -96 ++ vshuf.b vr7, vr7, vr0, vr9 ++ ++ ++ vst vr6, a3, -112 ++ vst vr7, a3, -128 ++ addi.d a3, a3, -128 ++ bne a4, a6, L(back_un_loop) ++ ++L(back_un_less_128): ++ blt a2, t8, L(back_un_less_64) ++ vld vr2, a4, -16 ++ vld vr3, a4, -32 ++ vshuf.b vr4, vr0, vr1, vr9 ++ ++ vld vr0, a4, -48 ++ vst vr4, a3, -16 ++ addi.d a2, a2, -64 ++ vshuf.b vr4, vr1, vr2, vr9 ++ ++ vld vr1, a4, -64 ++ addi.d a4, a4, -64 ++ vst vr4, a3, -32 ++ vshuf.b vr2, vr2, vr3, vr9 ++ ++ ++ vshuf.b vr3, vr3, vr0, vr9 ++ vst vr2, a3, -48 ++ vst vr3, a3, -64 ++ addi.d a3, a3, -64 ++ ++L(back_un_less_64): ++ blt a2, t7, L(back_un_less_32) ++ vshuf.b vr3, vr0, vr1, vr9 ++ vld vr0, a4, -16 ++ vst vr3, a3, -16 ++ ++ addi.d a2, a2, -32 ++ vshuf.b vr3, vr1, vr0, vr9 ++ vld vr1, a4, -32 ++ addi.d a4, a4, -32 ++ ++ vst vr3, a3, -32 ++ addi.d a3, a3, -32 ++L(back_un_less_32): ++ blt a2, t6, L(back_un_less_16) ++ vshuf.b vr2, vr0, vr1, vr9 ++ ++ ++ vor.v vr0, vr1, vr1 ++ vld vr1, a4, -16 ++ vst vr2, a3, -16 ++ addi.d a3, a3, -16 ++ ++L(back_un_less_16): ++ vld vr2, a1, 0 ++ vshuf.b vr0, vr0, vr1, vr9 ++ vst vr0, a3, -16 ++ vst vr2, a0, 0 ++ ++ jr ra ++END(MEMMOVE_NAME) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (MEMCPY_NAME) ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +new file mode 100644 +index 00000000..3284ce25 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S +@@ -0,0 +1,380 @@ ++/* Optimized memmove_unaligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++ ++# define MEMMOVE_NAME __memmove_unaligned ++ ++# define LD_64(reg, n) \ ++ ld.d t0, reg, n; \ ++ ld.d t1, reg, n + 8; \ ++ ld.d t2, reg, n + 16; \ ++ ld.d t3, reg, n + 24; \ ++ ld.d t4, reg, n + 32; \ ++ ld.d t5, reg, n + 40; \ ++ ld.d t6, reg, n + 48; \ ++ ld.d t7, reg, n + 56; ++ ++# define ST_64(reg, n) \ ++ st.d t0, reg, n; \ ++ st.d t1, reg, n + 8; \ ++ st.d t2, reg, n + 16; \ ++ st.d t3, reg, n + 24; \ ++ st.d t4, reg, n + 32; \ ++ st.d t5, reg, n + 40; \ ++ st.d t6, reg, n + 48; \ ++ st.d t7, reg, n + 56; ++ ++LEAF(MEMMOVE_NAME, 3) ++ add.d a4, a1, a2 ++ add.d a3, a0, a2 ++ beq a1, a0, L(less_1bytes) ++ move t8, a0 ++ ++ srai.d a6, a2, 4 ++ beqz a6, L(less_16bytes) ++ srai.d a6, a2, 6 ++ bnez a6, L(more_64bytes) ++ srai.d a6, a2, 5 ++ beqz a6, L(less_32bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a1, 16 ++ ld.d t3, a1, 24 ++ ++ ld.d t4, a4, -32 ++ ld.d t5, a4, -24 ++ ld.d t6, a4, -16 ++ ld.d t7, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a0, 16 ++ st.d t3, a0, 24 ++ ++ st.d t4, a3, -32 ++ st.d t5, a3, -24 ++ st.d t6, a3, -16 ++ st.d t7, a3, -8 ++ ++ jr ra ++ ++L(less_32bytes): ++ ld.d t0, a1, 0 ++ ld.d t1, a1, 8 ++ ld.d t2, a4, -16 ++ ld.d t3, a4, -8 ++ ++ st.d t0, a0, 0 ++ st.d t1, a0, 8 ++ st.d t2, a3, -16 ++ st.d t3, a3, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d a6, a2, 3 ++ beqz a6, L(less_8bytes) ++ ++ ld.d t0, a1, 0 ++ ld.d t1, a4, -8 ++ st.d t0, a0, 0 ++ st.d t1, a3, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d a6, a2, 2 ++ beqz a6, L(less_4bytes) ++ ++ ld.w t0, a1, 0 ++ ld.w t1, a4, -4 ++ st.w t0, a0, 0 ++ st.w t1, a3, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d a6, a2, 1 ++ beqz a6, L(less_2bytes) ++ ++ ld.h t0, a1, 0 ++ ld.h t1, a4, -2 ++ st.h t0, a0, 0 ++ st.h t1, a3, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(more_64bytes): ++ sub.d a7, a0, a1 ++ bltu a7, a2, L(copy_backward) ++ ++L(copy_forward): ++ srli.d a0, a0, 3 ++ slli.d a0, a0, 3 ++ beq a0, t8, L(all_align) ++ addi.d a0, a0, 0x8 ++ sub.d a7, t8, a0 ++ sub.d a1, a1, a7 ++ add.d a2, a7, a2 ++ ++L(start_unalign_proc): ++ pcaddi t1, 18 ++ slli.d a6, a7, 3 ++ add.d t1, t1, a6 ++ jr t1 ++ ++ ld.b t0, a1, -7 ++ st.b t0, a0, -7 ++ ld.b t0, a1, -6 ++ st.b t0, a0, -6 ++ ld.b t0, a1, -5 ++ st.b t0, a0, -5 ++ ld.b t0, a1, -4 ++ st.b t0, a0, -4 ++ ld.b t0, a1, -3 ++ st.b t0, a0, -3 ++ ld.b t0, a1, -2 ++ st.b t0, a0, -2 ++ ld.b t0, a1, -1 ++ st.b t0, a0, -1 ++L(start_over): ++ ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc) ++ ++L(loop_less): ++ LD_64(a1, 0) ++ ST_64(a0, 0) ++ LD_64(a1, 64) ++ ST_64(a0, 64) ++ ++ addi.d a0, a0, 0x80 ++ addi.d a1, a1, 0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less) ++ ++L(end_unalign_proc): ++ addi.d a2, a2, 0x80 ++ ++ pcaddi t1, 36 ++ andi t2, a2, 0x78 ++ add.d a1, a1, t2 ++ add.d a0, a0, t2 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a1, -120 ++ st.d t0, a0, -120 ++ ld.d t0, a1, -112 ++ st.d t0, a0, -112 ++ ld.d t0, a1, -104 ++ st.d t0, a0, -104 ++ ld.d t0, a1, -96 ++ st.d t0, a0, -96 ++ ld.d t0, a1, -88 ++ st.d t0, a0, -88 ++ ld.d t0, a1, -80 ++ st.d t0, a0, -80 ++ ld.d t0, a1, -72 ++ st.d t0, a0, -72 ++ ld.d t0, a1, -64 ++ st.d t0, a0, -64 ++ ld.d t0, a1, -56 ++ st.d t0, a0, -56 ++ ld.d t0, a1, -48 ++ st.d t0, a0, -48 ++ ld.d t0, a1, -40 ++ st.d t0, a0, -40 ++ ld.d t0, a1, -32 ++ st.d t0, a0, -32 ++ ld.d t0, a1, -24 ++ st.d t0, a0, -24 ++ ld.d t0, a1, -16 ++ st.d t0, a0, -16 ++ ld.d t0, a1, -8 ++ st.d t0, a0, -8 ++ ++ andi a2, a2, 0x7 ++ pcaddi t1, 18 ++ slli.d a2, a2, 3 ++ sub.d t1, t1, a2 ++ jr t1 ++ ++ ld.b t0, a4, -7 ++ st.b t0, a3, -7 ++ ld.b t0, a4, -6 ++ st.b t0, a3, -6 ++ ld.b t0, a4, -5 ++ st.b t0, a3, -5 ++ ld.b t0, a4, -4 ++ st.b t0, a3, -4 ++ ld.b t0, a4, -3 ++ st.b t0, a3, -3 ++ ld.b t0, a4, -2 ++ st.b t0, a3, -2 ++ ld.b t0, a4, -1 ++ st.b t0, a3, -1 ++L(end): ++ move a0, t8 ++ jr ra ++ ++L(all_align): ++ addi.d a1, a1, 0x8 ++ addi.d a0, a0, 0x8 ++ ld.d t0, a1, -8 ++ st.d t0, a0, -8 ++ addi.d a2, a2, -8 ++ b L(start_over) ++ ++L(all_align_back): ++ addi.d a4, a4, -0x8 ++ addi.d a3, a3, -0x8 ++ ld.d t0, a4, 0 ++ st.d t0, a3, 0 ++ addi.d a2, a2, -8 ++ b L(start_over_back) ++ ++L(copy_backward): ++ move a5, a3 ++ srli.d a3, a3, 3 ++ slli.d a3, a3, 3 ++ beq a3, a5, L(all_align_back) ++ sub.d a7, a3, a5 ++ add.d a4, a4, a7 ++ add.d a2, a7, a2 ++ ++ pcaddi t1, 18 ++ slli.d a6, a7, 3 ++ add.d t1, t1, a6 ++ jr t1 ++ ++ ld.b t0, a4, 6 ++ st.b t0, a3, 6 ++ ld.b t0, a4, 5 ++ st.b t0, a3, 5 ++ ld.b t0, a4, 4 ++ st.b t0, a3, 4 ++ ld.b t0, a4, 3 ++ st.b t0, a3, 3 ++ ld.b t0, a4, 2 ++ st.b t0, a3, 2 ++ ld.b t0, a4, 1 ++ st.b t0, a3, 1 ++ ld.b t0, a4, 0 ++ st.b t0, a3, 0 ++L(start_over_back): ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc_back) ++ ++L(loop_less_back): ++ LD_64(a4, -64) ++ ST_64(a3, -64) ++ LD_64(a4, -128) ++ ST_64(a3, -128) ++ ++ addi.d a4, a4, -0x80 ++ addi.d a3, a3, -0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less_back) ++ ++L(end_unalign_proc_back): ++ addi.d a2, a2, 0x80 ++ ++ pcaddi t1, 36 ++ andi t2, a2, 0x78 ++ sub.d a4, a4, t2 ++ sub.d a3, a3, t2 ++ sub.d t1, t1, t2 ++ jr t1 ++ ++ ld.d t0, a4, 112 ++ st.d t0, a3, 112 ++ ld.d t0, a4, 104 ++ st.d t0, a3, 104 ++ ld.d t0, a4, 96 ++ st.d t0, a3, 96 ++ ld.d t0, a4, 88 ++ st.d t0, a3, 88 ++ ld.d t0, a4, 80 ++ st.d t0, a3, 80 ++ ld.d t0, a4, 72 ++ st.d t0, a3, 72 ++ ld.d t0, a4, 64 ++ st.d t0, a3, 64 ++ ld.d t0, a4, 56 ++ st.d t0, a3, 56 ++ ld.d t0, a4, 48 ++ st.d t0, a3, 48 ++ ld.d t0, a4, 40 ++ st.d t0, a3, 40 ++ ld.d t0, a4, 32 ++ st.d t0, a3, 32 ++ ld.d t0, a4, 24 ++ st.d t0, a3, 24 ++ ld.d t0, a4, 16 ++ st.d t0, a3, 16 ++ ld.d t0, a4, 8 ++ st.d t0, a3, 8 ++ ld.d t0, a4, 0 ++ st.d t0, a3, 0 ++ ++ andi a2, a2, 0x7 ++ pcaddi t1, 18 ++ slli.d a2, a2, 3 ++ sub.d t1, t1, a2 ++ jr t1 ++ ++ ld.b t0, a1, 6 ++ st.b t0, a0, 6 ++ ld.b t0, a1, 5 ++ st.b t0, a0, 5 ++ ld.b t0, a1, 4 ++ st.b t0, a0, 4 ++ ld.b t0, a1, 3 ++ st.b t0, a0, 3 ++ ld.b t0, a1, 2 ++ st.b t0, a0, 2 ++ ld.b t0, a1, 1 ++ st.b t0, a0, 1 ++ ld.b t0, a1, 0 ++ st.b t0, a0, 0 ++ ++ move a0, t8 ++ jr ra ++END(MEMMOVE_NAME) ++ ++libc_hidden_builtin_def (MEMMOVE_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memmove.c b/sysdeps/loongarch/lp64/multiarch/memmove.c +new file mode 100644 +index 00000000..7e3ca4c4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memmove.c +@@ -0,0 +1,38 @@ ++/* Multiple versions of memmove. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memmove __redirect_memmove ++# include ++# undef memmove ++ ++# define SYMBOL_NAME memmove ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memmove, __libc_memmove, ++ IFUNC_SELECTOR ()); ++strong_alias (__libc_memmove, memmove); ++ ++# ifdef SHARED ++__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove) ++ __attribute__ ((visibility ("hidden"))); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c +new file mode 100644 +index 00000000..ced61ebc +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c +@@ -0,0 +1,23 @@ ++/* Generic implementation of memrchr. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#if IS_IN (libc) ++# define MEMRCHR __memrchr_generic ++#endif ++ ++#include +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S +new file mode 100644 +index 00000000..5f3e0d06 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S +@@ -0,0 +1,123 @@ ++/* Optimized memrchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef MEMRCHR ++# define MEMRCHR __memrchr_lasx ++#endif ++ ++LEAF(MEMRCHR, 6) ++ beqz a2, L(ret0) ++ addi.d a2, a2, -1 ++ add.d a3, a0, a2 ++ andi t1, a3, 0x3f ++ ++ bstrins.d a3, zero, 5, 0 ++ addi.d t1, t1, 1 ++ xvld xr0, a3, 0 ++ xvld xr1, a3, 32 ++ ++ sub.d t2, zero, t1 ++ li.d t3, -1 ++ xvreplgr2vr.b xr2, a1 ++ andi t4, a0, 0x3f ++ ++ srl.d t2, t3, t2 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ and t0, t0, t2 ++ ++ bltu a2, t1, L(end) ++ bnez t0, L(found) ++ bstrins.d a0, zero, 5, 0 ++L(loop): ++ xvld xr0, a3, -64 ++ ++ xvld xr1, a3, -32 ++ addi.d a3, a3, -64 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ ++ ++ beq a0, a3, L(out) ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(found): ++ addi.d a0, a3, 63 ++ clz.d t1, t0 ++ sub.d a0, a0, t1 ++ jr ra ++ ++ ++L(out): ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++L(end): ++ sll.d t2, t3, t4 ++ and t0, t0, t2 ++ addi.d a0, a3, 63 ++ clz.d t1, t0 ++ ++ sub.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ ++ jr ra ++END(MEMRCHR) ++ ++libc_hidden_builtin_def (MEMRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S +new file mode 100644 +index 00000000..39a7c8b0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S +@@ -0,0 +1,105 @@ ++/* Optimized memrchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMRCHR __memrchr_lsx ++ ++LEAF(MEMRCHR, 6) ++ beqz a2, L(ret0) ++ addi.d a2, a2, -1 ++ add.d a3, a0, a2 ++ andi t1, a3, 0x1f ++ ++ bstrins.d a3, zero, 4, 0 ++ addi.d t1, t1, 1 ++ vld vr0, a3, 0 ++ vld vr1, a3, 16 ++ ++ sub.d t2, zero, t1 ++ li.d t3, -1 ++ vreplgr2vr.b vr2, a1 ++ andi t4, a0, 0x1f ++ ++ srl.d t2, t3, t2 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ and t0, t0, t2 ++ ++ bltu a2, t1, L(end) ++ bnez t0, L(found) ++ bstrins.d a0, zero, 4, 0 ++L(loop): ++ vld vr0, a3, -32 ++ ++ vld vr1, a3, -16 ++ addi.d a3, a3, -32 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ ++ beq a0, a3, L(out) ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ ++ ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(found): ++ addi.d a0, a3, 31 ++ clz.w t1, t0 ++ sub.d a0, a0, t1 ++ jr ra ++ ++L(out): ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++L(end): ++ sll.d t2, t3, t4 ++ and t0, t0, t2 ++ addi.d a0, a3, 31 ++ clz.w t1, t0 ++ ++ ++ sub.d a0, a0, t1 ++ maskeqz a0, a0, t0 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ jr ra ++END(MEMRCHR) ++ ++libc_hidden_builtin_def (MEMRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c +new file mode 100644 +index 00000000..8baba9ab +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c +@@ -0,0 +1,33 @@ ++/* Multiple versions of memrchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memrchr __redirect_memrchr ++# include ++# undef memrchr ++ ++# define SYMBOL_NAME memrchr ++# include "ifunc-memrchr.h" ++ ++libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ()); ++libc_hidden_def (__memrchr) ++weak_alias (__memrchr, memrchr) ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +new file mode 100644 +index 00000000..1fce95b7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S +@@ -0,0 +1,174 @@ ++/* Optimized memset aligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define MEMSET_NAME __memset_aligned ++#else ++# define MEMSET_NAME memset ++#endif ++ ++LEAF(MEMSET_NAME, 6) ++ move t0, a0 ++ andi a3, a0, 0x7 ++ li.w t6, 16 ++ beqz a3, L(align) ++ bltu a2, t6, L(short_data) ++ ++L(make_align): ++ li.w t8, 8 ++ sub.d t2, t8, a3 ++ pcaddi t1, 11 ++ slli.d t3, t2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++ ++L(al7): ++ st.b a1, t0, 6 ++L(al6): ++ st.b a1, t0, 5 ++L(al5): ++ st.b a1, t0, 4 ++L(al4): ++ st.b a1, t0, 3 ++L(al3): ++ st.b a1, t0, 2 ++L(al2): ++ st.b a1, t0, 1 ++L(al1): ++ st.b a1, t0, 0 ++L(al0): ++ add.d t0, t0, t2 ++ sub.d a2, a2, t2 ++ ++L(align): ++ bstrins.d a1, a1, 15, 8 ++ bstrins.d a1, a1, 31, 16 ++ bstrins.d a1, a1, 63, 32 ++ bltu a2, t6, L(less_16bytes) ++ ++ andi a4, a2, 0x3f ++ beq a4, a2, L(less_64bytes) ++ ++ sub.d t1, a2, a4 ++ move a2, a4 ++ add.d a5, t0, t1 ++ ++L(loop_64bytes): ++ addi.d t0, t0, 64 ++ st.d a1, t0, -64 ++ st.d a1, t0, -56 ++ st.d a1, t0, -48 ++ st.d a1, t0, -40 ++ ++ st.d a1, t0, -32 ++ st.d a1, t0, -24 ++ st.d a1, t0, -16 ++ st.d a1, t0, -8 ++ bne t0, a5, L(loop_64bytes) ++ ++L(less_64bytes): ++ srai.d a4, a2, 5 ++ beqz a4, L(less_32bytes) ++ addi.d a2, a2, -32 ++ st.d a1, t0, 0 ++ ++ st.d a1, t0, 8 ++ st.d a1, t0, 16 ++ st.d a1, t0, 24 ++ addi.d t0, t0, 32 ++ ++L(less_32bytes): ++ bltu a2, t6, L(less_16bytes) ++ addi.d a2, a2, -16 ++ st.d a1, t0, 0 ++ st.d a1, t0, 8 ++ addi.d t0, t0, 16 ++ ++L(less_16bytes): ++ srai.d a4, a2, 3 ++ beqz a4, L(less_8bytes) ++ addi.d a2, a2, -8 ++ st.d a1, t0, 0 ++ addi.d t0, t0, 8 ++ ++L(less_8bytes): ++ beqz a2, L(less_1byte) ++ srai.d a4, a2, 2 ++ beqz a4, L(less_4bytes) ++ addi.d a2, a2, -4 ++ st.w a1, t0, 0 ++ addi.d t0, t0, 4 ++ ++L(less_4bytes): ++ srai.d a3, a2, 1 ++ beqz a3, L(less_2bytes) ++ addi.d a2, a2, -2 ++ st.h a1, t0, 0 ++ addi.d t0, t0, 2 ++ ++L(less_2bytes): ++ beqz a2, L(less_1byte) ++ st.b a1, t0, 0 ++L(less_1byte): ++ jr ra ++ ++L(short_data): ++ pcaddi t1, 19 ++ slli.d t3, a2, 2 ++ sub.d t1, t1, t3 ++ jr t1 ++L(short_15): ++ st.b a1, a0, 14 ++L(short_14): ++ st.b a1, a0, 13 ++L(short_13): ++ st.b a1, a0, 12 ++L(short_12): ++ st.b a1, a0, 11 ++L(short_11): ++ st.b a1, a0, 10 ++L(short_10): ++ st.b a1, a0, 9 ++L(short_9): ++ st.b a1, a0, 8 ++L(short_8): ++ st.b a1, a0, 7 ++L(short_7): ++ st.b a1, a0, 6 ++L(short_6): ++ st.b a1, a0, 5 ++L(short_5): ++ st.b a1, a0, 4 ++L(short_4): ++ st.b a1, a0, 3 ++L(short_3): ++ st.b a1, a0, 2 ++L(short_2): ++ st.b a1, a0, 1 ++L(short_1): ++ st.b a1, a0, 0 ++L(short_0): ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +new file mode 100644 +index 00000000..041abbac +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S +@@ -0,0 +1,142 @@ ++/* Optimized memset implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lasx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 32 ++ move a3, a0 ++ xvreplgr2vr.b xr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_32bytes) ++ li.d t3, 128 ++ li.d t2, 64 ++ blt t3, a2, L(long_bytes) ++ ++L(less_128bytes): ++ bgeu t2, a2, L(less_64bytes) ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a4, -32 ++ ++ xvst xr0, a4, -64 ++ jr ra ++L(less_64bytes): ++ xvst xr0, a3, 0 ++ xvst xr0, a4, -32 ++ ++ ++ jr ra ++L(less_32bytes): ++ srli.d t0, a2, 4 ++ beqz t0, L(less_16bytes) ++ vst vr0, a3, 0 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ ++ vstelm.d vr0, a3, 0, 0 ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ ++ beqz t0, L(less_4bytes) ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++ ++ ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ ++ jr ra ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a3, 0 ++L(less_1bytes): ++ jr ra ++ ++L(long_bytes): ++ xvst xr0, a3, 0 ++ bstrins.d a3, zero, 4, 0 ++ addi.d a3, a3, 32 ++ sub.d a2, a4, a3 ++ ++ andi t0, a2, 0xff ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++ ++L(loop_256): ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ xvst xr0, a3, 128 ++ xvst xr0, a3, 160 ++ xvst xr0, a3, 192 ++ xvst xr0, a3, 224 ++ ++ addi.d a3, a3, 256 ++ bne a3, t0, L(loop_256) ++L(long_end): ++ bltu a2, t3, L(end_less_128) ++ addi.d a2, a2, -128 ++ ++ xvst xr0, a3, 0 ++ xvst xr0, a3, 32 ++ xvst xr0, a3, 64 ++ xvst xr0, a3, 96 ++ ++ ++ addi.d a3, a3, 128 ++L(end_less_128): ++ bltu a2, t2, L(end_less_64) ++ addi.d a2, a2, -64 ++ xvst xr0, a3, 0 ++ ++ xvst xr0, a3, 32 ++ addi.d a3, a3, 64 ++L(end_less_64): ++ bltu a2, t1, L(end_less_32) ++ xvst xr0, a3, 0 ++ ++L(end_less_32): ++ xvst xr0, a4, -32 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +new file mode 100644 +index 00000000..3d3982aa +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S +@@ -0,0 +1,135 @@ ++/* Optimized memset implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define MEMSET __memset_lsx ++ ++LEAF(MEMSET, 6) ++ li.d t1, 16 ++ move a3, a0 ++ vreplgr2vr.b vr0, a1 ++ add.d a4, a0, a2 ++ ++ bgeu t1, a2, L(less_16bytes) ++ li.d t3, 64 ++ li.d t2, 32 ++ bgeu a2, t3, L(long_bytes) ++ ++L(less_64bytes): ++ bgeu t2, a2, L(less_32bytes) ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ vst vr0, a4, -32 ++ ++ vst vr0, a4, -16 ++ jr ra ++L(less_32bytes): ++ vst vr0, a3, 0 ++ vst vr0, a4, -16 ++ ++ ++ jr ra ++L(less_16bytes): ++ srli.d t0, a2, 3 ++ beqz t0, L(less_8bytes) ++ vstelm.d vr0, a3, 0, 0 ++ ++ vstelm.d vr0, a4, -8, 0 ++ jr ra ++L(less_8bytes): ++ srli.d t0, a2, 2 ++ beqz t0, L(less_4bytes) ++ ++ vstelm.w vr0, a3, 0, 0 ++ vstelm.w vr0, a4, -4, 0 ++ jr ra ++L(less_4bytes): ++ srli.d t0, a2, 1 ++ ++ beqz t0, L(less_2bytes) ++ vstelm.h vr0, a3, 0, 0 ++ vstelm.h vr0, a4, -2, 0 ++ jr ra ++ ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ vstelm.b vr0, a3, 0, 0 ++L(less_1bytes): ++ jr ra ++L(long_bytes): ++ vst vr0, a3, 0 ++ ++ bstrins.d a3, zero, 3, 0 ++ addi.d a3, a3, 16 ++ sub.d a2, a4, a3 ++ andi t0, a2, 0x7f ++ ++ beq t0, a2, L(long_end) ++ move a2, t0 ++ sub.d t0, a4, t0 ++ ++L(loop_128): ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ vst vr0, a3, 64 ++ ++ ++ vst vr0, a3, 80 ++ vst vr0, a3, 96 ++ vst vr0, a3, 112 ++ addi.d a3, a3, 128 ++ ++ bne a3, t0, L(loop_128) ++L(long_end): ++ bltu a2, t3, L(end_less_64) ++ addi.d a2, a2, -64 ++ vst vr0, a3, 0 ++ ++ vst vr0, a3, 16 ++ vst vr0, a3, 32 ++ vst vr0, a3, 48 ++ addi.d a3, a3, 64 ++ ++L(end_less_64): ++ bltu a2, t2, L(end_less_32) ++ addi.d a2, a2, -32 ++ vst vr0, a3, 0 ++ vst vr0, a3, 16 ++ ++ addi.d a3, a3, 32 ++L(end_less_32): ++ bltu a2, t1, L(end_less_16) ++ vst vr0, a3, 0 ++ ++L(end_less_16): ++ vst vr0, a4, -16 ++ jr ra ++END(MEMSET) ++ ++libc_hidden_builtin_def (MEMSET) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +new file mode 100644 +index 00000000..f7d32039 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S +@@ -0,0 +1,162 @@ ++/* Optimized memset unaligned implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++ ++# define MEMSET_NAME __memset_unaligned ++ ++#define ST_128(n) \ ++ st.d a1, a0, n; \ ++ st.d a1, a0, n+8 ; \ ++ st.d a1, a0, n+16 ; \ ++ st.d a1, a0, n+24 ; \ ++ st.d a1, a0, n+32 ; \ ++ st.d a1, a0, n+40 ; \ ++ st.d a1, a0, n+48 ; \ ++ st.d a1, a0, n+56 ; \ ++ st.d a1, a0, n+64 ; \ ++ st.d a1, a0, n+72 ; \ ++ st.d a1, a0, n+80 ; \ ++ st.d a1, a0, n+88 ; \ ++ st.d a1, a0, n+96 ; \ ++ st.d a1, a0, n+104; \ ++ st.d a1, a0, n+112; \ ++ st.d a1, a0, n+120; ++ ++LEAF(MEMSET_NAME, 6) ++ bstrins.d a1, a1, 15, 8 ++ add.d t7, a0, a2 ++ bstrins.d a1, a1, 31, 16 ++ move t0, a0 ++ ++ bstrins.d a1, a1, 63, 32 ++ srai.d t8, a2, 4 ++ beqz t8, L(less_16bytes) ++ srai.d t8, a2, 6 ++ ++ bnez t8, L(more_64bytes) ++ srai.d t8, a2, 5 ++ beqz t8, L(less_32bytes) ++ ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, a0, 16 ++ st.d a1, a0, 24 ++ ++ st.d a1, t7, -32 ++ st.d a1, t7, -24 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_32bytes): ++ st.d a1, a0, 0 ++ st.d a1, a0, 8 ++ st.d a1, t7, -16 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_16bytes): ++ srai.d t8, a2, 3 ++ beqz t8, L(less_8bytes) ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ jr ra ++ ++L(less_8bytes): ++ srai.d t8, a2, 2 ++ beqz t8, L(less_4bytes) ++ st.w a1, a0, 0 ++ st.w a1, t7, -4 ++ ++ jr ra ++ ++L(less_4bytes): ++ srai.d t8, a2, 1 ++ beqz t8, L(less_2bytes) ++ st.h a1, a0, 0 ++ st.h a1, t7, -2 ++ ++ jr ra ++ ++L(less_2bytes): ++ beqz a2, L(less_1bytes) ++ st.b a1, a0, 0 ++ ++ jr ra ++ ++L(less_1bytes): ++ jr ra ++ ++L(more_64bytes): ++ srli.d a0, a0, 3 ++ slli.d a0, a0, 3 ++ addi.d a0, a0, 0x8 ++ st.d a1, t0, 0 ++ ++ sub.d t2, t0, a0 ++ add.d a2, t2, a2 ++ addi.d a2, a2, -0x80 ++ blt a2, zero, L(end_unalign_proc) ++ ++L(loop_less): ++ ST_128(0) ++ addi.d a0, a0, 0x80 ++ addi.d a2, a2, -0x80 ++ bge a2, zero, L(loop_less) ++ ++L(end_unalign_proc): ++ addi.d a2, a2, 0x80 ++ pcaddi t1, 20 ++ andi t5, a2, 0x78 ++ srli.d t5, t5, 1 ++ ++ sub.d t1, t1, t5 ++ jr t1 ++ ++ st.d a1, a0, 112 ++ st.d a1, a0, 104 ++ st.d a1, a0, 96 ++ st.d a1, a0, 88 ++ st.d a1, a0, 80 ++ st.d a1, a0, 72 ++ st.d a1, a0, 64 ++ st.d a1, a0, 56 ++ st.d a1, a0, 48 ++ st.d a1, a0, 40 ++ st.d a1, a0, 32 ++ st.d a1, a0, 24 ++ st.d a1, a0, 16 ++ st.d a1, a0, 8 ++ st.d a1, a0, 0 ++ st.d a1, t7, -8 ++ ++ move a0, t0 ++ jr ra ++END(MEMSET_NAME) ++ ++libc_hidden_builtin_def (MEMSET_NAME) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c +new file mode 100644 +index 00000000..3ff60d8a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/memset.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of memset. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define memset __redirect_memset ++# include ++# undef memset ++ ++# define SYMBOL_NAME memset ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_memset, memset, ++ IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (memset, __GI_memset, __redirect_memset) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +new file mode 100644 +index 00000000..9c7155ae +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S +@@ -0,0 +1,124 @@ ++/* Optimized rawmemchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define RAWMEMCHR_NAME __rawmemchr_aligned ++#else ++# define RAWMEMCHR_NAME __rawmemchr ++#endif ++ ++LEAF(RAWMEMCHR_NAME, 6) ++ andi t1, a0, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ bstrins.d a1, a1, 15, 8 ++ ++ ld.d t0, a0, 0 ++ slli.d t1, t1, 3 ++ ori a2, a2, 0x101 ++ bstrins.d a1, a1, 31, 16 ++ ++ li.w t8, -1 ++ bstrins.d a1, a1, 63, 32 ++ bstrins.d a2, a2, 63, 32 ++ sll.d t2, t8, t1 ++ ++ sll.d t3, a1, t1 ++ orn t0, t0, t2 ++ slli.d a3, a2, 7 ++ beqz a1, L(find_zero) ++ ++ xor t0, t0, t3 ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop): ++ ld.d t0, a0, 0 ++ xor t0, t0, a1 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ xor t0, t0, a1 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t0, t3 ++ srli.d t0, t0, 3 ++ add.d a0, a0, t0 ++ jr ra ++ ++L(loop_7bit): ++ ld.d t0, a0, 0 ++L(find_zero): ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ bnez t2, L(more_check) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ and t2, t1, a3 ++ ++ beqz t2, L(loop_7bit) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ addi.d a0, a0, 8 ++ ++L(loop_8bit): ++ ld.d t0, a0, 0 ++ ++ sub.d t1, t0, a2 ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ bnez t3, L(count_pos) ++ ++ ld.d t0, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t0, a2 ++ ++ andn t2, a3, t0 ++ and t3, t1, t2 ++ beqz t3, L(loop_8bit) ++ ++ addi.d a0, a0, -8 ++ b L(count_pos) ++ ++END(RAWMEMCHR_NAME) ++ ++libc_hidden_builtin_def (__rawmemchr) +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +new file mode 100644 +index 00000000..be2eb59d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S +@@ -0,0 +1,82 @@ ++/* Optimized rawmemchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lasx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 5, 0 ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ ++ xvreplgr2vr.b xr2, a1 ++ xvseq.b xr0, xr0, xr2 ++ xvseq.b xr1, xr1, xr2 ++ xvmsknz.b xr0, xr0 ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ vilvl.h vr0, vr3, vr0 ++ ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a2 ++ ++ ++ beqz t0, L(loop) ++ ctz.d t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ xvseq.b xr0, xr0, xr2 ++ ++ xvseq.b xr1, xr1, xr2 ++ xvmax.bu xr3, xr0, xr1 ++ xvseteqz.v fcc0, xr3 ++ bcnez fcc0, L(loop) ++ ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr3, xr0, 4 ++ xvpickve.w xr4, xr1, 4 ++ ++ ++ vilvl.h vr0, vr3, vr0 ++ vilvl.h vr1, vr4, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ ++ ctz.d t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +new file mode 100644 +index 00000000..2f6fe024 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S +@@ -0,0 +1,71 @@ ++/* Optimized rawmemchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define RAWMEMCHR __rawmemchr_lsx ++ ++LEAF(RAWMEMCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ vreplgr2vr.b vr2, a1 ++ vseq.b vr0, vr0, vr2 ++ vseq.b vr1, vr1, vr2 ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a2 ++ ++ beqz t0, L(loop) ++ ctz.w t0, t0 ++ add.d a0, a2, t0 ++ jr ra ++ ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ vseq.b vr0, vr0, vr2 ++ ++ vseq.b vr1, vr1, vr2 ++ vmax.bu vr3, vr0, vr1 ++ vseteqz.v fcc0, vr3 ++ bcnez fcc0, L(loop) ++ ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++ ctz.w t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(RAWMEMCHR) ++ ++libc_hidden_builtin_def (RAWMEMCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +new file mode 100644 +index 00000000..89c7ffff +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of rawmemchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if IS_IN (libc) ++# define rawmemchr __redirect_rawmemchr ++# define __rawmemchr __redirect___rawmemchr ++# include ++# undef rawmemchr ++# undef __rawmemchr ++ ++# define SYMBOL_NAME rawmemchr ++# include "ifunc-rawmemchr.h" ++ ++libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr, ++ IFUNC_SELECTOR ()); ++weak_alias (__rawmemchr, rawmemchr) ++# ifdef SHARED ++__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr) ++ __attribute__((visibility ("hidden"))); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +new file mode 100644 +index 00000000..1f763db6 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S +@@ -0,0 +1,27 @@ ++/* stpcpy-aligned implementation is in strcpy-aligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#if IS_IN (libc) ++# define STPCPY __stpcpy_aligned ++#else ++# define STPCPY __stpcpy ++#endif ++ ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-aligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +new file mode 100644 +index 00000000..13d6c953 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lasx implementation is in strcpy-lasx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#define STPCPY __stpcpy_lasx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lasx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +new file mode 100644 +index 00000000..e0f17ab5 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S +@@ -0,0 +1,22 @@ ++/* stpcpy-lsx implementation is in strcpy-lsx.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#define STPCPY __stpcpy_lsx ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-lsx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +new file mode 100644 +index 00000000..cc2f9712 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S +@@ -0,0 +1,22 @@ ++/* stpcpy-unaligned implementation is in strcpy-unaligned.S. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#define STPCPY __stpcpy_unaligned ++#define USE_AS_STPCPY ++#define STRCPY STPCPY ++#include "strcpy-unaligned.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +new file mode 100644 +index 00000000..d4860d7a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c +@@ -0,0 +1,42 @@ ++/* Multiple versions of stpcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2017-2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define stpcpy __redirect_stpcpy ++# define __stpcpy __redirect___stpcpy ++# define NO_MEMPCPY_STPCPY_REDIRECT ++# define __NO_STRING_INLINES ++# include ++# undef stpcpy ++# undef __stpcpy ++ ++# define SYMBOL_NAME stpcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ()); ++ ++weak_alias (__stpcpy, stpcpy) ++# ifdef SHARED ++__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +new file mode 100644 +index 00000000..62020054 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S +@@ -0,0 +1,99 @@ ++/* Optimized strchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRCHR_NAME __strchr_aligned ++#else ++# define STRCHR_NAME strchr ++#endif ++ ++LEAF(STRCHR_NAME, 6) ++ slli.d t1, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ ori a2, a2, 0x101 ++ andi a1, a1, 0xff ++ bstrins.d a2, a2, 63, 32 ++ li.w t0, -1 ++ ++ mul.d a1, a1, a2 ++ sll.d t0, t0, t1 ++ slli.d a3, a2, 7 ++ orn t2, t2, t0 ++ ++ sll.d t3, a1, t1 ++ xor t4, t2, t3 ++ sub.d a4, t2, a2 ++ sub.d a5, t4, a2 ++ ++ ++ andn a4, a4, t2 ++ andn a5, a5, t4 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ bnez t0, L(end) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t4, a0, 0 ++ xor t2, t4, a1 ++ ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ ++ or t0, a4, a5 ++ and t0, t0, a3 ++ bnez t0, L(end) ++ ld.d t4, a0, 8 ++ ++ ++ addi.d a0, a0, 16 ++ xor t2, t4, a1 ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ beqz t0, L(loop) ++ addi.d a0, a0, -8 ++L(end): ++ and t0, a5, a3 ++ and t1, a4, a3 ++ ++ ctz.d t0, t0 ++ ctz.d t1, t1 ++ srli.w t2, t0, 3 ++ sltu t3, t1, t0 ++ ++ ++ add.d a0, a0, t2 ++ masknez a0, a0, t3 ++ jr ra ++END(STRCHR_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +new file mode 100644 +index 00000000..4d3cc588 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S +@@ -0,0 +1,91 @@ ++/* Optimized strchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef AS_STRCHRNUL ++# define STRCHR __strchr_lasx ++#endif ++ ++LEAF(STRCHR, 6) ++ andi t1, a0, 0x1f ++ bstrins.d a0, zero, 4, 0 ++ xvld xr0, a0, 0 ++ li.d t2, -1 ++ ++ xvreplgr2vr.b xr1, a1 ++ sll.d t1, t2, t1 ++ xvxor.v xr2, xr0, xr1 ++ xvmin.bu xr0, xr0, xr2 ++ ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr3, xr0, 4 ++ vilvl.h vr0, vr3, vr0 ++ movfr2gr.s t0, fa0 ++ ++ orn t0, t0, t1 ++ bne t0, t2, L(end) ++ addi.d a0, a0, 32 ++ nop ++ ++ ++L(loop): ++ xvld xr0, a0, 0 ++ xvxor.v xr2, xr0, xr1 ++ xvmin.bu xr0, xr0, xr2 ++ xvsetanyeqz.b fcc0, xr0 ++ ++ bcnez fcc0, L(loop_end) ++ xvld xr0, a0, 32 ++ addi.d a0, a0, 64 ++ xvxor.v xr2, xr0, xr1 ++ ++ xvmin.bu xr0, xr0, xr2 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(loop) ++ addi.d a0, a0, -32 ++ ++L(loop_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ ++ ++L(end): ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++#ifndef AS_STRCHRNUL ++ vreplgr2vr.b vr0, t0 ++ xvpermi.q xr3, xr2, 1 ++ ++ vshuf.b vr0, vr3, vr2, vr0 ++ vpickve2gr.bu t0, vr0, 0 ++ masknez a0, a0, t0 ++#endif ++ jr ra ++ ++END(STRCHR) ++ ++libc_hidden_builtin_def(STRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +new file mode 100644 +index 00000000..8b78c35c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S +@@ -0,0 +1,73 @@ ++/* Optimized strlen implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#ifndef AS_STRCHRNUL ++# define STRCHR __strchr_lsx ++#endif ++ ++LEAF(STRCHR, 6) ++ andi t1, a0, 0xf ++ bstrins.d a0, zero, 3, 0 ++ vld vr0, a0, 0 ++ li.d t2, -1 ++ ++ vreplgr2vr.b vr1, a1 ++ sll.d t3, t2, t1 ++ vxor.v vr2, vr0, vr1 ++ vmin.bu vr0, vr0, vr2 ++ ++ vmsknz.b vr0, vr0 ++ movfr2gr.s t0, fa0 ++ ext.w.h t0, t0 ++ orn t0, t0, t3 ++ ++ beq t0, t2, L(loop) ++L(found): ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++#ifndef AS_STRCHRNUL ++ vreplve.b vr2, vr2, t0 ++ vpickve2gr.bu t1, vr2, 0 ++ masknez a0, a0, t1 ++#endif ++ jr ra ++ ++ ++L(loop): ++ vld vr0, a0, 16 ++ addi.d a0, a0, 16 ++ vxor.v vr2, vr0, vr1 ++ vmin.bu vr0, vr0, vr2 ++ ++ vsetanyeqz.b fcc0, vr0 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ movfr2gr.s t0, fa0 ++ ++ b L(found) ++END(STRCHR) ++ ++libc_hidden_builtin_def (STRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c +new file mode 100644 +index 00000000..404e97bd +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchr.c +@@ -0,0 +1,36 @@ ++/* Multiple versions of strchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strchr __redirect_strchr ++# include ++# undef strchr ++ ++# define SYMBOL_NAME strchr ++# include "ifunc-strchr.h" ++ ++libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ()); ++weak_alias(strchr, index) ++# ifdef SHARED ++__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +new file mode 100644 +index 00000000..20856a06 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S +@@ -0,0 +1,95 @@ ++/* Optimized strchrnul implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRCHRNUL_NAME __strchrnul_aligned ++#else ++# define STRCHRNUL_NAME __strchrnul ++#endif ++ ++LEAF(STRCHRNUL_NAME, 6) ++ slli.d t1, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ ori a2, a2, 0x101 ++ andi a1, a1, 0xff ++ bstrins.d a2, a2, 63, 32 ++ li.w t0, -1 ++ ++ mul.d a1, a1, a2 ++ sll.d t0, t0, t1 ++ slli.d a3, a2, 7 ++ orn t2, t2, t0 ++ ++ sll.d t3, a1, t1 ++ xor t4, t2, t3 ++ sub.d a4, t2, a2 ++ sub.d a5, t4, a2 ++ ++ ++ andn a4, a4, t2 ++ andn a5, a5, t4 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ bnez t0, L(end) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t4, a0, 0 ++ xor t2, t4, a1 ++ ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ ++ or t0, a4, a5 ++ and t0, t0, a3 ++ bnez t0, L(end) ++ ld.d t4, a0, 8 ++ ++ ++ addi.d a0, a0, 16 ++ xor t2, t4, a1 ++ sub.d a4, t4, a2 ++ sub.d a5, t2, a2 ++ ++ andn a4, a4, t4 ++ andn a5, a5, t2 ++ or t0, a4, a5 ++ and t0, t0, a3 ++ ++ beqz t0, L(loop) ++ addi.d a0, a0, -8 ++L(end): ++ ctz.d t0, t0 ++ srli.w t0, t0, 3 ++ ++ ++ add.d a0, a0, t0 ++ jr ra ++END(STRCHRNUL_NAME) ++ ++libc_hidden_builtin_def (STRCHRNUL_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +new file mode 100644 +index 00000000..4753d4ce +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S +@@ -0,0 +1,22 @@ ++/* Optimized strchrnul implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#define STRCHR __strchrnul_lasx ++#define AS_STRCHRNUL ++#include "strchr-lasx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +new file mode 100644 +index 00000000..671e740c +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S +@@ -0,0 +1,22 @@ ++/* Optimized strchrnul implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#define STRCHR __strchrnul_lsx ++#define AS_STRCHRNUL ++#include "strchr-lsx.S" +diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c +new file mode 100644 +index 00000000..f3b8296e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c +@@ -0,0 +1,39 @@ ++/* Multiple versions of strchrnul. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++ ++#if IS_IN (libc) ++# define strchrnul __redirect_strchrnul ++# define __strchrnul __redirect___strchrnul ++# include ++# undef __strchrnul ++# undef strchrnul ++ ++# define SYMBOL_NAME strchrnul ++# include "ifunc-strchrnul.h" ++ ++libc_ifunc_redirected (__redirect_strchrnul, __strchrnul, ++ IFUNC_SELECTOR ()); ++weak_alias (__strchrnul, strchrnul) ++# ifdef SHARED ++__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul) ++ __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +new file mode 100644 +index 00000000..ba1f9667 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S +@@ -0,0 +1,179 @@ ++/* Optimized strcmp implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRCMP_NAME __strcmp_aligned ++#else ++# define STRCMP_NAME strcmp ++#endif ++ ++LEAF(STRCMP_NAME, 6) ++ lu12i.w a4, 0x01010 ++ andi a2, a0, 0x7 ++ ori a4, a4, 0x101 ++ andi a3, a1, 0x7 ++ ++ bstrins.d a4, a4, 63, 32 ++ li.d t7, -1 ++ li.d t8, 8 ++ slli.d a5, a4, 7 ++ ++ bne a2, a3, L(unaligned) ++ bstrins.d a0, zero, 2, 0 ++ bstrins.d a1, zero, 2, 0 ++ ld.d t0, a0, 0 ++ ++ ld.d t1, a1, 0 ++ slli.d t3, a2, 3 ++ sll.d t2, t7, t3 ++ orn t0, t0, t2 ++ ++ ++ orn t1, t1, t2 ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ ++ bne t0, t1, L(al_end) ++L(al_loop): ++ bnez t2, L(ret0) ++ ldx.d t0, a0, t8 ++ ldx.d t1, a1, t8 ++ ++ addi.d t8, t8, 8 ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ ++ beq t0, t1, L(al_loop) ++L(al_end): ++ xor t3, t0, t1 ++ or t2, t2, t3 ++ ctz.d t3, t2 ++ ++ ++ bstrins.d t3, zero, 2, 0 ++ srl.d t0, t0, t3 ++ srl.d t1, t1, t3 ++ andi t0, t0, 0xff ++ ++ andi t1, t1, 0xff ++ sub.d a0, t0, t1 ++ jr ra ++ nop ++ ++L(ret0): ++ move a0, zero ++ jr ra ++ nop ++ nop ++ ++L(unaligned): ++ slt a6, a3, a2 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a6 ++ xor a0, a0, t0 ++ ++ ++ xor a1, a1, t0 ++ andi a2, a0, 0x7 ++ andi a3, a1, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t4, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d a2, a2, 3 ++ ++ slli.d a3, a3, 3 ++ srl.d t0, t4, a2 ++ srl.d t1, t1, a3 ++ srl.d t5, t7, a3 ++ ++ orn t0, t0, t5 ++ orn t1, t1, t5 ++ bne t0, t1, L(not_equal) ++ sll.d t5, t7, a2 ++ ++ ++ sub.d a3, a2, a3 ++ orn t4, t4, t5 ++ sub.d a2, zero, a3 ++ sub.d t2, t4, a4 ++ ++ andn t3, a5, t4 ++ and t2, t2, t3 ++ bnez t2, L(find_zero) ++L(un_loop): ++ srl.d t5, t4, a3 ++ ++ ldx.d t4, a0, t8 ++ ldx.d t1, a1, t8 ++ addi.d t8, t8, 8 ++ sll.d t0, t4, a2 ++ ++ or t0, t0, t5 ++ bne t0, t1, L(not_equal) ++ sub.d t2, t4, a4 ++ andn t3, a5, t4 ++ ++ ++ and t2, t2, t3 ++ beqz t2, L(un_loop) ++L(find_zero): ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ ++ and t2, t2, t3 ++ bnez t2, L(ret0) ++ ldx.d t1, a1, t8 ++ srl.d t0, t4, a3 ++ ++L(not_equal): ++ sub.d t2, t0, a4 ++ andn t3, a5, t0 ++ and t2, t2, t3 ++ xor t3, t0, t1 ++ ++ or t2, t2, t3 ++L(un_end): ++ ctz.d t3, t2 ++ bstrins.d t3, zero, 2, 0 ++ srl.d t0, t0, t3 ++ ++ ++ srl.d t1, t1, t3 ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ ++ ++ sub.d t3, t1, t0 ++ masknez t0, t2, a6 ++ maskeqz t1, t3, a6 ++ or a0, t0, t1 ++ ++ jr ra ++END(STRCMP_NAME) ++ ++libc_hidden_builtin_def (STRCMP_NAME) +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +new file mode 100644 +index 00000000..091c8c9e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S +@@ -0,0 +1,165 @@ ++/* Optimized strcmp implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRCMP __strcmp_lsx ++ ++LEAF(STRCMP, 6) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a2, a0, 0xf ++ vld vr2, t0, %pc_lo12(L(INDEX)) ++ andi a3, a1, 0xf ++ ++ bne a2, a3, L(unaligned) ++ bstrins.d a0, zero, 3, 0 ++ bstrins.d a1, zero, 3, 0 ++ vld vr0, a0, 0 ++ ++ vld vr1, a1, 0 ++ vreplgr2vr.b vr3, a2 ++ vslt.b vr2, vr2, vr3 ++ vseq.b vr3, vr0, vr1 ++ ++ vmin.bu vr3, vr0, vr3 ++ vor.v vr3, vr3, vr2 ++ vsetanyeqz.b fcc0, vr3 ++ bcnez fcc0, L(al_out) ++ ++ ++L(al_loop): ++ vld vr0, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ vsetanyeqz.b fcc0, vr3 ++ bceqz fcc0, L(al_loop) ++ ++L(al_out): ++ vseqi.b vr3, vr3, 0 ++ vfrstpi.b vr3, vr3, 0 ++ vshuf.b vr0, vr0, vr0, vr3 ++ vshuf.b vr1, vr1, vr1, vr3 ++ ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d a0, t0, t1 ++ jr ra ++ ++ ++L(unaligned): ++ slt a4, a3, a2 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a4 ++ xor a0, a0, t0 ++ ++ xor a1, a1, t0 ++ andi a2, a0, 0xf ++ andi a3, a1, 0xf ++ bstrins.d a0, zero, 3, 0 ++ ++ bstrins.d a1, zero, 3, 0 ++ vld vr3, a0, 0 ++ vld vr1, a1, 0 ++ vreplgr2vr.b vr4, a2 ++ ++ vreplgr2vr.b vr5, a3 ++ vslt.b vr7, vr2, vr5 ++ vsub.b vr5, vr5, vr4 ++ vaddi.bu vr6, vr2, 16 ++ ++ ++ vsub.b vr6, vr6, vr5 ++ vshuf.b vr0, vr3, vr3, vr6 ++ vor.v vr0, vr0, vr7 ++ vor.v vr1, vr1, vr7 ++ ++ vseq.b vr5, vr0, vr1 ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(not_equal) ++ vslt.b vr4, vr2, vr4 ++ ++ vor.v vr0, vr3, vr4 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(find_zero) ++ nop ++ ++L(un_loop): ++ vld vr3, a0, 16 ++ vld vr1, a1, 16 ++ addi.d a0, a0, 16 ++ addi.d a1, a1, 16 ++ ++ ++ vshuf.b vr0, vr3, vr0, vr6 ++ vseq.b vr5, vr0, vr1 ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(not_equal) ++ ++ vsetanyeqz.b fcc0, vr3 ++ vor.v vr0, vr3, vr3 ++ bceqz fcc0, L(un_loop) ++L(find_zero): ++ vmin.bu vr5, vr1, vr5 ++ ++ vsetanyeqz.b fcc0, vr5 ++ bcnez fcc0, L(ret0) ++ vld vr1, a1, 16 ++ vshuf.b vr0, vr3, vr3, vr6 ++ ++ vseq.b vr5, vr0, vr1 ++L(not_equal): ++ vmin.bu vr5, vr0, vr5 ++L(un_end): ++ vseqi.b vr5, vr5, 0 ++ vfrstpi.b vr5, vr5, 0 ++ ++ ++ vshuf.b vr0, vr0, vr0, vr5 ++ vshuf.b vr1, vr1, vr1, vr5 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d t3, t0, t1 ++ sub.d t4, t1, t0 ++ masknez t0, t3, a4 ++ maskeqz t1, t4, a4 ++ ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++END(STRCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c +new file mode 100644 +index 00000000..6f249c0b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strcmp __redirect_strcmp ++# include ++# undef strcmp ++ ++# define SYMBOL_NAME strcmp ++# include "ifunc-strcmp.h" ++ ++libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +new file mode 100644 +index 00000000..4ed539fd +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S +@@ -0,0 +1,202 @@ ++/* Optimized strcpy stpcpy aligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# ifndef STRCPY ++# define STRCPY __strcpy_aligned ++# endif ++#else ++# ifndef STRCPY ++# define STRCPY strcpy ++# endif ++#endif ++ ++LEAF(STRCPY, 6) ++ andi a3, a0, 0x7 ++ move a2, a0 ++ beqz a3, L(dest_align) ++ sub.d a5, a1, a3 ++ addi.d a5, a5, 8 ++ ++L(make_dest_align): ++ ld.b t0, a1, 0 ++ addi.d a1, a1, 1 ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++ beqz t0, L(al_out) ++ ++ bne a1, a5, L(make_dest_align) ++ ++L(dest_align): ++ andi a4, a1, 7 ++ bstrins.d a1, zero, 2, 0 ++ ++ lu12i.w t5, 0x1010 ++ ld.d t0, a1, 0 ++ ori t5, t5, 0x101 ++ bstrins.d t5, t5, 63, 32 ++ ++ slli.d t6, t5, 0x7 ++ bnez a4, L(unalign) ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ bnez t3, L(al_end) ++ ++L(al_loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ ++ addi.d a1, a1, 8 ++ addi.d a2, a2, 8 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ ++ and t3, t1, t2 ++ beqz t3, L(al_loop) ++ ++L(al_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++ andi a3, t1, 8 ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++ ++L(al_end_8): ++ beqz a3, L(al_end_4) ++ st.d t0, a2, 0 ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, 7 ++#endif ++ jr ra ++L(al_end_4): ++ beqz a4, L(al_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(al_end_2): ++ beqz a5, L(al_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(al_end_1): ++ beqz a6, L(al_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(al_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++ ++ .align 4 ++L(unalign): ++ slli.d a5, a4, 3 ++ li.d t1, -1 ++ sub.d a6, zero, a5 ++ ++ srl.d a7, t0, a5 ++ sll.d t7, t1, a6 ++ ++ or t0, a7, t7 ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t3, t1, t2 ++ ++ bnez t3, L(un_end) ++ ++ ld.d t4, a1, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ bnez t3, L(un_end_with_remaining) ++ ++L(un_loop): ++ srl.d a7, t4, a5 ++ ++ ld.d t4, a1, 16 ++ addi.d a1, a1, 8 ++ ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ ++ sub.d t1, t4, t5 ++ andn t2, t6, t4 ++ sll.d t0, t4, a6 ++ and t3, t1, t2 ++ ++ or t0, t0, a7 ++ beqz t3, L(un_loop) ++ ++L(un_end_with_remaining): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ sub.d t1, t1, a4 ++ ++ blt t1, zero, L(un_end_less_8) ++ st.d t0, a2, 0 ++ addi.d a2, a2, 8 ++ beqz t1, L(un_out) ++ srl.d t0, t4, a5 ++ b L(un_end_less_8) ++ ++L(un_end): ++ ctz.d t1, t3 ++ srli.d t1, t1, 3 ++ addi.d t1, t1, 1 ++ ++L(un_end_less_8): ++ andi a4, t1, 4 ++ andi a5, t1, 2 ++ andi a6, t1, 1 ++L(un_end_4): ++ beqz a4, L(un_end_2) ++ st.w t0, a2, 0 ++ addi.d a2, a2, 4 ++ srli.d t0, t0, 32 ++L(un_end_2): ++ beqz a5, L(un_end_1) ++ st.h t0, a2, 0 ++ addi.d a2, a2, 2 ++ srli.d t0, t0, 16 ++L(un_end_1): ++ beqz a6, L(un_out) ++ st.b t0, a2, 0 ++ addi.d a2, a2, 1 ++L(un_out): ++#ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++#endif ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +new file mode 100644 +index 00000000..c2825612 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S +@@ -0,0 +1,215 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lasx ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ ori t8, zero, 0xfe0 ++ andi t0, a1, 0xfff ++ li.d t7, -1 ++ move a2, a0 ++ ++ bltu t8, t0, L(page_cross_start) ++L(start_entry): ++ xvld xr0, a1, 0 ++ li.d t0, 32 ++ andi t1, a2, 0x1f ++ ++ xvsetanyeqz.b fcc0, xr0 ++ sub.d t0, t0, t1 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ ++ xvst xr0, a2, 0 ++ andi a3, a1, 0x1f ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ ++ ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ xvst xr0, a2, 0 ++ ++ xvld xr0, a1, 32 ++ addi.d a2, a2, 32 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ ++ bceqz fcc0, L(al_loop) ++L(al_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++ nop ++ ++L(page_cross_start): ++ move a4, a1 ++ bstrins.d a4, zero, 4, 0 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ ++ beq t0, t7, L(start_entry) ++ b L(tail) ++L(unaligned): ++ andi t0, a1, 0xfff ++ bltu t8, t0, L(un_page_cross) ++ ++ ++L(un_start_entry): ++ xvld xr0, a1, 0 ++ xvsetanyeqz.b fcc0, xr0 ++ bcnez fcc0, L(un_end) ++ addi.d a1, a1, 32 ++ ++L(un_loop): ++ xvst xr0, a2, 0 ++ andi t0, a1, 0xfff ++ addi.d a2, a2, 32 ++ bltu t8, t0, L(page_cross_loop) ++ ++L(un_loop_entry): ++ xvld xr0, a1, 0 ++ addi.d a1, a1, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(un_loop) ++ ++ addi.d a1, a1, -32 ++L(un_end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ ++ movfr2gr.s t0, fa0 ++L(un_tail): ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ xvld xr0, a1, -31 ++ ++ add.d dstend, a2, t0 ++ xvst xr0, dstend, -31 ++ jr ra ++L(un_page_cross): ++ sub.d a4, a1, a3 ++ ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_start_entry) ++ b L(un_tail) ++ ++ ++L(page_cross_loop): ++ sub.d a4, a1, a3 ++ xvld xr0, a4, 0 ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t7, L(un_loop_entry) ++ ++ b L(un_tail) ++L(end): ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++L(tail): ++ cto.w t0, t0 ++ add.d dstend, a2, t0 ++ add.d a5, a1, t0 ++ ++L(less_32): ++ srli.d t1, t0, 4 ++ beqz t1, L(less_16) ++ vld vr0, a1, 0 ++ vld vr1, a5, -15 ++ ++ vst vr0, a2, 0 ++ vst vr1, dstend, -15 ++ jr ra ++L(less_16): ++ srli.d t1, t0, 3 ++ ++ beqz t1, L(less_8) ++ ld.d t2, a1, 0 ++ ld.d t3, a5, -7 ++ st.d t2, a2, 0 ++ ++ st.d t3, dstend, -7 ++ jr ra ++L(less_8): ++ li.d t1, 3 ++ bltu t0, t1, L(less_3) ++ ++ ld.w t2, a1, 0 ++ ld.w t3, a5, -3 ++ st.w t2, a2, 0 ++ st.w t3, dstend, -3 ++ ++ jr ra ++L(less_3): ++ beqz t0, L(zero_byte) ++ ld.h t2, a1, 0 ++ ++ st.h t2, a2, 0 ++L(zero_byte): ++ st.b zero, dstend, 0 ++ jr ra ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +new file mode 100644 +index 00000000..fc2498f7 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S +@@ -0,0 +1,212 @@ ++/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_lsx ++# endif ++ ++LEAF(STRCPY, 6) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a4, a1, 0xf ++ vld vr1, t0, %pc_lo12(L(INDEX)) ++ move a2, a0 ++ ++ beqz a4, L(load_start) ++ xor t0, a1, a4 ++ vld vr0, t0, 0 ++ vreplgr2vr.b vr2, a4 ++ ++ vadd.b vr2, vr2, vr1 ++ vshuf.b vr0, vr2, vr0, vr2 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(end) ++ ++L(load_start): ++ vld vr0, a1, 0 ++ li.d t1, 16 ++ andi a3, a2, 0xf ++ vsetanyeqz.b fcc0, vr0 ++ ++ ++ sub.d t0, t1, a3 ++ bcnez fcc0, L(end) ++ add.d a1, a1, t0 ++ vst vr0, a2, 0 ++ ++ andi a3, a1, 0xf ++ add.d a2, a2, t0 ++ bnez a3, L(unaligned) ++ vld vr0, a1, 0 ++ ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(al_end) ++L(al_loop): ++ vst vr0, a2, 0 ++ vld vr0, a1, 16 ++ ++ addi.d a2, a2, 16 ++ addi.d a1, a1, 16 ++ vsetanyeqz.b fcc0, vr0 ++ bceqz fcc0, L(al_loop) ++ ++ ++L(al_end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, -15 ++# ifdef USE_AS_STPCPY ++ add.d a0, a2, t0 ++ vst vr0, a0, -15 ++# else ++ add.d a2, a2, t0 ++ vst vr0, a2, -15 ++# endif ++ jr ra ++ ++L(end): ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ addi.d t0, t0, 1 ++ ++L(end_16): ++ andi t1, t0, 16 ++ beqz t1, L(end_8) ++ vst vr0, a2, 0 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 15 ++# endif ++ jr ra ++ ++L(end_8): ++ andi t2, t0, 8 ++ andi t3, t0, 4 ++ andi t4, t0, 2 ++ andi t5, t0, 1 ++ ++ beqz t2, L(end_4) ++ vstelm.d vr0, a2, 0, 0 ++ addi.d a2, a2, 8 ++ vbsrl.v vr0, vr0, 8 ++ ++L(end_4): ++ beqz t3, L(end_2) ++ vstelm.w vr0, a2, 0, 0 ++ addi.d a2, a2, 4 ++ vbsrl.v vr0, vr0, 4 ++ ++L(end_2): ++ beqz t4, L(end_1) ++ vstelm.h vr0, a2, 0, 0 ++ addi.d a2, a2, 2 ++ vbsrl.v vr0, vr0, 2 ++ ++ ++L(end_1): ++ beqz t5, L(out) ++ vstelm.b vr0, a2, 0, 0 ++ addi.d a2, a2, 1 ++L(out): ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, -1 ++# endif ++ jr ra ++ ++ .align 4 ++L(unaligned): ++ bstrins.d a1, zero, 3, 0 ++ vld vr2, a1, 0 ++ vreplgr2vr.b vr3, a3 ++ vslt.b vr4, vr1, vr3 ++ ++ vor.v vr0, vr2, vr4 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_first_end) ++ vld vr0, a1, 16 ++ ++ vadd.b vr3, vr3, vr1 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ ++ vor.v vr2, vr0, vr0 ++ addi.d a1, a1, 16 ++L(un_loop): ++ vld vr0, a1, 16 ++ vst vr4, a2, 0 ++ ++ addi.d a2, a2, 16 ++ vshuf.b vr4, vr0, vr2, vr3 ++ vsetanyeqz.b fcc0, vr0 ++ bcnez fcc0, L(un_end) ++ ++ vld vr2, a1, 32 ++ vst vr4, a2, 0 ++ addi.d a1, a1, 32 ++ addi.d a2, a2, 16 ++ ++ vshuf.b vr4, vr2, vr0, vr3 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(un_loop) ++ vor.v vr0, vr2, vr2 ++ ++ ++ addi.d a1, a1, -16 ++L(un_end): ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, 1f ++ vst vr4, a2, 0 ++ ++1: ++ vmsknz.b vr1, vr0 ++ movfr2gr.s t0, fa1 ++ cto.w t0, t0 ++ add.d a1, a1, t0 ++ ++ vld vr0, a1, 1 ++ add.d a2, a2, t0 ++ sub.d a2, a2, a3 ++ vst vr0, a2, 1 ++# ifdef USE_AS_STPCPY ++ addi.d a0, a2, 16 ++# endif ++ jr ra ++L(un_first_end): ++ addi.d a2, a2, -16 ++ addi.d a1, a1, -16 ++ b 1b ++END(STRCPY) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +new file mode 100644 +index 00000000..9e31883b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S +@@ -0,0 +1,138 @@ ++/* Optimized strcpy unaligned implementation using basic LoongArch ++ instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++ ++# ifndef STRCPY ++# define STRCPY __strcpy_unaligned ++# endif ++ ++# ifdef USE_AS_STPCPY ++# define dstend a0 ++# else ++# define dstend a4 ++# endif ++ ++LEAF(STRCPY, 6) ++ lu12i.w t5, 0x01010 ++ li.w t0, 0xff8 ++ ori t5, t5, 0x101 ++ andi t1, a1, 0xfff ++ ++ bstrins.d t5, t5, 63, 32 ++ move a2, a0 ++ slli.d t6, t5, 7 ++ bltu t0, t1, L(page_cross) ++ ++L(start_entry): ++ ld.d t0, a1, 0 ++ li.d t3, 8 ++ andi a3, a1, 0x7 ++ sub.d t1, t0, t5 ++ ++ andn t2, t6, t0 ++ sub.d t3, t3, a3 ++ and t1, t1, t2 ++ bnez t1, L(end) ++ ++ ++ add.d a1, a1, t3 ++ st.d t0, a2, 0 ++ add.d a2, a2, t3 ++ ld.d t0, a1, 0 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ bnez t1, L(long_end) ++ ++L(loop): ++ st.d t0, a2, 0 ++ ld.d t0, a1, 8 ++ addi.d a2, a2, 8 ++ addi.d a1, a1, 8 ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(loop) ++ ++ ++L(long_end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a1, a1, t1 ++ ld.d t0, a1, -7 ++ ++ add.d dstend, a2, t1 ++ st.d t0, dstend, -7 ++ jr ra ++ nop ++ ++L(end): ++ ctz.d t1, t1 ++ srli.d t1, t1, 3 ++ add.d a3, a1, t1 ++ add.d dstend, a2, t1 ++ ++L(less_8): ++ li.d t0, 3 ++ bltu t1, t0, L(less_3) ++ ld.w t1, a1, 0 ++ ld.w t2, a3, -3 ++ ++ ++ st.w t1, a2, 0 ++ st.w t2, dstend, -3 ++ jr ra ++L(less_3): ++ beqz t1, L(zero_bytes) ++ ++ ld.h t1, a1, 0 ++ st.h t1, a2, 0 ++L(zero_bytes): ++ st.b zero, dstend, 0 ++ jr ra ++ ++L(page_cross): ++ move a4, a1 ++ bstrins.d a4, zero, 2, 0 ++ ld.d t0, a4, 0 ++ li.d t3, -1 ++ ++ slli.d t4, a1, 3 ++ srl.d t3, t3, t4 ++ srl.d t0, t0, t4 ++ orn t0, t0, t3 ++ ++ ++ sub.d t1, t0, t5 ++ andn t2, t6, t0 ++ and t1, t1, t2 ++ beqz t1, L(start_entry) ++ ++ b L(end) ++END(STRCPY) ++ ++libc_hidden_builtin_def (STRCPY) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c +new file mode 100644 +index 00000000..46afd068 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strcpy. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strcpy __redirect_strcpy ++# include ++# undef strcpy ++ ++# define SYMBOL_NAME strcpy ++# include "ifunc-lasx.h" ++ ++libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +new file mode 100644 +index 00000000..ed0548e4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S +@@ -0,0 +1,100 @@ ++/* Optimized strlen implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRLEN __strlen_aligned ++#else ++# define STRLEN strlen ++#endif ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ li.w t0, -1 ++ ++ ld.d t2, a0, 0 ++ andi t1, a1, 0x7 ++ ori a2, a2, 0x101 ++ slli.d t1, t1, 3 ++ ++ bstrins.d a2, a2, 63, 32 ++ sll.d t1, t0, t1 ++ slli.d t3, a2, 7 ++ nor a3, zero, t3 ++ ++ orn t2, t2, t1 ++ sub.d t0, t2, a2 ++ nor t1, t2, a3 ++ and t0, t0, t1 ++ ++ ++ bnez t0, L(count_pos) ++ addi.d a0, a0, 8 ++L(loop_16_7bit): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, t3 ++ bnez t0, L(more_check) ++ ld.d t2, a0, 8 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, t3 ++ addi.d a0, a0, 16 ++ beqz t0, L(loop_16_7bit) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ nor t0, t2, a3 ++ and t0, t1, t0 ++ bnez t0, L(count_pos) ++ addi.d a0, a0, 8 ++ ++ ++L(loop_16_8bit): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ nor t0, t2, a3 ++ and t0, t0, t1 ++ ++ bnez t0, L(count_pos) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t2, a2 ++ ++ nor t0, t2, a3 ++ and t0, t0, t1 ++ beqz t0, L(loop_16_8bit) ++ addi.d a0, a0, -8 ++ ++L(count_pos): ++ ctz.d t1, t0 ++ sub.d a0, a0, a1 ++ srli.d t1, t1, 3 ++ add.d a0, a0, t1 ++ ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +new file mode 100644 +index 00000000..91342f34 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S +@@ -0,0 +1,63 @@ ++/* Optimized strlen implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRLEN __strlen_lasx ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 4, 0 ++ li.d t1, -1 ++ xvld xr0, a0, 0 ++ ++ xvmsknz.b xr0, xr0 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 # sign extend ++ ++ sra.w t0, t0, a1 ++ beq t0, t1, L(loop) ++ cto.w a0, t0 ++ jr ra ++ ++L(loop): ++ xvld xr0, a0, 32 ++ addi.d a0, a0, 32 ++ xvsetanyeqz.b fcc0, xr0 ++ bceqz fcc0, L(loop) ++ ++ ++ xvmsknz.b xr0, xr0 ++ sub.d a0, a0, a1 ++ xvpickve.w xr1, xr0, 4 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +new file mode 100644 +index 00000000..b09c12e0 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S +@@ -0,0 +1,71 @@ ++/* Optimized strlen implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRLEN __strlen_lsx ++ ++LEAF(STRLEN, 6) ++ move a1, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ li.d t1, -1 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a1 ++ beq t0, t1, L(loop) ++ cto.w a0, t0 ++ ++ jr ra ++ nop ++ nop ++ nop ++ ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ vmin.bu vr2, vr0, vr1 ++ ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ ++ vilvl.h vr0, vr1, vr0 ++ sub.d a0, a0, a1 ++ movfr2gr.s t0, fa0 ++ cto.w t0, t0 ++ ++ add.d a0, a0, t0 ++ jr ra ++END(STRLEN) ++ ++libc_hidden_builtin_def (STRLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c +new file mode 100644 +index 00000000..381c2daa +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strlen.c +@@ -0,0 +1,37 @@ ++/* Multiple versions of strlen. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++ ++#if IS_IN (libc) ++# define strlen __redirect_strlen ++# include ++# undef strlen ++ ++# define SYMBOL_NAME strlen ++# include "ifunc-strlen.h" ++ ++libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +new file mode 100644 +index 00000000..f63de872 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S +@@ -0,0 +1,218 @@ ++/* Optimized strncmp implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRNCMP __strncmp_aligned ++#else ++# define STRNCMP strncmp ++#endif ++ ++LEAF(STRNCMP, 6) ++ beqz a2, L(ret0) ++ lu12i.w a5, 0x01010 ++ andi a3, a0, 0x7 ++ ori a5, a5, 0x101 ++ ++ andi a4, a1, 0x7 ++ bstrins.d a5, a5, 63, 32 ++ li.d t7, -1 ++ li.d t8, 8 ++ ++ addi.d a2, a2, -1 ++ slli.d a6, a5, 7 ++ bne a3, a4, L(unaligned) ++ bstrins.d a0, zero, 2, 0 ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t0, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d t2, a3, 3 ++ ++ ++ sub.d t5, t8, a3 ++ srl.d t3, t7, t2 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ ++ orn t0, t0, t3 ++ orn t1, t1, t3 ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ ++ and t2, t2, t3 ++ bne t0, t1, L(al_end) ++ sltu t4, a2, t5 ++ sub.d a2, a2, t5 ++ ++L(al_loop): ++ or t4, t2, t4 ++ bnez t4, L(ret0) ++ ldx.d t0, a0, t8 ++ ldx.d t1, a1, t8 ++ ++ ++ addi.d t8, t8, 8 ++ sltui t4, a2, 8 ++ addi.d a2, a2, -8 ++ sub.d t2, t0, a5 ++ ++ andn t3, a6, t0 ++ and t2, t2, t3 ++ beq t0, t1, L(al_loop) ++ addi.d a2, a2, 8 ++ ++L(al_end): ++ xor t3, t0, t1 ++ or t2, t2, t3 ++ ctz.d t2, t2 ++ srli.d t4, t2, 3 ++ ++ bstrins.d t2, zero, 2, 0 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ andi t0, t0, 0xff ++ ++ ++ andi t1, t1, 0xff ++ sltu t2, a2, t4 ++ sub.d a0, t0, t1 ++ masknez a0, a0, t2 ++ ++ jr ra ++L(ret0): ++ move a0, zero ++ jr ra ++ nop ++ ++L(unaligned): ++ slt a7, a4, a3 ++ xor t0, a0, a1 ++ maskeqz t0, t0, a7 ++ xor a0, a0, t0 ++ ++ xor a1, a1, t0 ++ andi a3, a0, 0x7 ++ andi a4, a1, 0x7 ++ bstrins.d a0, zero, 2, 0 ++ ++ ++ bstrins.d a1, zero, 2, 0 ++ ld.d t4, a0, 0 ++ ld.d t1, a1, 0 ++ slli.d t2, a3, 3 ++ ++ slli.d t3, a4, 3 ++ srl.d t5, t7, t3 ++ srl.d t0, t4, t2 ++ srl.d t1, t1, t3 ++ ++ orn t0, t0, t5 ++ orn t1, t1, t5 ++ bne t0, t1, L(not_equal) ++ sub.d t6, t8, a4 ++ ++ sub.d a4, t2, t3 ++ sll.d t2, t7, t2 ++ sub.d t5, t8, a3 ++ orn t4, t4, t2 ++ ++ ++ sub.d t2, t4, a5 ++ andn t3, a6, t4 ++ sltu t7, a2, t5 ++ and t2, t2, t3 ++ ++ sub.d a3, zero, a4 ++ or t2, t2, t7 ++ bnez t2, L(un_end) ++ sub.d t7, t5, t6 ++ ++ sub.d a2, a2, t5 ++ sub.d t6, t8, t7 ++L(un_loop): ++ srl.d t5, t4, a4 ++ ldx.d t4, a0, t8 ++ ++ ldx.d t1, a1, t8 ++ addi.d t8, t8, 8 ++ sll.d t0, t4, a3 ++ or t0, t0, t5 ++ ++ ++ bne t0, t1, L(loop_not_equal) ++ sub.d t2, t4, a5 ++ andn t3, a6, t4 ++ sltui t5, a2, 8 ++ ++ and t2, t2, t3 ++ addi.d a2, a2, -8 ++ or t3, t2, t5 ++ beqz t3, L(un_loop) ++ ++ addi.d a2, a2, 8 ++L(un_end): ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ sltu t5, a2, t6 ++ ++ and t2, t2, t3 ++ or t2, t2, t5 ++ bnez t2, L(ret0) ++ ldx.d t1, a1, t8 ++ ++ ++ srl.d t0, t4, a4 ++ sub.d a2, a2, t6 ++L(not_equal): ++ sub.d t2, t0, a5 ++ andn t3, a6, t0 ++ ++ xor t4, t0, t1 ++ and t2, t2, t3 ++ or t2, t2, t4 ++ ctz.d t2, t2 ++ ++ bstrins.d t2, zero, 2, 0 ++ srli.d t4, t2, 3 ++ srl.d t0, t0, t2 ++ srl.d t1, t1, t2 ++ ++ andi t0, t0, 0xff ++ andi t1, t1, 0xff ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ ++ ++ masknez t0, t2, a7 ++ maskeqz t1, t3, a7 ++ sltu t2, a2, t4 ++ or a0, t0, t1 ++ ++ masknez a0, a0, t2 ++ jr ra ++L(loop_not_equal): ++ add.d a2, a2, t7 ++ b L(not_equal) ++END(STRNCMP) ++ ++libc_hidden_builtin_def (STRNCMP) +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +new file mode 100644 +index 00000000..83cb801d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S +@@ -0,0 +1,208 @@ ++/* Optimized strncmp implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNCMP __strncmp_lsx ++ ++LEAF(STRNCMP, 6) ++ beqz a2, L(ret0) ++ pcalau12i t0, %pc_hi20(L(INDEX)) ++ andi a3, a0, 0xf ++ vld vr2, t0, %pc_lo12(L(INDEX)) ++ ++ andi a4, a1, 0xf ++ li.d t2, 16 ++ bne a3, a4, L(unaligned) ++ xor t0, a0, a3 ++ ++ xor t1, a1, a4 ++ vld vr0, t0, 0 ++ vld vr1, t1, 0 ++ vreplgr2vr.b vr3, a3 ++ ++ ++ sub.d t2, t2, a3 ++ vadd.b vr3, vr3, vr2 ++ vshuf.b vr0, vr3, vr0, vr3 ++ vshuf.b vr1, vr3, vr1, vr3 ++ ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ bgeu t2, a2, L(al_early_end) ++ vsetanyeqz.b fcc0, vr3 ++ ++ bcnez fcc0, L(al_end) ++ add.d a3, a0, a2 ++ addi.d a4, a3, -1 ++ bstrins.d a4, zero, 3, 0 ++ ++ sub.d a2, a3, a4 ++L(al_loop): ++ vld vr0, t0, 16 ++ vld vr1, t1, 16 ++ addi.d t0, t0, 16 ++ ++ ++ addi.d t1, t1, 16 ++ vseq.b vr3, vr0, vr1 ++ vmin.bu vr3, vr0, vr3 ++ beq t0, a4, L(al_early_end) ++ ++ vsetanyeqz.b fcc0, vr3 ++ bceqz fcc0, L(al_loop) ++L(al_end): ++ vseqi.b vr3, vr3, 0 ++ vfrstpi.b vr3, vr3, 0 ++ ++ vshuf.b vr0, vr0, vr0, vr3 ++ vshuf.b vr1, vr1, vr1, vr3 ++ vpickve2gr.bu t0, vr0, 0 ++ vpickve2gr.bu t1, vr1, 0 ++ ++ sub.d a0, t0, t1 ++ jr ra ++L(al_early_end): ++ vreplgr2vr.b vr4, a2 ++ vslt.b vr4, vr2, vr4 ++ ++ ++ vorn.v vr3, vr3, vr4 ++ b L(al_end) ++L(unaligned): ++ slt a5, a3, a4 ++ xor t0, a0, a1 ++ ++ maskeqz t0, t0, a5 ++ xor a0, a0, t0 ++ xor a1, a1, t0 ++ andi a3, a0, 0xf ++ ++ andi a4, a1, 0xf ++ xor t0, a0, a3 ++ xor t1, a1, a4 ++ vld vr0, t0, 0 ++ ++ vld vr3, t1, 0 ++ sub.d t2, t2, a3 ++ vreplgr2vr.b vr4, a3 ++ vreplgr2vr.b vr5, a4 ++ ++ ++ vaddi.bu vr6, vr2, 16 ++ vsub.b vr7, vr4, vr5 ++ vsub.b vr6, vr6, vr7 ++ vadd.b vr4, vr2, vr4 ++ ++ vshuf.b vr1, vr3, vr3, vr6 ++ vshuf.b vr0, vr7, vr0, vr4 ++ vshuf.b vr1, vr7, vr1, vr4 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ bgeu t2, a2, L(un_early_end) ++ vsetanyeqz.b fcc0, vr4 ++ bcnez fcc0, L(un_end) ++ ++ add.d a6, a0, a2 ++ vslt.b vr5, vr2, vr5 ++ addi.d a7, a6, -1 ++ vor.v vr3, vr3, vr5 ++ ++ ++ bstrins.d a7, zero, 3, 0 ++ sub.d a2, a6, a7 ++L(un_loop): ++ vld vr0, t0, 16 ++ addi.d t0, t0, 16 ++ ++ vsetanyeqz.b fcc0, vr3 ++ bcnez fcc0, L(has_zero) ++ beq t0, a7, L(end_with_len) ++ vor.v vr1, vr3, vr3 ++ ++ vld vr3, t1, 16 ++ addi.d t1, t1, 16 ++ vshuf.b vr1, vr3, vr1, vr6 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ vsetanyeqz.b fcc0, vr4 ++ bceqz fcc0, L(un_loop) ++L(un_end): ++ vseqi.b vr4, vr4, 0 ++ ++ ++ vfrstpi.b vr4, vr4, 0 ++ vshuf.b vr0, vr0, vr0, vr4 ++ vshuf.b vr1, vr1, vr1, vr4 ++ vpickve2gr.bu t0, vr0, 0 ++ ++ vpickve2gr.bu t1, vr1, 0 ++ sub.d t2, t0, t1 ++ sub.d t3, t1, t0 ++ masknez t0, t2, a5 ++ ++ maskeqz t1, t3, a5 ++ or a0, t0, t1 ++ jr ra ++L(has_zero): ++ vshuf.b vr1, vr3, vr3, vr6 ++ ++ vseq.b vr4, vr0, vr1 ++ vmin.bu vr4, vr0, vr4 ++ bne t0, a7, L(un_end) ++L(un_early_end): ++ vreplgr2vr.b vr5, a2 ++ ++ vslt.b vr5, vr2, vr5 ++ vorn.v vr4, vr4, vr5 ++ b L(un_end) ++L(end_with_len): ++ sub.d a6, a3, a4 ++ ++ bgeu a6, a2, 1f ++ vld vr4, t1, 16 ++1: ++ vshuf.b vr1, vr4, vr3, vr6 ++ vseq.b vr4, vr0, vr1 ++ ++ vmin.bu vr4, vr0, vr4 ++ vreplgr2vr.b vr5, a2 ++ vslt.b vr5, vr2, vr5 ++ vorn.v vr4, vr4, vr5 ++ ++ b L(un_end) ++L(ret0): ++ move a0, zero ++ jr ra ++END(STRNCMP) ++ ++ .section .rodata.cst16,"M",@progbits,16 ++ .align 4 ++L(INDEX): ++ .dword 0x0706050403020100 ++ .dword 0x0f0e0d0c0b0a0908 ++ ++libc_hidden_builtin_def (STRNCMP) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c +new file mode 100644 +index 00000000..af6d0bc4 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c +@@ -0,0 +1,35 @@ ++/* Multiple versions of strncmp. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strncmp __redirect_strncmp ++# include ++# undef strncmp ++ ++# define SYMBOL_NAME strncmp ++# include "ifunc-strncmp.h" ++ ++libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ()); ++ ++# ifdef SHARED ++__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +new file mode 100644 +index 00000000..a8296a1b +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S +@@ -0,0 +1,102 @@ ++/* Optimized strnlen implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRNLEN __strnlen_aligned ++#else ++# define STRNLEN __strnlen ++#endif ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(out) ++ lu12i.w a2, 0x01010 ++ andi t1, a0, 0x7 ++ move t4, a0 ++ ++ bstrins.d a0, zero, 2, 0 ++ ori a2, a2, 0x101 ++ li.w t0, -1 ++ ld.d t2, a0, 0 ++ ++ slli.d t3, t1, 3 ++ bstrins.d a2, a2, 63, 32 ++ li.w t5, 8 ++ slli.d a3, a2, 7 ++ ++ sub.w t1, t5, t1 ++ sll.d t0, t0, t3 ++ orn t2, t2, t0 ++ sub.d t0, t2, a2 ++ ++ ++ andn t3, a3, t2 ++ and t0, t0, t3 ++ bnez t0, L(count_pos) ++ sub.d t5, a1, t1 ++ ++ bgeu t1, a1, L(out) ++ addi.d a0, a0, 8 ++L(loop): ++ ld.d t2, a0, 0 ++ sub.d t0, t2, a2 ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 9 ++ and t0, t0, t1 ++ or t7, t0, t6 ++ ++ bnez t7, L(count_pos) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t0, t2, a2 ++ ++ ++ andn t1, a3, t2 ++ sltui t6, t5, 17 ++ and t0, t0, t1 ++ addi.d t5, t5, -16 ++ ++ or t7, t0, t6 ++ beqz t7, L(loop) ++ addi.d a0, a0, -8 ++L(count_pos): ++ ctz.d t1, t0 ++ ++ sub.d a0, a0, t4 ++ srli.d t1, t1, 3 ++ add.d a0, t1, a0 ++ sltu t0, a0, a1 ++ ++ masknez t1, a1, t0 ++ maskeqz a0, a0, t0 ++ or a0, a0, t1 ++ jr ra ++ ++ ++L(out): ++ move a0, a1 ++ jr ra ++END(STRNLEN) ++ ++weak_alias (STRNLEN, strnlen) ++libc_hidden_builtin_def (STRNLEN) +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +new file mode 100644 +index 00000000..aa6c812d +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S +@@ -0,0 +1,100 @@ ++/* Optimized strnlen implementation using LoongArch LASX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lasx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x3f ++ li.d t3, 65 ++ sub.d a2, a0, t1 ++ ++ xvld xr0, a2, 0 ++ xvld xr1, a2, 32 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ xvmsknz.b xr0, xr0 ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ ++ ++ movfr2gr.d t0, fa0 ++ sra.d t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 5, 0 ++ ++L(loop): ++ xvld xr0, a0, 64 ++ xvld xr1, a0, 96 ++ addi.d a0, a0, 64 ++ beq a0, a4, L(out) ++ ++ xvmin.bu xr2, xr0, xr1 ++ xvsetanyeqz.b fcc0, xr2 ++ bceqz fcc0, L(loop) ++L(out): ++ xvmsknz.b xr0, xr0 ++ ++ ++ xvmsknz.b xr1, xr1 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ cto.d t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +new file mode 100644 +index 00000000..d0febe3e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S +@@ -0,0 +1,89 @@ ++/* Optimized strnlen implementation using LoongArch LSX instructions ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++# define STRNLEN __strnlen_lsx ++ ++LEAF(STRNLEN, 6) ++ beqz a1, L(ret0) ++ andi t1, a0, 0x1f ++ li.d t3, 33 ++ sub.d a2, a0, t1 ++ ++ vld vr0, a2, 0 ++ vld vr1, a2, 16 ++ sub.d t1, t3, t1 ++ move a3, a0 ++ ++ sltu t1, a1, t1 ++ vmsknz.b vr0, vr0 ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ ++ movfr2gr.s t0, fa0 ++ sra.w t0, t0, a0 ++ orn t1, t1, t0 ++ bnez t1, L(end) ++ ++ ++ add.d a4, a0, a1 ++ move a0, a2 ++ addi.d a4, a4, -1 ++ bstrins.d a4, zero, 4, 0 ++ ++L(loop): ++ vld vr0, a0, 32 ++ vld vr1, a0, 48 ++ addi.d a0, a0, 32 ++ beq a0, a4, L(out) ++ ++ vmin.bu vr2, vr0, vr1 ++ vsetanyeqz.b fcc0, vr2 ++ bceqz fcc0, L(loop) ++L(out): ++ vmsknz.b vr0, vr0 ++ ++ vmsknz.b vr1, vr1 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++L(end): ++ sub.d a0, a0, a3 ++ ++ ++ cto.w t0, t0 ++ add.d a0, a0, t0 ++ sltu t1, a0, a1 ++ masknez t0, a1, t1 ++ ++ maskeqz t1, a0, t1 ++ or a0, t0, t1 ++ jr ra ++L(ret0): ++ move a0, zero ++ ++ jr ra ++END(STRNLEN) ++ ++libc_hidden_builtin_def (STRNLEN) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c +new file mode 100644 +index 00000000..38b7a25a +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c +@@ -0,0 +1,39 @@ ++/* Multiple versions of strnlen. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strnlen __redirect_strnlen ++# define __strnlen __redirect___strnlen ++# include ++# undef __strnlen ++# undef strnlen ++ ++# define SYMBOL_NAME strnlen ++# include "ifunc-strnlen.h" ++ ++libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ()); ++weak_alias (__strnlen, strnlen); ++# ifdef SHARED ++__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen) ++ __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen); ++__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen) ++ __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen); ++# endif ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S +new file mode 100644 +index 00000000..a73deb78 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S +@@ -0,0 +1,170 @@ ++/* Optimized strrchr implementation using basic LoongArch instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) ++# define STRRCHR __strrchr_aligned ++#else ++# define STRRCHR strrchr ++#endif ++ ++LEAF(STRRCHR, 6) ++ slli.d t0, a0, 3 ++ bstrins.d a0, zero, 2, 0 ++ lu12i.w a2, 0x01010 ++ ld.d t2, a0, 0 ++ ++ andi a1, a1, 0xff ++ ori a2, a2, 0x101 ++ li.d t3, -1 ++ bstrins.d a2, a2, 63, 32 ++ ++ sll.d t5, t3, t0 ++ slli.d a3, a2, 7 ++ orn t4, t2, t5 ++ mul.d a1, a1, a2 ++ ++ sub.d t0, t4, a2 ++ andn t1, a3, t4 ++ and t1, t0, t1 ++ beqz t1, L(find_tail) ++ ++ ++ ctz.d t0, t1 ++ orn t0, zero, t0 ++ xor t2, t4, a1 ++ srl.d t0, t3, t0 ++ ++ orn t2, t2, t0 ++ orn t2, t2, t5 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ctz.d t0, t1 ++ srli.d t0, t0, 3 ++ ++ addi.d a0, a0, 7 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ ++L(find_tail): ++ addi.d a4, a0, 8 ++ addi.d a0, a0, 8 ++L(loop_ascii): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, a3 ++ bnez t0, L(more_check) ++ ld.d t2, a0, 8 ++ sub.d t1, t2, a2 ++ ++ and t0, t1, a3 ++ addi.d a0, a0, 16 ++ beqz t0, L(loop_ascii) ++ addi.d a0, a0, -8 ++ ++L(more_check): ++ andn t0, a3, t2 ++ and t1, t1, t0 ++ bnez t1, L(tail) ++ addi.d a0, a0, 8 ++ ++ ++L(loop_nonascii): ++ ld.d t2, a0, 0 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ++ bnez t1, L(tail) ++ ld.d t2, a0, 8 ++ addi.d a0, a0, 16 ++ sub.d t1, t2, a2 ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ beqz t1, L(loop_nonascii) ++ addi.d a0, a0, -8 ++ ++L(tail): ++ ctz.d t0, t1 ++ orn t0, zero, t0 ++ xor t2, t2, a1 ++ srl.d t0, t3, t0 ++ ++ ++ orn t2, t2, t0 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ ++ and t1, t0, t1 ++ bnez t1, L(count_pos) ++L(find_loop): ++ beq a0, a4, L(find_end) ++ ld.d t2, a0, -8 ++ ++ addi.d a0, a0, -8 ++ xor t2, t2, a1 ++ sub.d t1, t2, a2 ++ andn t0, a3, t2 ++ ++ and t1, t0, t1 ++ beqz t1, L(find_loop) ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++L(count_pos): ++ ctz.d t0, t1 ++ addi.d a0, a0, 7 ++ ++ srli.d t0, t0, 3 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++L(find_end): ++ xor t2, t4, a1 ++ orn t2, t2, t5 ++ revb.d t2, t2 ++ sub.d t1, t2, a2 ++ ++ ++ andn t0, a3, t2 ++ and t1, t0, t1 ++ ctz.d t0, t1 ++ srli.d t0, t0, 3 ++ ++ addi.d a0, a4, -1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S +new file mode 100644 +index 00000000..5a6e2297 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S +@@ -0,0 +1,176 @@ ++/* Optimized strrchr implementation using LoongArch LASX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define STRRCHR __strrchr_lasx ++ ++LEAF(STRRCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 5, 0 ++ xvld xr0, a0, 0 ++ xvld xr1, a0, 32 ++ ++ li.d t2, -1 ++ xvreplgr2vr.b xr4, a1 ++ xvmsknz.b xr2, xr0 ++ xvmsknz.b xr3, xr1 ++ ++ xvpickve.w xr5, xr2, 4 ++ xvpickve.w xr6, xr3, 4 ++ vilvl.h vr2, vr5, vr2 ++ vilvl.h vr3, vr6, vr3 ++ ++ vilvl.w vr2, vr3, vr2 ++ movfr2gr.d t0, fa2 ++ sra.d t0, t0, a2 ++ beq t0, t2, L(find_tail) ++ ++ ++ xvseq.b xr2, xr0, xr4 ++ xvseq.b xr3, xr1, xr4 ++ xvmsknz.b xr2, xr2 ++ xvmsknz.b xr3, xr3 ++ ++ xvpickve.w xr4, xr2, 4 ++ xvpickve.w xr5, xr3, 4 ++ vilvl.h vr2, vr4, vr2 ++ vilvl.h vr3, vr5, vr3 ++ ++ vilvl.w vr1, vr3, vr2 ++ slli.d t3, t2, 1 ++ movfr2gr.d t1, fa1 ++ cto.d t0, t0 ++ ++ srl.d t1, t1, a2 ++ sll.d t3, t3, t0 ++ addi.d a0, a2, 63 ++ andn t1, t1, t3 ++ ++ ++ clz.d t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ .align 5 ++L(find_tail): ++ addi.d a3, a0, 64 ++L(loop): ++ xvld xr2, a0, 64 ++ xvld xr3, a0, 96 ++ addi.d a0, a0, 64 ++ ++ xvmin.bu xr5, xr2, xr3 ++ xvsetanyeqz.b fcc0, xr5 ++ bceqz fcc0, L(loop) ++ xvmsknz.b xr5, xr2 ++ ++ ++ xvmsknz.b xr6, xr3 ++ xvpickve.w xr7, xr5, 4 ++ xvpickve.w xr8, xr6, 4 ++ vilvl.h vr5, vr7, vr5 ++ ++ vilvl.h vr6, vr8, vr6 ++ xvseq.b xr2, xr2, xr4 ++ xvseq.b xr3, xr3, xr4 ++ xvmsknz.b xr2, xr2 ++ ++ xvmsknz.b xr3, xr3 ++ xvpickve.w xr7, xr2, 4 ++ xvpickve.w xr8, xr3, 4 ++ vilvl.h vr2, vr7, vr2 ++ ++ vilvl.h vr3, vr8, vr3 ++ vilvl.w vr5, vr6, vr5 ++ vilvl.w vr2, vr3, vr2 ++ movfr2gr.d t0, fa5 ++ ++ ++ movfr2gr.d t1, fa2 ++ slli.d t3, t2, 1 ++ cto.d t0, t0 ++ sll.d t3, t3, t0 ++ ++ andn t1, t1, t3 ++ beqz t1, L(find_loop) ++ clz.d t0, t1 ++ addi.d a0, a0, 63 ++ ++ sub.d a0, a0, t0 ++ jr ra ++L(find_loop): ++ beq a0, a3, L(find_end) ++ xvld xr2, a0, -64 ++ ++ xvld xr3, a0, -32 ++ addi.d a0, a0, -64 ++ xvseq.b xr2, xr2, xr4 ++ xvseq.b xr3, xr3, xr4 ++ ++ ++ xvmax.bu xr5, xr2, xr3 ++ xvseteqz.v fcc0, xr5 ++ bcnez fcc0, L(find_loop) ++ xvmsknz.b xr0, xr2 ++ ++ xvmsknz.b xr1, xr3 ++ xvpickve.w xr2, xr0, 4 ++ xvpickve.w xr3, xr1, 4 ++ vilvl.h vr0, vr2, vr0 ++ ++ vilvl.h vr1, vr3, vr1 ++ vilvl.w vr0, vr1, vr0 ++ movfr2gr.d t0, fa0 ++ addi.d a0, a0, 63 ++ ++ clz.d t0, t0 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++ ++L(find_end): ++ xvseq.b xr2, xr0, xr4 ++ xvseq.b xr3, xr1, xr4 ++ xvmsknz.b xr2, xr2 ++ xvmsknz.b xr3, xr3 ++ ++ xvpickve.w xr4, xr2, 4 ++ xvpickve.w xr5, xr3, 4 ++ vilvl.h vr2, vr4, vr2 ++ vilvl.h vr3, vr5, vr3 ++ ++ vilvl.w vr1, vr3, vr2 ++ movfr2gr.d t1, fa1 ++ addi.d a0, a2, 63 ++ srl.d t1, t1, a2 ++ ++ clz.d t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S +new file mode 100644 +index 00000000..8f2fd22e +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S +@@ -0,0 +1,144 @@ ++/* Optimized strrchr implementation using LoongArch LSX instructions. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library. If not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if IS_IN (libc) && !defined __loongarch_soft_float ++ ++#define STRRCHR __strrchr_lsx ++ ++LEAF(STRRCHR, 6) ++ move a2, a0 ++ bstrins.d a0, zero, 4, 0 ++ vld vr0, a0, 0 ++ vld vr1, a0, 16 ++ ++ li.d t2, -1 ++ vreplgr2vr.b vr4, a1 ++ vmsknz.b vr2, vr0 ++ vmsknz.b vr3, vr1 ++ ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa2 ++ sra.w t0, t0, a2 ++ beq t0, t2, L(find_tail) ++ ++ vseq.b vr2, vr0, vr4 ++ vseq.b vr3, vr1, vr4 ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ ++ ++ vilvl.h vr1, vr3, vr2 ++ slli.d t3, t2, 1 ++ movfr2gr.s t1, fa1 ++ cto.w t0, t0 ++ ++ srl.w t1, t1, a2 ++ sll.d t3, t3, t0 ++ addi.d a0, a2, 31 ++ andn t1, t1, t3 ++ ++ clz.w t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++ ++ .align 5 ++L(find_tail): ++ addi.d a3, a0, 32 ++L(loop): ++ vld vr2, a0, 32 ++ vld vr3, a0, 48 ++ addi.d a0, a0, 32 ++ ++ vmin.bu vr5, vr2, vr3 ++ vsetanyeqz.b fcc0, vr5 ++ bceqz fcc0, L(loop) ++ vmsknz.b vr5, vr2 ++ ++ vmsknz.b vr6, vr3 ++ vilvl.h vr5, vr6, vr5 ++ vseq.b vr2, vr2, vr4 ++ vseq.b vr3, vr3, vr4 ++ ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ vilvl.h vr2, vr3, vr2 ++ movfr2gr.s t0, fa5 ++ ++ ++ movfr2gr.s t1, fa2 ++ slli.d t3, t2, 1 ++ cto.w t0, t0 ++ sll.d t3, t3, t0 ++ ++ andn t1, t1, t3 ++ beqz t1, L(find_loop) ++ clz.w t0, t1 ++ addi.d a0, a0, 31 ++ ++ sub.d a0, a0, t0 ++ jr ra ++L(find_loop): ++ beq a0, a3, L(find_end) ++ vld vr2, a0, -32 ++ ++ vld vr3, a0, -16 ++ addi.d a0, a0, -32 ++ vseq.b vr2, vr2, vr4 ++ vseq.b vr3, vr3, vr4 ++ ++ ++ vmax.bu vr5, vr2, vr3 ++ vseteqz.v fcc0, vr5 ++ bcnez fcc0, L(find_loop) ++ vmsknz.b vr0, vr2 ++ ++ vmsknz.b vr1, vr3 ++ vilvl.h vr0, vr1, vr0 ++ movfr2gr.s t0, fa0 ++ addi.d a0, a0, 31 ++ ++ clz.w t0, t0 ++ sub.d a0, a0, t0 ++ jr ra ++ nop ++ ++L(find_end): ++ vseq.b vr2, vr0, vr4 ++ vseq.b vr3, vr1, vr4 ++ vmsknz.b vr2, vr2 ++ vmsknz.b vr3, vr3 ++ ++ ++ vilvl.h vr1, vr3, vr2 ++ movfr2gr.s t1, fa1 ++ addi.d a0, a2, 31 ++ srl.w t1, t1, a2 ++ ++ clz.w t0, t1 ++ sub.d a0, a0, t0 ++ maskeqz a0, a0, t1 ++ jr ra ++END(STRRCHR) ++ ++libc_hidden_builtin_def(STRRCHR) ++#endif +diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c +new file mode 100644 +index 00000000..d9c9f660 +--- /dev/null ++++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c +@@ -0,0 +1,36 @@ ++/* Multiple versions of strrchr. ++ All versions must be listed in ifunc-impl-list.c. ++ Copyright (C) 2023 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Define multiple versions only for the definition in libc. */ ++#if IS_IN (libc) ++# define strrchr __redirect_strrchr ++# include ++# undef strrchr ++ ++# define SYMBOL_NAME strrchr ++# include "ifunc-strrchr.h" ++ ++libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ()); ++weak_alias (strrchr, rindex) ++# ifdef SHARED ++__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr) ++ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr); ++# endif ++ ++#endif +diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S +index 6c7065cd..b6e4f727 100644 +--- a/sysdeps/loongarch/setjmp.S ++++ b/sysdeps/loongarch/setjmp.S +@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp) + REG_S s8, a0, 12*SZREG + + #ifndef __loongarch_soft_float +- FREG_S $f24, a0, 13*SZREG + 0*SZFREG +- FREG_S $f25, a0, 13*SZREG + 1*SZFREG +- FREG_S $f26, a0, 13*SZREG + 2*SZFREG +- FREG_S $f27, a0, 13*SZREG + 3*SZFREG +- FREG_S $f28, a0, 13*SZREG + 4*SZFREG +- FREG_S $f29, a0, 13*SZREG + 5*SZFREG +- FREG_S $f30, a0, 13*SZREG + 6*SZFREG +- FREG_S $f31, a0, 13*SZREG + 7*SZFREG ++ FREG_S fs0, a0, 13*SZREG + 0*SZFREG ++ FREG_S fs1, a0, 13*SZREG + 1*SZFREG ++ FREG_S fs2, a0, 13*SZREG + 2*SZFREG ++ FREG_S fs3, a0, 13*SZREG + 3*SZFREG ++ FREG_S fs4, a0, 13*SZREG + 4*SZFREG ++ FREG_S fs5, a0, 13*SZREG + 5*SZFREG ++ FREG_S fs6, a0, 13*SZREG + 6*SZFREG ++ FREG_S fs7, a0, 13*SZREG + 7*SZFREG + #endif + + #if !IS_IN (libc) && IS_IN(rtld) + li.w v0, 0 +- jirl zero,ra,0 ++ jirl zero, ra, 0 + #else + b __sigjmp_save + #endif +diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S +index e9d82033..bf6bfc9e 100644 +--- a/sysdeps/loongarch/start.S ++++ b/sysdeps/loongarch/start.S +@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT) + cfi_undefined (1) + or a5, a0, zero /* rtld_fini */ + +-#if ENABLE_STATIC_PIE +-/* For static PIE, the GOT cannot be used in _start because the GOT entries are +- offsets instead of real addresses before __libc_start_main. +- __libc_start_main and/or main may be not local, so we rely on the linker to +- produce PLT entries for them. GNU ld >= 2.40 supports this. */ +-# define LA la.pcrel +-#else +-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local +- function correctly. We deem these old linkers failing to support static PIE +- and load the addresses from GOT. */ +-# define LA la.got +-#endif +- +- LA a0, t0, main ++ la.pcrel a0, t0, main + REG_L a1, sp, 0 + ADDI a2, sp, SZREG + +@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT) + move a4, zero /* used to be fini */ + or a6, sp, zero /* stack_end */ + +- LA ra, t0, __libc_start_main ++ la.pcrel ra, t0, __libc_start_main + jirl ra, ra, 0 + +- LA ra, t0, abort ++ la.pcrel ra, t0, abort + jirl ra, ra, 0 + END (ENTRY_POINT) +diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h +index d1a279b8..c5eb8afa 100644 +--- a/sysdeps/loongarch/sys/asm.h ++++ b/sysdeps/loongarch/sys/asm.h +@@ -39,16 +39,32 @@ + #define FREG_L fld.d + #define FREG_S fst.d + +-/* Declare leaf routine. */ +-#define LEAF(symbol) \ +- .text; \ +- .globl symbol; \ +- .align 3; \ +- cfi_startproc; \ +- .type symbol, @function; \ +- symbol: +- +-#define ENTRY(symbol) LEAF (symbol) ++/* Declare leaf routine. ++ The usage of macro LEAF/ENTRY is as follows: ++ 1. LEAF(fcn) -- the align value of fcn is .align 3 (default value) ++ 2. LEAF(fcn, 6) -- the align value of fcn is .align 6 ++*/ ++#define LEAF_IMPL(symbol, aln, ...) \ ++ .text; \ ++ .globl symbol; \ ++ .align aln; \ ++ .type symbol, @function; \ ++symbol: \ ++ cfi_startproc; ++ ++ ++#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3) ++#define ENTRY(...) LEAF(__VA_ARGS__) ++ ++#define LEAF_NO_ALIGN(symbol) \ ++ .text; \ ++ .globl symbol; \ ++ .type symbol, @function; \ ++symbol: \ ++ cfi_startproc; ++ ++#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol) ++ + + /* Mark end of function. */ + #undef END +diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h +index 5100f36d..524d2e32 100644 +--- a/sysdeps/loongarch/sys/regdef.h ++++ b/sysdeps/loongarch/sys/regdef.h +@@ -89,6 +89,14 @@ + #define fs5 $f29 + #define fs6 $f30 + #define fs7 $f31 ++#define fcc0 $fcc0 ++#define fcc1 $fcc1 ++#define fcc2 $fcc2 ++#define fcc3 $fcc3 ++#define fcc4 $fcc4 ++#define fcc5 $fcc5 ++#define fcc6 $fcc6 ++#define fcc7 $fcc7 + + #define vr0 $vr0 + #define vr1 $vr1 +@@ -98,6 +106,30 @@ + #define vr5 $vr5 + #define vr6 $vr6 + #define vr7 $vr7 ++#define vr8 $vr8 ++#define vr9 $vr9 ++#define vr10 $vr10 ++#define vr11 $vr11 ++#define vr12 $vr12 ++#define vr13 $vr13 ++#define vr14 $vr14 ++#define vr15 $vr15 ++#define vr16 $vr16 ++#define vr17 $vr17 ++#define vr18 $vr18 ++#define vr19 $vr19 ++#define vr20 $vr20 ++#define vr21 $vr21 ++#define vr22 $vr22 ++#define vr23 $vr23 ++#define vr24 $vr24 ++#define vr25 $vr25 ++#define vr26 $vr26 ++#define vr27 $vr27 ++#define vr28 $vr28 ++#define vr29 $vr29 ++#define vr30 $vr30 ++#define vr31 $vr31 + + #define xr0 $xr0 + #define xr1 $xr1 +@@ -107,5 +139,30 @@ + #define xr5 $xr5 + #define xr6 $xr6 + #define xr7 $xr7 ++#define xr7 $xr7 ++#define xr8 $xr8 ++#define xr9 $xr9 ++#define xr10 $xr10 ++#define xr11 $xr11 ++#define xr12 $xr12 ++#define xr13 $xr13 ++#define xr14 $xr14 ++#define xr15 $xr15 ++#define xr16 $xr16 ++#define xr17 $xr17 ++#define xr18 $xr18 ++#define xr19 $xr19 ++#define xr20 $xr20 ++#define xr21 $xr21 ++#define xr22 $xr22 ++#define xr23 $xr23 ++#define xr24 $xr24 ++#define xr25 $xr25 ++#define xr26 $xr26 ++#define xr27 $xr27 ++#define xr28 $xr28 ++#define xr29 $xr29 ++#define xr30 $xr30 ++#define xr31 $xr31 + + #endif /* _SYS_REGDEF_H */ +diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h +index 5104b69c..7acec23d 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h ++++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h +@@ -35,3 +35,4 @@ + #define HWCAP_LOONGARCH_LBT_X86 (1 << 10) + #define HWCAP_LOONGARCH_LBT_ARM (1 << 11) + #define HWCAP_LOONGARCH_LBT_MIPS (1 << 12) ++#define HWCAP_LOONGARCH_PTW (1 << 13) +diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure +index 0d1159e9..8e744d3a 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/configure ++++ b/sysdeps/unix/sysv/linux/loongarch/configure +@@ -1,7 +1,7 @@ + # This file is generated from configure.ac by Autoconf. DO NOT EDIT! + # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. + +-arch_minimum_kernel=5.19.0 ++arch_minimum_kernel=4.15.0 + + libc_cv_loongarch_int_abi=no + +diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac +index 04e9150a..00048d47 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac ++++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac +@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage + GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory. + # Local configure fragment for sysdeps/unix/sysv/linux/loongarch. + +-arch_minimum_kernel=5.19.0 ++arch_minimum_kernel=4.15.0 + + libc_cv_loongarch_int_abi=no + AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__ +diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +index e371e13b..d1a280a5 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h +@@ -25,5 +25,7 @@ + #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX) + #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX) + ++#define INIT_ARCH() ++ + #endif /* _CPU_FEATURES_LOONGARCH64_H */ + +diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h +index b25e353b..d6c78687 100644 +--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h ++++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h +@@ -19,17 +19,15 @@ + #ifndef POINTER_GUARD_H + #define POINTER_GUARD_H + +-/* Load a got-relative EXPR into G, using T. +- Note G and T are register names. */ ++/* Load a got-relative EXPR into register G. */ + #define LD_GLOBAL(G, EXPR) \ + la.global G, EXPR; \ + REG_L G, G, 0; + +-/* Load a pc-relative EXPR into G, using T. +- Note G and T are register names. */ ++/* Load a pc-relative EXPR into register G. */ + #define LD_PCREL(G, EXPR) \ +- la.pcrel G, EXPR; \ +- REG_L G, G, 0; ++ pcalau12i G, %pc_hi20(EXPR); \ ++ REG_L G, G, %pc_lo12(EXPR); + + #if (IS_IN (rtld) \ + || (!defined SHARED && (IS_IN (libc) \ +-- +2.33.0 + diff --git a/glibc.spec b/glibc.spec index cf8246b..3810358 100644 --- a/glibc.spec +++ b/glibc.spec @@ -48,10 +48,14 @@ %undefine with_valgrind %endif +%ifarch loongarch64 +%global ENABLE_RELOC 0 +%else %global ENABLE_RELOC 1 +%endif # Only some architectures have static PIE support -%define pie_arches %{ix86} x86_64 aarch64 +%define pie_arches %{ix86} x86_64 aarch64 loongarch64 %define enablekernel 3.2 %define target %{_target_cpu}-%{_vendor}-linux @@ -67,7 +71,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 21 +Release: 22 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -161,6 +165,7 @@ Patch9016: add-GB18030-2022-charmap-BZ-30243.patch Patch9017: fix-Segmentation-fault-in-nss-module.patch Patch9018: fix_nss_database_check_reload_and_get_memleak.patch Patch9019: 0001-fix-glibc-build-error-on-x86.patch +Patch9020: 0001-LoongArch-update-from-upstream.patch %if %{ENABLE_RELOC} Patch9021: reserve-relocation-information-for-sysboost.patch @@ -758,7 +763,9 @@ touch devel.filelist touch nscd.filelist touch nss_modules.filelist touch nss-devel.filelist +%ifnarch loongarch64 touch libnsl.filelist +%endif touch debugutils.filelist touch benchtests.filelist touch help.filelist @@ -817,7 +824,9 @@ cat master.filelist \ -e '%{_prefix}/share' \ -e '/var/db/Makefile' \ -e '/libnss_.*\.so[0-9.]*$' \ +%ifnarch loongarch64 -e '/libnsl' \ +%endif -e 'glibc-benchtests' \ -e 'aux-cache' \ > glibc.filelist @@ -890,8 +899,10 @@ grep '/libnss_[a-z]*\.so$' master.filelist > nss-devel.filelist ############################################################################## # libnsl subpackage ############################################################################## +%ifnarch loongarch64 grep -E '/libnsl\.so\.[0-9]+$' master.filelist > libnsl.filelist test $(wc -l < libnsl.filelist) -eq 1 +%endif ############################################################################## # glibc debugutils sub-package @@ -1072,7 +1083,8 @@ elf/ld.so --library-path .:elf:nptl:dlfcn \ %endif popd -%endif # %{run_glibc_tests} +%endif +#%{run_glibc_tests} ############################################################################## # Install and uninstall scripts @@ -1330,8 +1342,10 @@ fi %files -f nss-devel.filelist nss-devel +%ifnarch loongarch64 %files -f libnsl.filelist -n libnsl /%{_lib}/libnsl.so.1 +%endif %files -f debugutils.filelist debugutils @@ -1353,10 +1367,15 @@ fi %endif %changelog -* Tue Feb 6 Qingqing Li - 2.38-21 +* Thu Feb 22 2024 Peng Fan - 2.38-22 +- LoongArch: sync patch from upstream. +- glibc-version >= 2.34 not support libnsl for LoongArch. +- Fix spec file format about date. + +* Tue Feb 6 2024 Qingqing Li - 2.38-21 - arm: Remove wrong ldr from _dl_start_user (BZ 31339) -* Mon Feb 5 Qingqing Li - 2.38-20 +* Mon Feb 5 2024 Qingqing Li - 2.38-20 - x86_64: Optimize ffsll function code size - S390: Fix building with disable mutli arch (BZ 31196) - sparc: Fix broken memset for sparc32 (BZ 31068) @@ -1364,20 +1383,20 @@ fi - sparc: Fix sparc64 memmove length comparison (BZ 31266) - sparc: Remove unwind information from signal return stubs (BZ 31244) -* Thu Feb 1 Hewenliang - 2.38-19 +* Thu Feb 1 2024 Hewenliang - 2.38-19 - backport:fix CVE-2023-6779 CVE-2023-6780 -* Wed Jan 31 Qingqing Li - 2.38-18 +* Wed Jan 31 2024 Qingqing Li - 2.38-18 - backport:fix CVE-2023-6246. -* Sat Jan 13 Qingqing Li - 2.38-17 +* Sat Jan 13 2024 Qingqing Li - 2.38-17 - elf: Add a way to check if tunable is set (BZ 27069) - malloc: Improve MAPE_HUGETLB with glibc.malloc.hugetlb=2 -* Wed Jan 3 Qingqing Li - 2.38-16 +* Wed Jan 3 2024 Qingqing Li - 2.38-16 - backport patches from glibc upstream 2.38 branch -* Thu Dec 14 shixuantong - 2.38-15 +* Thu Dec 14 2023 shixuantong - 2.38-15 - elf: Handle non-directory name in search path (BZ 31035) * Fri Dec 8 2023 Qingqing Li - 2.38-14 -- Gitee