From a2e1725d583b1535564d3cec6cda968d820fc9ec Mon Sep 17 00:00:00 2001
From: ticat_fp <fanpeng@loongson.cn>
Date: Fri, 23 Feb 2024 14:30:42 +0800
Subject: [PATCH] LoongArch: sync patch from upstream.

glibc-version >= 2.34 not support libnsl for LoongArch.
Fix spec file format about date.

Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
 0001-LoongArch-update-from-upstream.patch | 10719 ++++++++++++++++++++
 glibc.spec                                |    39 +-
 2 files changed, 10748 insertions(+), 10 deletions(-)
 create mode 100644 0001-LoongArch-update-from-upstream.patch

diff --git a/0001-LoongArch-update-from-upstream.patch b/0001-LoongArch-update-from-upstream.patch
new file mode 100644
index 0000000..1bb3950
--- /dev/null
+++ b/0001-LoongArch-update-from-upstream.patch
@@ -0,0 +1,10719 @@
+From ba7d73e755aed2f9394e0f3ef3b03ce995181486 Mon Sep 17 00:00:00 2001
+From: ticat_fp <fanpeng@loongson.cn>
+Date: Fri, 23 Feb 2024 10:08:08 +0800
+Subject: [PATCH] LoongArch: update from upstream
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ config.h.in                                   |   5 -
+ elf/elf.h                                     |  14 +
+ sysdeps/loongarch/__longjmp.S                 |  20 +-
+ sysdeps/loongarch/bits/link.h                 |  24 +-
+ sysdeps/loongarch/bits/link_lavcurrent.h      |  25 +
+ sysdeps/loongarch/configure                   |  41 +-
+ sysdeps/loongarch/configure.ac                |  34 +-
+ sysdeps/loongarch/dl-audit-check.h            |  23 +
+ sysdeps/loongarch/dl-link.sym                 |   8 +-
+ sysdeps/loongarch/dl-machine.h                |  17 +-
+ sysdeps/loongarch/dl-trampoline.S             | 179 +---
+ sysdeps/loongarch/dl-trampoline.h             | 242 ++++++
+ sysdeps/loongarch/lp64/multiarch/Makefile     |  52 ++
+ .../lp64/multiarch/dl-symbol-redir-ifunc.h    |  24 +
+ .../lp64/multiarch/ifunc-impl-list.c          | 164 ++++
+ sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h |  45 +
+ .../loongarch/lp64/multiarch/ifunc-memchr.h   |  40 +
+ .../loongarch/lp64/multiarch/ifunc-memcmp.h   |  40 +
+ .../loongarch/lp64/multiarch/ifunc-memrchr.h  |  40 +
+ .../lp64/multiarch/ifunc-rawmemchr.h          |  40 +
+ .../loongarch/lp64/multiarch/ifunc-strchr.h   |  41 +
+ .../lp64/multiarch/ifunc-strchrnul.h          |  41 +
+ .../loongarch/lp64/multiarch/ifunc-strcmp.h   |  38 +
+ .../loongarch/lp64/multiarch/ifunc-strlen.h   |  40 +
+ .../loongarch/lp64/multiarch/ifunc-strncmp.h  |  38 +
+ .../loongarch/lp64/multiarch/ifunc-strnlen.h  |  41 +
+ .../loongarch/lp64/multiarch/ifunc-strrchr.h  |  41 +
+ .../loongarch/lp64/multiarch/memchr-aligned.S |  95 +++
+ .../loongarch/lp64/multiarch/memchr-lasx.S    | 117 +++
+ sysdeps/loongarch/lp64/multiarch/memchr-lsx.S | 102 +++
+ sysdeps/loongarch/lp64/multiarch/memchr.c     |  37 +
+ .../loongarch/lp64/multiarch/memcmp-aligned.S | 292 +++++++
+ .../loongarch/lp64/multiarch/memcmp-lasx.S    | 207 +++++
+ sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S | 269 ++++++
+ sysdeps/loongarch/lp64/multiarch/memcmp.c     |  43 +
+ .../loongarch/lp64/multiarch/memcpy-aligned.S | 783 ++++++++++++++++++
+ .../loongarch/lp64/multiarch/memcpy-lasx.S    |  20 +
+ sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S |  20 +
+ .../lp64/multiarch/memcpy-unaligned.S         | 247 ++++++
+ sysdeps/loongarch/lp64/multiarch/memcpy.c     |  37 +
+ .../lp64/multiarch/memmove-aligned.S          |  20 +
+ .../loongarch/lp64/multiarch/memmove-lasx.S   | 287 +++++++
+ .../loongarch/lp64/multiarch/memmove-lsx.S    | 534 ++++++++++++
+ .../lp64/multiarch/memmove-unaligned.S        | 380 +++++++++
+ sysdeps/loongarch/lp64/multiarch/memmove.c    |  38 +
+ .../lp64/multiarch/memrchr-generic.c          |  23 +
+ .../loongarch/lp64/multiarch/memrchr-lasx.S   | 123 +++
+ .../loongarch/lp64/multiarch/memrchr-lsx.S    | 105 +++
+ sysdeps/loongarch/lp64/multiarch/memrchr.c    |  33 +
+ .../loongarch/lp64/multiarch/memset-aligned.S | 174 ++++
+ .../loongarch/lp64/multiarch/memset-lasx.S    | 142 ++++
+ sysdeps/loongarch/lp64/multiarch/memset-lsx.S | 135 +++
+ .../lp64/multiarch/memset-unaligned.S         | 162 ++++
+ sysdeps/loongarch/lp64/multiarch/memset.c     |  37 +
+ .../lp64/multiarch/rawmemchr-aligned.S        | 124 +++
+ .../loongarch/lp64/multiarch/rawmemchr-lasx.S |  82 ++
+ .../loongarch/lp64/multiarch/rawmemchr-lsx.S  |  71 ++
+ sysdeps/loongarch/lp64/multiarch/rawmemchr.c  |  37 +
+ .../loongarch/lp64/multiarch/stpcpy-aligned.S |  27 +
+ .../loongarch/lp64/multiarch/stpcpy-lasx.S    |  22 +
+ sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S |  22 +
+ .../lp64/multiarch/stpcpy-unaligned.S         |  22 +
+ sysdeps/loongarch/lp64/multiarch/stpcpy.c     |  42 +
+ .../loongarch/lp64/multiarch/strchr-aligned.S |  99 +++
+ .../loongarch/lp64/multiarch/strchr-lasx.S    |  91 ++
+ sysdeps/loongarch/lp64/multiarch/strchr-lsx.S |  73 ++
+ sysdeps/loongarch/lp64/multiarch/strchr.c     |  36 +
+ .../lp64/multiarch/strchrnul-aligned.S        |  95 +++
+ .../loongarch/lp64/multiarch/strchrnul-lasx.S |  22 +
+ .../loongarch/lp64/multiarch/strchrnul-lsx.S  |  22 +
+ sysdeps/loongarch/lp64/multiarch/strchrnul.c  |  39 +
+ .../loongarch/lp64/multiarch/strcmp-aligned.S | 179 ++++
+ sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S | 165 ++++
+ sysdeps/loongarch/lp64/multiarch/strcmp.c     |  35 +
+ .../loongarch/lp64/multiarch/strcpy-aligned.S | 202 +++++
+ .../loongarch/lp64/multiarch/strcpy-lasx.S    | 215 +++++
+ sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S | 212 +++++
+ .../lp64/multiarch/strcpy-unaligned.S         | 138 +++
+ sysdeps/loongarch/lp64/multiarch/strcpy.c     |  35 +
+ .../loongarch/lp64/multiarch/strlen-aligned.S | 100 +++
+ .../loongarch/lp64/multiarch/strlen-lasx.S    |  63 ++
+ sysdeps/loongarch/lp64/multiarch/strlen-lsx.S |  71 ++
+ sysdeps/loongarch/lp64/multiarch/strlen.c     |  37 +
+ .../lp64/multiarch/strncmp-aligned.S          | 218 +++++
+ .../loongarch/lp64/multiarch/strncmp-lsx.S    | 208 +++++
+ sysdeps/loongarch/lp64/multiarch/strncmp.c    |  35 +
+ .../lp64/multiarch/strnlen-aligned.S          | 102 +++
+ .../loongarch/lp64/multiarch/strnlen-lasx.S   | 100 +++
+ .../loongarch/lp64/multiarch/strnlen-lsx.S    |  89 ++
+ sysdeps/loongarch/lp64/multiarch/strnlen.c    |  39 +
+ .../lp64/multiarch/strrchr-aligned.S          | 170 ++++
+ .../loongarch/lp64/multiarch/strrchr-lasx.S   | 176 ++++
+ .../loongarch/lp64/multiarch/strrchr-lsx.S    | 144 ++++
+ sysdeps/loongarch/lp64/multiarch/strrchr.c    |  36 +
+ sysdeps/loongarch/setjmp.S                    |  18 +-
+ sysdeps/loongarch/start.S                     |  19 +-
+ sysdeps/loongarch/sys/asm.h                   |  36 +-
+ sysdeps/loongarch/sys/regdef.h                |  57 ++
+ .../unix/sysv/linux/loongarch/bits/hwcap.h    |   1 +
+ sysdeps/unix/sysv/linux/loongarch/configure   |   2 +-
+ .../unix/sysv/linux/loongarch/configure.ac    |   2 +-
+ .../unix/sysv/linux/loongarch/cpu-features.h  |   2 +
+ .../unix/sysv/linux/loongarch/pointer_guard.h |  10 +-
+ 103 files changed, 9365 insertions(+), 296 deletions(-)
+ create mode 100644 sysdeps/loongarch/bits/link_lavcurrent.h
+ create mode 100644 sysdeps/loongarch/dl-audit-check.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/Makefile
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcmp.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memcpy.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memmove.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memrchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/memset.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/stpcpy.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchr.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strchrnul.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcmp.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strcpy.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strlen.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strncmp.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strnlen.c
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+ create mode 100644 sysdeps/loongarch/lp64/multiarch/strrchr.c
+
+diff --git a/config.h.in b/config.h.in
+index 0dedc124..44a34072 100644
+--- a/config.h.in
++++ b/config.h.in
+@@ -141,11 +141,6 @@
+ /* LOONGARCH floating-point ABI for ld.so.  */
+ #undef LOONGARCH_ABI_FRLEN
+ 
+-/* Assembler support LoongArch LASX/LSX vector instructions.
+-   This macro becomes obsolete when glibc increased the minimum
+-   required version of GNU 'binutils' to 2.41 or later. */
+-#define HAVE_LOONGARCH_VEC_ASM 0
+-
+ /* Linux specific: minimum supported kernel version.  */
+ #undef	__LINUX_KERNEL_VERSION
+ 
+diff --git a/elf/elf.h b/elf/elf.h
+index 89fc8021..51633079 100644
+--- a/elf/elf.h
++++ b/elf/elf.h
+@@ -794,6 +794,7 @@ typedef struct
+ #define NT_386_TLS	0x200		/* i386 TLS slots (struct user_desc) */
+ #define NT_386_IOPERM	0x201		/* x86 io permission bitmap (1=deny) */
+ #define NT_X86_XSTATE	0x202		/* x86 extended state using xsave */
++#define NT_X86_SHSTK	0x204		/* x86 SHSTK state */
+ #define NT_S390_HIGH_GPRS	0x300	/* s390 upper register halves */
+ #define NT_S390_TIMER	0x301		/* s390 timer register */
+ #define NT_S390_TODCMP	0x302		/* s390 TOD clock comparator register */
+@@ -832,6 +833,8 @@ typedef struct
+ #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers.  */
+ #define NT_MIPS_FP_MODE	0x801		/* MIPS floating-point mode.  */
+ #define NT_MIPS_MSA	0x802		/* MIPS SIMD registers.  */
++#define NT_RISCV_CSR	0x900		/* RISC-V Control and Status Registers */
++#define NT_RISCV_VECTOR	0x901		/* RISC-V vector registers */
+ #define NT_LOONGARCH_CPUCFG	0xa00	/* LoongArch CPU config registers.  */
+ #define NT_LOONGARCH_CSR	0xa01	/* LoongArch control and
+ 					   status registers.  */
+@@ -841,6 +844,8 @@ typedef struct
+ 					   SIMD Extension registers.  */
+ #define NT_LOONGARCH_LBT	0xa04	/* LoongArch Loongson Binary
+ 					   Translation registers.  */
++#define NT_LOONGARCH_HW_BREAK	0xa05   /* LoongArch hardware breakpoint registers */
++#define NT_LOONGARCH_HW_WATCH	0xa06   /* LoongArch hardware watchpoint registers */
+ 
+ /* Legal values for the note segment descriptor types for object files.  */
+ 
+@@ -4205,6 +4210,15 @@ enum
+ #define R_LARCH_TLS_GD_HI20 98
+ #define R_LARCH_32_PCREL 99
+ #define R_LARCH_RELAX 100
++#define R_LARCH_DELETE 101
++#define R_LARCH_ALIGN 102
++#define R_LARCH_PCREL20_S2 103
++#define R_LARCH_CFA 104
++#define R_LARCH_ADD6 105
++#define R_LARCH_SUB6 106
++#define R_LARCH_ADD_ULEB128 107
++#define R_LARCH_SUB_ULEB128 108
++#define R_LARCH_64_PCREL 109
+ 
+ /* ARC specific declarations.  */
+ 
+diff --git a/sysdeps/loongarch/__longjmp.S b/sysdeps/loongarch/__longjmp.S
+index cbde1946..e87ce311 100644
+--- a/sysdeps/loongarch/__longjmp.S
++++ b/sysdeps/loongarch/__longjmp.S
+@@ -43,18 +43,18 @@ ENTRY (__longjmp)
+ 	REG_L s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_L $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_L $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_L $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_L $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_L $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_L $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_L $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_L $f31, a0, 13*SZREG + 7*SZFREG
++	FREG_L fs0, a0, 13*SZREG + 0*SZFREG
++	FREG_L fs1, a0, 13*SZREG + 1*SZFREG
++	FREG_L fs2, a0, 13*SZREG + 2*SZFREG
++	FREG_L fs3, a0, 13*SZREG + 3*SZFREG
++	FREG_L fs4, a0, 13*SZREG + 4*SZFREG
++	FREG_L fs5, a0, 13*SZREG + 5*SZFREG
++	FREG_L fs6, a0, 13*SZREG + 6*SZFREG
++	FREG_L fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+-	sltui	a0,a1,1
++	sltui	a0, a1, 1
+ 	ADD	a0, a0, a1	 # a0 = (a1 == 0) ? 1 : a1
+-	jirl	zero,ra,0
++	jirl	zero, ra, 0
+ 
+ END (__longjmp)
+diff --git a/sysdeps/loongarch/bits/link.h b/sysdeps/loongarch/bits/link.h
+index 7fa61312..00f6f25f 100644
+--- a/sysdeps/loongarch/bits/link.h
++++ b/sysdeps/loongarch/bits/link.h
+@@ -20,10 +20,26 @@
+ #error "Never include <bits/link.h> directly; use <link.h> instead."
+ #endif
+ 
++#ifndef __loongarch_soft_float
++typedef float La_loongarch_vr
++    __attribute__ ((__vector_size__ (16), __aligned__ (16)));
++typedef float La_loongarch_xr
++    __attribute__ ((__vector_size__ (32), __aligned__ (16)));
++
++typedef union
++{
++  double fpreg[4];
++  La_loongarch_vr vr[2];
++  La_loongarch_xr xr[1];
++} La_loongarch_vector __attribute__ ((__aligned__ (16)));
++#endif
++
+ typedef struct La_loongarch_regs
+ {
+   unsigned long int lr_reg[8]; /* a0 - a7 */
+-  double lr_fpreg[8];	       /* fa0 - fa7 */
++#ifndef __loongarch_soft_float
++  La_loongarch_vector lr_vec[8]; /* fa0 - fa7 or vr0 - vr7 or xr0 - xr7*/
++#endif
+   unsigned long int lr_ra;
+   unsigned long int lr_sp;
+ } La_loongarch_regs;
+@@ -33,8 +49,10 @@ typedef struct La_loongarch_retval
+ {
+   unsigned long int lrv_a0;
+   unsigned long int lrv_a1;
+-  double lrv_fa0;
+-  double lrv_fa1;
++#ifndef __loongarch_soft_float
++  La_loongarch_vector lrv_vec0;
++  La_loongarch_vector lrv_vec1;
++#endif
+ } La_loongarch_retval;
+ 
+ __BEGIN_DECLS
+diff --git a/sysdeps/loongarch/bits/link_lavcurrent.h b/sysdeps/loongarch/bits/link_lavcurrent.h
+new file mode 100644
+index 00000000..15f1eb84
+--- /dev/null
++++ b/sysdeps/loongarch/bits/link_lavcurrent.h
+@@ -0,0 +1,25 @@
++/* Data structure for communication from the run-time dynamic linker for
++   loaded ELF shared objects.  LAV_CURRENT definition.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef _LINK_H
++# error "Never include <bits/link_lavcurrent.h> directly; use <link.h> instead."
++#endif
++
++/* Version numbers for la_version handshake interface.  */
++#define LAV_CURRENT	3
+diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
+index 7f1dabbc..30b60d19 100644
+--- a/sysdeps/loongarch/configure
++++ b/sysdeps/loongarch/configure
+@@ -4,21 +4,19 @@
+ printf "%s\n" "#define HIDDEN_VAR_NEEDS_DYNAMIC_RELOC 1" >>confdefs.h
+ 
+ 
+-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the toolchain is sufficient to build static PIE on LoongArch" >&5
+-printf %s "checking if the toolchain is sufficient to build static PIE on LoongArch... " >&6; }
++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if ${CC-cc} is sufficient to build static PIE on LoongArch" >&5
++printf %s "checking if ${CC-cc} is sufficient to build static PIE on LoongArch... " >&6; }
+ if test ${libc_cv_static_pie_on_loongarch+y}
+ then :
+   printf %s "(cached) " >&6
+ else $as_nop
+ 
+-  cat > conftest1.S <<\EOF
++  cat > conftest.S <<\EOF
+ .global _start
+ .type _start, @function
+ _start:
+   li.w $a7, 93
+-  /* This ensures the assembler supports explicit reloc.  */
+-  pcalau12i $a0, %pc_hi20(x)
+-  ld.w $a0, $a0, %pc_lo12(x)
++  li.w $a0, 0
+   syscall 0
+ 
+ .data
+@@ -27,41 +25,21 @@ x:
+   /* This should produce an R_LARCH_RELATIVE in the static PIE.  */
+   .dword _start
+ EOF
+-  cat > conftest2.S <<\EOF
+-.global f
+-.type f, @function
+-f:
+-  /* The linker should be able to handle this and produce a PLT entry.  */
+-  la.pcrel $t0, $t0, external_func
+-  jirl $zero, $t0, 0
+-EOF
+ 
+   libc_cv_static_pie_on_loongarch=no
+-  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest1 conftest1.S'
+-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+-  (eval $ac_try) 2>&5
+-  ac_status=$?
+-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+-  test $ac_status = 0; }; } \
+-     && { ac_try='LC_ALL=C $READELF -Wr conftest1 | grep -q R_LARCH_RELATIVE'
++  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest conftest.S'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+   printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+   test $ac_status = 0; }; } \
+-     && ! { ac_try='LC_ALL=C $READELF -Wl conftest1 | grep -q INTERP'
++     && { ac_try='LC_ALL=C $READELF -Wr conftest | grep -q R_LARCH_RELATIVE'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+   printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+   test $ac_status = 0; }; } \
+-     && { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -shared -fPIC -o conftest2.so conftest2.S'
+-  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+-  (eval $ac_try) 2>&5
+-  ac_status=$?
+-  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+-  test $ac_status = 0; }; } \
+-     && { ac_try='LC_ALL=C $READELF -Wr conftest2.so | grep -q 'R_LARCH_JUMP_SLOT.*external_func''
++     && ! { ac_try='LC_ALL=C $READELF -Wl conftest | grep -q INTERP'
+   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+   (eval $ac_try) 2>&5
+   ac_status=$?
+@@ -128,8 +106,7 @@ rm -f conftest*
+ fi
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_loongarch_vec_asm" >&5
+ printf "%s\n" "$libc_cv_loongarch_vec_asm" >&6; }
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  printf "%s\n" "#define HAVE_LOONGARCH_VEC_ASM 1" >>confdefs.h
+-
++if test $libc_cv_loongarch_vec_asm = no; then
++  as_fn_error $? "binutils version is too old, use 2.41 or newer version" "$LINENO" 5
+ fi
+ 
+diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
+index 39efccfd..28a8ae54 100644
+--- a/sysdeps/loongarch/configure.ac
++++ b/sysdeps/loongarch/configure.ac
+@@ -8,19 +8,17 @@ AC_DEFINE(HIDDEN_VAR_NEEDS_DYNAMIC_RELOC)
+ dnl Test if the toolchain is new enough for static PIE.
+ dnl We need a GAS supporting explicit reloc (older GAS produces stack-based
+ dnl reloc and triggers an internal error in the linker).  And, we need GCC to
+-dnl pass the correct linker flags for static PIE.  GCC >= 13 and GAS >= 2.40
+-dnl satisfy the requirement, but a distro may backport static PIE support into
+-dnl earlier GCC or Binutils releases as well.
+-AC_CACHE_CHECK([if the toolchain is sufficient to build static PIE on LoongArch],
++dnl pass the correct linker flags for static PIE.  We strictly require GAS >=
++dnl 2.41 so we don't need to check the assembler capability, but we need to
++dnl check if GCC is doing the correct thing.
++AC_CACHE_CHECK([if ${CC-cc} is sufficient to build static PIE on LoongArch],
+ libc_cv_static_pie_on_loongarch, [
+-  cat > conftest1.S <<\EOF
++  cat > conftest.S <<\EOF
+ .global _start
+ .type _start, @function
+ _start:
+   li.w $a7, 93
+-  /* This ensures the assembler supports explicit reloc.  */
+-  pcalau12i $a0, %pc_hi20(x)
+-  ld.w $a0, $a0, %pc_lo12(x)
++  li.w $a0, 0
+   syscall 0
+ 
+ .data
+@@ -29,21 +27,11 @@ x:
+   /* This should produce an R_LARCH_RELATIVE in the static PIE.  */
+   .dword _start
+ EOF
+-  cat > conftest2.S <<\EOF
+-.global f
+-.type f, @function
+-f:
+-  /* The linker should be able to handle this and produce a PLT entry.  */
+-  la.pcrel $t0, $t0, external_func
+-  jirl $zero, $t0, 0
+-EOF
+ 
+   libc_cv_static_pie_on_loongarch=no
+-  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest1 conftest1.S]) \
+-     && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest1 | grep -q R_LARCH_RELATIVE]) \
+-     && ! AC_TRY_COMMAND([LC_ALL=C $READELF -Wl conftest1 | grep -q INTERP]) \
+-     && AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -shared -fPIC -o conftest2.so conftest2.S]) \
+-     && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest2.so | grep -q 'R_LARCH_JUMP_SLOT.*external_func'])
++  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -static-pie -nostdlib -fPIE -o conftest conftest.S]) \
++     && AC_TRY_COMMAND([LC_ALL=C $READELF -Wr conftest | grep -q R_LARCH_RELATIVE]) \
++     && ! AC_TRY_COMMAND([LC_ALL=C $READELF -Wl conftest | grep -q INTERP])
+   then
+     libc_cv_static_pie_on_loongarch=yes
+   fi
+@@ -74,6 +62,6 @@ else
+   libc_cv_loongarch_vec_asm=no
+ fi
+ rm -f conftest*])
+-if test $libc_cv_loongarch_vec_asm = yes; then
+-  AC_DEFINE(HAVE_LOONGARCH_VEC_ASM)
++if test $libc_cv_loongarch_vec_asm = no; then
++  AC_MSG_ERROR([binutils version is too old, use 2.41 or newer version])
+ fi
+diff --git a/sysdeps/loongarch/dl-audit-check.h b/sysdeps/loongarch/dl-audit-check.h
+new file mode 100644
+index 00000000..a139c939
+--- /dev/null
++++ b/sysdeps/loongarch/dl-audit-check.h
+@@ -0,0 +1,23 @@
++/* rtld-audit version check.  LoongArch version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++static inline bool
++_dl_audit_check_version (unsigned int lav)
++{
++  return lav == LAV_CURRENT;
++}
+diff --git a/sysdeps/loongarch/dl-link.sym b/sysdeps/loongarch/dl-link.sym
+index 868ab7c6..b534968e 100644
+--- a/sysdeps/loongarch/dl-link.sym
++++ b/sysdeps/loongarch/dl-link.sym
+@@ -6,9 +6,13 @@ DL_SIZEOF_RG            sizeof(struct La_loongarch_regs)
+ DL_SIZEOF_RV            sizeof(struct La_loongarch_retval)
+ 
+ DL_OFFSET_RG_A0         offsetof(struct La_loongarch_regs, lr_reg)
+-DL_OFFSET_RG_FA0        offsetof(struct La_loongarch_regs, lr_fpreg)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RG_VEC0       offsetof(struct La_loongarch_regs, lr_vec)
++#endif
+ DL_OFFSET_RG_RA         offsetof(struct La_loongarch_regs, lr_ra)
+ DL_OFFSET_RG_SP         offsetof(struct La_loongarch_regs, lr_sp)
+ 
+ DL_OFFSET_RV_A0         offsetof(struct La_loongarch_retval, lrv_a0)
+-DL_OFFSET_RV_FA0        offsetof(struct La_loongarch_retval, lrv_a1)
++#ifndef __loongarch_soft_float
++DL_OFFSET_RV_VEC0       offsetof(struct La_loongarch_retval, lrv_vec0)
++#endif
+diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
+index 51ce9af8..57913cef 100644
+--- a/sysdeps/loongarch/dl-machine.h
++++ b/sysdeps/loongarch/dl-machine.h
+@@ -90,7 +90,7 @@ static inline ElfW (Addr) elf_machine_dynamic (void)
+ 	or	$a0, $sp, $zero   \n\
+ 	bl	_dl_start   \n\
+ 	# Stash user entry point in s0.   \n\
+-	or	$s0, $v0, $zero   \n\
++	or	$s0, $a0, $zero   \n\
+ 	# Load the original argument count.   \n\
+ 	ld.d	$a1, $sp, 0   \n\
+ 	# Call _dl_init (struct link_map *main_map, int argc, \
+@@ -270,9 +270,11 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+   /* If using PLTs, fill in the first two entries of .got.plt.  */
+   if (l->l_info[DT_JMPREL])
+     {
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+       extern void _dl_runtime_resolve_lasx (void) attribute_hidden;
+       extern void _dl_runtime_resolve_lsx (void) attribute_hidden;
++      extern void _dl_runtime_profile_lasx (void) attribute_hidden;
++      extern void _dl_runtime_profile_lsx (void) attribute_hidden;
+ #endif
+       extern void _dl_runtime_resolve (void) attribute_hidden;
+       extern void _dl_runtime_profile (void) attribute_hidden;
+@@ -287,7 +289,14 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	 end in this function.  */
+       if (profile != 0)
+ 	{
+-	   gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
++#if !defined __loongarch_soft_float
++	  if (SUPPORT_LASX)
++	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
++	  else if (SUPPORT_LSX)
++	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
++	  else
++#endif
++	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile;
+ 
+ 	  if (GLRO(dl_profile) != NULL
+ 	      && _dl_name_match_p (GLRO(dl_profile), l))
+@@ -300,7 +309,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ 	  /* This function will get called to fix up the GOT entry
+ 	     indicated by the offset on the stack, and then jump to
+ 	     the resolved address.  */
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+ 	  if (SUPPORT_LASX)
+ 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
+ 	  else if (SUPPORT_LSX)
+diff --git a/sysdeps/loongarch/dl-trampoline.S b/sysdeps/loongarch/dl-trampoline.S
+index f6ba5e44..bb449ecf 100644
+--- a/sysdeps/loongarch/dl-trampoline.S
++++ b/sysdeps/loongarch/dl-trampoline.S
+@@ -19,193 +19,24 @@
+ #include <sysdep.h>
+ #include <sys/asm.h>
+ 
+-#if HAVE_LOONGARCH_VEC_ASM && !defined __loongarch_soft_float
++#if !defined __loongarch_soft_float
+ #define USE_LASX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lasx
++#define _dl_runtime_profile _dl_runtime_profile_lasx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LASX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+ 
+ #define USE_LSX
+ #define _dl_runtime_resolve _dl_runtime_resolve_lsx
++#define _dl_runtime_profile _dl_runtime_profile_lsx
+ #include "dl-trampoline.h"
+ #undef FRAME_SIZE
+ #undef USE_LSX
+ #undef _dl_runtime_resolve
++#undef _dl_runtime_profile
+ #endif
+ 
+ #include "dl-trampoline.h"
+-
+-#include "dl-link.h"
+-
+-ENTRY (_dl_runtime_profile)
+-       /* LoongArch we get called with:
+-	t0	      linkr_map pointer
+-	t1	      the scaled offset stored in t0, which can be used
+-		      to calculate the offset of the current symbol in .rela.plt
+-	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
+-	t3	      dl resolver entry point, no use in this function
+-
+-	Stack frame layout:
+-	[sp,    #96] La_loongarch_regs
+-	[sp,    #48] La_loongarch_retval
+-	[sp,    #40] frame size return from pltenter
+-	[sp,    #32] dl_profile_call saved a1
+-	[sp,    #24] dl_profile_call saved a0
+-	[sp,    #16] T1
+-	[sp,     #0] ra, fp   <- fp
+-       */
+-
+-# define OFFSET_T1              16
+-# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
+-# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16
+-# define OFFSET_RV              OFFSET_FS + 8
+-# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
+-
+-# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
+-
+-	/* Save arguments to stack. */
+-	ADDI	sp, sp, -SF_SIZE
+-	REG_S	ra, sp, 0
+-	REG_S	fp, sp, 8
+-
+-	or	fp, sp, zero
+-
+-	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	/* Update .got.plt and obtain runtime address of callee.  */
+-	SLLI	a1, t1, 1
+-	or	a0, t0, zero
+-	ADD	a1, a1, t1
+-	or	a2, ra, zero		/* return addr */
+-	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
+-	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-
+-	la	t2, _dl_profile_fixup
+-	jirl	ra, t2, 0
+-
+-	REG_L	t3, fp, OFFSET_FS
+-	bge	t3, zero, 1f
+-
+-	/* Save the return.  */
+-	or	t4, v0, zero
+-
+-	/* Restore arguments from stack.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-
+-	REG_L   ra, fp, 0
+-	REG_L   fp, fp, SZREG
+-
+-	ADDI	sp, sp, SF_SIZE
+-	jirl	zero, t4, 0
+-
+-1:
+-	/* The new frame size is in t3.  */
+-	SUB	sp, fp, t3
+-	BSTRINS sp, zero, 3, 0
+-
+-	REG_S	a0, fp, OFFSET_T1
+-
+-	or	a0, sp, zero
+-	ADDI	a1, fp, SF_SIZE
+-	or	a2, t3,	zero
+-	la	t5, memcpy
+-	jirl	ra, t5, 0
+-
+-	REG_L	t6, fp, OFFSET_T1
+-
+-	/* Call the function.  */
+-	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
+-	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
+-	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
+-	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
+-	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
+-	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
+-	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
+-	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 0*SZFREG
+-	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 1*SZFREG
+-	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 2*SZFREG
+-	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 3*SZFREG
+-	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 4*SZFREG
+-	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 5*SZFREG
+-	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 6*SZFREG
+-	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_FA0 + 7*SZFREG
+-#endif
+-	jirl	ra, t6, 0
+-
+-	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
+-	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0
+-	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* Setup call to pltexit.  */
+-	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
+-	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
+-	ADDI	a2, fp, OFFSET_RG
+-	ADDI	a3, fp, OFFSET_RV
+-	la	t7, _dl_audit_pltexit
+-	jirl	ra, t7, 0
+-
+-	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
+-	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
+-
+-#ifndef __loongarch_soft_float
+-	FREG_L	fa0, fp, OFFSET_RV + DL_OFFSET_RV_FA0
+-	FREG_L	fa1, fp, OFFSET_RV + DL_OFFSET_RV_FA0 + SZFREG
+-#endif
+-
+-	/* RA from within La_loongarch_reg.  */
+-	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
+-	or	sp, fp, zero
+-	ADDI	sp, sp, SF_SIZE
+-	REG_S   fp, fp, SZREG
+-
+-	jirl	zero, ra, 0
+-
+-END (_dl_runtime_profile)
+diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
+index 99fcacab..e298439d 100644
+--- a/sysdeps/loongarch/dl-trampoline.h
++++ b/sysdeps/loongarch/dl-trampoline.h
+@@ -125,3 +125,245 @@ ENTRY (_dl_runtime_resolve)
+ 	/* Invoke the callee. */
+ 	jirl	zero, t1, 0
+ END (_dl_runtime_resolve)
++
++#include "dl-link.h"
++
++ENTRY (_dl_runtime_profile)
++       /* LoongArch we get called with:
++	t0	      linkr_map pointer
++	t1	      the scaled offset stored in t0, which can be used
++		      to calculate the offset of the current symbol in .rela.plt
++	t2	      %hi(%pcrel(.got.plt)) stored in t2, no use in this function
++	t3	      dl resolver entry point, no use in this function
++
++	Stack frame layout:
++	[sp,    #208] La_loongarch_regs
++	[sp,    #128] La_loongarch_retval // align: 16
++	[sp,    #112] frame size return from pltenter
++	[sp,    #80 ] dl_profile_call saved vec1
++	[sp,    #48 ] dl_profile_call saved vec0 // align: 16
++	[sp,    #32 ] dl_profile_call saved a1
++	[sp,    #24 ] dl_profile_call saved a0
++	[sp,    #16 ] T1
++	[sp,     #0 ] ra, fp   <- fp
++       */
++
++# define OFFSET_T1              16
++# define OFFSET_SAVED_CALL_A0   OFFSET_T1 + 8
++# define OFFSET_FS              OFFSET_SAVED_CALL_A0 + 16 + 8 + 64
++# define OFFSET_RV              OFFSET_FS + 8 + 8
++# define OFFSET_RG              OFFSET_RV + DL_SIZEOF_RV
++
++# define SF_SIZE                (-(-(OFFSET_RG + DL_SIZEOF_RG) & ALMASK))
++
++	/* Save arguments to stack. */
++	ADDI	sp, sp, -SF_SIZE
++	REG_S	ra, sp, 0
++	REG_S	fp, sp, 8
++
++	or	fp, sp, zero
++
++	REG_S	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_S	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_S	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_S	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_S	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_S	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_S	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_S	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvst	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvst	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvst	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvst	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvst	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvst	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvst	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvst	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vst	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vst	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vst	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vst	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vst	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vst	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vst	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vst	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_S	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_S	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_S	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_S	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_S	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_S	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_S	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_S	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	/* Update .got.plt and obtain runtime address of callee.  */
++	SLLI	a1, t1, 1
++	or	a0, t0, zero
++	ADD	a1, a1, t1
++	or	a2, ra, zero		/* return addr */
++	ADDI	a3, fp, OFFSET_RG	/* La_loongarch_regs pointer */
++	ADDI	a4, fp, OFFSET_FS 	/* frame size return from pltenter */
++
++	REG_S	a0, fp, OFFSET_SAVED_CALL_A0
++	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++
++	la	t2, _dl_profile_fixup
++	jirl	ra, t2, 0
++
++	REG_L	t3, fp, OFFSET_FS
++	bge	t3, zero, 1f
++
++	/* Save the return.  */
++	or	t4, v0, zero
++
++	/* Restore arguments from stack.  */
++	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	REG_L   ra, fp, 0
++	REG_L   fp, fp, SZREG
++
++	ADDI	sp, sp, SF_SIZE
++	jirl	zero, t4, 0
++
++1:
++	/* The new frame size is in t3.  */
++	SUB	sp, fp, t3
++	BSTRINS sp, zero, 3, 0
++
++	REG_S	a0, fp, OFFSET_T1
++
++	or	a0, sp, zero
++	ADDI	a1, fp, SF_SIZE
++	or	a2, t3,	zero
++	la	t5, memcpy
++	jirl	ra, t5, 0
++
++	REG_L	t6, fp, OFFSET_T1
++
++	/* Call the function.  */
++	REG_L	a0, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 0*SZREG
++	REG_L	a1, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 1*SZREG
++	REG_L	a2, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 2*SZREG
++	REG_L	a3, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 3*SZREG
++	REG_L	a4, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 4*SZREG
++	REG_L	a5, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 5*SZREG
++	REG_L	a6, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 6*SZREG
++	REG_L	a7, fp, OFFSET_RG + DL_OFFSET_RG_A0 + 7*SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZXREG
++	xvld	xr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZXREG
++	xvld	xr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZXREG
++	xvld	xr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZXREG
++	xvld	xr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZXREG
++	xvld	xr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZXREG
++	xvld	xr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZXREG
++	xvld	xr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZVREG
++	vld	vr1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZVREG
++	vld	vr2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZVREG
++	vld	vr3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZVREG
++	vld	vr4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZVREG
++	vld	vr5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZVREG
++	vld	vr6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZVREG
++	vld	vr7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 0*SZFREG
++	FREG_L	fa1, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 1*SZFREG
++	FREG_L	fa2, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 2*SZFREG
++	FREG_L	fa3, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 3*SZFREG
++	FREG_L	fa4, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 4*SZFREG
++	FREG_L	fa5, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 5*SZFREG
++	FREG_L	fa6, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 6*SZFREG
++	FREG_L	fa7, fp, OFFSET_RG + DL_OFFSET_RG_VEC0 + 7*SZFREG
++#endif
++
++	jirl	ra, t6, 0
++
++	REG_S	a0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0
++	REG_S	a1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++	xvst	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	xvst	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++	vst	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	vst	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_S	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	FREG_S	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++	/* Setup call to pltexit.  */
++	REG_L	a0, fp, OFFSET_SAVED_CALL_A0
++	REG_L	a1, fp, OFFSET_SAVED_CALL_A0 + SZREG
++	ADDI	a2, fp, OFFSET_RG
++	ADDI	a3, fp, OFFSET_RV
++	la	t7, _dl_audit_pltexit
++	jirl	ra, t7, 0
++
++	REG_L	a0, fp, OFFSET_RV + DL_OFFSET_RV_A0
++	REG_L	a1, fp, OFFSET_RV + DL_OFFSET_RV_A0 + SZREG
++
++#ifdef USE_LASX
++	xvld	xr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	xvld	xr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZXREG
++#elif defined USE_LSX
++	vld	vr0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	vld	vr1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZVREG
++#elif !defined __loongarch_soft_float
++	FREG_L	fa0, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0
++	FREG_L	fa1, fp, OFFSET_SAVED_CALL_A0 + DL_OFFSET_RV_VEC0 + SZFREG
++#endif
++
++	/* RA from within La_loongarch_reg.  */
++	REG_L   ra, fp, OFFSET_RG + DL_OFFSET_RG_RA
++	or	sp, fp, zero
++	ADDI	sp, sp, SF_SIZE
++	REG_S   fp, fp, SZREG
++
++	jirl	zero, ra, 0
++
++END (_dl_runtime_profile)
+diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile
+new file mode 100644
+index 00000000..fe863e1b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/Makefile
+@@ -0,0 +1,52 @@
++ifeq ($(subdir),string)
++sysdep_routines += \
++  strlen-aligned \
++  strlen-lsx \
++  strlen-lasx \
++  strnlen-aligned \
++  strnlen-lsx \
++  strnlen-lasx \
++  strchr-aligned \
++  strchr-lsx \
++  strchr-lasx \
++  strrchr-aligned \
++  strrchr-lsx \
++  strrchr-lasx \
++  strchrnul-aligned \
++  strchrnul-lsx \
++  strchrnul-lasx \
++  strcmp-aligned \
++  strcmp-lsx \
++  strncmp-aligned \
++  strncmp-lsx \
++  strcpy-aligned \
++  strcpy-unaligned \
++  strcpy-lsx \
++  strcpy-lasx \
++  stpcpy-aligned \
++  stpcpy-unaligned \
++  stpcpy-lsx \
++  stpcpy-lasx \
++  memcpy-aligned \
++  memcpy-unaligned \
++  memmove-unaligned \
++  memmove-lsx \
++  memmove-lasx \
++  rawmemchr-aligned \
++  rawmemchr-lsx \
++  rawmemchr-lasx \
++  memchr-aligned \
++  memchr-lsx \
++  memchr-lasx \
++  memrchr-generic \
++  memrchr-lsx \
++  memrchr-lasx \
++  memset-aligned \
++  memset-unaligned \
++  memset-lsx \
++  memset-lasx \
++  memcmp-aligned \
++  memcmp-lsx \
++  memcmp-lasx \
++# sysdep_routines
++endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+new file mode 100644
+index 00000000..e2723873
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
+@@ -0,0 +1,24 @@
++/* Symbol rediretion for loader/static initialization code.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#ifndef _DL_IFUNC_GENERIC_H
++#define _DL_IFUNC_GENERIC_H
++
++asm ("memset = __memset_aligned");
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+new file mode 100644
+index 00000000..529e2369
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-impl-list.c
+@@ -0,0 +1,164 @@
++/* Enumerate available IFUNC implementations of a function LoongArch64 version.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++#include <assert.h>
++#include <string.h>
++#include <wchar.h>
++#include <ldsodefs.h>
++#include <ifunc-impl-list.h>
++#include <stdio.h>
++
++size_t
++__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
++			size_t max)
++{
++
++  size_t i = max;
++
++  IFUNC_IMPL (i, name, strlen,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LASX, __strlen_lasx)
++	      IFUNC_IMPL_ADD (array, i, strlen, SUPPORT_LSX, __strlen_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strnlen,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LASX, __strnlen_lasx)
++	      IFUNC_IMPL_ADD (array, i, strnlen, SUPPORT_LSX, __strnlen_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LASX, __strchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, strchr, SUPPORT_LSX, __strchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strchrnul,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LASX, __strchrnul_lasx)
++	      IFUNC_IMPL_ADD (array, i, strchrnul, SUPPORT_LSX, __strchrnul_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strcmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strcmp, SUPPORT_LSX, __strcmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strcmp, 1, __strcmp_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strncmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strncmp, SUPPORT_LSX, __strncmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strncmp, 1, __strncmp_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strcpy,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LASX, __strcpy_lasx)
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_LSX, __strcpy_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strcpy, SUPPORT_UAL, __strcpy_unaligned)
++	      IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, stpcpy,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx)
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_UAL, __stpcpy_unaligned)
++	      IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, strrchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LASX, __strrchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, strrchr, SUPPORT_LSX, __strrchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, memcpy,
++#if !defined __loongarch_soft_float
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LSX, __memcpy_lsx)
++#endif
++              IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_UAL, __memcpy_unaligned)
++              IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_aligned)
++              )
++
++  IFUNC_IMPL (i, name, memmove,
++#if !defined __loongarch_soft_float
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LASX, __memmove_lasx)
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_LSX, __memmove_lsx)
++#endif
++              IFUNC_IMPL_ADD (array, i, memmove, SUPPORT_UAL, __memmove_unaligned)
++              IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_aligned)
++              )
++
++  IFUNC_IMPL (i, name, rawmemchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LASX, __rawmemchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, SUPPORT_LSX, __rawmemchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, memchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LASX, __memchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, memchr, SUPPORT_LSX, __memchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, memrchr,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LASX, __memrchr_lasx)
++	      IFUNC_IMPL_ADD (array, i, memrchr, SUPPORT_LSX, __memrchr_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic)
++	      )
++
++  IFUNC_IMPL (i, name, memset,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LASX, __memset_lasx)
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_LSX, __memset_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memset, SUPPORT_UAL, __memset_unaligned)
++	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_aligned)
++	      )
++
++  IFUNC_IMPL (i, name, memcmp,
++#if !defined __loongarch_soft_float
++	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LASX, __memcmp_lasx)
++	      IFUNC_IMPL_ADD (array, i, memcmp, SUPPORT_LSX, __memcmp_lsx)
++#endif
++	      IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_aligned)
++	      )
++  return i;
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+new file mode 100644
+index 00000000..3be67da6
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-lasx.h
+@@ -0,0 +1,45 @@
++/* Common definition for ifunc selection implementation.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (unaligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++  if (SUPPORT_UAL)
++    return OPTIMIZE (unaligned);
++  else
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+new file mode 100644
+index 00000000..9060ccd5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for memchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+new file mode 100644
+index 00000000..04adc2e5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memcmp.h
+@@ -0,0 +1,40 @@
++/* Common definition for memcmp ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+new file mode 100644
+index 00000000..8215f9ad
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-memrchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for memrchr implementation.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (generic) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (generic);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+new file mode 100644
+index 00000000..a7bb4cf9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-rawmemchr.h
+@@ -0,0 +1,40 @@
++/* Common definition for rawmemchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+new file mode 100644
+index 00000000..4494db79
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchr.h
+@@ -0,0 +1,41 @@
++/* Common definition for strchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+new file mode 100644
+index 00000000..8a925120
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strchrnul.h
+@@ -0,0 +1,41 @@
++/* Common definition for strchrnul ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+new file mode 100644
+index 00000000..ca26352b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strcmp.h
+@@ -0,0 +1,38 @@
++/* Common definition for strcmp ifunc selection.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+new file mode 100644
+index 00000000..6258bb76
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strlen.h
+@@ -0,0 +1,40 @@
++/* Common definition for strlen ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+new file mode 100644
+index 00000000..1a7dc36b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strncmp.h
+@@ -0,0 +1,38 @@
++/* Common definition for strncmp ifunc selection.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+new file mode 100644
+index 00000000..5cf89810
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strnlen.h
+@@ -0,0 +1,41 @@
++/* Common definition for strnlen ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+new file mode 100644
+index 00000000..bbb34089
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/ifunc-strrchr.h
+@@ -0,0 +1,41 @@
++/* Common definition for strrchr ifunc selections.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <ldsodefs.h>
++#include <ifunc-init.h>
++
++#if !defined __loongarch_soft_float
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
++#endif
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++#if !defined __loongarch_soft_float
++  if (SUPPORT_LASX)
++    return OPTIMIZE (lasx);
++  else if (SUPPORT_LSX)
++    return OPTIMIZE (lsx);
++  else
++#endif
++    return OPTIMIZE (aligned);
++}
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+new file mode 100644
+index 00000000..81d0d004
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-aligned.S
+@@ -0,0 +1,95 @@
++/* Optimized memchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCHR_NAME __memchr_aligned
++#else
++# define MEMCHR_NAME memchr
++#endif
++
++LEAF(MEMCHR_NAME, 6)
++    beqz        a2, L(out)
++    andi        t1, a0, 0x7
++    add.d       a5, a0, a2
++    bstrins.d   a0, zero, 2, 0
++
++    ld.d        t0, a0, 0
++    bstrins.d   a1, a1, 15, 8
++    lu12i.w     a3, 0x01010
++    slli.d      t2, t1, 03
++
++    bstrins.d   a1, a1, 31, 16
++    ori         a3, a3, 0x101
++    li.d        t7, -1
++    li.d        t8, 8
++
++    bstrins.d   a1, a1, 63, 32
++    bstrins.d   a3, a3, 63, 32
++    sll.d       t2, t7, t2
++    xor         t0, t0, a1
++
++
++    addi.d      a6, a5, -1
++    slli.d      a4, a3, 7
++    sub.d       t1, t8, t1
++    orn         t0, t0, t2
++
++    sub.d       t2, t0, a3
++    andn        t3, a4, t0
++    bstrins.d   a6, zero, 2, 0
++    and         t0, t2, t3
++
++    bgeu        t1, a2, L(end)
++L(loop):
++    bnez        t0, L(found)
++    ld.d        t1, a0, 8
++    xor         t0, t1, a1
++
++    addi.d      a0, a0, 8
++    sub.d       t2, t0, a3
++    andn        t3, a4, t0
++    and         t0, t2, t3
++
++
++    bne         a0, a6, L(loop)
++L(end):
++    sub.d       t1, a5, a6
++    ctz.d       t0, t0
++    srli.d      t0, t0, 3
++
++    sltu        t1, t0, t1
++    add.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++
++L(found):
++    ctz.d       t0, t0
++    srli.d      t0, t0, 3
++    add.d       a0, a0, t0
++    jr          ra
++
++L(out):
++    move        a0, zero
++    jr          ra
++END(MEMCHR_NAME)
++
++libc_hidden_builtin_def (MEMCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+new file mode 100644
+index 00000000..a26cdf48
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lasx.S
+@@ -0,0 +1,117 @@
++/* Optimized memchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCHR __memchr_lasx
++
++LEAF(MEMCHR, 6)
++    beqz            a2, L(ret0)
++    add.d           a3, a0, a2
++    andi            t0, a0, 0x3f
++    bstrins.d       a0, zero, 5, 0
++
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++    li.d            t1, -1
++    li.d            t2, 64
++
++    xvreplgr2vr.b   xr2, a1
++    sll.d           t3, t1, t0
++    sub.d           t2, t2, t0
++    xvseq.b         xr0, xr0, xr2
++
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++
++
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++
++    movfr2gr.d      t0, fa0
++    and             t0, t0, t3
++    bgeu            t2, a2, L(end)
++    bnez            t0, L(found)
++
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 5, 0
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++
++    addi.d          a0, a0, 64
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    beq             a0, a4, L(out)
++
++
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++    xvmsknz.b       xr0, xr0
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++L(found):
++    ctz.d           t1, t0
++
++    add.d           a0, a0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++
++L(out):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(end):
++    sub.d           t2, zero, a3
++    srl.d           t1, t1, t2
++    and             t0, t0, t1
++    ctz.d           t1, t0
++
++    add.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++END(MEMCHR)
++
++libc_hidden_builtin_def (MEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+new file mode 100644
+index 00000000..a73ecd25
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr-lsx.S
+@@ -0,0 +1,102 @@
++/* Optimized memchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCHR __memchr_lsx
++
++LEAF(MEMCHR, 6)
++    beqz            a2, L(ret0)
++    add.d           a3, a0, a2
++    andi            t0, a0, 0x1f
++    bstrins.d       a0, zero, 4, 0
++
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++    li.d            t1, -1
++    li.d            t2, 32
++
++    vreplgr2vr.b    vr2, a1
++    sll.d           t3, t1, t0
++    sub.d           t2, t2, t0
++    vseq.b          vr0, vr0, vr2
++
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++
++    movfr2gr.s      t0, fa0
++    and             t0, t0, t3
++    bgeu            t2, a2, L(end)
++    bnez            t0, L(found)
++
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 4, 0
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++
++    addi.d          a0, a0, 32
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    beq             a0, a4, L(out)
++
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++L(found):
++    ctz.w           t0, t0
++
++    add.d           a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++L(out):
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(end):
++    sub.d           t2, zero, a3
++    srl.w           t1, t1, t2
++    and             t0, t0, t1
++    ctz.w           t1, t0
++
++
++    add.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++END(MEMCHR)
++
++libc_hidden_builtin_def (MEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memchr.c b/sysdeps/loongarch/lp64/multiarch/memchr.c
+new file mode 100644
+index 00000000..059479c0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memchr.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memchr __redirect_memchr
++# include <string.h>
++# undef memchr
++
++# define SYMBOL_NAME memchr
++# include "ifunc-memchr.h"
++
++libc_ifunc_redirected (__redirect_memchr, memchr,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+new file mode 100644
+index 00000000..14a7caa9
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-aligned.S
+@@ -0,0 +1,292 @@
++/* Optimized memcmp implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCMP_NAME __memcmp_aligned
++#else
++# define MEMCMP_NAME memcmp
++#endif
++
++LEAF(MEMCMP_NAME, 6)
++    beqz        a2, L(ret)
++    andi        a4, a1, 0x7
++    andi        a3, a0, 0x7
++    sltu        a5, a4, a3
++
++    xor         t0, a0, a1
++    li.w        t8, 8
++    maskeqz     t0, t0, a5
++    li.w        t7, -1
++
++    xor         a0, a0, t0
++    xor         a1, a1, t0
++    andi        a3, a0, 0x7
++    andi        a4, a1, 0x7
++
++    xor         a0, a0, a3
++    xor         a1, a1, a4
++    ld.d        t2, a0, 0
++    ld.d        t1, a1, 0
++
++    slli.d      t3, a3, 3
++    slli.d      t4, a4, 3
++    sub.d       a6, t3, t4
++    srl.d       t1, t1, t4
++
++    srl.d       t0, t2, t3
++    srl.d       t5, t7, t4
++    sub.d       t6, t0, t1
++    and         t6, t6, t5
++
++    sub.d       t5, t8, a4
++    bnez        t6, L(first_out)
++    bgeu        t5, a2, L(ret)
++    sub.d       a2, a2, t5
++
++    bnez        a6, L(unaligned)
++    blt         a2, t8, L(al_less_8bytes)
++    andi        t1, a2, 31
++    beq         t1, a2, L(al_less_32bytes)
++
++    sub.d       t2, a2, t1
++    add.d       a4, a0, t2
++    move        a2, t1
++
++L(al_loop):
++    ld.d        t0, a0, 8
++
++    ld.d        t1, a1, 8
++    ld.d        t2, a0, 16
++    ld.d        t3, a1, 16
++    ld.d        t4, a0, 24
++
++    ld.d        t5, a1, 24
++    ld.d        t6, a0, 32
++    ld.d        t7, a1, 32
++    addi.d      a0, a0, 32
++
++    addi.d      a1, a1, 32
++    bne         t0, t1, L(out1)
++    bne         t2, t3, L(out2)
++    bne         t4, t5, L(out3)
++
++    bne         t6, t7, L(out4)
++    bne         a0, a4, L(al_loop)
++
++L(al_less_32bytes):
++    srai.d      a4, a2, 4
++    beqz        a4, L(al_less_16bytes)
++
++    ld.d        t0, a0, 8
++    ld.d        t1, a1, 8
++    ld.d        t2, a0, 16
++    ld.d        t3, a1, 16
++
++    addi.d      a0, a0, 16
++    addi.d      a1, a1, 16
++    addi.d      a2, a2, -16
++    bne         t0, t1, L(out1)
++
++    bne         t2, t3, L(out2)
++
++L(al_less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(al_less_8bytes)
++    ld.d        t0, a0, 8
++
++    ld.d        t1, a1, 8
++    addi.d      a0, a0, 8
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++
++    bne         t0, t1, L(out1)
++
++L(al_less_8bytes):
++    beqz        a2, L(ret)
++    ld.d        t0, a0, 8
++    ld.d        t1, a1, 8
++
++    li.d        t7, -1
++    slli.d      t2, a2, 3
++    sll.d       t2, t7, t2
++    sub.d       t3, t0, t1
++
++    andn        t6, t3, t2
++    bnez        t6, L(count_diff)
++
++L(ret):
++    move        a0, zero
++    jr          ra
++
++L(out4):
++    move        t0, t6
++    move        t1, t7
++    sub.d       t6, t6, t7
++    b           L(count_diff)
++
++L(out3):
++    move        t0, t4
++    move        t1, t5
++    sub.d       t6, t4, t5
++    b           L(count_diff)
++
++L(out2):
++    move        t0, t2
++    move        t1, t3
++L(out1):
++    sub.d       t6, t0, t1
++    b           L(count_diff)
++
++L(first_out):
++    slli.d      t4, a2, 3
++    slt         t3, a2, t5
++    sll.d       t4, t7, t4
++    maskeqz     t4, t4, t3
++
++    andn        t6, t6, t4
++
++L(count_diff):
++    ctz.d       t2, t6
++    bstrins.d   t2, zero, 2, 0
++    srl.d       t0, t0, t2
++
++    srl.d       t1, t1, t2
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++
++    sub.d       t3, t1, t0
++    masknez     t2, t2, a5
++    maskeqz     t3, t3, a5
++    or          a0, t2, t3
++
++    jr          ra
++
++L(unaligned):
++    sub.d       a7, zero, a6
++    srl.d       t0, t2, a6
++    blt         a2, t8, L(un_less_8bytes)
++
++    andi        t1, a2, 31
++    beq         t1, a2, L(un_less_32bytes)
++    sub.d       t2, a2, t1
++    add.d       a4, a0, t2
++
++    move        a2, t1
++
++L(un_loop):
++    ld.d        t2, a0, 8
++    ld.d        t1, a1, 8
++    ld.d        t4, a0, 16
++
++    ld.d        t3, a1, 16
++    ld.d        t6, a0, 24
++    ld.d        t5, a1, 24
++    ld.d        t8, a0, 32
++
++    ld.d        t7, a1, 32
++    addi.d      a0, a0, 32
++    addi.d      a1, a1, 32
++    sll.d       a3, t2, a7
++
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++    srl.d       t0, t2, a6
++    sll.d       a3, t4, a7
++
++    or          t2, a3, t0
++    bne         t2, t3, L(out2)
++    srl.d       t0, t4, a6
++    sll.d       a3, t6, a7
++
++    or          t4, a3, t0
++    bne         t4, t5, L(out3)
++    srl.d       t0, t6, a6
++    sll.d       a3, t8, a7
++
++    or          t6, t0, a3
++    bne         t6, t7, L(out4)
++    srl.d       t0, t8, a6
++    bne         a0, a4, L(un_loop)
++
++L(un_less_32bytes):
++    srai.d      a4, a2, 4
++    beqz        a4, L(un_less_16bytes)
++    ld.d        t2, a0, 8
++    ld.d        t1, a1, 8
++
++    ld.d        t4, a0, 16
++    ld.d        t3, a1, 16
++    addi.d      a0, a0, 16
++    addi.d      a1, a1, 16
++
++    addi.d      a2, a2, -16
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++
++    srl.d       t0, t2, a6
++    sll.d       a3, t4, a7
++    or          t2, a3, t0
++    bne         t2, t3, L(out2)
++
++    srl.d       t0, t4, a6
++
++L(un_less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(un_less_8bytes)
++    ld.d        t2, a0, 8
++
++    ld.d        t1, a1, 8
++    addi.d      a0, a0, 8
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++    bne         t0, t1, L(out1)
++    srl.d       t0, t2, a6
++
++L(un_less_8bytes):
++    beqz        a2, L(ret)
++    andi        a7, a7, 63
++    slli.d      a4, a2, 3
++    bgeu        a7, a4, L(last_cmp)
++
++    ld.d        t2, a0, 8
++    sll.d       a3, t2, a7
++    or          t0, a3, t0
++
++L(last_cmp):
++    ld.d        t1, a1, 8
++
++    li.d        t7, -1
++    sll.d       t2, t7, a4
++    sub.d       t3, t0, t1
++    andn        t6, t3, t2
++
++    bnez        t6, L(count_diff)
++    move        a0, zero
++    jr          ra
++END(MEMCMP_NAME)
++
++libc_hidden_builtin_def (MEMCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+new file mode 100644
+index 00000000..3151a179
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lasx.S
+@@ -0,0 +1,207 @@
++/* Optimized memcmp implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCMP __memcmp_lasx
++
++LEAF(MEMCMP, 6)
++    li.d            t2, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t2, a2, L(less32)
++
++    li.d            t1, 160
++    bgeu            a2, t1, L(make_aligned)
++L(loop32):
++    xvld            xr0, a0, 0
++    xvld            xr1, a1, 0
++
++    addi.d          a0, a0, 32
++    addi.d          a1, a1, 32
++    addi.d          a2, a2, -32
++    xvseq.b         xr2, xr0, xr1
++
++    xvsetanyeqz.b   fcc0, xr2
++    bcnez           fcc0, L(end)
++L(last_bytes):
++    bltu            t2, a2, L(loop32)
++    xvld            xr0, a3, -32
++
++
++    xvld            xr1, a4, -32
++    xvseq.b         xr2, xr0, xr1
++L(end):
++    xvmsknz.b       xr2, xr2
++    xvpermi.q       xr4, xr0, 1
++
++    xvpickve.w      xr3, xr2, 4
++    xvpermi.q       xr5, xr1, 1
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa2
++
++    cto.w           t0, t0
++    vreplgr2vr.b    vr2, t0
++    vshuf.b         vr0, vr4, vr0, vr2
++    vshuf.b         vr1, vr5, vr1, vr2
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++
++L(less32):
++    srli.d          t0, a2, 4
++    beqz            t0, L(less16)
++    vld             vr0, a0, 0
++    vld             vr1, a1, 0
++
++    vld             vr2, a3, -16
++    vld             vr3, a4, -16
++L(short_ret):
++    vseq.b          vr4, vr0, vr1
++    vseq.b          vr5, vr2, vr3
++
++    vmsknz.b        vr4, vr4
++    vmsknz.b        vr5, vr5
++    vilvl.h         vr4, vr5, vr4
++    movfr2gr.s      t0, fa4
++
++    cto.w           t0, t0
++    vreplgr2vr.b    vr4, t0
++    vshuf.b         vr0, vr2, vr0, vr4
++    vshuf.b         vr1, vr3, vr1, vr4
++
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++L(less16):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less8)
++    vldrepl.d       vr0, a0, 0
++    vldrepl.d       vr1, a1, 0
++
++    vldrepl.d       vr2, a3, -8
++    vldrepl.d       vr3, a4, -8
++    b               L(short_ret)
++    nop
++
++L(less8):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less4)
++    vldrepl.w       vr0, a0, 0
++    vldrepl.w       vr1, a1, 0
++
++
++    vldrepl.w       vr2, a3, -4
++    vldrepl.w       vr3, a4, -4
++    b               L(short_ret)
++    nop
++
++L(less4):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less2)
++    vldrepl.h       vr0, a0, 0
++    vldrepl.h       vr1, a1, 0
++
++    vldrepl.h       vr2, a3, -2
++    vldrepl.h       vr3, a4, -2
++    b               L(short_ret)
++    nop
++
++L(less2):
++    beqz            a2, L(ret0)
++    ld.bu           t0, a0, 0
++    ld.bu           t1, a1, 0
++    sub.d           a0, t0, t1
++
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++
++L(make_aligned):
++    xvld            xr0, a0, 0
++
++    xvld            xr1, a1, 0
++    xvseq.b         xr2, xr0, xr1
++    xvsetanyeqz.b   fcc0, xr2
++    bcnez           fcc0, L(end)
++
++    andi            t0, a0, 0x1f
++    sub.d           t0, t2, t0
++    sub.d           t1, a2, t0
++    add.d           a0, a0, t0
++
++    add.d           a1, a1, t0
++    andi            a2, t1, 0x3f
++    sub.d           t0, t1, a2
++    add.d           a5, a0, t0
++
++
++L(loop_align):
++    xvld            xr0, a0, 0
++    xvld            xr1, a1, 0
++    xvld            xr2, a0, 32
++    xvld            xr3, a1, 32
++
++    xvseq.b         xr0, xr0, xr1
++    xvseq.b         xr1, xr2, xr3
++    xvmin.bu        xr2, xr1, xr0
++    xvsetanyeqz.b   fcc0, xr2
++
++    bcnez           fcc0, L(pair_end)
++    addi.d          a0, a0, 64
++    addi.d          a1, a1, 64
++    bne             a0, a5, L(loop_align)
++
++    bnez            a2, L(last_bytes)
++    move            a0, zero
++    jr              ra
++    nop
++
++
++L(pair_end):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++
++    vilvl.h         vr0, vr2, vr0
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++    cto.d           t0, t0
++    ldx.bu          t1, a0, t0
++    ldx.bu          t2, a1, t0
++    sub.d           a0, t1, t2
++
++    jr              ra
++END(MEMCMP)
++
++libc_hidden_builtin_def (MEMCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+new file mode 100644
+index 00000000..38a50a4c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp-lsx.S
+@@ -0,0 +1,269 @@
++/* Optimized memcmp implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define MEMCMP __memcmp_lsx
++
++LEAF(MEMCMP, 6)
++    beqz            a2, L(out)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a3, a0, 0xf
++    vld             vr5, t0, %pc_lo12(L(INDEX))
++
++    andi            a4, a1, 0xf
++    bne             a3, a4, L(unaligned)
++    bstrins.d       a0, zero, 3, 0
++    xor             a1, a1, a4
++
++    vld             vr0, a0, 0
++    vld             vr1, a1, 0
++    li.d            t0, 16
++    vreplgr2vr.b    vr3, a3
++
++    sub.d           t1, t0, a3
++    vadd.b          vr3, vr3, vr5
++    vshuf.b         vr0, vr3, vr0, vr3
++    vshuf.b         vr1, vr3, vr1, vr3
++
++
++    vseq.b          vr4, vr0, vr1
++    bgeu            t1, a2, L(al_end)
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(al_found)
++
++    sub.d           t1, a2, t1
++    andi            a2, t1, 31
++    beq             a2, t1, L(al_less_32bytes)
++    sub.d           t2, t1, a2
++
++    add.d           a4, a0, t2
++L(al_loop):
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    vld             vr2, a0, 32
++
++    vld             vr3, a1, 32
++    addi.d          a0, a0, 32
++    addi.d          a1, a1, 32
++    vseq.b          vr4, vr0, vr1
++
++
++    vseq.b          vr6, vr2, vr3
++    vand.v          vr6, vr4, vr6
++    vsetanyeqz.b    fcc0, vr6
++    bcnez           fcc0, L(al_pair_end)
++
++    bne             a0, a4, L(al_loop)
++L(al_less_32bytes):
++    bgeu            t0, a2, L(al_less_16bytes)
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++
++    vld             vr2, a0, 32
++    vld             vr3, a1, 32
++    addi.d          a2, a2, -16
++    vreplgr2vr.b    vr6, a2
++
++    vslt.b          vr5, vr5, vr6
++    vseq.b          vr4, vr0, vr1
++    vseq.b          vr6, vr2, vr3
++    vorn.v          vr6, vr6, vr5
++
++
++L(al_pair_end):
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(al_found)
++    vnori.b         vr4, vr6, 0
++    vfrstpi.b       vr4, vr4, 0
++
++    vshuf.b         vr0, vr2, vr2, vr4
++    vshuf.b         vr1, vr3, vr3, vr4
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           a0, t0, t1
++    jr              ra
++    nop
++    nop
++
++L(al_less_16bytes):
++    beqz            a2, L(out)
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    vseq.b          vr4, vr0, vr1
++
++
++L(al_end):
++    vreplgr2vr.b    vr6, a2
++    vslt.b          vr5, vr5, vr6
++    vorn.v          vr4, vr4, vr5
++    nop
++
++L(al_found):
++    vnori.b         vr4, vr4, 0
++    vfrstpi.b       vr4, vr4, 0
++    vshuf.b         vr0, vr0, vr0, vr4
++    vshuf.b         vr1, vr1, vr1, vr4
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++L(out):
++    move            a0, zero
++    jr              ra
++    nop
++    nop
++
++
++L(unaligned):
++    xor             t2, a0, a1
++    sltu            a5, a3, a4
++    masknez         t2, t2, a5
++    xor             a0, a0, t2
++
++    xor             a1, a1, t2
++    andi            a3, a0, 0xf
++    andi            a4, a1, 0xf
++    bstrins.d       a0, zero, 3, 0
++
++    xor             a1, a1, a4
++    vld             vr4, a0, 0
++    vld             vr1, a1, 0
++    li.d            t0, 16
++
++    vreplgr2vr.b    vr2, a4
++    sub.d           a6, a4, a3
++    sub.d           t1, t0, a4
++    sub.d           t2, t0, a6
++
++
++    vadd.b          vr2, vr2, vr5
++    vreplgr2vr.b    vr6, t2
++    vadd.b          vr6, vr6, vr5
++    vshuf.b         vr0, vr4, vr4, vr6
++
++    vshuf.b         vr1, vr2, vr1, vr2
++    vshuf.b         vr0, vr2, vr0, vr2
++    vseq.b          vr7, vr0, vr1
++    bgeu            t1, a2, L(un_end)
++
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++    sub.d           a2, a2, t1
++    andi            t1, a2, 31
++
++    beq             a2, t1, L(un_less_32bytes)
++    sub.d           t2, a2, t1
++    move            a2, t1
++    add.d           a4, a1, t2
++
++
++L(un_loop):
++    vld             vr2, a0, 16
++    vld             vr1, a1, 16
++    vld             vr3, a1, 32
++    addi.d          a1, a1, 32
++
++    addi.d          a0, a0, 32
++    vshuf.b         vr0, vr2, vr4, vr6
++    vld             vr4, a0, 0
++    vseq.b          vr7, vr0, vr1
++
++    vshuf.b         vr2, vr4, vr2, vr6
++    vseq.b          vr8, vr2, vr3
++    vand.v          vr8, vr7, vr8
++    vsetanyeqz.b    fcc0, vr8
++
++    bcnez           fcc0, L(un_pair_end)
++    bne             a1, a4, L(un_loop)
++
++L(un_less_32bytes):
++    bltu            a2, t0, L(un_less_16bytes)
++    vld             vr2, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++
++    addi.d          a1, a1, 16
++    addi.d          a2, a2, -16
++    vshuf.b         vr0, vr2, vr4, vr6
++    vor.v           vr4, vr2, vr2
++
++    vseq.b          vr7, vr0, vr1
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++L(un_less_16bytes):
++    beqz            a2, L(out)
++    vld             vr1, a1, 16
++    bgeu            a6, a2, 1f
++
++    vld             vr2, a0, 16
++1:
++    vshuf.b         vr0, vr2, vr4, vr6
++    vseq.b          vr7, vr0, vr1
++L(un_end):
++    vreplgr2vr.b    vr3, a2
++
++
++    vslt.b          vr3, vr5, vr3
++    vorn.v          vr7, vr7, vr3
++
++L(un_found):
++    vnori.b         vr7, vr7, 0
++    vfrstpi.b       vr7, vr7, 0
++
++    vshuf.b         vr0, vr0, vr0, vr7
++    vshuf.b         vr1, vr1, vr1, vr7
++L(calc_result):
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           t2, t0, t1
++    sub.d           t3, t1, t0
++    masknez         t0, t3, a5
++    maskeqz         t1, t2, a5
++
++    or              a0, t0, t1
++    jr              ra
++L(un_pair_end):
++    vsetanyeqz.b    fcc0, vr7
++    bcnez           fcc0, L(un_found)
++
++
++    vnori.b         vr7, vr8, 0
++    vfrstpi.b       vr7, vr7, 0
++    vshuf.b         vr0, vr2, vr2, vr7
++    vshuf.b         vr1, vr3, vr3, vr7
++
++    b               L(calc_result)
++END(MEMCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (MEMCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcmp.c b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+new file mode 100644
+index 00000000..32eccac2
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcmp.c
+@@ -0,0 +1,43 @@
++/* Multiple versions of memcmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memcmp __redirect_memcmp
++# include <string.h>
++# undef memcmp
++
++# define SYMBOL_NAME memcmp
++# include "ifunc-memcmp.h"
++
++libc_ifunc_redirected (__redirect_memcmp, memcmp,
++		       IFUNC_SELECTOR ());
++# undef bcmp
++weak_alias (memcmp, bcmp)
++
++# undef __memcmpeq
++strong_alias (memcmp, __memcmpeq)
++libc_hidden_def (__memcmpeq)
++
++# ifdef SHARED
++__hidden_ver1 (memcmp, __GI_memcmp, __redirect_memcmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+new file mode 100644
+index 00000000..7eb34395
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-aligned.S
+@@ -0,0 +1,783 @@
++/* Optimized memcpy_aligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMCPY_NAME __memcpy_aligned
++# define MEMMOVE_NAME __memmove_aligned
++#else
++# define MEMCPY_NAME memcpy
++# define MEMMOVE_NAME memmove
++#endif
++
++#define LD_64(reg, n)            \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++#define ST_64(reg, n)            \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMMOVE_NAME, 6)
++    sub.d       t0, a0, a1
++    bltu        t0, a2, L(copy_back)
++END(MEMMOVE_NAME)
++
++LEAF_NO_ALIGN(MEMCPY_NAME)
++    srai.d      a3, a2, 4
++    beqz        a3, L(short_data)
++
++    move        a4, a0
++    andi        a5, a0, 0x7
++    andi        a6, a1, 0x7
++    li.d        t8, 8
++    beqz        a5, L(check_align)
++
++    sub.d       t2, t8, a5
++    sub.d       a2, a2, t2
++    pcaddi      t1, 20
++    slli.d      t3, t2, 3
++
++    add.d       a1, a1, t2
++    sub.d       t1, t1, t3
++    add.d       a4, a4, t2
++    jr          t1
++
++L(al7):
++    ld.b        t0, a1, -7
++    st.b        t0, a4, -7
++L(al6):
++    ld.b        t0, a1, -6
++    st.b        t0, a4, -6
++L(al5):
++    ld.b        t0, a1, -5
++    st.b        t0, a4, -5
++L(al4):
++    ld.b        t0, a1, -4
++    st.b        t0, a4, -4
++L(al3):
++    ld.b        t0, a1, -3
++    st.b        t0, a4, -3
++L(al2):
++    ld.b        t0, a1, -2
++    st.b        t0, a4, -2
++L(al1):
++    ld.b        t0, a1, -1
++    st.b        t0, a4, -1
++
++L(check_align):
++    bne         a5, a6, L(unalign)
++    srai.d      a3, a2, 4
++    beqz        a3, L(al_less_16bytes)
++    andi        a3, a2, 0x3f
++
++    beq         a3, a2, L(al_less_64bytes)
++    sub.d       t0, a2, a3
++    move        a2, a3
++    add.d       a5, a1, t0
++
++L(loop_64bytes):
++    LD_64(a1, 0)
++    addi.d      a1, a1, 64
++    ST_64(a4, 0)
++
++    addi.d      a4, a4, 64
++    bne         a1, a5, L(loop_64bytes)
++
++L(al_less_64bytes):
++    srai.d     a3, a2, 5
++    beqz       a3, L(al_less_32bytes)
++
++    ld.d       t0, a1, 0
++    ld.d       t1, a1, 8
++    ld.d       t2, a1, 16
++    ld.d       t3, a1, 24
++
++    addi.d     a1, a1, 32
++    addi.d     a2, a2, -32
++
++    st.d       t0, a4, 0
++    st.d       t1, a4, 8
++    st.d       t2, a4, 16
++    st.d       t3, a4, 24
++
++    addi.d     a4, a4, 32
++
++L(al_less_32bytes):
++    srai.d     a3, a2, 4
++    beqz       a3, L(al_less_16bytes)
++
++    ld.d       t0, a1, 0
++    ld.d       t1, a1, 8
++    addi.d     a1, a1, 16
++    addi.d     a2, a2, -16
++
++    st.d       t0, a4, 0
++    st.d       t1, a4, 8
++    addi.d     a4, a4, 16
++
++L(al_less_16bytes):
++    srai.d     a3, a2, 3
++    beqz       a3, L(al_less_8bytes)
++
++    ld.d       t0, a1, 0
++    addi.d     a1, a1, 8
++    addi.d     a2, a2, -8
++    st.d       t0, a4, 0
++    addi.d     a4, a4, 8
++
++L(al_less_8bytes):
++    srai.d      a3, a2, 2
++    beqz        a3, L(al_less_4bytes)
++
++    ld.w        t0, a1, 0
++    addi.d      a1, a1, 4
++    addi.d      a2, a2, -4
++    st.w        t0, a4, 0
++    addi.d      a4, a4, 4
++
++L(al_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(al_less_2bytes)
++
++    ld.h        t0, a1, 0
++    addi.d      a1, a1, 2
++    addi.d      a2, a2, -2
++    st.h        t0, a4, 0
++    addi.d      a4, a4, 2
++
++L(al_less_2bytes):
++    beqz        a2, L(al_less_1byte)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a4, 0
++
++L(al_less_1byte):
++    jr          ra
++
++L(unalign):
++    andi        a5, a1, 0x7
++    bstrins.d   a1, zero, 2, 0
++    sub.d       t8, t8, a5
++    slli.d      a5, a5, 3
++
++    ld.d        t0, a1, 0
++    addi.d      a1, a1, 8
++    slli.d      a6, t8, 3
++    srl.d       a7, t0, a5
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(un_less_16bytes)
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(un_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    add.d       a3, a1, t0
++
++L(un_long_bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    srl.d       t4, t0, a5
++    sll.d       t0, t0, a6
++    srl.d       t5, t1, a5
++    sll.d       t1, t1, a6
++
++    srl.d       t6, t2, a5
++    sll.d       t2, t2, a6
++    srl.d       t7, t3, a5
++    sll.d       t3, t3, a6
++
++    or          t0, a7, t0
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    ld.d        t4, a1, 32
++    ld.d        t5, a1, 40
++    ld.d        t6, a1, 48
++    ld.d        a7, a1, 56
++
++    st.d        t0, a4, 0
++    st.d        t1, a4, 8
++    st.d        t2, a4, 16
++    st.d        t3, a4, 24
++
++    addi.d      a1, a1, 64
++
++    srl.d       t0, t4, a5
++    sll.d       t4, t4, a6
++    srl.d       t1, t5, a5
++    sll.d       t5, t5, a6
++
++    srl.d       t2, t6, a5
++    sll.d       t6, t6, a6
++    sll.d       t3, a7, a6
++    srl.d       a7, a7, a5
++
++    or          t4, t7, t4
++    or          t5, t0, t5
++    or          t6, t1, t6
++    or          t3, t2, t3
++
++    st.d        t4, a4, 32
++    st.d        t5, a4, 40
++    st.d        t6, a4, 48
++    st.d        t3, a4, 56
++
++    addi.d      a4, a4, 64
++    bne         a3, a1, L(un_long_bytes)
++
++L(un_less_64bytes):
++    srai.d	a3, a2, 5
++    beqz	a3, L(un_less_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    addi.d      a1, a1, 32
++    addi.d      a2, a2, -32
++
++    srl.d       t4, t0, a5
++    sll.d       t0, t0, a6
++    srl.d       t5, t1, a5
++    sll.d       t1, t1, a6
++
++    srl.d       t6, t2, a5
++    sll.d       t2, t2, a6
++    or          t0, a7, t0
++    srl.d       a7, t3, a5
++    sll.d       t3, t3, a6
++
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    st.d        t0, a4, 0
++    st.d        t1, a4, 8
++    st.d        t2, a4, 16
++    st.d        t3, a4, 24
++
++    addi.d      a4, a4, 32
++
++L(un_less_32bytes):
++    srai.d      a3, a2, 4
++    beqz        a3, L(un_less_16bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    addi.d      a1, a1, 16
++    addi.d      a2, a2, -16
++
++    srl.d       t2, t0, a5
++    sll.d       t3, t0, a6
++    sll.d       t4, t1, a6
++    or          t3, a7, t3
++    or          t4, t2, t4
++
++    srl.d       a7, t1, a5
++    st.d        t3, a4, 0
++    st.d        t4, a4, 8
++    addi.d      a4, a4, 16
++
++L(un_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(un_less_8bytes)
++
++    ld.d        t0, a1, 0
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, -8
++    sll.d       t1, t0, a6
++
++    or          t2, a7, t1
++    srl.d       a7, t0, a5
++    st.d        t2, a4, 0
++    addi.d      a4, a4, 8
++
++L(un_less_8bytes):
++    beqz        a2, L(un_less_1byte)
++    bge         t8, a2, 1f
++
++    ld.d        t0, a1, 0
++    sll.d       t0, t0, a6
++    or          a7, a7, t0
++
++1:
++    srai.d      a3, a2, 2
++    beqz        a3, L(un_less_4bytes)
++
++    addi.d      a2, a2, -4
++    st.w        a7, a4, 0
++    addi.d      a4, a4, 4
++    srai.d      a7, a7, 32
++
++L(un_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(un_less_2bytes)
++
++    addi.d      a2, a2, -2
++    st.h        a7, a4, 0
++    addi.d      a4, a4, 2
++    srai.d      a7, a7, 16
++
++L(un_less_2bytes):
++    beqz        a2, L(un_less_1byte)
++    st.b        a7, a4, 0
++
++L(un_less_1byte):
++    jr          ra
++
++L(short_data):
++    pcaddi      t1, 36
++    slli.d      t2, a2, 3
++    add.d       a4, a0, a2
++    sub.d       t1, t1, t2
++    add.d       a1, a1, a2
++    jr          t1
++
++L(short_15_bytes):
++    ld.b       t0, a1, -15
++    st.b       t0, a4, -15
++L(short_14_bytes):
++    ld.b       t0, a1, -14
++    st.b       t0, a4, -14
++L(short_13_bytes):
++    ld.b       t0, a1, -13
++    st.b       t0, a4, -13
++L(short_12_bytes):
++    ld.b       t0, a1, -12
++    st.b       t0, a4, -12
++L(short_11_bytes):
++    ld.b       t0, a1, -11
++    st.b       t0, a4, -11
++L(short_10_bytes):
++    ld.b       t0, a1, -10
++    st.b       t0, a4, -10
++L(short_9_bytes):
++    ld.b       t0, a1, -9
++    st.b       t0, a4, -9
++L(short_8_bytes):
++    ld.b       t0, a1, -8
++    st.b       t0, a4, -8
++L(short_7_bytes):
++    ld.b       t0, a1, -7
++    st.b       t0, a4, -7
++L(short_6_bytes):
++    ld.b       t0, a1, -6
++    st.b       t0, a4, -6
++L(short_5_bytes):
++    ld.b       t0, a1, -5
++    st.b       t0, a4, -5
++L(short_4_bytes):
++    ld.b       t0, a1, -4
++    st.b       t0, a4, -4
++L(short_3_bytes):
++    ld.b       t0, a1, -3
++    st.b       t0, a4, -3
++L(short_2_bytes):
++    ld.b       t0, a1, -2
++    st.b       t0, a4, -2
++L(short_1_bytes):
++    ld.b       t0, a1, -1
++    st.b       t0, a4, -1
++    jr         ra
++
++L(copy_back):
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_short_data)
++
++    add.d       a4, a0, a2
++    add.d       a1, a1, a2
++
++    andi        a5, a4, 0x7
++    andi        a6, a1, 0x7
++    beqz        a5, L(back_check_align)
++
++    sub.d       a2, a2, a5
++    sub.d       a1, a1, a5
++    sub.d       a4, a4, a5
++
++    pcaddi      t1, 18
++    slli.d      t3, a5, 3
++    sub.d       t1, t1, t3
++    jr          t1
++
++    ld.b        t0, a1, 6
++    st.b        t0, a4, 6
++    ld.b        t0, a1, 5
++    st.b        t0, a4, 5
++    ld.b        t0, a1, 4
++    st.b        t0, a4, 4
++    ld.b        t0, a1, 3
++    st.b        t0, a4, 3
++    ld.b        t0, a1, 2
++    st.b        t0, a4, 2
++    ld.b        t0, a1, 1
++    st.b        t0, a4, 1
++    ld.b        t0, a1, 0
++    st.b        t0, a4, 0
++
++L(back_check_align):
++    bne         a5, a6, L(back_unalign)
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_less_16bytes)
++
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(back_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    sub.d       a5, a1, t0
++
++L(back_loop_64bytes):
++    LD_64(a1, -64)
++    addi.d      a1, a1, -64
++    ST_64(a4, -64)
++
++    addi.d      a4, a4, -64
++    bne         a1, a5, L(back_loop_64bytes)
++
++L(back_less_64bytes):
++    srai.d     a3, a2, 5
++    beqz       a3, L(back_less_32bytes)
++
++    ld.d       t0, a1, -32
++    ld.d       t1, a1, -24
++    ld.d       t2, a1, -16
++    ld.d       t3, a1, -8
++
++    addi.d     a1, a1, -32
++    addi.d     a2, a2, -32
++
++    st.d       t0, a4, -32
++    st.d       t1, a4, -24
++    st.d       t2, a4, -16
++    st.d       t3, a4, -8
++
++    addi.d     a4, a4, -32
++
++L(back_less_32bytes):
++    srai.d     a3, a2, 4
++    beqz       a3, L(back_less_16bytes)
++
++    ld.d       t0, a1, -16
++    ld.d       t1, a1, -8
++
++    addi.d     a2, a2, -16
++    addi.d     a1, a1, -16
++
++    st.d       t0, a4, -16
++    st.d       t1, a4, -8
++    addi.d     a4, a4, -16
++
++L(back_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(back_less_8bytes)
++
++    ld.d        t0, a1, -8
++    addi.d      a2, a2, -8
++    addi.d      a1, a1, -8
++
++    st.d        t0, a4, -8
++    addi.d      a4, a4, -8
++
++L(back_less_8bytes):
++    srai.d      a3, a2, 2
++    beqz        a3, L(back_less_4bytes)
++
++    ld.w        t0, a1, -4
++    addi.d      a2, a2, -4
++    addi.d      a1, a1, -4
++
++    st.w        t0, a4, -4
++    addi.d      a4, a4, -4
++
++L(back_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(back_less_2bytes)
++
++    ld.h        t0, a1, -2
++    addi.d      a2, a2, -2
++    addi.d      a1, a1, -2
++
++    st.h        t0, a4, -2
++    addi.d      a4, a4, -2
++
++L(back_less_2bytes):
++    beqz        a2, L(back_less_1byte)
++
++    ld.b        t0, a1, -1
++    st.b        t0, a4, -1
++
++L(back_less_1byte):
++    jr          ra
++
++L(back_unalign):
++    andi        t8, a1, 0x7
++    bstrins.d   a1, zero, 2, 0
++
++    sub.d       a6, zero, t8
++
++    ld.d        t0, a1, 0
++    slli.d      a6, a6, 3
++    slli.d      a5, t8, 3
++    sll.d       a7, t0, a6
++
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_un_less_16bytes)
++
++    andi        a3, a2, 0x3f
++    beq         a3, a2, L(back_un_less_64bytes)
++
++    sub.d       t0, a2, a3
++    move        a2, a3
++    sub.d       a3, a1, t0
++
++L(back_un_long_bytes):
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++    ld.d        t2, a1, -24
++    ld.d        t3, a1, -32
++
++    sll.d       t4, t0, a6
++    srl.d       t0, t0, a5
++
++    sll.d       t5, t1, a6
++    srl.d       t1, t1, a5
++
++    sll.d       t6, t2, a6
++    srl.d       t2, t2, a5
++
++    sll.d       t7, t3, a6
++    srl.d       t3, t3, a5
++
++    or          t0, t0, a7
++    or          t1, t1, t4
++    or          t2, t2, t5
++    or          t3, t3, t6
++
++    ld.d        t4, a1, -40
++    ld.d        t5, a1, -48
++    ld.d        t6, a1, -56
++    ld.d        a7, a1, -64
++    st.d        t0, a4, -8
++    st.d        t1, a4, -16
++    st.d        t2, a4, -24
++    st.d        t3, a4, -32
++
++    addi.d      a1, a1, -64
++
++    sll.d       t0, t4, a6
++    srl.d       t4, t4, a5
++
++    sll.d       t1, t5, a6
++    srl.d       t5, t5, a5
++
++    sll.d       t2, t6, a6
++    srl.d       t6, t6, a5
++
++    srl.d       t3, a7, a5
++    sll.d       a7, a7, a6
++
++    or          t4, t7, t4
++    or          t5, t0, t5
++    or          t6, t1, t6
++    or          t3, t2, t3
++
++    st.d        t4, a4, -40
++    st.d        t5, a4, -48
++    st.d        t6, a4, -56
++    st.d        t3, a4, -64
++
++    addi.d      a4, a4, -64
++    bne         a3, a1, L(back_un_long_bytes)
++
++L(back_un_less_64bytes):
++    srai.d	a3, a2, 5
++    beqz	a3, L(back_un_less_32bytes)
++
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++    ld.d        t2, a1, -24
++    ld.d        t3, a1, -32
++
++    addi.d      a1, a1, -32
++    addi.d      a2, a2, -32
++
++    sll.d       t4, t0, a6
++    srl.d       t0, t0, a5
++
++    sll.d       t5, t1, a6
++    srl.d       t1, t1, a5
++
++    sll.d       t6, t2, a6
++    srl.d       t2, t2, a5
++
++    or          t0, a7, t0
++
++    sll.d       a7, t3, a6
++    srl.d       t3, t3, a5
++
++    or          t1, t4, t1
++    or          t2, t5, t2
++    or          t3, t6, t3
++
++    st.d        t0, a4, -8
++    st.d        t1, a4, -16
++    st.d        t2, a4, -24
++    st.d        t3, a4, -32
++
++    addi.d      a4, a4, -32
++
++L(back_un_less_32bytes):
++    srai.d      a3, a2, 4
++    beqz        a3, L(back_un_less_16bytes)
++
++    ld.d        t0, a1, -8
++    ld.d        t1, a1, -16
++
++    addi.d      a1, a1, -16
++    addi.d      a2, a2, -16
++
++    sll.d       t2, t0, a6
++    srl.d       t3, t0, a5
++
++    srl.d       t4, t1, a5
++    or          t3, a7, t3
++    or          t4, t2, t4
++    sll.d       a7, t1, a6
++
++    st.d        t3, a4, -8
++    st.d        t4, a4, -16
++
++    addi.d      a4, a4, -16
++
++L(back_un_less_16bytes):
++    srai.d      a3, a2, 3
++    beqz        a3, L(back_un_less_8bytes)
++
++    ld.d        t0, a1, -8
++
++    addi.d      a1, a1, -8
++    addi.d      a2, a2, -8
++
++    srl.d       t1, t0, a5
++    or          t2, a7, t1
++    sll.d       a7, t0, a6
++
++    st.d        t2, a4, -8
++    addi.d      a4, a4, -8
++
++L(back_un_less_8bytes):
++    beqz        a2, L(back_end)
++    bge         t8, a2, 1f
++
++    ld.d        t0, a1, -8
++    srl.d       t0, t0, a5
++    or          a7, a7, t0
++
++1:
++    srai.d      a3, a2, 2
++    beqz        a3, L(back_un_less_4bytes)
++
++    srai.d      t0, a7, 32
++    addi.d      a2, a2, -4
++    st.w        t0, a4, -4
++    addi.d      a4, a4, -4
++    slli.d      a7, a7, 32
++
++L(back_un_less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(back_un_less_2bytes)
++    srai.d      t0, a7, 48
++    addi.d      a2, a2, -2
++    st.h        t0, a4, -2
++    addi.d      a4, a4, -2
++    slli.d      a7, a7, 16
++L(back_un_less_2bytes):
++    beqz        a2, L(back_un_less_1byte)
++    srai.d      t0, a7, 56
++    st.b        t0, a4, -1
++L(back_un_less_1byte):
++    jr          ra
++
++L(back_short_data):
++    pcaddi     t1, 34
++    slli.d     t2, a2, 3
++    sub.d      t1, t1, t2
++    jr         t1
++
++    ld.b       t0, a1, 14
++    st.b       t0, a0, 14
++    ld.b       t0, a1, 13
++    st.b       t0, a0, 13
++    ld.b       t0, a1, 12
++    st.b       t0, a0, 12
++    ld.b       t0, a1, 11
++    st.b       t0, a0, 11
++    ld.b       t0, a1, 10
++    st.b       t0, a0, 10
++    ld.b       t0, a1, 9
++    st.b       t0, a0, 9
++    ld.b       t0, a1, 8
++    st.b       t0, a0, 8
++    ld.b       t0, a1, 7
++    st.b       t0, a0, 7
++    ld.b       t0, a1, 6
++    st.b       t0, a0, 6
++    ld.b       t0, a1, 5
++    st.b       t0, a0, 5
++    ld.b       t0, a1, 4
++    st.b       t0, a0, 4
++    ld.b       t0, a1, 3
++    st.b       t0, a0, 3
++    ld.b       t0, a1, 2
++    st.b       t0, a0, 2
++    ld.b       t0, a1, 1
++    st.b       t0, a0, 1
++    ld.b       t0, a1, 0
++    st.b       t0, a0, 0
++L(back_end):
++    jr         ra
++
++END(MEMCPY_NAME)
++
++libc_hidden_builtin_def (MEMMOVE_NAME)
++libc_hidden_builtin_def (MEMCPY_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+new file mode 100644
+index 00000000..ae148df5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lasx.S
+@@ -0,0 +1,20 @@
++/* Optimized memcpy implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memcpy is part of memmove.S */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+new file mode 100644
+index 00000000..feb2bb0e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-lsx.S
+@@ -0,0 +1,20 @@
++/* Optimized memcpy implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memcpy is part of memmove.S */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+new file mode 100644
+index 00000000..31019b13
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy-unaligned.S
+@@ -0,0 +1,247 @@
++/* Optimized unaligned memcpy implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMCPY_NAME __memcpy_unaligned
++
++# define LD_64(reg, n)           \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++# define ST_64(reg, n)           \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMCPY_NAME, 3)
++    add.d       a4, a1, a2
++    add.d       a3, a0, a2
++    li.w        a6, 16
++    bge         a6, a2, L(less_16bytes)
++
++    li.w        a6, 128
++    blt         a6, a2, L(long_bytes)
++    li.w        a6, 64
++    blt         a6, a2, L(more_64bytes)
++
++    li.w        a6, 32
++    blt         a6, a2, L(more_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a4, -16
++    ld.d        t3, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a3, -16
++    st.d        t3, a3, -8
++    jr          ra
++
++L(more_64bytes):
++    srli.d      t8, a0, 3
++    slli.d      t8, t8, 3
++    addi.d      t8, t8,  0x8
++    sub.d       a7, a0, t8
++
++    ld.d        t0, a1, 0
++    sub.d       a1, a1, a7
++    st.d        t0, a0, 0
++    add.d       a7, a7, a2
++    addi.d      a7, a7, -0x20
++
++L(loop_32):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    st.d        t0, t8, 0
++    st.d        t1, t8, 8
++    st.d        t2, t8, 16
++    st.d        t3, t8, 24
++
++    addi.d      t8, t8, 0x20
++    addi.d	a1, a1, 0x20
++    addi.d	a7, a7, -0x20
++    blt         zero, a7, L(loop_32)
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(more_32bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a0, 16
++    st.d        t3, a0, 24
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      a6, a2, 3
++    beqz        a6, L(less_8bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a4, -8
++    st.d        t0, a0, 0
++    st.d        t1, a3, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      a6, a2, 2
++    beqz        a6, L(less_4bytes)
++
++    ld.w        t0, a1, 0
++    ld.w        t1, a4, -4
++    st.w        t0, a0, 0
++    st.w        t1, a3, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      a6, a2, 1
++    beqz        a6, L(less_2bytes)
++
++    ld.h        t0, a1, 0
++    ld.h        t1, a4, -2
++    st.h        t0, a0, 0
++    st.h        t1, a3, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(long_bytes):
++    srli.d      t8, a0, 3
++    slli.d      t8, t8, 3
++    beq         a0, t8, L(start)
++    ld.d        t0, a1, 0
++
++    addi.d      t8, t8, 0x8
++    st.d        t0, a0, 0
++    sub.d       a7, a0, t8
++    sub.d       a1, a1, a7
++
++L(start):
++    addi.d     a5, a3, -0x80
++    blt        a5, t8, L(align_end_proc)
++
++L(loop_128):
++    LD_64(a1, 0)
++    ST_64(t8, 0)
++    LD_64(a1, 64)
++    addi.d     a1, a1, 0x80
++    ST_64(t8, 64)
++    addi.d     t8, t8, 0x80
++    bge        a5, t8, L(loop_128)
++
++L(align_end_proc):
++    sub.d      a2, a3, t8
++    pcaddi     t1, 34
++    andi       t2, a2, 0x78
++    sub.d      t1, t1, t2
++    jr         t1
++
++    ld.d       t0, a1, 112
++    st.d       t0, t8, 112
++    ld.d       t0, a1, 104
++    st.d       t0, t8, 104
++    ld.d       t0, a1, 96
++    st.d       t0, t8, 96
++    ld.d       t0, a1, 88
++    st.d       t0, t8, 88
++    ld.d       t0, a1, 80
++    st.d       t0, t8, 80
++    ld.d       t0, a1, 72
++    st.d       t0, t8, 72
++    ld.d       t0, a1, 64
++    st.d       t0, t8, 64
++    ld.d       t0, a1, 56
++    st.d       t0, t8, 56
++    ld.d       t0, a1, 48
++    st.d       t0, t8, 48
++    ld.d       t0, a1, 40
++    st.d       t0, t8, 40
++    ld.d       t0, a1, 32
++    st.d       t0, t8, 32
++    ld.d       t0, a1, 24
++    st.d       t0, t8, 24
++    ld.d       t0, a1, 16
++    st.d       t0, t8, 16
++    ld.d       t0, a1, 8
++    st.d       t0, t8, 8
++    ld.d       t0, a1, 0
++    st.d       t0, t8, 0
++    ld.d       t0, a4, -8
++    st.d       t0, a3, -8
++
++    jr         ra
++END(MEMCPY_NAME)
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memcpy.c b/sysdeps/loongarch/lp64/multiarch/memcpy.c
+new file mode 100644
+index 00000000..93b238ce
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memcpy.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memcpy __redirect_memcpy
++# include <string.h>
++# undef memcpy
++
++# define SYMBOL_NAME memcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memcpy, memcpy,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memcpy, __GI_memcpy, __redirect_memcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memcmp);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+new file mode 100644
+index 00000000..a02114c0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-aligned.S
+@@ -0,0 +1,20 @@
++/* Optimized memmove_aligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* memmove_aligned is part of memcpy_aligned, see memcpy-aligned.S.  */
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+new file mode 100644
+index 00000000..95d8ee7b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lasx.S
+@@ -0,0 +1,287 @@
++/* Optimized memmove implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef MEMCPY_NAME
++# define MEMCPY_NAME __memcpy_lasx
++#endif
++
++#ifndef MEMMOVE_NAME
++# define MEMMOVE_NAME __memmove_lasx
++#endif
++
++LEAF(MEMCPY_NAME, 6)
++    li.d            t0, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t0, a2, L(less_32bytes)
++
++    li.d            t1, 64
++    bltu            t1, a2, L(copy_long)
++    xvld            xr0, a1, 0
++    xvld            xr1, a4, -32
++
++    xvst            xr0, a0, 0
++    xvst            xr1, a3, -32
++    jr              ra
++L(less_32bytes):
++    srli.d          t0, a2, 4
++
++    beqz            t0, L(less_16bytes)
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++
++
++    vst             vr1, a3, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    ld.d            t0, a1, 0
++    ld.d            t1, a4, -8
++    st.d            t0, a0, 0
++    st.d            t1, a3, -8
++
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++    ld.w            t0, a1, 0
++
++    ld.w            t1, a4, -4
++    st.w            t0, a0, 0
++    st.w            t1, a3, -4
++    jr              ra
++
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    ld.h            t0, a1, 0
++    ld.h            t1, a4, -2
++
++    st.h            t0, a0, 0
++    st.h            t1, a3, -2
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++
++    ld.b            t0, a1, 0
++    st.b            t0, a0, 0
++L(less_1bytes):
++    jr              ra
++END(MEMCPY_NAME)
++
++LEAF(MEMMOVE_NAME, 6)
++
++    li.d            t0, 32
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t0, a2, L(less_32bytes)
++
++    li.d            t1, 64
++    bltu            t1, a2, L(move_long)
++    xvld            xr0, a1, 0
++    xvld            xr1, a4, -32
++
++    xvst            xr0, a0, 0
++    xvst            xr1, a3, -32
++    jr              ra
++L(move_long):
++    sub.d           t2, a0, a1
++
++    bltu            t2, a2, L(copy_back)
++L(copy_long):
++    andi            t2, a0, 0x1f
++    addi.d          a2, a2, -1
++    sub.d           t2, t0, t2
++
++
++    xvld            xr8, a1, 0
++    xvld            xr9, a4, -32
++    sub.d           t3, a2, t2
++    add.d           a5, a0, t2
++
++    andi            a2, t3, 0xff
++    add.d           a1, a1, t2
++    beq             a2, t3, L(lt256)
++    sub.d           a6, a4, a2
++
++    addi.d          a6, a6, -1
++L(loop_256):
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++    xvld            xr2, a1, 64
++
++    xvld            xr3, a1, 96
++    xvld            xr4, a1, 128
++    xvld            xr5, a1, 160
++    xvld            xr6, a1, 192
++
++
++    xvld            xr7, a1, 224
++    addi.d          a1, a1, 256
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++
++    xvst            xr2, a5, 64
++    xvst            xr3, a5, 96
++    xvst            xr4, a5, 128
++    xvst            xr5, a5, 160
++
++    xvst            xr6, a5, 192
++    xvst            xr7, a5, 224
++    addi.d          a5, a5, 256
++    bne             a1, a6, L(loop_256)
++
++L(lt256):
++    srli.d          t2, a2, 7
++    beqz            t2, L(lt128)
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++
++
++    xvld            xr2, a1, 64
++    xvld            xr3, a1, 96
++    addi.d          a1, a1, 128
++    addi.d          a2, a2, -128
++
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++    xvst            xr2, a5, 64
++    xvst            xr3, a5, 96
++
++    addi.d          a5, a5, 128
++L(lt128):
++    bltu            a2, t1, L(lt64)
++    xvld            xr0, a1, 0
++    xvld            xr1, a1, 32
++
++    addi.d          a1, a1, 64
++    addi.d          a2, a2, -64
++    xvst            xr0, a5, 0
++    xvst            xr1, a5, 32
++
++
++    addi.d          a5, a5, 64
++L(lt64):
++    bltu            a2, t0, L(lt32)
++    xvld            xr0, a1, 0
++    xvst            xr0, a5, 0
++
++L(lt32):
++    xvst            xr8, a0, 0
++    xvst            xr9, a3, -32
++    jr              ra
++    nop
++
++L(copy_back):
++    addi.d          a3, a3, -1
++    addi.d          a2, a2, -2
++    andi            t2, a3, 0x1f
++    xvld            xr8, a1, 0
++
++    xvld            xr9, a4, -32
++    sub.d           t3, a2, t2
++    sub.d           a5, a3, t2
++    sub.d           a4, a4, t2
++
++
++    andi            a2, t3, 0xff
++    beq             a2, t3, L(back_lt256)
++    add.d           a6, a1, a2
++    addi.d          a6, a6, 2
++
++L(back_loop_256):
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    xvld            xr2, a4, -97
++    xvld            xr3, a4, -129
++
++    xvld            xr4, a4, -161
++    xvld            xr5, a4, -193
++    xvld            xr6, a4, -225
++    xvld            xr7, a4, -257
++
++    addi.d          a4, a4, -256
++    xvst            xr0, a5, -32
++    xvst            xr1, a5, -64
++    xvst            xr2, a5, -96
++
++
++    xvst            xr3, a5, -128
++    xvst            xr4, a5, -160
++    xvst            xr5, a5, -192
++    xvst            xr6, a5, -224
++
++    xvst            xr7, a5, -256
++    addi.d          a5, a5, -256
++    bne             a4, a6, L(back_loop_256)
++L(back_lt256):
++    srli.d          t2, a2, 7
++
++    beqz            t2, L(back_lt128)
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    xvld            xr2, a4, -97
++
++    xvld            xr3, a4, -129
++    addi.d          a2, a2, -128
++    addi.d          a4, a4, -128
++    xvst            xr0, a5, -32
++
++
++    xvst            xr1, a5, -64
++    xvst            xr2, a5, -96
++    xvst            xr3, a5, -128
++    addi.d          a5, a5, -128
++
++L(back_lt128):
++    blt             a2, t1, L(back_lt64)
++    xvld            xr0, a4, -33
++    xvld            xr1, a4, -65
++    addi.d          a2, a2, -64
++
++    addi.d          a4, a4, -64
++    xvst            xr0, a5, -32
++    xvst            xr1, a5, -64
++    addi.d          a5, a5, -64
++
++L(back_lt64):
++    bltu            a2, t0, L(back_lt32)
++    xvld            xr0, a4, -33
++    xvst            xr0, a5, -32
++L(back_lt32):
++    xvst            xr8, a0, 0
++
++
++    xvst            xr9, a3, -31
++    jr              ra
++END(MEMMOVE_NAME)
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+new file mode 100644
+index 00000000..5eb819ef
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-lsx.S
+@@ -0,0 +1,534 @@
++/* Optimized memmove implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMCPY_NAME __memcpy_lsx
++# define MEMMOVE_NAME __memmove_lsx
++
++LEAF(MEMCPY_NAME, 6)
++    li.d            t6, 16
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t6, a2, L(less_16bytes)
++
++    li.d            t8, 64
++    li.d            t7, 32
++    bltu            t8, a2, L(copy_long)
++    bltu            t7, a2, L(more_32bytes)
++
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a3, -16
++
++    jr              ra
++L(more_32bytes):
++    vld             vr0, a1, 0
++    vld             vr1, a1, 16
++    vld             vr2, a4, -32
++
++
++    vld             vr3, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a0, 16
++    vst             vr2, a3, -32
++
++    vst             vr3, a3, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    vldrepl.d       vr0, a1, 0
++    vldrepl.d       vr1, a4, -8
++    vstelm.d        vr0, a0, 0, 0
++    vstelm.d        vr1, a3, -8, 0
++
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++    vldrepl.w       vr0, a1, 0
++
++
++    vldrepl.w       vr1, a4, -4
++    vstelm.w        vr0, a0, 0, 0
++    vstelm.w        vr1, a3, -4, 0
++    jr              ra
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    vldrepl.h       vr0, a1, 0
++    vldrepl.h       vr1, a4, -2
++
++    vstelm.h        vr0, a0, 0, 0
++    vstelm.h        vr1, a3, -2, 0
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++
++    ld.b            t0, a1, 0
++    st.b            t0, a0, 0
++L(less_1bytes):
++    jr              ra
++    nop
++END(MEMCPY_NAME)
++
++LEAF(MEMMOVE_NAME, 6)
++    li.d            t6, 16
++    add.d           a3, a0, a2
++    add.d           a4, a1, a2
++    bgeu            t6, a2, L(less_16bytes)
++
++    li.d            t8, 64
++    li.d            t7, 32
++    bltu            t8, a2, L(move_long)
++    bltu            t7, a2, L(more_32bytes)
++
++    vld             vr0, a1, 0
++    vld             vr1, a4, -16
++    vst             vr0, a0, 0
++    vst             vr1, a3, -16
++
++    jr              ra
++    nop
++L(move_long):
++    sub.d           t0, a0, a1
++    bltu            t0, a2, L(copy_back)
++
++
++L(copy_long):
++    vld             vr2, a1, 0
++    andi            t0, a0, 0xf
++    sub.d           t0, t6, t0
++    add.d           a1, a1, t0
++
++    sub.d           a2, a2, t0
++    andi            t1, a1, 0xf
++    bnez            t1, L(unaligned)
++    vld             vr0, a1, 0
++
++    addi.d          a2, a2, -16
++    vst             vr2, a0, 0
++    andi            t2, a2, 0x7f
++    add.d           a5, a0, t0
++
++    beq             a2, t2, L(al_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    add.d           a6, a1, t3
++
++
++L(al_loop):
++    vld             vr1, a1, 16
++    vld             vr2, a1, 32
++    vld             vr3, a1, 48
++    vld             vr4, a1, 64
++
++    vld             vr5, a1, 80
++    vld             vr6, a1, 96
++    vld             vr7, a1, 112
++    vst             vr0, a5, 0
++
++    vld             vr0, a1, 128
++    addi.d          a1, a1, 128
++    vst             vr1, a5, 16
++    vst             vr2, a5, 32
++
++    vst             vr3, a5, 48
++    vst             vr4, a5, 64
++    vst             vr5, a5, 80
++    vst             vr6, a5, 96
++
++
++    vst             vr7, a5, 112
++    addi.d          a5, a5, 128
++    bne             a1, a6, L(al_loop)
++L(al_less_128):
++    blt             a2, t8, L(al_less_64)
++
++    vld             vr1, a1, 16
++    vld             vr2, a1, 32
++    vld             vr3, a1, 48
++    addi.d          a2, a2, -64
++
++    vst             vr0, a5, 0
++    vld             vr0, a1, 64
++    addi.d          a1, a1, 64
++    vst             vr1, a5, 16
++
++    vst             vr2, a5, 32
++    vst             vr3, a5, 48
++    addi.d          a5, a5, 64
++L(al_less_64):
++    blt             a2, t7, L(al_less_32)
++
++
++    vld             vr1, a1, 16
++    addi.d          a2, a2, -32
++    vst             vr0, a5, 0
++    vld             vr0, a1, 32
++
++    addi.d          a1, a1, 32
++    vst             vr1, a5, 16
++    addi.d          a5, a5, 32
++L(al_less_32):
++    blt             a2, t6, L(al_less_16)
++
++    vst             vr0, a5, 0
++    vld             vr0, a1, 16
++    addi.d          a5, a5, 16
++L(al_less_16):
++    vld             vr1, a4, -16
++
++    vst             vr0, a5, 0
++    vst             vr1, a3, -16
++    jr              ra
++    nop
++
++
++L(unaligned):
++    pcalau12i       t2, %pc_hi20(L(INDEX))
++    bstrins.d       a1, zero, 3, 0
++    vld             vr8, t2, %pc_lo12(L(INDEX))
++    vld             vr0, a1, 0
++
++    vld             vr1, a1, 16
++    addi.d          a2, a2, -16
++    vst             vr2, a0, 0
++    add.d           a5, a0, t0
++
++    vreplgr2vr.b    vr9, t1
++    andi            t2, a2, 0x7f
++    vadd.b          vr9, vr9, vr8
++    addi.d          a1, a1, 32
++
++
++    beq             t2, a2, L(un_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    add.d           a6, a1, t3
++
++L(un_loop):
++    vld             vr2, a1, 0
++    vld             vr3, a1, 16
++    vld             vr4, a1, 32
++    vld             vr5, a1, 48
++
++    vld             vr6, a1, 64
++    vld             vr7, a1, 80
++    vshuf.b         vr8, vr1, vr0, vr9
++    vld             vr0, a1, 96
++
++    vst             vr8, a5, 0
++    vshuf.b         vr8, vr2, vr1, vr9
++    vld             vr1, a1, 112
++    vst             vr8, a5, 16
++
++
++    addi.d          a1, a1, 128
++    vshuf.b         vr2, vr3, vr2, vr9
++    vshuf.b         vr3, vr4, vr3, vr9
++    vst             vr2, a5, 32
++
++    vshuf.b         vr4, vr5, vr4, vr9
++    vst             vr3, a5, 48
++    vshuf.b         vr5, vr6, vr5, vr9
++    vst             vr4, a5, 64
++
++    vshuf.b         vr6, vr7, vr6, vr9
++    vst             vr5, a5, 80
++    vshuf.b         vr7, vr0, vr7, vr9
++    vst             vr6, a5, 96
++
++    vst             vr7, a5, 112
++    addi.d          a5, a5, 128
++    bne             a1, a6, L(un_loop)
++L(un_less_128):
++    blt             a2, t8, L(un_less_64)
++
++
++    vld             vr2, a1, 0
++    vld             vr3, a1, 16
++    vshuf.b         vr4, vr1, vr0, vr9
++    vld             vr0, a1, 32
++
++    vst             vr4, a5, 0
++    addi.d          a2, a2, -64
++    vshuf.b         vr4, vr2, vr1, vr9
++    vld             vr1, a1, 48
++
++    addi.d          a1, a1, 64
++    vst             vr4, a5, 16
++    vshuf.b         vr2, vr3, vr2, vr9
++    vshuf.b         vr3, vr0, vr3, vr9
++
++    vst             vr2, a5, 32
++    vst             vr3, a5, 48
++    addi.d          a5, a5, 64
++L(un_less_64):
++    blt             a2, t7, L(un_less_32)
++
++
++    vshuf.b         vr3, vr1, vr0, vr9
++    vld             vr0, a1, 0
++    vst             vr3, a5, 0
++    addi.d          a2, a2, -32
++
++    vshuf.b         vr3, vr0, vr1, vr9
++    vld             vr1, a1, 16
++    addi.d          a1, a1, 32
++    vst             vr3, a5, 16
++
++    addi.d          a5, a5, 32
++L(un_less_32):
++    blt             a2, t6, L(un_less_16)
++    vshuf.b         vr2, vr1, vr0, vr9
++    vor.v           vr0, vr1, vr1
++
++    vld             vr1, a1, 0
++    vst             vr2, a5, 0
++    addi.d          a5, a5, 16
++L(un_less_16):
++    vld             vr2, a4, -16
++
++
++    vshuf.b         vr0, vr1, vr0, vr9
++    vst             vr0, a5, 0
++    vst             vr2, a3, -16
++    jr              ra
++
++L(copy_back):
++    addi.d          t0, a3, -1
++    vld             vr2, a4, -16
++    andi            t0, t0, 0xf
++    addi.d          t0, t0, 1
++
++    sub.d           a4, a4, t0
++    sub.d           a2, a2, t0
++    andi            t1, a4, 0xf
++    bnez            t1, L(back_unaligned)
++
++    vld             vr0, a4, -16
++    addi.d          a2, a2, -16
++    vst             vr2, a3, -16
++    andi            t2, a2, 0x7f
++
++
++    sub.d           a3, a3, t0
++    beq             t2, a2, L(back_al_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++
++    sub.d           a6, a4, t3
++L(back_al_loop):
++    vld             vr1, a4, -32
++    vld             vr2, a4, -48
++    vld             vr3, a4, -64
++
++    vld             vr4, a4, -80
++    vld             vr5, a4, -96
++    vld             vr6, a4, -112
++    vld             vr7, a4, -128
++
++    vst             vr0, a3, -16
++    vld             vr0, a4, -144
++    addi.d          a4, a4, -128
++    vst             vr1, a3, -32
++
++
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    vst             vr4, a3, -80
++    vst             vr5, a3, -96
++
++    vst             vr6, a3, -112
++    vst             vr7, a3, -128
++    addi.d          a3, a3, -128
++    bne             a4, a6, L(back_al_loop)
++
++L(back_al_less_128):
++    blt             a2, t8, L(back_al_less_64)
++    vld             vr1, a4, -32
++    vld             vr2, a4, -48
++    vld             vr3, a4, -64
++
++    addi.d          a2, a2, -64
++    vst             vr0, a3, -16
++    vld             vr0, a4, -80
++    addi.d          a4, a4, -64
++
++
++    vst             vr1, a3, -32
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    addi.d          a3, a3, -64
++
++L(back_al_less_64):
++    blt             a2, t7, L(back_al_less_32)
++    vld             vr1, a4, -32
++    addi.d          a2, a2, -32
++    vst             vr0, a3, -16
++
++    vld             vr0, a4, -48
++    vst             vr1, a3, -32
++    addi.d          a3, a3, -32
++    addi.d          a4, a4, -32
++
++L(back_al_less_32):
++    blt             a2, t6, L(back_al_less_16)
++    vst             vr0, a3, -16
++    vld             vr0, a4, -32
++    addi.d          a3, a3, -16
++
++
++L(back_al_less_16):
++    vld             vr1, a1, 0
++    vst             vr0, a3, -16
++    vst             vr1, a0, 0
++    jr              ra
++
++L(back_unaligned):
++    pcalau12i       t2, %pc_hi20(L(INDEX))
++    bstrins.d       a4, zero, 3, 0
++    vld             vr8, t2, %pc_lo12(L(INDEX))
++    vld             vr0, a4, 0
++
++    vld             vr1, a4, -16
++    addi.d          a2, a2, -16
++    vst             vr2, a3, -16
++    sub.d           a3, a3, t0
++
++
++    vreplgr2vr.b    vr9, t1
++    andi            t2, a2, 0x7f
++    vadd.b          vr9, vr9, vr8
++    addi.d          a4, a4, -16
++
++    beq             t2, a2, L(back_un_less_128)
++    sub.d           t3, a2, t2
++    move            a2, t2
++    sub.d           a6, a4, t3
++
++L(back_un_loop):
++    vld             vr2, a4, -16
++    vld             vr3, a4, -32
++    vld             vr4, a4, -48
++
++    vld             vr5, a4, -64
++    vld             vr6, a4, -80
++    vld             vr7, a4, -96
++    vshuf.b         vr8, vr0, vr1, vr9
++
++
++    vld             vr0, a4, -112
++    vst             vr8, a3, -16
++    vshuf.b         vr8, vr1, vr2, vr9
++    vld             vr1, a4, -128
++
++    vst             vr8, a3, -32
++    addi.d          a4, a4, -128
++    vshuf.b         vr2, vr2, vr3, vr9
++    vshuf.b         vr3, vr3, vr4, vr9
++
++    vst             vr2, a3, -48
++    vshuf.b         vr4, vr4, vr5, vr9
++    vst             vr3, a3, -64
++    vshuf.b         vr5, vr5, vr6, vr9
++
++    vst             vr4, a3, -80
++    vshuf.b         vr6, vr6, vr7, vr9
++    vst             vr5, a3, -96
++    vshuf.b         vr7, vr7, vr0, vr9
++
++
++    vst             vr6, a3, -112
++    vst             vr7, a3, -128
++    addi.d          a3, a3, -128
++    bne             a4, a6, L(back_un_loop)
++
++L(back_un_less_128):
++    blt             a2, t8, L(back_un_less_64)
++    vld             vr2, a4, -16
++    vld             vr3, a4, -32
++    vshuf.b         vr4, vr0, vr1, vr9
++
++    vld             vr0, a4, -48
++    vst             vr4, a3, -16
++    addi.d          a2, a2, -64
++    vshuf.b         vr4, vr1, vr2, vr9
++
++    vld             vr1, a4, -64
++    addi.d          a4, a4, -64
++    vst             vr4, a3, -32
++    vshuf.b         vr2, vr2, vr3, vr9
++
++
++    vshuf.b         vr3, vr3, vr0, vr9
++    vst             vr2, a3, -48
++    vst             vr3, a3, -64
++    addi.d          a3, a3, -64
++
++L(back_un_less_64):
++    blt             a2, t7, L(back_un_less_32)
++    vshuf.b         vr3, vr0, vr1, vr9
++    vld             vr0, a4, -16
++    vst             vr3, a3, -16
++
++    addi.d          a2, a2, -32
++    vshuf.b         vr3, vr1, vr0, vr9
++    vld             vr1, a4, -32
++    addi.d          a4, a4, -32
++
++    vst             vr3, a3, -32
++    addi.d          a3, a3, -32
++L(back_un_less_32):
++    blt             a2, t6, L(back_un_less_16)
++    vshuf.b         vr2, vr0, vr1, vr9
++
++
++    vor.v           vr0, vr1, vr1
++    vld             vr1, a4, -16
++    vst             vr2, a3, -16
++    addi.d          a3, a3, -16
++
++L(back_un_less_16):
++    vld             vr2, a1, 0
++    vshuf.b         vr0, vr0, vr1, vr9
++    vst             vr0, a3, -16
++    vst             vr2, a0, 0
++
++    jr              ra
++END(MEMMOVE_NAME)
++
++    .section        .rodata.cst16,"M",@progbits,16
++    .align          4
++L(INDEX):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (MEMCPY_NAME)
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+new file mode 100644
+index 00000000..3284ce25
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove-unaligned.S
+@@ -0,0 +1,380 @@
++/* Optimized memmove_unaligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMMOVE_NAME __memmove_unaligned
++
++# define LD_64(reg, n)            \
++    ld.d        t0, reg, n;      \
++    ld.d        t1, reg, n + 8;  \
++    ld.d        t2, reg, n + 16; \
++    ld.d        t3, reg, n + 24; \
++    ld.d        t4, reg, n + 32; \
++    ld.d        t5, reg, n + 40; \
++    ld.d        t6, reg, n + 48; \
++    ld.d        t7, reg, n + 56;
++
++# define ST_64(reg, n)            \
++    st.d        t0, reg, n;      \
++    st.d        t1, reg, n + 8;  \
++    st.d        t2, reg, n + 16; \
++    st.d        t3, reg, n + 24; \
++    st.d        t4, reg, n + 32; \
++    st.d        t5, reg, n + 40; \
++    st.d        t6, reg, n + 48; \
++    st.d        t7, reg, n + 56;
++
++LEAF(MEMMOVE_NAME, 3)
++    add.d       a4, a1, a2
++    add.d       a3, a0, a2
++    beq         a1, a0, L(less_1bytes)
++    move        t8, a0
++
++    srai.d      a6, a2, 4
++    beqz        a6, L(less_16bytes)
++    srai.d      a6, a2, 6
++    bnez        a6, L(more_64bytes)
++    srai.d      a6, a2, 5
++    beqz        a6, L(less_32bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a1, 16
++    ld.d        t3, a1, 24
++
++    ld.d        t4, a4, -32
++    ld.d        t5, a4, -24
++    ld.d        t6, a4, -16
++    ld.d        t7, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a0, 16
++    st.d        t3, a0, 24
++
++    st.d        t4, a3, -32
++    st.d        t5, a3, -24
++    st.d        t6, a3, -16
++    st.d        t7, a3, -8
++
++    jr          ra
++
++L(less_32bytes):
++    ld.d        t0, a1, 0
++    ld.d        t1, a1, 8
++    ld.d        t2, a4, -16
++    ld.d        t3, a4, -8
++
++    st.d        t0, a0, 0
++    st.d        t1, a0, 8
++    st.d        t2, a3, -16
++    st.d        t3, a3, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      a6, a2, 3
++    beqz        a6, L(less_8bytes)
++
++    ld.d        t0, a1, 0
++    ld.d        t1, a4, -8
++    st.d        t0, a0, 0
++    st.d        t1, a3, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      a6, a2, 2
++    beqz        a6, L(less_4bytes)
++
++    ld.w        t0, a1, 0
++    ld.w        t1, a4, -4
++    st.w        t0, a0, 0
++    st.w        t1, a3, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      a6, a2, 1
++    beqz        a6, L(less_2bytes)
++
++    ld.h        t0, a1, 0
++    ld.h        t1, a4, -2
++    st.h        t0, a0, 0
++    st.h        t1, a3, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(more_64bytes):
++    sub.d       a7, a0, a1
++    bltu        a7, a2, L(copy_backward)
++
++L(copy_forward):
++    srli.d      a0, a0, 3
++    slli.d      a0, a0, 3
++    beq         a0, t8, L(all_align)
++    addi.d      a0, a0, 0x8
++    sub.d       a7, t8, a0
++    sub.d       a1, a1, a7
++    add.d       a2, a7, a2
++
++L(start_unalign_proc):
++    pcaddi      t1, 18
++    slli.d      a6, a7, 3
++    add.d       t1, t1, a6
++    jr          t1
++
++    ld.b        t0, a1, -7
++    st.b        t0, a0, -7
++    ld.b        t0, a1, -6
++    st.b        t0, a0, -6
++    ld.b        t0, a1, -5
++    st.b        t0, a0, -5
++    ld.b        t0, a1, -4
++    st.b        t0, a0, -4
++    ld.b        t0, a1, -3
++    st.b        t0, a0, -3
++    ld.b        t0, a1, -2
++    st.b        t0, a0, -2
++    ld.b        t0, a1, -1
++    st.b        t0, a0, -1
++L(start_over):
++
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc)
++
++L(loop_less):
++    LD_64(a1, 0)
++    ST_64(a0, 0)
++    LD_64(a1, 64)
++    ST_64(a0, 64)
++
++    addi.d      a0, a0, 0x80
++    addi.d      a1, a1, 0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less)
++
++L(end_unalign_proc):
++    addi.d      a2, a2, 0x80
++
++    pcaddi      t1, 36
++    andi        t2, a2, 0x78
++    add.d       a1, a1, t2
++    add.d       a0, a0, t2
++    sub.d       t1, t1, t2
++    jr          t1
++
++    ld.d        t0, a1, -120
++    st.d        t0, a0, -120
++    ld.d        t0, a1, -112
++    st.d        t0, a0, -112
++    ld.d        t0, a1, -104
++    st.d        t0, a0, -104
++    ld.d        t0, a1, -96
++    st.d        t0, a0, -96
++    ld.d        t0, a1, -88
++    st.d        t0, a0, -88
++    ld.d        t0, a1, -80
++    st.d        t0, a0, -80
++    ld.d        t0, a1, -72
++    st.d        t0, a0, -72
++    ld.d        t0, a1, -64
++    st.d        t0, a0, -64
++    ld.d        t0, a1, -56
++    st.d        t0, a0, -56
++    ld.d        t0, a1, -48
++    st.d        t0, a0, -48
++    ld.d        t0, a1, -40
++    st.d        t0, a0, -40
++    ld.d        t0, a1, -32
++    st.d        t0, a0, -32
++    ld.d        t0, a1, -24
++    st.d        t0, a0, -24
++    ld.d        t0, a1, -16
++    st.d        t0, a0, -16
++    ld.d        t0, a1, -8
++    st.d        t0, a0, -8
++
++    andi        a2, a2, 0x7
++    pcaddi      t1, 18
++    slli.d      a2, a2, 3
++    sub.d       t1, t1, a2
++    jr          t1
++
++    ld.b        t0, a4, -7
++    st.b        t0, a3, -7
++    ld.b        t0, a4, -6
++    st.b        t0, a3, -6
++    ld.b        t0, a4, -5
++    st.b        t0, a3, -5
++    ld.b        t0, a4, -4
++    st.b        t0, a3, -4
++    ld.b        t0, a4, -3
++    st.b        t0, a3, -3
++    ld.b        t0, a4, -2
++    st.b        t0, a3, -2
++    ld.b        t0, a4, -1
++    st.b        t0, a3, -1
++L(end):
++    move        a0, t8
++    jr          ra
++
++L(all_align):
++    addi.d      a1, a1, 0x8
++    addi.d      a0, a0, 0x8
++    ld.d        t0, a1, -8
++    st.d        t0, a0, -8
++    addi.d      a2, a2, -8
++    b           L(start_over)
++
++L(all_align_back):
++    addi.d      a4, a4, -0x8
++    addi.d      a3, a3, -0x8
++    ld.d        t0, a4, 0
++    st.d        t0, a3, 0
++    addi.d      a2, a2, -8
++    b           L(start_over_back)
++
++L(copy_backward):
++    move        a5, a3
++    srli.d      a3, a3, 3
++    slli.d      a3, a3, 3
++    beq         a3, a5, L(all_align_back)
++    sub.d       a7, a3, a5
++    add.d       a4, a4, a7
++    add.d       a2, a7, a2
++
++    pcaddi      t1, 18
++    slli.d      a6, a7, 3
++    add.d       t1, t1, a6
++    jr          t1
++
++    ld.b        t0, a4, 6
++    st.b        t0, a3, 6
++    ld.b        t0, a4, 5
++    st.b        t0, a3, 5
++    ld.b        t0, a4, 4
++    st.b        t0, a3, 4
++    ld.b        t0, a4, 3
++    st.b        t0, a3, 3
++    ld.b        t0, a4, 2
++    st.b        t0, a3, 2
++    ld.b        t0, a4, 1
++    st.b        t0, a3, 1
++    ld.b        t0, a4, 0
++    st.b        t0, a3, 0
++L(start_over_back):
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc_back)
++
++L(loop_less_back):
++    LD_64(a4, -64)
++    ST_64(a3, -64)
++    LD_64(a4, -128)
++    ST_64(a3, -128)
++
++    addi.d      a4, a4, -0x80
++    addi.d      a3, a3, -0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less_back)
++
++L(end_unalign_proc_back):
++    addi.d      a2, a2, 0x80
++
++    pcaddi      t1, 36
++    andi        t2, a2, 0x78
++    sub.d       a4, a4, t2
++    sub.d       a3, a3, t2
++    sub.d       t1, t1, t2
++    jr          t1
++
++    ld.d        t0, a4, 112
++    st.d        t0, a3, 112
++    ld.d        t0, a4, 104
++    st.d        t0, a3, 104
++    ld.d        t0, a4, 96
++    st.d        t0, a3, 96
++    ld.d        t0, a4, 88
++    st.d        t0, a3, 88
++    ld.d        t0, a4, 80
++    st.d        t0, a3, 80
++    ld.d        t0, a4, 72
++    st.d        t0, a3, 72
++    ld.d        t0, a4, 64
++    st.d        t0, a3, 64
++    ld.d        t0, a4, 56
++    st.d        t0, a3, 56
++    ld.d        t0, a4, 48
++    st.d        t0, a3, 48
++    ld.d        t0, a4, 40
++    st.d        t0, a3, 40
++    ld.d        t0, a4, 32
++    st.d        t0, a3, 32
++    ld.d        t0, a4, 24
++    st.d        t0, a3, 24
++    ld.d        t0, a4, 16
++    st.d        t0, a3, 16
++    ld.d        t0, a4, 8
++    st.d        t0, a3, 8
++    ld.d        t0, a4, 0
++    st.d        t0, a3, 0
++
++    andi        a2, a2, 0x7
++    pcaddi      t1, 18
++    slli.d      a2, a2, 3
++    sub.d       t1, t1, a2
++    jr          t1
++
++    ld.b        t0, a1, 6
++    st.b        t0, a0, 6
++    ld.b        t0, a1, 5
++    st.b        t0, a0, 5
++    ld.b        t0, a1, 4
++    st.b        t0, a0, 4
++    ld.b        t0, a1, 3
++    st.b        t0, a0, 3
++    ld.b        t0, a1, 2
++    st.b        t0, a0, 2
++    ld.b        t0, a1, 1
++    st.b        t0, a0, 1
++    ld.b        t0, a1, 0
++    st.b        t0, a0, 0
++
++    move        a0, t8
++    jr          ra
++END(MEMMOVE_NAME)
++
++libc_hidden_builtin_def (MEMMOVE_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memmove.c b/sysdeps/loongarch/lp64/multiarch/memmove.c
+new file mode 100644
+index 00000000..7e3ca4c4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memmove.c
+@@ -0,0 +1,38 @@
++/* Multiple versions of memmove.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memmove __redirect_memmove
++# include <string.h>
++# undef memmove
++
++# define SYMBOL_NAME memmove
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
++		       IFUNC_SELECTOR ());
++strong_alias (__libc_memmove, memmove);
++
++# ifdef SHARED
++__hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
++  __attribute__ ((visibility ("hidden")));
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+new file mode 100644
+index 00000000..ced61ebc
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-generic.c
+@@ -0,0 +1,23 @@
++/* Generic implementation of memrchr.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define MEMRCHR __memrchr_generic
++#endif
++
++#include <string/memrchr.c>
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+new file mode 100644
+index 00000000..5f3e0d06
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lasx.S
+@@ -0,0 +1,123 @@
++/* Optimized memrchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef MEMRCHR
++# define MEMRCHR __memrchr_lasx
++#endif
++
++LEAF(MEMRCHR, 6)
++    beqz            a2, L(ret0)
++    addi.d          a2, a2, -1
++    add.d           a3, a0, a2
++    andi            t1, a3, 0x3f
++
++    bstrins.d       a3, zero, 5, 0
++    addi.d          t1, t1, 1
++    xvld            xr0, a3, 0
++    xvld            xr1, a3, 32
++
++    sub.d           t2, zero, t1
++    li.d            t3, -1
++    xvreplgr2vr.b   xr2, a1
++    andi            t4, a0, 0x3f
++
++    srl.d           t2, t3, t2
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    and             t0, t0, t2
++
++    bltu            a2, t1, L(end)
++    bnez            t0, L(found)
++    bstrins.d       a0, zero, 5, 0
++L(loop):
++    xvld            xr0, a3, -64
++
++    xvld            xr1, a3, -32
++    addi.d          a3, a3, -64
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++
++
++    beq             a0, a3, L(out)
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(found):
++    addi.d          a0, a3, 63
++    clz.d           t1, t0
++    sub.d           a0, a0, t1
++    jr              ra
++
++
++L(out):
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++L(end):
++    sll.d           t2, t3, t4
++    and             t0, t0, t2
++    addi.d          a0, a3, 63
++    clz.d           t1, t0
++
++    sub.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++
++    jr              ra
++END(MEMRCHR)
++
++libc_hidden_builtin_def (MEMRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+new file mode 100644
+index 00000000..39a7c8b0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr-lsx.S
+@@ -0,0 +1,105 @@
++/* Optimized memrchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMRCHR __memrchr_lsx
++
++LEAF(MEMRCHR, 6)
++    beqz            a2, L(ret0)
++    addi.d          a2, a2, -1
++    add.d           a3, a0, a2
++    andi            t1, a3, 0x1f
++
++    bstrins.d       a3, zero, 4, 0
++    addi.d          t1, t1, 1
++    vld             vr0, a3, 0
++    vld             vr1, a3, 16
++
++    sub.d           t2, zero, t1
++    li.d            t3, -1
++    vreplgr2vr.b    vr2, a1
++    andi            t4, a0, 0x1f
++
++    srl.d           t2, t3, t2
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    and             t0, t0, t2
++
++    bltu            a2, t1, L(end)
++    bnez            t0, L(found)
++    bstrins.d       a0, zero, 4, 0
++L(loop):
++    vld             vr0, a3, -32
++
++    vld             vr1, a3, -16
++    addi.d          a3, a3, -32
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++
++    beq             a0, a3, L(out)
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++
++
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(found):
++    addi.d          a0, a3, 31
++    clz.w           t1, t0
++    sub.d           a0, a0, t1
++    jr              ra
++
++L(out):
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++L(end):
++    sll.d           t2, t3, t4
++    and             t0, t0, t2
++    addi.d          a0, a3, 31
++    clz.w           t1, t0
++
++
++    sub.d           a0, a0, t1
++    maskeqz         a0, a0, t0
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++    jr              ra
++END(MEMRCHR)
++
++libc_hidden_builtin_def (MEMRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memrchr.c b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+new file mode 100644
+index 00000000..8baba9ab
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memrchr.c
+@@ -0,0 +1,33 @@
++/* Multiple versions of memrchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memrchr __redirect_memrchr
++# include <string.h>
++# undef memrchr
++
++# define SYMBOL_NAME memrchr
++# include "ifunc-memrchr.h"
++
++libc_ifunc_redirected (__redirect_memrchr, __memrchr, IFUNC_SELECTOR ());
++libc_hidden_def (__memrchr)
++weak_alias (__memrchr, memrchr)
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-aligned.S b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+new file mode 100644
+index 00000000..1fce95b7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-aligned.S
+@@ -0,0 +1,174 @@
++/* Optimized memset aligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define MEMSET_NAME __memset_aligned
++#else
++# define MEMSET_NAME memset
++#endif
++
++LEAF(MEMSET_NAME, 6)
++    move        t0, a0
++    andi        a3, a0, 0x7
++    li.w        t6, 16
++    beqz        a3, L(align)
++    bltu        a2, t6, L(short_data)
++
++L(make_align):
++    li.w        t8, 8
++    sub.d       t2, t8, a3
++    pcaddi      t1, 11
++    slli.d      t3, t2, 2
++    sub.d       t1, t1, t3
++    jr          t1
++
++L(al7):
++    st.b        a1, t0, 6
++L(al6):
++    st.b        a1, t0, 5
++L(al5):
++    st.b        a1, t0, 4
++L(al4):
++    st.b        a1, t0, 3
++L(al3):
++    st.b        a1, t0, 2
++L(al2):
++    st.b        a1, t0, 1
++L(al1):
++    st.b        a1, t0, 0
++L(al0):
++    add.d       t0, t0, t2
++    sub.d       a2, a2, t2
++
++L(align):
++    bstrins.d   a1, a1, 15, 8
++    bstrins.d   a1, a1, 31, 16
++    bstrins.d   a1, a1, 63, 32
++    bltu        a2, t6, L(less_16bytes)
++
++    andi        a4, a2, 0x3f
++    beq         a4, a2, L(less_64bytes)
++
++    sub.d       t1, a2, a4
++    move        a2, a4
++    add.d       a5, t0, t1
++
++L(loop_64bytes):
++    addi.d      t0, t0, 64
++    st.d        a1, t0, -64
++    st.d        a1, t0, -56
++    st.d        a1, t0, -48
++    st.d        a1, t0, -40
++
++    st.d        a1, t0, -32
++    st.d        a1, t0, -24
++    st.d        a1, t0, -16
++    st.d        a1, t0, -8
++    bne         t0, a5, L(loop_64bytes)
++
++L(less_64bytes):
++    srai.d      a4, a2, 5
++    beqz        a4, L(less_32bytes)
++    addi.d      a2, a2, -32
++    st.d        a1, t0, 0
++
++    st.d        a1, t0, 8
++    st.d        a1, t0, 16
++    st.d        a1, t0, 24
++    addi.d      t0, t0, 32
++
++L(less_32bytes):
++    bltu        a2, t6, L(less_16bytes)
++    addi.d      a2, a2, -16
++    st.d        a1, t0, 0
++    st.d        a1, t0, 8
++    addi.d      t0, t0, 16
++
++L(less_16bytes):
++    srai.d      a4, a2, 3
++    beqz        a4, L(less_8bytes)
++    addi.d      a2, a2, -8
++    st.d        a1, t0, 0
++    addi.d      t0, t0, 8
++
++L(less_8bytes):
++    beqz        a2, L(less_1byte)
++    srai.d      a4, a2, 2
++    beqz        a4, L(less_4bytes)
++    addi.d      a2, a2, -4
++    st.w        a1, t0, 0
++    addi.d      t0, t0, 4
++
++L(less_4bytes):
++    srai.d      a3, a2, 1
++    beqz        a3, L(less_2bytes)
++    addi.d      a2, a2, -2
++    st.h        a1, t0, 0
++    addi.d      t0, t0, 2
++
++L(less_2bytes):
++    beqz        a2, L(less_1byte)
++    st.b        a1, t0, 0
++L(less_1byte):
++    jr          ra
++
++L(short_data):
++    pcaddi      t1, 19
++    slli.d      t3, a2, 2
++    sub.d       t1, t1, t3
++    jr          t1
++L(short_15):
++    st.b        a1, a0, 14
++L(short_14):
++    st.b        a1, a0, 13
++L(short_13):
++    st.b        a1, a0, 12
++L(short_12):
++    st.b        a1, a0, 11
++L(short_11):
++    st.b        a1, a0, 10
++L(short_10):
++    st.b        a1, a0, 9
++L(short_9):
++    st.b        a1, a0, 8
++L(short_8):
++    st.b        a1, a0, 7
++L(short_7):
++    st.b        a1, a0, 6
++L(short_6):
++    st.b        a1, a0, 5
++L(short_5):
++    st.b        a1, a0, 4
++L(short_4):
++    st.b        a1, a0, 3
++L(short_3):
++    st.b        a1, a0, 2
++L(short_2):
++    st.b        a1, a0, 1
++L(short_1):
++    st.b        a1, a0, 0
++L(short_0):
++    jr          ra
++END(MEMSET_NAME)
++
++libc_hidden_builtin_def (MEMSET_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lasx.S b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+new file mode 100644
+index 00000000..041abbac
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-lasx.S
+@@ -0,0 +1,142 @@
++/* Optimized memset implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMSET __memset_lasx
++
++LEAF(MEMSET, 6)
++    li.d            t1, 32
++    move            a3, a0
++    xvreplgr2vr.b   xr0, a1
++    add.d           a4, a0, a2
++
++    bgeu            t1, a2, L(less_32bytes)
++    li.d            t3, 128
++    li.d            t2, 64
++    blt             t3, a2, L(long_bytes)
++
++L(less_128bytes):
++    bgeu            t2, a2, L(less_64bytes)
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a4, -32
++
++    xvst            xr0, a4, -64
++    jr              ra
++L(less_64bytes):
++    xvst            xr0, a3, 0
++    xvst            xr0, a4, -32
++
++
++    jr              ra
++L(less_32bytes):
++    srli.d          t0, a2, 4
++    beqz            t0, L(less_16bytes)
++    vst             vr0, a3, 0
++
++    vst             vr0, a4, -16
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++
++    vstelm.d        vr0, a3, 0, 0
++    vstelm.d        vr0, a4, -8, 0
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++
++    beqz            t0, L(less_4bytes)
++    vstelm.w        vr0, a3, 0, 0
++    vstelm.w        vr0, a4, -4, 0
++    jr              ra
++
++
++L(less_4bytes):
++    srli.d          t0, a2, 1
++    beqz            t0, L(less_2bytes)
++    vstelm.h        vr0, a3, 0, 0
++    vstelm.h        vr0, a4, -2, 0
++
++    jr              ra
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++    st.b            a1, a3, 0
++L(less_1bytes):
++    jr              ra
++
++L(long_bytes):
++    xvst            xr0, a3, 0
++    bstrins.d       a3, zero, 4, 0
++    addi.d          a3, a3, 32
++    sub.d           a2, a4, a3
++
++    andi            t0, a2, 0xff
++    beq             t0, a2, L(long_end)
++    move            a2, t0
++    sub.d           t0, a4, t0
++
++
++L(loop_256):
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a3, 64
++    xvst            xr0, a3, 96
++
++    xvst            xr0, a3, 128
++    xvst            xr0, a3, 160
++    xvst            xr0, a3, 192
++    xvst            xr0, a3, 224
++
++    addi.d          a3, a3, 256
++    bne             a3, t0, L(loop_256)
++L(long_end):
++    bltu            a2, t3, L(end_less_128)
++    addi.d          a2, a2, -128
++
++    xvst            xr0, a3, 0
++    xvst            xr0, a3, 32
++    xvst            xr0, a3, 64
++    xvst            xr0, a3, 96
++
++
++    addi.d          a3, a3, 128
++L(end_less_128):
++    bltu            a2, t2, L(end_less_64)
++    addi.d          a2, a2, -64
++    xvst            xr0, a3, 0
++
++    xvst            xr0, a3, 32
++    addi.d          a3, a3, 64
++L(end_less_64):
++    bltu            a2, t1, L(end_less_32)
++    xvst            xr0, a3, 0
++
++L(end_less_32):
++    xvst            xr0, a4, -32
++    jr              ra
++END(MEMSET)
++
++libc_hidden_builtin_def (MEMSET)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-lsx.S b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+new file mode 100644
+index 00000000..3d3982aa
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-lsx.S
+@@ -0,0 +1,135 @@
++/* Optimized memset implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define MEMSET __memset_lsx
++
++LEAF(MEMSET, 6)
++    li.d            t1, 16
++    move            a3, a0
++    vreplgr2vr.b    vr0, a1
++    add.d           a4, a0, a2
++
++    bgeu            t1, a2, L(less_16bytes)
++    li.d            t3, 64
++    li.d            t2, 32
++    bgeu            a2, t3, L(long_bytes)
++
++L(less_64bytes):
++    bgeu            t2, a2, L(less_32bytes)
++    vst             vr0, a3, 0
++    vst             vr0, a3, 16
++    vst             vr0, a4, -32
++
++    vst             vr0, a4, -16
++    jr              ra
++L(less_32bytes):
++    vst             vr0, a3, 0
++    vst             vr0, a4, -16
++
++
++    jr              ra
++L(less_16bytes):
++    srli.d          t0, a2, 3
++    beqz            t0, L(less_8bytes)
++    vstelm.d        vr0, a3, 0, 0
++
++    vstelm.d        vr0, a4, -8, 0
++    jr              ra
++L(less_8bytes):
++    srli.d          t0, a2, 2
++    beqz            t0, L(less_4bytes)
++
++    vstelm.w        vr0, a3, 0, 0
++    vstelm.w        vr0, a4, -4, 0
++    jr              ra
++L(less_4bytes):
++    srli.d          t0, a2, 1
++
++    beqz            t0, L(less_2bytes)
++    vstelm.h        vr0, a3, 0, 0
++    vstelm.h        vr0, a4, -2, 0
++    jr              ra
++
++
++L(less_2bytes):
++    beqz            a2, L(less_1bytes)
++    vstelm.b        vr0, a3, 0, 0
++L(less_1bytes):
++    jr              ra
++L(long_bytes):
++    vst             vr0, a3, 0
++
++    bstrins.d       a3, zero, 3, 0
++    addi.d          a3, a3, 16
++    sub.d           a2, a4, a3
++    andi            t0, a2, 0x7f
++
++    beq             t0, a2, L(long_end)
++    move            a2, t0
++    sub.d           t0, a4, t0
++
++L(loop_128):
++    vst             vr0, a3, 0
++
++    vst             vr0, a3, 16
++    vst             vr0, a3, 32
++    vst             vr0, a3, 48
++    vst             vr0, a3, 64
++
++
++    vst             vr0, a3, 80
++    vst             vr0, a3, 96
++    vst             vr0, a3, 112
++    addi.d          a3, a3, 128
++
++    bne             a3, t0, L(loop_128)
++L(long_end):
++    bltu            a2, t3, L(end_less_64)
++    addi.d          a2, a2, -64
++    vst             vr0, a3, 0
++
++    vst             vr0, a3, 16
++    vst             vr0, a3, 32
++    vst             vr0, a3, 48
++    addi.d          a3, a3, 64
++
++L(end_less_64):
++    bltu            a2, t2, L(end_less_32)
++    addi.d          a2, a2, -32
++    vst             vr0, a3, 0
++    vst             vr0, a3, 16
++
++    addi.d          a3, a3, 32
++L(end_less_32):
++    bltu            a2, t1, L(end_less_16)
++    vst             vr0, a3, 0
++
++L(end_less_16):
++    vst             vr0, a4, -16
++    jr              ra
++END(MEMSET)
++
++libc_hidden_builtin_def (MEMSET)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+new file mode 100644
+index 00000000..f7d32039
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset-unaligned.S
+@@ -0,0 +1,162 @@
++/* Optimized memset unaligned implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# define MEMSET_NAME __memset_unaligned
++
++#define ST_128(n)              \
++    st.d        a1, a0, n;     \
++    st.d        a1, a0, n+8  ; \
++    st.d        a1, a0, n+16 ; \
++    st.d        a1, a0, n+24 ; \
++    st.d        a1, a0, n+32 ; \
++    st.d        a1, a0, n+40 ; \
++    st.d        a1, a0, n+48 ; \
++    st.d        a1, a0, n+56 ; \
++    st.d        a1, a0, n+64 ; \
++    st.d        a1, a0, n+72 ; \
++    st.d        a1, a0, n+80 ; \
++    st.d        a1, a0, n+88 ; \
++    st.d        a1, a0, n+96 ; \
++    st.d        a1, a0, n+104; \
++    st.d        a1, a0, n+112; \
++    st.d        a1, a0, n+120;
++
++LEAF(MEMSET_NAME, 6)
++    bstrins.d   a1, a1, 15, 8
++    add.d       t7, a0, a2
++    bstrins.d   a1, a1, 31, 16
++    move        t0, a0
++
++    bstrins.d   a1, a1, 63, 32
++    srai.d      t8, a2, 4
++    beqz        t8, L(less_16bytes)
++    srai.d      t8, a2, 6
++
++    bnez        t8, L(more_64bytes)
++    srai.d      t8, a2, 5
++    beqz        t8, L(less_32bytes)
++
++    st.d        a1, a0, 0
++    st.d        a1, a0, 8
++    st.d        a1, a0, 16
++    st.d        a1, a0, 24
++
++    st.d        a1, t7, -32
++    st.d        a1, t7, -24
++    st.d        a1, t7, -16
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_32bytes):
++    st.d        a1, a0, 0
++    st.d        a1, a0, 8
++    st.d        a1, t7, -16
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_16bytes):
++    srai.d      t8, a2, 3
++    beqz        t8, L(less_8bytes)
++    st.d        a1, a0, 0
++    st.d        a1, t7, -8
++
++    jr          ra
++
++L(less_8bytes):
++    srai.d      t8, a2, 2
++    beqz        t8, L(less_4bytes)
++    st.w        a1, a0, 0
++    st.w        a1, t7, -4
++
++    jr          ra
++
++L(less_4bytes):
++    srai.d      t8, a2, 1
++    beqz        t8, L(less_2bytes)
++    st.h        a1, a0, 0
++    st.h        a1, t7, -2
++
++    jr          ra
++
++L(less_2bytes):
++    beqz        a2, L(less_1bytes)
++    st.b        a1, a0, 0
++
++    jr          ra
++
++L(less_1bytes):
++    jr          ra
++
++L(more_64bytes):
++    srli.d      a0, a0, 3
++    slli.d      a0, a0, 3
++    addi.d      a0, a0, 0x8
++    st.d        a1, t0, 0
++
++    sub.d       t2, t0, a0
++    add.d       a2, t2, a2
++    addi.d      a2, a2, -0x80
++    blt         a2, zero, L(end_unalign_proc)
++
++L(loop_less):
++    ST_128(0)
++    addi.d      a0, a0,  0x80
++    addi.d      a2, a2, -0x80
++    bge         a2, zero, L(loop_less)
++
++L(end_unalign_proc):
++    addi.d      a2, a2, 0x80
++    pcaddi      t1, 20
++    andi        t5, a2, 0x78
++    srli.d      t5, t5, 1
++
++    sub.d       t1, t1, t5
++    jr          t1
++
++    st.d        a1, a0, 112
++    st.d        a1, a0, 104
++    st.d        a1, a0, 96
++    st.d        a1, a0, 88
++    st.d        a1, a0, 80
++    st.d        a1, a0, 72
++    st.d        a1, a0, 64
++    st.d        a1, a0, 56
++    st.d        a1, a0, 48
++    st.d        a1, a0, 40
++    st.d        a1, a0, 32
++    st.d        a1, a0, 24
++    st.d        a1, a0, 16
++    st.d        a1, a0, 8
++    st.d        a1, a0, 0
++    st.d        a1, t7, -8
++
++    move        a0, t0
++    jr          ra
++END(MEMSET_NAME)
++
++libc_hidden_builtin_def (MEMSET_NAME)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/memset.c b/sysdeps/loongarch/lp64/multiarch/memset.c
+new file mode 100644
+index 00000000..3ff60d8a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/memset.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of memset.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define memset __redirect_memset
++# include <string.h>
++# undef memset
++
++# define SYMBOL_NAME memset
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_memset, memset,
++		       IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (memset, __GI_memset, __redirect_memset)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memset);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+new file mode 100644
+index 00000000..9c7155ae
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-aligned.S
+@@ -0,0 +1,124 @@
++/* Optimized rawmemchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define RAWMEMCHR_NAME __rawmemchr_aligned
++#else
++# define RAWMEMCHR_NAME __rawmemchr
++#endif
++
++LEAF(RAWMEMCHR_NAME, 6)
++    andi        t1, a0, 0x7
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    bstrins.d   a1, a1, 15, 8
++
++    ld.d        t0, a0, 0
++    slli.d      t1, t1, 3
++    ori         a2, a2, 0x101
++    bstrins.d   a1, a1, 31, 16
++
++    li.w        t8, -1
++    bstrins.d   a1, a1, 63, 32
++    bstrins.d   a2, a2, 63, 32
++    sll.d       t2, t8, t1
++
++    sll.d       t3, a1, t1
++    orn         t0, t0, t2
++    slli.d      a3, a2, 7
++    beqz        a1, L(find_zero)
++
++    xor         t0, t0, t3
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++
++    bnez        t3, L(count_pos)
++    addi.d      a0, a0, 8
++
++L(loop):
++    ld.d        t0, a0, 0
++    xor         t0, t0, a1
++
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    xor         t0, t0, a1
++    sub.d       t1, t0, a2
++
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    beqz        t3, L(loop)
++    addi.d      a0, a0, -8
++L(count_pos):
++    ctz.d       t0, t3
++    srli.d      t0, t0, 3
++    add.d       a0, a0, t0
++    jr          ra
++
++L(loop_7bit):
++    ld.d        t0, a0, 0
++L(find_zero):
++    sub.d       t1, t0, a2
++    and         t2, t1, a3
++    bnez        t2, L(more_check)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t0, a2
++    and         t2, t1, a3
++
++    beqz        t2, L(loop_7bit)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++    addi.d      a0, a0, 8
++
++L(loop_8bit):
++    ld.d        t0, a0, 0
++
++    sub.d       t1, t0, a2
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    bnez        t3, L(count_pos)
++
++    ld.d        t0, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t0, a2
++
++    andn        t2, a3, t0
++    and         t3, t1, t2
++    beqz        t3, L(loop_8bit)
++
++    addi.d      a0, a0, -8
++    b           L(count_pos)
++
++END(RAWMEMCHR_NAME)
++
++libc_hidden_builtin_def (__rawmemchr)
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+new file mode 100644
+index 00000000..be2eb59d
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lasx.S
+@@ -0,0 +1,82 @@
++/* Optimized rawmemchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/asm.h>
++#include <sys/regdef.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define RAWMEMCHR __rawmemchr_lasx
++
++LEAF(RAWMEMCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 5, 0
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++
++    xvreplgr2vr.b   xr2, a1
++    xvseq.b         xr0, xr0, xr2
++    xvseq.b         xr1, xr1, xr2
++    xvmsknz.b       xr0, xr0
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++    vilvl.h         vr0, vr3, vr0
++
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    sra.d           t0, t0, a2
++
++
++    beqz            t0, L(loop)
++    ctz.d           t0, t0
++    add.d           a0, a2, t0
++    jr              ra
++
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++    addi.d          a0, a0, 64
++    xvseq.b         xr0, xr0, xr2
++
++    xvseq.b         xr1, xr1, xr2
++    xvmax.bu        xr3, xr0, xr1
++    xvseteqz.v      fcc0, xr3
++    bcnez           fcc0, L(loop)
++
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr3, xr0, 4
++    xvpickve.w      xr4, xr1, 4
++
++
++    vilvl.h         vr0, vr3, vr0
++    vilvl.h         vr1, vr4, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++
++    ctz.d           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(RAWMEMCHR)
++
++libc_hidden_builtin_def (RAWMEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+new file mode 100644
+index 00000000..2f6fe024
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr-lsx.S
+@@ -0,0 +1,71 @@
++/* Optimized rawmemchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define RAWMEMCHR __rawmemchr_lsx
++
++LEAF(RAWMEMCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    vreplgr2vr.b    vr2, a1
++    vseq.b          vr0, vr0, vr2
++    vseq.b          vr1, vr1, vr2
++    vmsknz.b        vr0, vr0
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a2
++
++    beqz            t0, L(loop)
++    ctz.w           t0, t0
++    add.d           a0, a2, t0
++    jr              ra
++
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    vseq.b          vr0, vr0, vr2
++
++    vseq.b          vr1, vr1, vr2
++    vmax.bu         vr3, vr0, vr1
++    vseteqz.v       fcc0, vr3
++    bcnez           fcc0, L(loop)
++
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++    ctz.w           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(RAWMEMCHR)
++
++libc_hidden_builtin_def (RAWMEMCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/rawmemchr.c b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+new file mode 100644
+index 00000000..89c7ffff
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/rawmemchr.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of rawmemchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define rawmemchr __redirect_rawmemchr
++# define __rawmemchr __redirect___rawmemchr
++# include <string.h>
++# undef rawmemchr
++# undef __rawmemchr
++
++# define SYMBOL_NAME rawmemchr
++# include "ifunc-rawmemchr.h"
++
++libc_ifunc_redirected (__redirect_rawmemchr, __rawmemchr,
++                       IFUNC_SELECTOR ());
++weak_alias (__rawmemchr, rawmemchr)
++# ifdef SHARED
++__hidden_ver1 (__rawmemchr, __GI___rawmemchr, __redirect___rawmemchr)
++  __attribute__((visibility ("hidden")));
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+new file mode 100644
+index 00000000..1f763db6
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-aligned.S
+@@ -0,0 +1,27 @@
++/* stpcpy-aligned implementation is in strcpy-aligned.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#if IS_IN (libc)
++# define STPCPY __stpcpy_aligned
++#else
++# define STPCPY __stpcpy
++#endif
++
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-aligned.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+new file mode 100644
+index 00000000..13d6c953
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lasx.S
+@@ -0,0 +1,22 @@
++/* stpcpy-lasx implementation is in strcpy-lasx.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_lasx
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-lasx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+new file mode 100644
+index 00000000..e0f17ab5
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-lsx.S
+@@ -0,0 +1,22 @@
++/* stpcpy-lsx implementation is in strcpy-lsx.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_lsx
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-lsx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+new file mode 100644
+index 00000000..cc2f9712
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy-unaligned.S
+@@ -0,0 +1,22 @@
++/* stpcpy-unaligned implementation is in strcpy-unaligned.S.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STPCPY __stpcpy_unaligned
++#define USE_AS_STPCPY
++#define STRCPY STPCPY
++#include "strcpy-unaligned.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/stpcpy.c b/sysdeps/loongarch/lp64/multiarch/stpcpy.c
+new file mode 100644
+index 00000000..d4860d7a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/stpcpy.c
+@@ -0,0 +1,42 @@
++/* Multiple versions of stpcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2017-2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define stpcpy __redirect_stpcpy
++# define __stpcpy __redirect___stpcpy
++# define NO_MEMPCPY_STPCPY_REDIRECT
++# define __NO_STRING_INLINES
++# include <string.h>
++# undef stpcpy
++# undef __stpcpy
++
++# define SYMBOL_NAME stpcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
++
++weak_alias (__stpcpy, stpcpy)
++# ifdef SHARED
++__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
++__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+new file mode 100644
+index 00000000..62020054
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-aligned.S
+@@ -0,0 +1,99 @@
++/* Optimized strchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCHR_NAME __strchr_aligned
++#else
++# define STRCHR_NAME strchr
++#endif
++
++LEAF(STRCHR_NAME, 6)
++    slli.d      t1, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    ori         a2, a2, 0x101
++    andi        a1, a1, 0xff
++    bstrins.d   a2, a2, 63, 32
++    li.w        t0, -1
++
++    mul.d       a1, a1, a2
++    sll.d       t0, t0, t1
++    slli.d      a3, a2, 7
++    orn         t2, t2, t0
++
++    sll.d       t3, a1, t1
++    xor         t4, t2, t3
++    sub.d       a4, t2, a2
++    sub.d       a5, t4, a2
++
++
++    andn        a4, a4, t2
++    andn        a5, a5, t4
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    bnez        t0, L(end)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t4, a0, 0
++    xor         t2, t4, a1
++
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++
++    or          t0, a4, a5
++    and         t0, t0, a3
++    bnez        t0, L(end)
++    ld.d        t4, a0, 8
++
++
++    addi.d      a0, a0, 16
++    xor         t2, t4, a1
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    beqz        t0, L(loop)
++    addi.d      a0, a0, -8
++L(end):
++    and         t0, a5, a3
++    and         t1, a4, a3
++
++    ctz.d       t0, t0
++    ctz.d       t1, t1
++    srli.w      t2, t0, 3
++    sltu        t3, t1, t0
++
++
++    add.d       a0, a0, t2
++    masknez     a0, a0, t3
++    jr          ra
++END(STRCHR_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+new file mode 100644
+index 00000000..4d3cc588
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lasx.S
+@@ -0,0 +1,91 @@
++/* Optimized strchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef AS_STRCHRNUL
++# define STRCHR __strchr_lasx
++#endif
++
++LEAF(STRCHR, 6)
++    andi            t1, a0, 0x1f
++    bstrins.d       a0, zero, 4, 0
++    xvld            xr0, a0, 0
++    li.d            t2, -1
++
++    xvreplgr2vr.b   xr1, a1
++    sll.d           t1, t2, t1
++    xvxor.v         xr2, xr0, xr1
++    xvmin.bu        xr0, xr0, xr2
++
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr3, xr0, 4
++    vilvl.h         vr0, vr3, vr0
++    movfr2gr.s      t0, fa0
++
++    orn             t0, t0, t1
++    bne             t0, t2, L(end)
++    addi.d          a0, a0, 32
++    nop
++
++
++L(loop):
++    xvld            xr0, a0, 0
++    xvxor.v         xr2, xr0, xr1
++    xvmin.bu        xr0, xr0, xr2
++    xvsetanyeqz.b   fcc0, xr0
++
++    bcnez           fcc0, L(loop_end)
++    xvld            xr0, a0, 32
++    addi.d          a0, a0, 64
++    xvxor.v         xr2, xr0, xr1
++
++    xvmin.bu        xr0, xr0, xr2
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(loop)
++    addi.d          a0, a0, -32
++
++L(loop_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++
++
++L(end):
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++#ifndef AS_STRCHRNUL
++    vreplgr2vr.b    vr0, t0
++    xvpermi.q       xr3, xr2, 1
++
++    vshuf.b         vr0, vr3, vr2, vr0
++    vpickve2gr.bu   t0, vr0, 0
++    masknez         a0, a0, t0
++#endif
++    jr              ra
++
++END(STRCHR)
++
++libc_hidden_builtin_def(STRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+new file mode 100644
+index 00000000..8b78c35c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr-lsx.S
+@@ -0,0 +1,73 @@
++/* Optimized strlen implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#ifndef AS_STRCHRNUL
++# define STRCHR __strchr_lsx
++#endif
++
++LEAF(STRCHR, 6)
++    andi            t1, a0, 0xf
++    bstrins.d       a0, zero, 3, 0
++    vld             vr0, a0, 0
++    li.d            t2, -1
++
++    vreplgr2vr.b    vr1, a1
++    sll.d           t3, t2, t1
++    vxor.v          vr2, vr0, vr1
++    vmin.bu         vr0, vr0, vr2
++
++    vmsknz.b        vr0, vr0
++    movfr2gr.s      t0, fa0
++    ext.w.h         t0, t0
++    orn             t0, t0, t3
++
++    beq             t0, t2, L(loop)
++L(found):
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++#ifndef AS_STRCHRNUL
++    vreplve.b       vr2, vr2, t0
++    vpickve2gr.bu   t1, vr2, 0
++    masknez         a0, a0, t1
++#endif
++    jr              ra
++
++
++L(loop):
++    vld             vr0, a0, 16
++    addi.d          a0, a0, 16
++    vxor.v          vr2, vr0, vr1
++    vmin.bu         vr0, vr0, vr2
++
++    vsetanyeqz.b    fcc0, vr0
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++    movfr2gr.s      t0, fa0
++
++    b               L(found)
++END(STRCHR)
++
++libc_hidden_builtin_def (STRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchr.c b/sysdeps/loongarch/lp64/multiarch/strchr.c
+new file mode 100644
+index 00000000..404e97bd
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchr.c
+@@ -0,0 +1,36 @@
++/* Multiple versions of strchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strchr __redirect_strchr
++# include <string.h>
++# undef strchr
++
++# define SYMBOL_NAME strchr
++# include "ifunc-strchr.h"
++
++libc_ifunc_redirected (__redirect_strchr, strchr, IFUNC_SELECTOR ());
++weak_alias(strchr, index)
++# ifdef SHARED
++__hidden_ver1 (strchr, __GI_strchr, __redirect_strchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strchr);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+new file mode 100644
+index 00000000..20856a06
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-aligned.S
+@@ -0,0 +1,95 @@
++/* Optimized strchrnul implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCHRNUL_NAME __strchrnul_aligned
++#else
++# define STRCHRNUL_NAME __strchrnul
++#endif
++
++LEAF(STRCHRNUL_NAME, 6)
++    slli.d      t1, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    ori         a2, a2, 0x101
++    andi        a1, a1, 0xff
++    bstrins.d   a2, a2, 63, 32
++    li.w        t0, -1
++
++    mul.d       a1, a1, a2
++    sll.d       t0, t0, t1
++    slli.d      a3, a2, 7
++    orn         t2, t2, t0
++
++    sll.d       t3, a1, t1
++    xor         t4, t2, t3
++    sub.d       a4, t2, a2
++    sub.d       a5, t4, a2
++
++
++    andn        a4, a4, t2
++    andn        a5, a5, t4
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    bnez        t0, L(end)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t4, a0, 0
++    xor         t2, t4, a1
++
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++
++    or          t0, a4, a5
++    and         t0, t0, a3
++    bnez        t0, L(end)
++    ld.d        t4, a0, 8
++
++
++    addi.d      a0, a0, 16
++    xor         t2, t4, a1
++    sub.d       a4, t4, a2
++    sub.d       a5, t2, a2
++
++    andn        a4, a4, t4
++    andn        a5, a5, t2
++    or          t0, a4, a5
++    and         t0, t0, a3
++
++    beqz        t0, L(loop)
++    addi.d      a0, a0, -8
++L(end):
++    ctz.d       t0, t0
++    srli.w      t0, t0, 3
++
++
++    add.d       a0, a0, t0
++    jr          ra
++END(STRCHRNUL_NAME)
++
++libc_hidden_builtin_def (STRCHRNUL_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+new file mode 100644
+index 00000000..4753d4ce
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lasx.S
+@@ -0,0 +1,22 @@
++/* Optimized strchrnul implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STRCHR __strchrnul_lasx
++#define AS_STRCHRNUL
++#include "strchr-lasx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+new file mode 100644
+index 00000000..671e740c
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul-lsx.S
+@@ -0,0 +1,22 @@
++/* Optimized strchrnul implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#define STRCHR __strchrnul_lsx
++#define AS_STRCHRNUL
++#include "strchr-lsx.S"
+diff --git a/sysdeps/loongarch/lp64/multiarch/strchrnul.c b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+new file mode 100644
+index 00000000..f3b8296e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strchrnul.c
+@@ -0,0 +1,39 @@
++/* Multiple versions of strchrnul.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++
++#if IS_IN (libc)
++# define strchrnul __redirect_strchrnul
++# define __strchrnul __redirect___strchrnul
++# include <string.h>
++# undef __strchrnul
++# undef strchrnul
++
++# define SYMBOL_NAME strchrnul
++# include "ifunc-strchrnul.h"
++
++libc_ifunc_redirected (__redirect_strchrnul, __strchrnul,
++                       IFUNC_SELECTOR ());
++weak_alias (__strchrnul, strchrnul)
++# ifdef SHARED
++__hidden_ver1 (__strchrnul, __GI___strchrnul, __redirect_strchrnul)
++  __attribute__((visibility ("hidden"))) __attribute_copy__ (strchrnul);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+new file mode 100644
+index 00000000..ba1f9667
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-aligned.S
+@@ -0,0 +1,179 @@
++/* Optimized strcmp implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRCMP_NAME __strcmp_aligned
++#else
++# define STRCMP_NAME strcmp
++#endif
++
++LEAF(STRCMP_NAME, 6)
++    lu12i.w     a4, 0x01010
++    andi        a2, a0, 0x7
++    ori         a4, a4, 0x101
++    andi        a3, a1, 0x7
++
++    bstrins.d   a4, a4, 63, 32
++    li.d        t7, -1
++    li.d        t8, 8
++    slli.d      a5, a4, 7
++
++    bne         a2, a3, L(unaligned)
++    bstrins.d   a0, zero, 2, 0
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t0, a0, 0
++
++    ld.d        t1, a1, 0
++    slli.d      t3, a2, 3
++    sll.d       t2, t7, t3
++    orn         t0, t0, t2
++
++
++    orn         t1, t1, t2
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++
++    bne         t0, t1, L(al_end)
++L(al_loop):
++    bnez        t2, L(ret0)
++    ldx.d       t0, a0, t8
++    ldx.d       t1, a1, t8
++
++    addi.d      t8, t8, 8
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++
++    beq         t0, t1, L(al_loop)
++L(al_end):
++    xor         t3, t0, t1
++    or          t2, t2, t3
++    ctz.d       t3, t2
++
++
++    bstrins.d   t3, zero, 2, 0
++    srl.d       t0, t0, t3
++    srl.d       t1, t1, t3
++    andi        t0, t0, 0xff
++
++    andi        t1, t1, 0xff
++    sub.d       a0, t0, t1
++    jr          ra
++    nop
++
++L(ret0):
++    move        a0, zero
++    jr          ra
++    nop
++    nop
++
++L(unaligned):
++    slt         a6, a3, a2
++    xor         t0, a0, a1
++    maskeqz     t0, t0, a6
++    xor         a0, a0, t0
++
++
++    xor         a1, a1, t0
++    andi        a2, a0, 0x7
++    andi        a3, a1, 0x7
++    bstrins.d   a0, zero, 2, 0
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t4, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      a2, a2, 3
++
++    slli.d      a3, a3, 3
++    srl.d       t0, t4, a2
++    srl.d       t1, t1, a3
++    srl.d       t5, t7, a3
++
++    orn         t0, t0, t5
++    orn         t1, t1, t5
++    bne         t0, t1, L(not_equal)
++    sll.d       t5, t7, a2
++
++
++    sub.d       a3, a2, a3
++    orn         t4, t4, t5
++    sub.d       a2, zero, a3
++    sub.d       t2, t4, a4
++
++    andn        t3, a5, t4
++    and         t2, t2, t3
++    bnez        t2, L(find_zero)
++L(un_loop):
++    srl.d       t5, t4, a3
++
++    ldx.d       t4, a0, t8
++    ldx.d       t1, a1, t8
++    addi.d      t8, t8, 8
++    sll.d       t0, t4, a2
++
++    or          t0, t0, t5
++    bne         t0, t1, L(not_equal)
++    sub.d       t2, t4, a4
++    andn        t3, a5, t4
++
++
++    and         t2, t2, t3
++    beqz        t2, L(un_loop)
++L(find_zero):
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++
++    and         t2, t2, t3
++    bnez        t2, L(ret0)
++    ldx.d       t1, a1, t8
++    srl.d       t0, t4, a3
++
++L(not_equal):
++    sub.d       t2, t0, a4
++    andn        t3, a5, t0
++    and         t2, t2, t3
++    xor         t3, t0, t1
++
++    or          t2, t2, t3
++L(un_end):
++    ctz.d       t3, t2
++    bstrins.d   t3, zero, 2, 0
++    srl.d       t0, t0, t3
++
++
++    srl.d       t1, t1, t3
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++
++
++    sub.d       t3, t1, t0
++    masknez     t0, t2, a6
++    maskeqz     t1, t3, a6
++    or          a0, t0, t1
++
++    jr	ra
++END(STRCMP_NAME)
++
++libc_hidden_builtin_def (STRCMP_NAME)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+new file mode 100644
+index 00000000..091c8c9e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp-lsx.S
+@@ -0,0 +1,165 @@
++/* Optimized strcmp implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRCMP	__strcmp_lsx
++
++LEAF(STRCMP, 6)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a2, a0, 0xf
++    vld             vr2, t0, %pc_lo12(L(INDEX))
++    andi            a3, a1, 0xf
++
++    bne             a2, a3, L(unaligned)
++    bstrins.d       a0, zero, 3, 0
++    bstrins.d       a1, zero, 3, 0
++    vld             vr0, a0, 0
++
++    vld             vr1, a1, 0
++    vreplgr2vr.b    vr3, a2
++    vslt.b          vr2, vr2, vr3
++    vseq.b          vr3, vr0, vr1
++
++    vmin.bu         vr3, vr0, vr3
++    vor.v           vr3, vr3, vr2
++    vsetanyeqz.b    fcc0, vr3
++    bcnez           fcc0, L(al_out)
++
++
++L(al_loop):
++    vld             vr0, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++    addi.d          a1, a1, 16
++
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    vsetanyeqz.b    fcc0, vr3
++    bceqz           fcc0, L(al_loop)
++
++L(al_out):
++    vseqi.b         vr3, vr3, 0
++    vfrstpi.b       vr3, vr3, 0
++    vshuf.b         vr0, vr0, vr0, vr3
++    vshuf.b         vr1, vr1, vr1, vr3
++
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           a0, t0, t1
++    jr              ra
++
++
++L(unaligned):
++    slt             a4, a3, a2
++    xor             t0, a0, a1
++    maskeqz         t0, t0, a4
++    xor             a0, a0, t0
++
++    xor             a1, a1, t0
++    andi            a2, a0, 0xf
++    andi            a3, a1, 0xf
++    bstrins.d       a0, zero, 3, 0
++
++    bstrins.d       a1, zero, 3, 0
++    vld             vr3, a0, 0
++    vld             vr1, a1, 0
++    vreplgr2vr.b    vr4, a2
++
++    vreplgr2vr.b    vr5, a3
++    vslt.b          vr7, vr2, vr5
++    vsub.b          vr5, vr5, vr4
++    vaddi.bu        vr6, vr2, 16
++
++
++    vsub.b          vr6, vr6, vr5
++    vshuf.b         vr0, vr3, vr3, vr6
++    vor.v           vr0, vr0, vr7
++    vor.v           vr1, vr1, vr7
++
++    vseq.b          vr5, vr0, vr1
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(not_equal)
++    vslt.b          vr4, vr2, vr4
++
++    vor.v           vr0, vr3, vr4
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(find_zero)
++    nop
++
++L(un_loop):
++    vld             vr3, a0, 16
++    vld             vr1, a1, 16
++    addi.d          a0, a0, 16
++    addi.d          a1, a1, 16
++
++
++    vshuf.b         vr0, vr3, vr0, vr6
++    vseq.b          vr5, vr0, vr1
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(not_equal)
++
++    vsetanyeqz.b    fcc0, vr3
++    vor.v           vr0, vr3, vr3
++    bceqz           fcc0, L(un_loop)
++L(find_zero):
++    vmin.bu         vr5, vr1, vr5
++
++    vsetanyeqz.b    fcc0, vr5
++    bcnez           fcc0, L(ret0)
++    vld             vr1, a1, 16
++    vshuf.b         vr0, vr3, vr3, vr6
++
++    vseq.b          vr5, vr0, vr1
++L(not_equal):
++    vmin.bu         vr5, vr0, vr5
++L(un_end):
++    vseqi.b         vr5, vr5, 0
++    vfrstpi.b       vr5, vr5, 0
++
++
++    vshuf.b         vr0, vr0, vr0, vr5
++    vshuf.b         vr1, vr1, vr1, vr5
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           t3, t0, t1
++    sub.d           t4, t1, t0
++    masknez         t0, t3, a4
++    maskeqz         t1, t4, a4
++
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++    jr              ra
++END(STRCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcmp.c b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+new file mode 100644
+index 00000000..6f249c0b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcmp.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strcmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strcmp __redirect_strcmp
++# include <string.h>
++# undef strcmp
++
++# define SYMBOL_NAME strcmp
++# include "ifunc-strcmp.h"
++
++libc_ifunc_redirected (__redirect_strcmp, strcmp, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strcmp, __GI_strcmp, __redirect_strcmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcmp);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+new file mode 100644
+index 00000000..4ed539fd
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-aligned.S
+@@ -0,0 +1,202 @@
++/* Optimized strcpy stpcpy aligned implementation using basic LoongArch
++   instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# ifndef STRCPY
++#  define STRCPY __strcpy_aligned
++# endif
++#else
++# ifndef STRCPY
++#  define STRCPY strcpy
++# endif
++#endif
++
++LEAF(STRCPY, 6)
++    andi        a3, a0, 0x7
++    move        a2, a0
++    beqz        a3, L(dest_align)
++    sub.d       a5, a1, a3
++    addi.d      a5, a5, 8
++
++L(make_dest_align):
++    ld.b        t0, a1, 0
++    addi.d      a1, a1, 1
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++    beqz        t0, L(al_out)
++
++    bne         a1, a5, L(make_dest_align)
++
++L(dest_align):
++    andi        a4, a1, 7
++    bstrins.d   a1, zero, 2, 0
++
++    lu12i.w     t5, 0x1010
++    ld.d        t0, a1, 0
++    ori         t5, t5, 0x101
++    bstrins.d   t5, t5, 63, 32
++
++    slli.d      t6, t5, 0x7
++    bnez        a4, L(unalign)
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++
++    and         t3, t1, t2
++    bnez        t3, L(al_end)
++
++L(al_loop):
++    st.d        t0, a2, 0
++    ld.d        t0, a1, 8
++
++    addi.d      a1, a1, 8
++    addi.d      a2, a2, 8
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++
++    and         t3, t1, t2
++    beqz        t3, L(al_loop)
++
++L(al_end):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++
++    andi        a3, t1, 8
++    andi        a4, t1, 4
++    andi        a5, t1, 2
++    andi        a6, t1, 1
++
++L(al_end_8):
++    beqz        a3, L(al_end_4)
++    st.d        t0, a2, 0
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, 7
++#endif
++    jr          ra
++L(al_end_4):
++    beqz        a4, L(al_end_2)
++    st.w        t0, a2, 0
++    addi.d      a2, a2, 4
++    srli.d      t0, t0, 32
++L(al_end_2):
++    beqz        a5, L(al_end_1)
++    st.h        t0, a2, 0
++    addi.d      a2, a2, 2
++    srli.d      t0, t0, 16
++L(al_end_1):
++    beqz        a6, L(al_out)
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++L(al_out):
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, -1
++#endif
++    jr          ra
++
++    .align      4
++L(unalign):
++    slli.d      a5, a4, 3
++    li.d        t1, -1
++    sub.d       a6, zero, a5
++
++    srl.d       a7, t0, a5
++    sll.d       t7, t1, a6
++
++    or          t0, a7, t7
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t3, t1, t2
++
++    bnez        t3, L(un_end)
++
++    ld.d        t4, a1, 8
++
++    sub.d       t1, t4, t5
++    andn        t2, t6, t4
++    sll.d       t0, t4, a6
++    and         t3, t1, t2
++
++    or          t0, t0, a7
++    bnez        t3, L(un_end_with_remaining)
++
++L(un_loop):
++    srl.d       a7, t4, a5
++
++    ld.d        t4, a1, 16
++    addi.d      a1, a1, 8
++
++    st.d        t0, a2, 0
++    addi.d      a2, a2, 8
++
++    sub.d       t1, t4, t5
++    andn        t2, t6, t4
++    sll.d       t0, t4, a6
++    and         t3, t1, t2
++
++    or          t0, t0, a7
++    beqz        t3, L(un_loop)
++
++L(un_end_with_remaining):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++    sub.d       t1, t1, a4
++
++    blt         t1, zero, L(un_end_less_8)
++    st.d        t0, a2, 0
++    addi.d      a2, a2, 8
++    beqz        t1, L(un_out)
++    srl.d       t0, t4, a5
++    b           L(un_end_less_8)
++
++L(un_end):
++    ctz.d       t1, t3
++    srli.d      t1, t1, 3
++    addi.d      t1, t1, 1
++
++L(un_end_less_8):
++    andi        a4, t1, 4
++    andi        a5, t1, 2
++    andi        a6, t1, 1
++L(un_end_4):
++    beqz        a4, L(un_end_2)
++    st.w        t0, a2, 0
++    addi.d      a2, a2, 4
++    srli.d      t0, t0, 32
++L(un_end_2):
++    beqz        a5, L(un_end_1)
++    st.h        t0, a2, 0
++    addi.d      a2, a2, 2
++    srli.d      t0, t0, 16
++L(un_end_1):
++    beqz        a6, L(un_out)
++    st.b        t0, a2, 0
++    addi.d      a2, a2, 1
++L(un_out):
++#ifdef USE_AS_STPCPY
++    addi.d      a0, a2, -1
++#endif
++    jr          ra
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+new file mode 100644
+index 00000000..c2825612
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lasx.S
+@@ -0,0 +1,215 @@
++/* Optimized strcpy stpcpy implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_lasx
++# endif
++
++# ifdef USE_AS_STPCPY
++#  define dstend a0
++# else
++#  define dstend a4
++# endif
++
++LEAF(STRCPY, 6)
++    ori             t8, zero, 0xfe0
++    andi            t0, a1, 0xfff
++    li.d            t7, -1
++    move            a2, a0
++
++    bltu            t8, t0, L(page_cross_start)
++L(start_entry):
++    xvld            xr0, a1, 0
++    li.d            t0, 32
++    andi            t1, a2, 0x1f
++
++    xvsetanyeqz.b   fcc0, xr0
++    sub.d           t0, t0, t1
++    bcnez           fcc0, L(end)
++    add.d           a1, a1, t0
++
++    xvst            xr0, a2, 0
++    andi            a3, a1, 0x1f
++    add.d           a2, a2, t0
++    bnez            a3, L(unaligned)
++
++
++    xvld            xr0, a1, 0
++    xvsetanyeqz.b   fcc0, xr0
++    bcnez           fcc0, L(al_end)
++L(al_loop):
++    xvst            xr0, a2, 0
++
++    xvld            xr0, a1, 32
++    addi.d          a2, a2, 32
++    addi.d          a1, a1, 32
++    xvsetanyeqz.b   fcc0, xr0
++
++    bceqz           fcc0, L(al_loop)
++L(al_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++    xvld            xr0, a1, -31
++
++
++    add.d           dstend, a2, t0
++    xvst            xr0, dstend, -31
++    jr              ra
++    nop
++
++L(page_cross_start):
++    move            a4, a1
++    bstrins.d       a4, zero, 4, 0
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++
++    beq             t0, t7, L(start_entry)
++    b               L(tail)
++L(unaligned):
++    andi            t0, a1, 0xfff
++    bltu            t8, t0, L(un_page_cross)
++
++
++L(un_start_entry):
++    xvld            xr0, a1, 0
++    xvsetanyeqz.b   fcc0, xr0
++    bcnez           fcc0, L(un_end)
++    addi.d          a1, a1, 32
++
++L(un_loop):
++    xvst            xr0, a2, 0
++    andi            t0, a1, 0xfff
++    addi.d          a2, a2, 32
++    bltu            t8, t0, L(page_cross_loop)
++
++L(un_loop_entry):
++    xvld            xr0, a1, 0
++    addi.d          a1, a1, 32
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(un_loop)
++
++    addi.d          a1, a1, -32
++L(un_end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++
++    movfr2gr.s      t0, fa0
++L(un_tail):
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++    xvld            xr0, a1, -31
++
++    add.d           dstend, a2, t0
++    xvst            xr0, dstend, -31
++    jr              ra
++L(un_page_cross):
++    sub.d           a4, a1, a3
++
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t7, L(un_start_entry)
++    b               L(un_tail)
++
++
++L(page_cross_loop):
++    sub.d           a4, a1, a3
++    xvld            xr0, a4, 0
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t7, L(un_loop_entry)
++
++    b               L(un_tail)
++L(end):
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++L(tail):
++    cto.w           t0, t0
++    add.d           dstend, a2, t0
++    add.d           a5, a1, t0
++
++L(less_32):
++    srli.d          t1, t0, 4
++    beqz            t1, L(less_16)
++    vld             vr0, a1, 0
++    vld             vr1, a5, -15
++
++    vst             vr0, a2, 0
++    vst             vr1, dstend, -15
++    jr              ra
++L(less_16):
++    srli.d          t1, t0, 3
++
++    beqz            t1, L(less_8)
++    ld.d            t2, a1, 0
++    ld.d            t3, a5, -7
++    st.d            t2, a2, 0
++
++    st.d            t3, dstend, -7
++    jr              ra
++L(less_8):
++    li.d            t1, 3
++    bltu            t0, t1, L(less_3)
++
++    ld.w            t2, a1, 0
++    ld.w            t3, a5, -3
++    st.w            t2, a2, 0
++    st.w            t3, dstend, -3
++
++    jr              ra
++L(less_3):
++    beqz            t0, L(zero_byte)
++    ld.h            t2, a1, 0
++
++    st.h            t2, a2, 0
++L(zero_byte):
++    st.b            zero, dstend, 0
++    jr              ra
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+new file mode 100644
+index 00000000..fc2498f7
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-lsx.S
+@@ -0,0 +1,212 @@
++/* Optimized strcpy stpcpy implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_lsx
++# endif
++
++LEAF(STRCPY, 6)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a4, a1, 0xf
++    vld             vr1, t0, %pc_lo12(L(INDEX))
++    move            a2, a0
++
++    beqz            a4, L(load_start)
++    xor             t0, a1, a4
++    vld             vr0, t0, 0
++    vreplgr2vr.b    vr2, a4
++
++    vadd.b          vr2, vr2, vr1
++    vshuf.b         vr0, vr2, vr0, vr2
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(end)
++
++L(load_start):
++    vld             vr0, a1, 0
++    li.d            t1, 16
++    andi            a3, a2, 0xf
++    vsetanyeqz.b    fcc0, vr0
++
++
++    sub.d           t0, t1, a3
++    bcnez           fcc0, L(end)
++    add.d           a1, a1, t0
++    vst             vr0, a2, 0
++
++    andi            a3, a1, 0xf
++    add.d           a2, a2, t0
++    bnez            a3, L(unaligned)
++    vld             vr0, a1, 0
++
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(al_end)
++L(al_loop):
++    vst             vr0, a2, 0
++    vld             vr0, a1, 16
++
++    addi.d          a2, a2, 16
++    addi.d          a1, a1, 16
++    vsetanyeqz.b    fcc0, vr0
++    bceqz           fcc0, L(al_loop)
++
++
++L(al_end):
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++
++    vld             vr0, a1, -15
++# ifdef USE_AS_STPCPY
++    add.d           a0, a2, t0
++    vst             vr0, a0, -15
++# else
++    add.d           a2, a2, t0
++    vst             vr0, a2, -15
++# endif
++    jr              ra
++
++L(end):
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    addi.d          t0, t0, 1
++
++L(end_16):
++    andi            t1, t0, 16
++    beqz            t1, L(end_8)
++    vst             vr0, a2, 0
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, 15
++# endif
++    jr              ra
++
++L(end_8):
++    andi            t2, t0, 8
++    andi            t3, t0, 4
++    andi            t4, t0, 2
++    andi            t5, t0, 1
++
++    beqz            t2, L(end_4)
++    vstelm.d        vr0, a2, 0, 0
++    addi.d          a2, a2, 8
++    vbsrl.v         vr0, vr0, 8
++
++L(end_4):
++    beqz            t3, L(end_2)
++    vstelm.w        vr0, a2, 0, 0
++    addi.d          a2, a2, 4
++    vbsrl.v         vr0, vr0, 4
++
++L(end_2):
++    beqz            t4, L(end_1)
++    vstelm.h        vr0, a2, 0, 0
++    addi.d          a2, a2, 2
++    vbsrl.v         vr0, vr0, 2
++
++
++L(end_1):
++    beqz            t5, L(out)
++    vstelm.b        vr0, a2, 0, 0
++    addi.d          a2, a2, 1
++L(out):
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, -1
++# endif
++    jr              ra
++
++    .align          4
++L(unaligned):
++    bstrins.d       a1, zero, 3, 0
++    vld             vr2, a1, 0
++    vreplgr2vr.b    vr3, a3
++    vslt.b          vr4, vr1, vr3
++
++    vor.v           vr0, vr2, vr4
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_first_end)
++    vld             vr0, a1, 16
++
++    vadd.b          vr3, vr3, vr1
++    vshuf.b         vr4, vr0, vr2, vr3
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_end)
++
++
++    vor.v           vr2, vr0, vr0
++    addi.d          a1, a1, 16
++L(un_loop):
++    vld             vr0, a1, 16
++    vst             vr4, a2, 0
++
++    addi.d          a2, a2, 16
++    vshuf.b         vr4, vr0, vr2, vr3
++    vsetanyeqz.b    fcc0, vr0
++    bcnez           fcc0, L(un_end)
++
++    vld             vr2, a1, 32
++    vst             vr4, a2, 0
++    addi.d          a1, a1, 32
++    addi.d          a2, a2, 16
++
++    vshuf.b         vr4, vr2, vr0, vr3
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(un_loop)
++    vor.v           vr0, vr2, vr2
++
++
++    addi.d          a1, a1, -16
++L(un_end):
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, 1f
++    vst             vr4, a2, 0
++
++1:
++    vmsknz.b        vr1, vr0
++    movfr2gr.s      t0, fa1
++    cto.w           t0, t0
++    add.d           a1, a1, t0
++
++    vld             vr0, a1, 1
++    add.d           a2, a2, t0
++    sub.d           a2, a2, a3
++    vst             vr0, a2, 1
++# ifdef USE_AS_STPCPY
++    addi.d          a0, a2, 16
++# endif
++    jr              ra
++L(un_first_end):
++    addi.d          a2, a2, -16
++    addi.d          a1, a1, -16
++    b               1b
++END(STRCPY)
++
++    .section        .rodata.cst16,"M",@progbits,16
++    .align          4
++L(INDEX):
++    .dword          0x0706050403020100
++    .dword          0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+new file mode 100644
+index 00000000..9e31883b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy-unaligned.S
+@@ -0,0 +1,138 @@
++/* Optimized strcpy unaligned implementation using basic LoongArch
++   instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++
++# ifndef STRCPY
++#  define STRCPY __strcpy_unaligned
++# endif
++
++# ifdef USE_AS_STPCPY
++#  define dstend a0
++# else
++#  define dstend a4
++# endif
++
++LEAF(STRCPY, 6)
++    lu12i.w     t5, 0x01010
++    li.w        t0, 0xff8
++    ori         t5, t5, 0x101
++    andi        t1, a1, 0xfff
++
++    bstrins.d   t5, t5, 63, 32
++    move        a2, a0
++    slli.d      t6, t5, 7
++    bltu        t0, t1, L(page_cross)
++
++L(start_entry):
++    ld.d        t0, a1, 0
++    li.d        t3, 8
++    andi        a3, a1, 0x7
++    sub.d       t1, t0, t5
++
++    andn        t2, t6, t0
++    sub.d       t3, t3, a3
++    and         t1, t1, t2
++    bnez        t1, L(end)
++
++
++    add.d       a1, a1, t3
++    st.d        t0, a2, 0
++    add.d       a2, a2, t3
++    ld.d        t0, a1, 0
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    bnez        t1, L(long_end)
++
++L(loop):
++    st.d        t0, a2, 0
++    ld.d        t0, a1, 8
++    addi.d      a2, a2, 8
++    addi.d      a1, a1, 8
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    beqz        t1, L(loop)
++
++
++L(long_end):
++    ctz.d       t1, t1
++    srli.d      t1, t1, 3
++    add.d       a1, a1, t1
++    ld.d        t0, a1, -7
++
++    add.d       dstend, a2, t1
++    st.d        t0, dstend, -7
++    jr          ra
++    nop
++
++L(end):
++    ctz.d       t1, t1
++    srli.d      t1, t1, 3
++    add.d       a3, a1, t1
++    add.d       dstend, a2, t1
++
++L(less_8):
++    li.d        t0, 3
++    bltu        t1, t0, L(less_3)
++    ld.w        t1, a1, 0
++    ld.w        t2, a3, -3
++
++
++    st.w        t1, a2, 0
++    st.w        t2, dstend, -3
++    jr          ra
++L(less_3):
++    beqz        t1, L(zero_bytes)
++
++    ld.h        t1, a1, 0
++    st.h        t1, a2, 0
++L(zero_bytes):
++    st.b        zero, dstend, 0
++    jr          ra
++
++L(page_cross):
++    move        a4, a1
++    bstrins.d   a4, zero, 2, 0
++    ld.d        t0, a4, 0
++    li.d        t3, -1
++
++    slli.d      t4, a1, 3
++    srl.d       t3, t3, t4
++    srl.d       t0, t0, t4
++    orn         t0, t0, t3
++
++
++    sub.d       t1, t0, t5
++    andn        t2, t6, t0
++    and         t1, t1, t2
++    beqz        t1, L(start_entry)
++
++    b           L(end)
++END(STRCPY)
++
++libc_hidden_builtin_def (STRCPY)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strcpy.c b/sysdeps/loongarch/lp64/multiarch/strcpy.c
+new file mode 100644
+index 00000000..46afd068
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strcpy.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strcpy.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strcpy __redirect_strcpy
++# include <string.h>
++# undef strcpy
++
++# define SYMBOL_NAME strcpy
++# include "ifunc-lasx.h"
++
++libc_ifunc_redirected (__redirect_strcpy, strcpy, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strcpy, __GI_strcpy, __redirect_strcpy)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strcpy);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+new file mode 100644
+index 00000000..ed0548e4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-aligned.S
+@@ -0,0 +1,100 @@
++/* Optimized strlen implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRLEN __strlen_aligned
++#else
++# define STRLEN strlen
++#endif
++
++LEAF(STRLEN, 6)
++    move        a1, a0
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    li.w        t0, -1
++
++    ld.d        t2, a0, 0
++    andi        t1, a1, 0x7
++    ori         a2, a2, 0x101
++    slli.d      t1, t1, 3
++
++    bstrins.d   a2, a2, 63, 32
++    sll.d       t1, t0, t1
++    slli.d      t3, a2, 7
++    nor         a3, zero, t3
++
++    orn         t2, t2, t1
++    sub.d       t0, t2, a2
++    nor         t1, t2, a3
++    and         t0, t0, t1
++
++
++    bnez        t0, L(count_pos)
++    addi.d      a0, a0, 8
++L(loop_16_7bit):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++
++    and         t0, t1, t3
++    bnez        t0, L(more_check)
++    ld.d        t2, a0, 8
++    sub.d       t1, t2, a2
++
++    and         t0, t1, t3
++    addi.d      a0, a0, 16
++    beqz        t0, L(loop_16_7bit)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    nor         t0, t2, a3
++    and         t0, t1, t0
++    bnez        t0, L(count_pos)
++    addi.d      a0, a0, 8
++
++
++L(loop_16_8bit):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++    nor         t0, t2, a3
++    and         t0, t0, t1
++
++    bnez        t0, L(count_pos)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t2, a2
++
++    nor         t0, t2, a3
++    and         t0, t0, t1
++    beqz        t0, L(loop_16_8bit)
++    addi.d      a0, a0, -8
++
++L(count_pos):
++    ctz.d       t1, t0
++    sub.d       a0, a0, a1
++    srli.d      t1, t1, 3
++    add.d       a0, a0, t1
++
++    jr          ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+new file mode 100644
+index 00000000..91342f34
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lasx.S
+@@ -0,0 +1,63 @@
++/* Optimized strlen implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRLEN __strlen_lasx
++
++LEAF(STRLEN, 6)
++    move            a1, a0
++    bstrins.d       a0, zero, 4, 0
++    li.d            t1, -1
++    xvld            xr0, a0, 0
++
++    xvmsknz.b       xr0, xr0
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0  # sign extend
++
++    sra.w           t0, t0, a1
++    beq             t0, t1, L(loop)
++    cto.w           a0, t0
++    jr              ra
++
++L(loop):
++    xvld            xr0, a0, 32
++    addi.d          a0, a0, 32
++    xvsetanyeqz.b   fcc0, xr0
++    bceqz           fcc0, L(loop)
++
++
++    xvmsknz.b       xr0, xr0
++    sub.d           a0, a0, a1
++    xvpickve.w      xr1, xr0, 4
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++    jr              ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+new file mode 100644
+index 00000000..b09c12e0
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen-lsx.S
+@@ -0,0 +1,71 @@
++/* Optimized strlen implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRLEN __strlen_lsx
++
++LEAF(STRLEN, 6)
++    move            a1, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    li.d            t1, -1
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a1
++    beq             t0, t1, L(loop)
++    cto.w           a0, t0
++
++    jr              ra
++    nop
++    nop
++    nop
++
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    vmin.bu         vr2, vr0, vr1
++
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++
++    vilvl.h         vr0, vr1, vr0
++    sub.d           a0, a0, a1
++    movfr2gr.s      t0, fa0
++    cto.w           t0, t0
++
++    add.d           a0, a0, t0
++    jr              ra
++END(STRLEN)
++
++libc_hidden_builtin_def (STRLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strlen.c b/sysdeps/loongarch/lp64/multiarch/strlen.c
+new file mode 100644
+index 00000000..381c2daa
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strlen.c
+@@ -0,0 +1,37 @@
++/* Multiple versions of strlen.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++
++#if IS_IN (libc)
++# define strlen __redirect_strlen
++# include <string.h>
++# undef strlen
++
++# define SYMBOL_NAME strlen
++# include "ifunc-strlen.h"
++
++libc_ifunc_redirected (__redirect_strlen, strlen, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strlen, __GI_strlen, __redirect_strlen)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strlen);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+new file mode 100644
+index 00000000..f63de872
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-aligned.S
+@@ -0,0 +1,218 @@
++/* Optimized strncmp implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRNCMP __strncmp_aligned
++#else
++# define STRNCMP strncmp
++#endif
++
++LEAF(STRNCMP, 6)
++    beqz        a2, L(ret0)
++    lu12i.w     a5, 0x01010
++    andi        a3, a0, 0x7
++    ori         a5, a5, 0x101
++
++    andi        a4, a1, 0x7
++    bstrins.d   a5, a5, 63, 32
++    li.d        t7, -1
++    li.d        t8, 8
++
++    addi.d      a2, a2, -1
++    slli.d      a6, a5, 7
++    bne         a3, a4, L(unaligned)
++    bstrins.d   a0, zero, 2, 0
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t0, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      t2, a3, 3
++
++
++    sub.d       t5, t8, a3
++    srl.d       t3, t7, t2
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++
++    orn         t0, t0, t3
++    orn         t1, t1, t3
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++
++    and         t2, t2, t3
++    bne         t0, t1, L(al_end)
++    sltu        t4, a2, t5
++    sub.d       a2, a2, t5
++
++L(al_loop):
++    or          t4, t2, t4
++    bnez        t4, L(ret0)
++    ldx.d       t0, a0, t8
++    ldx.d       t1, a1, t8
++
++
++    addi.d      t8, t8, 8
++    sltui       t4, a2, 8
++    addi.d      a2, a2, -8
++    sub.d       t2, t0, a5
++
++    andn        t3, a6, t0
++    and         t2, t2, t3
++    beq         t0, t1, L(al_loop)
++    addi.d      a2, a2, 8
++
++L(al_end):
++    xor         t3, t0, t1
++    or          t2, t2, t3
++    ctz.d       t2, t2
++    srli.d      t4, t2, 3
++
++    bstrins.d   t2, zero, 2, 0
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++    andi        t0, t0, 0xff
++
++
++    andi        t1, t1, 0xff
++    sltu        t2, a2, t4
++    sub.d       a0, t0, t1
++    masknez     a0, a0, t2
++
++    jr          ra
++L(ret0):
++    move        a0, zero
++    jr          ra
++    nop
++
++L(unaligned):
++    slt         a7, a4, a3
++    xor         t0, a0, a1
++    maskeqz     t0, t0, a7
++    xor         a0, a0, t0
++
++    xor         a1, a1, t0
++    andi        a3, a0, 0x7
++    andi        a4, a1, 0x7
++    bstrins.d   a0, zero, 2, 0
++
++
++    bstrins.d   a1, zero, 2, 0
++    ld.d        t4, a0, 0
++    ld.d        t1, a1, 0
++    slli.d      t2, a3, 3
++
++    slli.d      t3, a4, 3
++    srl.d       t5, t7, t3
++    srl.d       t0, t4, t2
++    srl.d       t1, t1, t3
++
++    orn         t0, t0, t5
++    orn         t1, t1, t5
++    bne         t0, t1, L(not_equal)
++    sub.d       t6, t8, a4
++
++    sub.d       a4, t2, t3
++    sll.d       t2, t7, t2
++    sub.d       t5, t8, a3
++    orn         t4, t4, t2
++
++
++    sub.d       t2, t4, a5
++    andn        t3, a6, t4
++    sltu        t7, a2, t5
++    and         t2, t2, t3
++
++    sub.d       a3, zero, a4
++    or          t2, t2, t7
++    bnez        t2, L(un_end)
++    sub.d       t7, t5, t6
++
++    sub.d       a2, a2, t5
++    sub.d       t6, t8, t7
++L(un_loop):
++    srl.d       t5, t4, a4
++    ldx.d       t4, a0, t8
++
++    ldx.d       t1, a1, t8
++    addi.d      t8, t8, 8
++    sll.d       t0, t4, a3
++    or          t0, t0, t5
++
++
++    bne         t0, t1, L(loop_not_equal)
++    sub.d       t2, t4, a5
++    andn        t3, a6, t4
++    sltui       t5, a2, 8
++
++    and         t2, t2, t3
++    addi.d      a2, a2, -8
++    or          t3, t2, t5
++    beqz        t3, L(un_loop)
++
++    addi.d      a2, a2, 8
++L(un_end):
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++    sltu        t5, a2, t6
++
++    and         t2, t2, t3
++    or          t2, t2, t5
++    bnez        t2, L(ret0)
++    ldx.d       t1, a1, t8
++
++
++    srl.d       t0, t4, a4
++    sub.d       a2, a2, t6
++L(not_equal):
++    sub.d       t2, t0, a5
++    andn        t3, a6, t0
++
++    xor         t4, t0, t1
++    and         t2, t2, t3
++    or          t2, t2, t4
++    ctz.d       t2, t2
++
++    bstrins.d   t2, zero, 2, 0
++    srli.d      t4, t2, 3
++    srl.d       t0, t0, t2
++    srl.d       t1, t1, t2
++
++    andi        t0, t0, 0xff
++    andi        t1, t1, 0xff
++    sub.d       t2, t0, t1
++    sub.d       t3, t1, t0
++
++
++    masknez     t0, t2, a7
++    maskeqz     t1, t3, a7
++    sltu        t2, a2, t4
++    or          a0, t0, t1
++
++    masknez     a0, a0, t2
++    jr          ra
++L(loop_not_equal):
++    add.d       a2, a2, t7
++    b           L(not_equal)
++END(STRNCMP)
++
++libc_hidden_builtin_def (STRNCMP)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+new file mode 100644
+index 00000000..83cb801d
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp-lsx.S
+@@ -0,0 +1,208 @@
++/* Optimized strncmp implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNCMP __strncmp_lsx
++
++LEAF(STRNCMP, 6)
++    beqz            a2, L(ret0)
++    pcalau12i       t0, %pc_hi20(L(INDEX))
++    andi            a3, a0, 0xf
++    vld             vr2, t0, %pc_lo12(L(INDEX))
++
++    andi            a4, a1, 0xf
++    li.d            t2, 16
++    bne             a3, a4, L(unaligned)
++    xor             t0, a0, a3
++
++    xor             t1, a1, a4
++    vld             vr0, t0, 0
++    vld             vr1, t1, 0
++    vreplgr2vr.b    vr3, a3
++
++
++    sub.d           t2, t2, a3
++    vadd.b          vr3, vr3, vr2
++    vshuf.b         vr0, vr3, vr0, vr3
++    vshuf.b         vr1, vr3, vr1, vr3
++
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    bgeu            t2, a2, L(al_early_end)
++    vsetanyeqz.b    fcc0, vr3
++
++    bcnez           fcc0, L(al_end)
++    add.d           a3, a0, a2
++    addi.d          a4, a3, -1
++    bstrins.d       a4, zero, 3, 0
++
++    sub.d           a2, a3, a4
++L(al_loop):
++    vld             vr0, t0, 16
++    vld             vr1, t1, 16
++    addi.d          t0, t0, 16
++
++
++    addi.d          t1, t1, 16
++    vseq.b          vr3, vr0, vr1
++    vmin.bu         vr3, vr0, vr3
++    beq             t0, a4, L(al_early_end)
++
++    vsetanyeqz.b    fcc0, vr3
++    bceqz           fcc0, L(al_loop)
++L(al_end):
++    vseqi.b         vr3, vr3, 0
++    vfrstpi.b       vr3, vr3, 0
++
++    vshuf.b         vr0, vr0, vr0, vr3
++    vshuf.b         vr1, vr1, vr1, vr3
++    vpickve2gr.bu   t0, vr0, 0
++    vpickve2gr.bu   t1, vr1, 0
++
++    sub.d           a0, t0, t1
++    jr              ra
++L(al_early_end):
++    vreplgr2vr.b    vr4, a2
++    vslt.b          vr4, vr2, vr4
++
++
++    vorn.v          vr3, vr3, vr4
++    b               L(al_end)
++L(unaligned):
++    slt             a5, a3, a4
++    xor             t0, a0, a1
++
++    maskeqz         t0, t0, a5
++    xor             a0, a0, t0
++    xor             a1, a1, t0
++    andi            a3, a0, 0xf
++
++    andi            a4, a1, 0xf
++    xor             t0, a0, a3
++    xor             t1, a1, a4
++    vld             vr0, t0, 0
++
++    vld             vr3, t1, 0
++    sub.d           t2, t2, a3
++    vreplgr2vr.b    vr4, a3
++    vreplgr2vr.b    vr5, a4
++
++
++    vaddi.bu        vr6, vr2, 16
++    vsub.b          vr7, vr4, vr5
++    vsub.b          vr6, vr6, vr7
++    vadd.b          vr4, vr2, vr4
++
++    vshuf.b         vr1, vr3, vr3, vr6
++    vshuf.b         vr0, vr7, vr0, vr4
++    vshuf.b         vr1, vr7, vr1, vr4
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    bgeu            t2, a2, L(un_early_end)
++    vsetanyeqz.b    fcc0, vr4
++    bcnez           fcc0, L(un_end)
++
++    add.d           a6, a0, a2
++    vslt.b          vr5, vr2, vr5
++    addi.d          a7, a6, -1
++    vor.v           vr3, vr3, vr5
++
++
++    bstrins.d       a7, zero, 3, 0
++    sub.d           a2, a6, a7
++L(un_loop):
++    vld             vr0, t0, 16
++    addi.d          t0, t0, 16
++
++    vsetanyeqz.b    fcc0, vr3
++    bcnez           fcc0, L(has_zero)
++    beq             t0, a7, L(end_with_len)
++    vor.v           vr1, vr3, vr3
++
++    vld             vr3, t1, 16
++    addi.d          t1, t1, 16
++    vshuf.b         vr1, vr3, vr1, vr6
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    vsetanyeqz.b    fcc0, vr4
++    bceqz           fcc0, L(un_loop)
++L(un_end):
++    vseqi.b         vr4, vr4, 0
++
++
++    vfrstpi.b       vr4, vr4, 0
++    vshuf.b         vr0, vr0, vr0, vr4
++    vshuf.b         vr1, vr1, vr1, vr4
++    vpickve2gr.bu   t0, vr0, 0
++
++    vpickve2gr.bu   t1, vr1, 0
++    sub.d           t2, t0, t1
++    sub.d           t3, t1, t0
++    masknez         t0, t2, a5
++
++    maskeqz         t1, t3, a5
++    or              a0, t0, t1
++    jr              ra
++L(has_zero):
++    vshuf.b         vr1, vr3, vr3, vr6
++
++    vseq.b          vr4, vr0, vr1
++    vmin.bu         vr4, vr0, vr4
++    bne             t0, a7, L(un_end)
++L(un_early_end):
++    vreplgr2vr.b    vr5, a2
++
++    vslt.b          vr5, vr2, vr5
++    vorn.v          vr4, vr4, vr5
++    b               L(un_end)
++L(end_with_len):
++    sub.d           a6, a3, a4
++
++    bgeu            a6, a2, 1f
++    vld             vr4, t1, 16
++1:
++    vshuf.b         vr1, vr4, vr3, vr6
++    vseq.b          vr4, vr0, vr1
++
++    vmin.bu         vr4, vr0, vr4
++    vreplgr2vr.b    vr5, a2
++    vslt.b          vr5, vr2, vr5
++    vorn.v          vr4, vr4, vr5
++
++    b               L(un_end)
++L(ret0):
++    move            a0, zero
++    jr              ra
++END(STRNCMP)
++
++    .section         .rodata.cst16,"M",@progbits,16
++    .align           4
++L(INDEX):
++    .dword           0x0706050403020100
++    .dword           0x0f0e0d0c0b0a0908
++
++libc_hidden_builtin_def (STRNCMP)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strncmp.c b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+new file mode 100644
+index 00000000..af6d0bc4
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strncmp.c
+@@ -0,0 +1,35 @@
++/* Multiple versions of strncmp.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strncmp __redirect_strncmp
++# include <string.h>
++# undef strncmp
++
++# define SYMBOL_NAME strncmp
++# include "ifunc-strncmp.h"
++
++libc_ifunc_redirected (__redirect_strncmp, strncmp, IFUNC_SELECTOR ());
++
++# ifdef SHARED
++__hidden_ver1 (strncmp, __GI_strncmp, __redirect_strncmp)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strncmp);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+new file mode 100644
+index 00000000..a8296a1b
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
+@@ -0,0 +1,102 @@
++/* Optimized strnlen implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRNLEN __strnlen_aligned
++#else
++# define STRNLEN __strnlen
++#endif
++
++LEAF(STRNLEN, 6)
++    beqz        a1, L(out)
++    lu12i.w     a2, 0x01010
++    andi        t1, a0, 0x7
++    move        t4, a0
++
++    bstrins.d   a0, zero, 2, 0
++    ori         a2, a2, 0x101
++    li.w        t0, -1
++    ld.d        t2, a0, 0
++
++    slli.d      t3, t1, 3
++    bstrins.d   a2, a2, 63, 32
++    li.w        t5, 8
++    slli.d      a3, a2, 7
++
++    sub.w       t1, t5, t1
++    sll.d       t0, t0, t3
++    orn         t2, t2, t0
++    sub.d       t0, t2, a2
++
++
++    andn        t3, a3, t2
++    and         t0, t0, t3
++    bnez        t0, L(count_pos)
++    sub.d       t5, a1, t1
++
++    bgeu        t1, a1, L(out)
++    addi.d      a0, a0, 8
++L(loop):
++    ld.d        t2, a0, 0
++    sub.d       t0, t2, a2
++
++    andn        t1, a3, t2
++    sltui       t6, t5, 9
++    and         t0, t0, t1
++    or          t7, t0, t6
++
++    bnez        t7, L(count_pos)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t0, t2, a2
++
++
++    andn        t1, a3, t2
++    sltui       t6, t5, 17
++    and         t0, t0, t1
++    addi.d      t5, t5, -16
++
++    or          t7, t0, t6
++    beqz        t7, L(loop)
++    addi.d      a0, a0, -8
++L(count_pos):
++    ctz.d       t1, t0
++
++    sub.d       a0, a0, t4
++    srli.d      t1, t1, 3
++    add.d       a0, t1, a0
++    sltu        t0, a0, a1
++
++    masknez     t1, a1, t0
++    maskeqz     a0, a0, t0
++    or          a0, a0, t1
++    jr          ra
++
++
++L(out):
++    move        a0, a1
++    jr          ra
++END(STRNLEN)
++
++weak_alias (STRNLEN, strnlen)
++libc_hidden_builtin_def (STRNLEN)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+new file mode 100644
+index 00000000..aa6c812d
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lasx.S
+@@ -0,0 +1,100 @@
++/* Optimized strnlen implementation using LoongArch LASX instructions
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNLEN __strnlen_lasx
++
++LEAF(STRNLEN, 6)
++    beqz            a1, L(ret0)
++    andi            t1, a0, 0x3f
++    li.d            t3, 65
++    sub.d           a2, a0, t1
++
++    xvld            xr0, a2, 0
++    xvld            xr1, a2, 32
++    sub.d           t1, t3, t1
++    move            a3, a0
++
++    sltu            t1, a1, t1
++    xvmsknz.b       xr0, xr0
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++
++
++    movfr2gr.d      t0, fa0
++    sra.d           t0, t0, a0
++    orn             t1, t1, t0
++    bnez            t1, L(end)
++
++    add.d           a4, a0, a1
++    move            a0, a2
++    addi.d          a4, a4, -1
++    bstrins.d       a4, zero, 5, 0
++
++L(loop):
++    xvld            xr0, a0, 64
++    xvld            xr1, a0, 96
++    addi.d          a0, a0, 64
++    beq             a0, a4, L(out)
++
++    xvmin.bu        xr2, xr0, xr1
++    xvsetanyeqz.b   fcc0, xr2
++    bceqz           fcc0, L(loop)
++L(out):
++    xvmsknz.b       xr0, xr0
++
++
++    xvmsknz.b       xr1, xr1
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++L(end):
++    sub.d           a0, a0, a3
++
++    cto.d           t0, t0
++    add.d           a0, a0, t0
++    sltu            t1, a0, a1
++    masknez         t0, a1, t1
++
++    maskeqz         t1, a0, t1
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++
++    jr              ra
++END(STRNLEN)
++
++libc_hidden_def (STRNLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+new file mode 100644
+index 00000000..d0febe3e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen-lsx.S
+@@ -0,0 +1,89 @@
++/* Optimized strnlen implementation using LoongArch LSX instructions
++   Copyright (C) 2023 Free Software Foundation, Inc.
++
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++# define STRNLEN __strnlen_lsx
++
++LEAF(STRNLEN, 6)
++    beqz            a1, L(ret0)
++    andi            t1, a0, 0x1f
++    li.d            t3, 33
++    sub.d           a2, a0, t1
++
++    vld             vr0, a2, 0
++    vld             vr1, a2, 16
++    sub.d           t1, t3, t1
++    move            a3, a0
++
++    sltu            t1, a1, t1
++    vmsknz.b        vr0, vr0
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++
++    movfr2gr.s      t0, fa0
++    sra.w           t0, t0, a0
++    orn             t1, t1, t0
++    bnez            t1, L(end)
++
++
++    add.d           a4, a0, a1
++    move            a0, a2
++    addi.d          a4, a4, -1
++    bstrins.d       a4, zero, 4, 0
++
++L(loop):
++    vld             vr0, a0, 32
++    vld             vr1, a0, 48
++    addi.d          a0, a0, 32
++    beq             a0, a4, L(out)
++
++    vmin.bu         vr2, vr0, vr1
++    vsetanyeqz.b    fcc0, vr2
++    bceqz           fcc0, L(loop)
++L(out):
++    vmsknz.b        vr0, vr0
++
++    vmsknz.b        vr1, vr1
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++L(end):
++    sub.d           a0, a0, a3
++
++
++    cto.w           t0, t0
++    add.d           a0, a0, t0
++    sltu            t1, a0, a1
++    masknez         t0, a1, t1
++
++    maskeqz         t1, a0, t1
++    or              a0, t0, t1
++    jr              ra
++L(ret0):
++    move            a0, zero
++
++    jr              ra
++END(STRNLEN)
++
++libc_hidden_builtin_def (STRNLEN)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen.c b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+new file mode 100644
+index 00000000..38b7a25a
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strnlen.c
+@@ -0,0 +1,39 @@
++/* Multiple versions of strnlen.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strnlen __redirect_strnlen
++# define __strnlen __redirect___strnlen
++# include <string.h>
++# undef __strnlen
++# undef strnlen
++
++# define SYMBOL_NAME strnlen
++# include "ifunc-strnlen.h"
++
++libc_ifunc_redirected (__redirect_strnlen, __strnlen, IFUNC_SELECTOR ());
++weak_alias (__strnlen, strnlen);
++# ifdef SHARED
++__hidden_ver1 (__strnlen, __GI___strnlen, __redirect___strnlen)
++  __attribute__((visibility ("hidden"))) __attribute_copy__ (strnlen);
++__hidden_ver1 (strnlen, __GI_strnlen, __redirect_strnlen)
++  __attribute__((weak, visibility ("hidden"))) __attribute_copy__ (strnlen);
++# endif
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+new file mode 100644
+index 00000000..a73deb78
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-aligned.S
+@@ -0,0 +1,170 @@
++/* Optimized strrchr implementation using basic LoongArch instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc)
++# define STRRCHR __strrchr_aligned
++#else
++# define STRRCHR strrchr
++#endif
++
++LEAF(STRRCHR, 6)
++    slli.d      t0, a0, 3
++    bstrins.d   a0, zero, 2, 0
++    lu12i.w     a2, 0x01010
++    ld.d        t2, a0, 0
++
++    andi        a1, a1, 0xff
++    ori         a2, a2, 0x101
++    li.d        t3, -1
++    bstrins.d	a2, a2, 63, 32
++
++    sll.d       t5, t3, t0
++    slli.d      a3, a2, 7
++    orn         t4, t2, t5
++    mul.d       a1, a1, a2
++
++    sub.d       t0, t4, a2
++    andn        t1, a3, t4
++    and         t1, t0, t1
++    beqz        t1, L(find_tail)
++
++
++    ctz.d       t0, t1
++    orn         t0, zero, t0
++    xor         t2, t4, a1
++    srl.d       t0, t3, t0
++
++    orn         t2, t2, t0
++    orn         t2, t2, t5
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    ctz.d       t0, t1
++    srli.d      t0, t0, 3
++
++    addi.d      a0, a0, 7
++    sub.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++
++
++L(find_tail):
++    addi.d      a4, a0, 8
++    addi.d      a0, a0, 8
++L(loop_ascii):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++
++    and         t0, t1, a3
++    bnez        t0, L(more_check)
++    ld.d        t2, a0, 8
++    sub.d       t1, t2, a2
++
++    and         t0, t1, a3
++    addi.d      a0, a0, 16
++    beqz        t0, L(loop_ascii)
++    addi.d      a0, a0, -8
++
++L(more_check):
++    andn        t0, a3, t2
++    and         t1, t1, t0
++    bnez        t1, L(tail)
++    addi.d      a0, a0, 8
++
++
++L(loop_nonascii):
++    ld.d        t2, a0, 0
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++    and         t1, t0, t1
++
++    bnez        t1, L(tail)
++    ld.d        t2, a0, 8
++    addi.d      a0, a0, 16
++    sub.d       t1, t2, a2
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    beqz        t1, L(loop_nonascii)
++    addi.d      a0, a0, -8
++
++L(tail):
++    ctz.d       t0, t1
++    orn         t0, zero, t0
++    xor         t2, t2, a1
++    srl.d       t0, t3, t0
++
++
++    orn         t2, t2, t0
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++
++    and         t1, t0, t1
++    bnez        t1, L(count_pos)
++L(find_loop):
++    beq         a0, a4, L(find_end)
++    ld.d        t2, a0, -8
++
++    addi.d      a0, a0, -8
++    xor         t2, t2, a1
++    sub.d       t1, t2, a2
++    andn        t0, a3, t2
++
++    and         t1, t0, t1
++    beqz        t1, L(find_loop)
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++L(count_pos):
++    ctz.d       t0, t1
++    addi.d      a0, a0, 7
++
++    srli.d      t0, t0, 3
++    sub.d       a0, a0, t0
++    jr          ra
++    nop
++
++L(find_end):
++    xor         t2, t4, a1
++    orn         t2, t2, t5
++    revb.d      t2, t2
++    sub.d       t1, t2, a2
++
++
++    andn        t0, a3, t2
++    and         t1, t0, t1
++    ctz.d       t0, t1
++    srli.d      t0, t0, 3
++
++    addi.d      a0, a4, -1
++    sub.d       a0, a0, t0
++    maskeqz     a0, a0, t1
++    jr          ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+new file mode 100644
+index 00000000..5a6e2297
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lasx.S
+@@ -0,0 +1,176 @@
++/* Optimized strrchr implementation using LoongArch LASX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define STRRCHR __strrchr_lasx
++
++LEAF(STRRCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 5, 0
++    xvld            xr0, a0, 0
++    xvld            xr1, a0, 32
++
++    li.d            t2, -1
++    xvreplgr2vr.b   xr4, a1
++    xvmsknz.b       xr2, xr0
++    xvmsknz.b       xr3, xr1
++
++    xvpickve.w      xr5, xr2, 4
++    xvpickve.w      xr6, xr3, 4
++    vilvl.h         vr2, vr5, vr2
++    vilvl.h         vr3, vr6, vr3
++
++    vilvl.w         vr2, vr3, vr2
++    movfr2gr.d      t0, fa2
++    sra.d           t0, t0, a2
++    beq             t0, t2, L(find_tail)
++
++
++    xvseq.b         xr2, xr0, xr4
++    xvseq.b         xr3, xr1, xr4
++    xvmsknz.b       xr2, xr2
++    xvmsknz.b       xr3, xr3
++
++    xvpickve.w      xr4, xr2, 4
++    xvpickve.w      xr5, xr3, 4
++    vilvl.h         vr2, vr4, vr2
++    vilvl.h         vr3, vr5, vr3
++
++    vilvl.w         vr1, vr3, vr2
++    slli.d          t3, t2, 1
++    movfr2gr.d      t1, fa1
++    cto.d           t0, t0
++
++    srl.d           t1, t1, a2
++    sll.d           t3, t3, t0
++    addi.d          a0, a2, 63
++    andn            t1, t1, t3
++
++
++    clz.d           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++
++    .align          5
++L(find_tail):
++    addi.d          a3, a0, 64
++L(loop):
++    xvld            xr2, a0, 64
++    xvld            xr3, a0, 96
++    addi.d          a0, a0, 64
++
++    xvmin.bu        xr5, xr2, xr3
++    xvsetanyeqz.b   fcc0, xr5
++    bceqz           fcc0, L(loop)
++    xvmsknz.b       xr5, xr2
++
++
++    xvmsknz.b       xr6, xr3
++    xvpickve.w      xr7, xr5, 4
++    xvpickve.w      xr8, xr6, 4
++    vilvl.h         vr5, vr7, vr5
++
++    vilvl.h         vr6, vr8, vr6
++    xvseq.b         xr2, xr2, xr4
++    xvseq.b         xr3, xr3, xr4
++    xvmsknz.b       xr2, xr2
++
++    xvmsknz.b       xr3, xr3
++    xvpickve.w      xr7, xr2, 4
++    xvpickve.w      xr8, xr3, 4
++    vilvl.h         vr2, vr7, vr2
++
++    vilvl.h         vr3, vr8, vr3
++    vilvl.w         vr5, vr6, vr5
++    vilvl.w         vr2, vr3, vr2
++    movfr2gr.d      t0, fa5
++
++
++    movfr2gr.d      t1, fa2
++    slli.d          t3, t2, 1
++    cto.d           t0, t0
++    sll.d           t3, t3, t0
++
++    andn            t1, t1, t3
++    beqz            t1, L(find_loop)
++    clz.d           t0, t1
++    addi.d          a0, a0, 63
++
++    sub.d           a0, a0, t0
++    jr              ra
++L(find_loop):
++    beq             a0, a3, L(find_end)
++    xvld            xr2, a0, -64
++
++    xvld            xr3, a0, -32
++    addi.d          a0, a0, -64
++    xvseq.b         xr2, xr2, xr4
++    xvseq.b         xr3, xr3, xr4
++
++
++    xvmax.bu        xr5, xr2, xr3
++    xvseteqz.v      fcc0, xr5
++    bcnez           fcc0, L(find_loop)
++    xvmsknz.b       xr0, xr2
++
++    xvmsknz.b       xr1, xr3
++    xvpickve.w      xr2, xr0, 4
++    xvpickve.w      xr3, xr1, 4
++    vilvl.h         vr0, vr2, vr0
++
++    vilvl.h         vr1, vr3, vr1
++    vilvl.w         vr0, vr1, vr0
++    movfr2gr.d      t0, fa0
++    addi.d          a0, a0, 63
++
++    clz.d           t0, t0
++    sub.d           a0, a0, t0
++    jr              ra
++    nop
++
++
++L(find_end):
++    xvseq.b         xr2, xr0, xr4
++    xvseq.b         xr3, xr1, xr4
++    xvmsknz.b       xr2, xr2
++    xvmsknz.b       xr3, xr3
++
++    xvpickve.w      xr4, xr2, 4
++    xvpickve.w      xr5, xr3, 4
++    vilvl.h         vr2, vr4, vr2
++    vilvl.h         vr3, vr5, vr3
++
++    vilvl.w         vr1, vr3, vr2
++    movfr2gr.d      t1, fa1
++    addi.d          a0, a2, 63
++    srl.d           t1, t1, a2
++
++    clz.d           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+new file mode 100644
+index 00000000..8f2fd22e
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr-lsx.S
+@@ -0,0 +1,144 @@
++/* Optimized strrchr implementation using LoongArch LSX instructions.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library.  If not, see
++   <https://www.gnu.org/licenses/>.  */
++
++#include <sysdep.h>
++#include <sys/regdef.h>
++#include <sys/asm.h>
++
++#if IS_IN (libc) && !defined __loongarch_soft_float
++
++#define STRRCHR __strrchr_lsx
++
++LEAF(STRRCHR, 6)
++    move            a2, a0
++    bstrins.d       a0, zero, 4, 0
++    vld             vr0, a0, 0
++    vld             vr1, a0, 16
++
++    li.d            t2, -1
++    vreplgr2vr.b    vr4, a1
++    vmsknz.b        vr2, vr0
++    vmsknz.b        vr3, vr1
++
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa2
++    sra.w           t0, t0, a2
++    beq             t0, t2, L(find_tail)
++
++    vseq.b          vr2, vr0, vr4
++    vseq.b          vr3, vr1, vr4
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++
++
++    vilvl.h         vr1, vr3, vr2
++    slli.d          t3, t2, 1
++    movfr2gr.s      t1, fa1
++    cto.w           t0, t0
++
++    srl.w           t1, t1, a2
++    sll.d           t3, t3, t0
++    addi.d          a0, a2, 31
++    andn            t1, t1, t3
++
++    clz.w           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++
++    .align          5
++L(find_tail):
++    addi.d          a3, a0, 32
++L(loop):
++    vld             vr2, a0, 32
++    vld             vr3, a0, 48
++    addi.d          a0, a0, 32
++
++    vmin.bu         vr5, vr2, vr3
++    vsetanyeqz.b    fcc0, vr5
++    bceqz           fcc0, L(loop)
++    vmsknz.b        vr5, vr2
++
++    vmsknz.b        vr6, vr3
++    vilvl.h         vr5, vr6, vr5
++    vseq.b          vr2, vr2, vr4
++    vseq.b          vr3, vr3, vr4
++
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++    vilvl.h         vr2, vr3, vr2
++    movfr2gr.s      t0, fa5
++
++
++    movfr2gr.s      t1, fa2
++    slli.d          t3, t2, 1
++    cto.w           t0, t0
++    sll.d           t3, t3, t0
++
++    andn            t1, t1, t3
++    beqz            t1, L(find_loop)
++    clz.w           t0, t1
++    addi.d          a0, a0, 31
++
++    sub.d           a0, a0, t0
++    jr              ra
++L(find_loop):
++    beq             a0, a3, L(find_end)
++    vld             vr2, a0, -32
++
++    vld             vr3, a0, -16
++    addi.d          a0, a0, -32
++    vseq.b          vr2, vr2, vr4
++    vseq.b          vr3, vr3, vr4
++
++
++    vmax.bu         vr5, vr2, vr3
++    vseteqz.v       fcc0, vr5
++    bcnez           fcc0, L(find_loop)
++    vmsknz.b        vr0, vr2
++
++    vmsknz.b        vr1, vr3
++    vilvl.h         vr0, vr1, vr0
++    movfr2gr.s      t0, fa0
++    addi.d          a0, a0, 31
++
++    clz.w           t0, t0
++    sub.d           a0, a0, t0
++    jr              ra
++    nop
++
++L(find_end):
++    vseq.b          vr2, vr0, vr4
++    vseq.b          vr3, vr1, vr4
++    vmsknz.b        vr2, vr2
++    vmsknz.b        vr3, vr3
++
++
++    vilvl.h         vr1, vr3, vr2
++    movfr2gr.s      t1, fa1
++    addi.d          a0, a2, 31
++    srl.w           t1, t1, a2
++
++    clz.w           t0, t1
++    sub.d           a0, a0, t0
++    maskeqz         a0, a0, t1
++    jr              ra
++END(STRRCHR)
++
++libc_hidden_builtin_def(STRRCHR)
++#endif
+diff --git a/sysdeps/loongarch/lp64/multiarch/strrchr.c b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+new file mode 100644
+index 00000000..d9c9f660
+--- /dev/null
++++ b/sysdeps/loongarch/lp64/multiarch/strrchr.c
+@@ -0,0 +1,36 @@
++/* Multiple versions of strrchr.
++   All versions must be listed in ifunc-impl-list.c.
++   Copyright (C) 2023 Free Software Foundation, Inc.
++   This file is part of the GNU C Library.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Lesser General Public
++   License as published by the Free Software Foundation; either
++   version 2.1 of the License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Lesser General Public License for more details.
++
++   You should have received a copy of the GNU Lesser General Public
++   License along with the GNU C Library; if not, see
++   <https://www.gnu.org/licenses/>.  */
++
++/* Define multiple versions only for the definition in libc.  */
++#if IS_IN (libc)
++# define strrchr __redirect_strrchr
++# include <string.h>
++# undef strrchr
++
++# define SYMBOL_NAME strrchr
++# include "ifunc-strrchr.h"
++
++libc_ifunc_redirected (__redirect_strrchr, strrchr, IFUNC_SELECTOR ());
++weak_alias (strrchr, rindex)
++# ifdef SHARED
++__hidden_ver1 (strrchr, __GI_strrchr, __redirect_strrchr)
++  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (strrchr);
++# endif
++
++#endif
+diff --git a/sysdeps/loongarch/setjmp.S b/sysdeps/loongarch/setjmp.S
+index 6c7065cd..b6e4f727 100644
+--- a/sysdeps/loongarch/setjmp.S
++++ b/sysdeps/loongarch/setjmp.S
+@@ -52,19 +52,19 @@ ENTRY (__sigsetjmp)
+ 	REG_S s8, a0, 12*SZREG
+ 
+ #ifndef __loongarch_soft_float
+-	FREG_S $f24, a0, 13*SZREG + 0*SZFREG
+-	FREG_S $f25, a0, 13*SZREG + 1*SZFREG
+-	FREG_S $f26, a0, 13*SZREG + 2*SZFREG
+-	FREG_S $f27, a0, 13*SZREG + 3*SZFREG
+-	FREG_S $f28, a0, 13*SZREG + 4*SZFREG
+-	FREG_S $f29, a0, 13*SZREG + 5*SZFREG
+-	FREG_S $f30, a0, 13*SZREG + 6*SZFREG
+-	FREG_S $f31, a0, 13*SZREG + 7*SZFREG
++	FREG_S fs0, a0, 13*SZREG + 0*SZFREG
++	FREG_S fs1, a0, 13*SZREG + 1*SZFREG
++	FREG_S fs2, a0, 13*SZREG + 2*SZFREG
++	FREG_S fs3, a0, 13*SZREG + 3*SZFREG
++	FREG_S fs4, a0, 13*SZREG + 4*SZFREG
++	FREG_S fs5, a0, 13*SZREG + 5*SZFREG
++	FREG_S fs6, a0, 13*SZREG + 6*SZFREG
++	FREG_S fs7, a0, 13*SZREG + 7*SZFREG
+ #endif
+ 
+ #if !IS_IN (libc) && IS_IN(rtld)
+ 	li.w		v0, 0
+-	jirl		zero,ra,0
++	jirl		zero, ra, 0
+ #else
+ 	b		__sigjmp_save
+ #endif
+diff --git a/sysdeps/loongarch/start.S b/sysdeps/loongarch/start.S
+index e9d82033..bf6bfc9e 100644
+--- a/sysdeps/loongarch/start.S
++++ b/sysdeps/loongarch/start.S
+@@ -60,20 +60,7 @@ ENTRY (ENTRY_POINT)
+ 	cfi_undefined (1)
+ 	or		a5, a0, zero /* rtld_fini */
+ 
+-#if ENABLE_STATIC_PIE
+-/* For static PIE, the GOT cannot be used in _start because the GOT entries are
+-   offsets instead of real addresses before __libc_start_main.
+-   __libc_start_main and/or main may be not local, so we rely on the linker to
+-   produce PLT entries for them.  GNU ld >= 2.40 supports this.  */
+-# define LA la.pcrel
+-#else
+-/* Old GNU ld (< 2.40) cannot handle PC relative address against a non-local
+-   function correctly.  We deem these old linkers failing to support static PIE
+-   and load the addresses from GOT.  */
+-# define LA la.got
+-#endif
+-
+-	LA		a0, t0, main
++	la.pcrel	a0, t0, main
+ 	REG_L		a1, sp, 0
+ 	ADDI		a2, sp, SZREG
+ 
+@@ -84,9 +71,9 @@ ENTRY (ENTRY_POINT)
+ 	move		a4, zero /* used to be fini */
+ 	or		a6, sp, zero /* stack_end */
+ 
+-	LA		ra, t0, __libc_start_main
++	la.pcrel	ra, t0, __libc_start_main
+ 	jirl		ra, ra, 0
+ 
+-	LA		ra, t0, abort
++	la.pcrel	ra, t0, abort
+ 	jirl		ra, ra, 0
+ END (ENTRY_POINT)
+diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
+index d1a279b8..c5eb8afa 100644
+--- a/sysdeps/loongarch/sys/asm.h
++++ b/sysdeps/loongarch/sys/asm.h
+@@ -39,16 +39,32 @@
+ #define FREG_L fld.d
+ #define FREG_S fst.d
+ 
+-/* Declare leaf routine.  */
+-#define LEAF(symbol) \
+-  .text; \
+-  .globl symbol; \
+-  .align 3; \
+-  cfi_startproc; \
+-  .type symbol, @function; \
+-  symbol:
+-
+-#define ENTRY(symbol) LEAF (symbol)
++/*  Declare leaf routine.
++    The usage of macro LEAF/ENTRY is as follows:
++    1. LEAF(fcn) -- the align value of fcn is .align 3 (default value)
++    2. LEAF(fcn, 6) -- the align value of fcn is .align 6
++*/
++#define LEAF_IMPL(symbol, aln, ...)	\
++	.text;				\
++	.globl symbol;			\
++	.align aln;			\
++	.type symbol, @function;	\
++symbol: \
++	cfi_startproc;
++
++
++#define LEAF(...) LEAF_IMPL(__VA_ARGS__, 3)
++#define ENTRY(...) LEAF(__VA_ARGS__)
++
++#define	LEAF_NO_ALIGN(symbol)		\
++	.text;				\
++	.globl	symbol;			\
++	.type	symbol, @function;	\
++symbol: \
++	cfi_startproc;
++
++#define ENTRY_NO_ALIGN(symbol) LEAF_NO_ALIGN(symbol)
++
+ 
+ /* Mark end of function.  */
+ #undef END
+diff --git a/sysdeps/loongarch/sys/regdef.h b/sysdeps/loongarch/sys/regdef.h
+index 5100f36d..524d2e32 100644
+--- a/sysdeps/loongarch/sys/regdef.h
++++ b/sysdeps/loongarch/sys/regdef.h
+@@ -89,6 +89,14 @@
+ #define fs5 $f29
+ #define fs6 $f30
+ #define fs7 $f31
++#define fcc0 $fcc0
++#define fcc1 $fcc1
++#define fcc2 $fcc2
++#define fcc3 $fcc3
++#define fcc4 $fcc4
++#define fcc5 $fcc5
++#define fcc6 $fcc6
++#define fcc7 $fcc7
+ 
+ #define vr0 $vr0
+ #define vr1 $vr1
+@@ -98,6 +106,30 @@
+ #define vr5 $vr5
+ #define vr6 $vr6
+ #define vr7 $vr7
++#define vr8 $vr8
++#define vr9 $vr9
++#define vr10 $vr10
++#define vr11 $vr11
++#define vr12 $vr12
++#define vr13 $vr13
++#define vr14 $vr14
++#define vr15 $vr15
++#define vr16 $vr16
++#define vr17 $vr17
++#define vr18 $vr18
++#define vr19 $vr19
++#define vr20 $vr20
++#define vr21 $vr21
++#define vr22 $vr22
++#define vr23 $vr23
++#define vr24 $vr24
++#define vr25 $vr25
++#define vr26 $vr26
++#define vr27 $vr27
++#define vr28 $vr28
++#define vr29 $vr29
++#define vr30 $vr30
++#define vr31 $vr31
+ 
+ #define xr0 $xr0
+ #define xr1 $xr1
+@@ -107,5 +139,30 @@
+ #define xr5 $xr5
+ #define xr6 $xr6
+ #define xr7 $xr7
++#define xr7 $xr7
++#define xr8 $xr8
++#define xr9 $xr9
++#define xr10 $xr10
++#define xr11 $xr11
++#define xr12 $xr12
++#define xr13 $xr13
++#define xr14 $xr14
++#define xr15 $xr15
++#define xr16 $xr16
++#define xr17 $xr17
++#define xr18 $xr18
++#define xr19 $xr19
++#define xr20 $xr20
++#define xr21 $xr21
++#define xr22 $xr22
++#define xr23 $xr23
++#define xr24 $xr24
++#define xr25 $xr25
++#define xr26 $xr26
++#define xr27 $xr27
++#define xr28 $xr28
++#define xr29 $xr29
++#define xr30 $xr30
++#define xr31 $xr31
+ 
+ #endif /* _SYS_REGDEF_H */
+diff --git a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+index 5104b69c..7acec23d 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
++++ b/sysdeps/unix/sysv/linux/loongarch/bits/hwcap.h
+@@ -35,3 +35,4 @@
+ #define HWCAP_LOONGARCH_LBT_X86         (1 << 10)
+ #define HWCAP_LOONGARCH_LBT_ARM         (1 << 11)
+ #define HWCAP_LOONGARCH_LBT_MIPS        (1 << 12)
++#define HWCAP_LOONGARCH_PTW             (1 << 13)
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure b/sysdeps/unix/sysv/linux/loongarch/configure
+index 0d1159e9..8e744d3a 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure
++++ b/sysdeps/unix/sysv/linux/loongarch/configure
+@@ -1,7 +1,7 @@
+ # This file is generated from configure.ac by Autoconf.  DO NOT EDIT!
+  # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
++arch_minimum_kernel=4.15.0
+ 
+ libc_cv_loongarch_int_abi=no
+ 
+diff --git a/sysdeps/unix/sysv/linux/loongarch/configure.ac b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+index 04e9150a..00048d47 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/configure.ac
++++ b/sysdeps/unix/sysv/linux/loongarch/configure.ac
+@@ -2,7 +2,7 @@ sinclude(./aclocal.m4)dnl Autoconf lossage
+ GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+ # Local configure fragment for sysdeps/unix/sysv/linux/loongarch.
+ 
+-arch_minimum_kernel=5.19.0
++arch_minimum_kernel=4.15.0
+ 
+ libc_cv_loongarch_int_abi=no
+ AC_EGREP_CPP(4 8 8, [__SIZEOF_INT__ __SIZEOF_LONG__ __SIZEOF_POINTER__
+diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+index e371e13b..d1a280a5 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
+@@ -25,5 +25,7 @@
+ #define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
+ #define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
+ 
++#define INIT_ARCH()
++
+ #endif /* _CPU_FEATURES_LOONGARCH64_H  */
+ 
+diff --git a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+index b25e353b..d6c78687 100644
+--- a/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
++++ b/sysdeps/unix/sysv/linux/loongarch/pointer_guard.h
+@@ -19,17 +19,15 @@
+ #ifndef POINTER_GUARD_H
+ #define POINTER_GUARD_H
+ 
+-/* Load a got-relative EXPR into G, using T.
+-   Note G and T are register names.  */
++/* Load a got-relative EXPR into register G.  */
+ #define LD_GLOBAL(G, EXPR) \
+   la.global G,  EXPR; \
+   REG_L     G,  G,  0;
+ 
+-/* Load a pc-relative EXPR into G, using T.
+-   Note G and T are register names.  */
++/* Load a pc-relative EXPR into register G.  */
+ #define LD_PCREL(G, EXPR) \
+-  la.pcrel  G,  EXPR; \
+-  REG_L     G,  G,  0;
++  pcalau12i G, %pc_hi20(EXPR); \
++  REG_L     G, G, %pc_lo12(EXPR);
+ 
+ #if (IS_IN (rtld) \
+      || (!defined SHARED && (IS_IN (libc) \
+-- 
+2.33.0
+
diff --git a/glibc.spec b/glibc.spec
index cf8246b..3810358 100644
--- a/glibc.spec
+++ b/glibc.spec
@@ -48,10 +48,14 @@
 %undefine with_valgrind
 %endif
 
+%ifarch loongarch64
+%global ENABLE_RELOC 0
+%else
 %global ENABLE_RELOC 1
+%endif
 
 # Only some architectures have static PIE support
-%define pie_arches %{ix86} x86_64 aarch64
+%define pie_arches %{ix86} x86_64 aarch64 loongarch64
 
 %define enablekernel 3.2
 %define target %{_target_cpu}-%{_vendor}-linux
@@ -67,7 +71,7 @@
 ##############################################################################
 Name: 	 	glibc
 Version: 	2.38
-Release: 	21
+Release: 	22
 Summary: 	The GNU libc libraries
 License:	%{all_license}
 URL: 		http://www.gnu.org/software/glibc/
@@ -161,6 +165,7 @@ Patch9016: add-GB18030-2022-charmap-BZ-30243.patch
 Patch9017: fix-Segmentation-fault-in-nss-module.patch
 Patch9018: fix_nss_database_check_reload_and_get_memleak.patch
 Patch9019: 0001-fix-glibc-build-error-on-x86.patch
+Patch9020: 0001-LoongArch-update-from-upstream.patch
 
 %if %{ENABLE_RELOC}
 Patch9021: reserve-relocation-information-for-sysboost.patch
@@ -758,7 +763,9 @@ touch devel.filelist
 touch nscd.filelist
 touch nss_modules.filelist
 touch nss-devel.filelist
+%ifnarch loongarch64
 touch libnsl.filelist
+%endif
 touch debugutils.filelist
 touch benchtests.filelist
 touch help.filelist
@@ -817,7 +824,9 @@ cat master.filelist \
     -e '%{_prefix}/share' \
     -e '/var/db/Makefile' \
     -e '/libnss_.*\.so[0-9.]*$' \
+%ifnarch loongarch64
     -e '/libnsl' \
+%endif
     -e 'glibc-benchtests' \
     -e 'aux-cache' \
     > glibc.filelist
@@ -890,8 +899,10 @@ grep '/libnss_[a-z]*\.so$' master.filelist > nss-devel.filelist
 ##############################################################################
 # libnsl subpackage
 ##############################################################################
+%ifnarch loongarch64
 grep -E '/libnsl\.so\.[0-9]+$' master.filelist > libnsl.filelist
 test $(wc -l < libnsl.filelist) -eq 1
+%endif
 
 ##############################################################################
 # glibc debugutils sub-package
@@ -1072,7 +1083,8 @@ elf/ld.so --library-path .:elf:nptl:dlfcn \
 %endif
 popd
 
-%endif # %{run_glibc_tests}
+%endif
+#%{run_glibc_tests}
 
 ##############################################################################
 # Install and uninstall scripts
@@ -1330,8 +1342,10 @@ fi
 
 %files -f nss-devel.filelist nss-devel
 
+%ifnarch loongarch64
 %files -f libnsl.filelist -n libnsl
 /%{_lib}/libnsl.so.1
+%endif
 
 %files -f debugutils.filelist debugutils
 
@@ -1353,10 +1367,15 @@ fi
 %endif
 
 %changelog
-* Tue Feb 6 Qingqing Li <liqingqing3@huawei.com> - 2.38-21
+* Thu Feb 22 2024 Peng Fan <fanpeng@loongson.cn> - 2.38-22
+- LoongArch: sync patch from upstream.
+- glibc-version >= 2.34 not support libnsl for LoongArch.
+- Fix spec file format about date.
+
+* Tue Feb 6 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-21
 - arm: Remove wrong ldr from _dl_start_user (BZ 31339)
 
-* Mon Feb 5 Qingqing Li <liqingqing3@huawei.com> - 2.38-20
+* Mon Feb 5 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-20
 - x86_64: Optimize ffsll function code size
 - S390: Fix building with disable mutli arch (BZ 31196)
 - sparc: Fix broken memset for sparc32 (BZ 31068)
@@ -1364,20 +1383,20 @@ fi
 - sparc: Fix sparc64 memmove length comparison (BZ 31266)
 - sparc: Remove unwind information from signal return stubs (BZ 31244)
 
-* Thu Feb 1 Hewenliang <hewenliang4@huawei.com> - 2.38-19
+* Thu Feb 1 2024 Hewenliang <hewenliang4@huawei.com> - 2.38-19
 - backport:fix CVE-2023-6779 CVE-2023-6780
 
-* Wed Jan 31 Qingqing Li <liqingqing3@huawei.com> - 2.38-18
+* Wed Jan 31 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-18
 - backport:fix CVE-2023-6246.
 
-* Sat Jan 13 Qingqing Li <liqingqing3@huawei.com> - 2.38-17
+* Sat Jan 13 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-17
 - elf: Add a way to check if tunable is set (BZ 27069)
 - malloc: Improve MAPE_HUGETLB with glibc.malloc.hugetlb=2
 
-* Wed Jan 3 Qingqing Li <liqingqing3@huawei.com> - 2.38-16
+* Wed Jan 3 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-16
 - backport patches from glibc upstream 2.38 branch
 
-* Thu Dec 14 shixuantong <shixuantong1@huawei.com> - 2.38-15
+* Thu Dec 14 2023 shixuantong <shixuantong1@huawei.com> - 2.38-15
 - elf: Handle non-directory name in search path (BZ 31035)
 
 * Fri Dec 8 2023 Qingqing Li <liqingqing3@huawei.com> - 2.38-14
-- 
Gitee