From c61b4a142d8edb0e1dac1a3114f9b5dac53990b9 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 4 Aug 2025 09:26:28 +0800 Subject: [PATCH] LoongArch: common sync from upstream Signed-off-by: Peng Fan --- ...ault-to-a-maximum-page-size-of-64KiB.patch | 161 +++++ ...PC-relative-relocations-for-shared-l.patch | 207 +++++++ ...solution-of-undefined-weak-hidden-pr.patch | 159 +++++ ...R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch | 67 ++ 0005-loongson-buffer-overflow.patch | 32 + ...sassembly-option-parsing-stopping-at.patch | 110 ++++ ...h-gas-resolving-constant-expressions.patch | 72 +++ ...shift-in-loongarch_elf_add_sub_reloc.patch | 54 ++ 0009-loongarch-gcc-4.5-build-fixes.patch | 151 +++++ ...delete-bytes-at-the-end-of-each-rela.patch | 574 ++++++++++++++++++ ...to-relax-instructions-into-NOPs-afte.patch | 405 ++++++++++++ ...ix-errors-due-to-version-differences.patch | 103 ++++ binutils.spec | 19 +- 13 files changed, 2112 insertions(+), 2 deletions(-) create mode 100644 0001-LoongArch-Default-to-a-maximum-page-size-of-64KiB.patch create mode 100644 0002-LoongArch-Check-PC-relative-relocations-for-shared-l.patch create mode 100644 0003-LoongArch-Fix-resolution-of-undefined-weak-hidden-pr.patch create mode 100644 0004-LoongArch-Allow-R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch create mode 100644 0005-loongson-buffer-overflow.patch create mode 100644 0006-LoongArch-Fix-disassembly-option-parsing-stopping-at.patch create mode 100644 0007-loongarch-gas-resolving-constant-expressions.patch create mode 100644 0008-ubsan-undefined-shift-in-loongarch_elf_add_sub_reloc.patch create mode 100644 0009-loongarch-gcc-4.5-build-fixes.patch create mode 100644 0010-LoongArch-Batch-delete-bytes-at-the-end-of-each-rela.patch create mode 100644 0011-LoongArch-Allow-to-relax-instructions-into-NOPs-afte.patch create mode 100644 0012-LoongArch-Fix-errors-due-to-version-differences.patch diff --git a/0001-LoongArch-Default-to-a-maximum-page-size-of-64KiB.patch b/0001-LoongArch-Default-to-a-maximum-page-size-of-64KiB.patch new file mode 100644 index 0000000..f376420 --- /dev/null +++ b/0001-LoongArch-Default-to-a-maximum-page-size-of-64KiB.patch @@ -0,0 +1,161 @@ +From 4b10a30d6d02675892fd0220fa1d722d9f342359 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sat, 19 Oct 2024 22:11:52 +0800 +Subject: [PATCH 01/12] LoongArch: Default to a maximum page size of 64KiB + +As per the spec (Section 7.5.10, LoongArch Reference Manual Vol. 1), +LoongArch machines are not limited in page size choices, and currently +page sizes of 4KiB, 16KiB and 64KiB are supported by mainline Linux. +While 16KiB is the most common, the current BFD code says it is the +maximum; this is not correct, and as an effect, almost all existing +binaries are incompatible with a 64KiB kernel because the sections are +not sufficiently aligned, while being totally fine otherwise. +This is needlessly complicating integration testing [1]. + +This patch fixes the inconsistency, and also brings BFD behavior in line +with that of LLD [2]. + +[1] https://github.com/loongson-community/discussions/issues/47 +[2] https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0/lld/ELF/Arch/LoongArch.cpp#L174-L183 + +bfd/ + * elfnn-loongarch.c (ELF_MAXPAGESIZE): Bump to 64KiB. + (ELF_MINPAGESIZE): Define as 4KiB. + (ELF_COMMONPAGESIZE): Define as 16KiB. + +ld/ + * testsuite/ld-loongarch-elf/64_pcrel.d: Update assertions after + changing the target max page size to 64KiB. + * testsuite/ld-loongarch-elf/data-got.d: Likewise. + * testsuite/ld-loongarch-elf/desc-relex.d: Likewise. + * testsuite/ld-loongarch-elf/relax-align-ignore-start.d: Likewise. + * testsuite/ld-loongarch-elf/tlsdesc_abs.d: Make the fuzzy match work + as intended by not checking exact instruction words. + * testsuite/ld-loongarch-elf/tlsdesc_extreme.d: Likewise. + +Signed-off-by: WANG Xuerui +--- + bfd/elfnn-loongarch.c | 4 +++- + ld/testsuite/ld-loongarch-elf/64_pcrel.d | 2 +- + ld/testsuite/ld-loongarch-elf/data-got.d | 2 +- + ld/testsuite/ld-loongarch-elf/desc-relax.d | 2 +- + .../ld-loongarch-elf/relax-align-ignore-start.d | 2 +- + ld/testsuite/ld-loongarch-elf/tlsdesc_abs.d | 12 ++++++------ + ld/testsuite/ld-loongarch-elf/tlsdesc_extreme.d | 8 ++++---- + 7 files changed, 17 insertions(+), 15 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 13216ef9..8dfb5ea1 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -6171,7 +6171,9 @@ elf_loongarch64_hash_symbol (struct elf_link_hash_entry *h) + #define ELF_ARCH bfd_arch_loongarch + #define ELF_TARGET_ID LARCH_ELF_DATA + #define ELF_MACHINE_CODE EM_LOONGARCH +-#define ELF_MAXPAGESIZE 0x4000 ++#define ELF_MINPAGESIZE 0x1000 ++#define ELF_MAXPAGESIZE 0x10000 ++#define ELF_COMMONPAGESIZE 0x4000 + #define bfd_elfNN_bfd_reloc_type_lookup loongarch_reloc_type_lookup + #define bfd_elfNN_bfd_link_hash_table_create \ + loongarch_elf_link_hash_table_create +diff --git a/ld/testsuite/ld-loongarch-elf/64_pcrel.d b/ld/testsuite/ld-loongarch-elf/64_pcrel.d +index 2ea063b7..1f4664ea 100644 +--- a/ld/testsuite/ld-loongarch-elf/64_pcrel.d ++++ b/ld/testsuite/ld-loongarch-elf/64_pcrel.d +@@ -1,4 +1,4 @@ + #... +-.*0xffffbffc.* ++.*0xfffefffc.* + .*0xffffffff.* + #pass +diff --git a/ld/testsuite/ld-loongarch-elf/data-got.d b/ld/testsuite/ld-loongarch-elf/data-got.d +index a754478a..4a26c72d 100644 +--- a/ld/testsuite/ld-loongarch-elf/data-got.d ++++ b/ld/testsuite/ld-loongarch-elf/data-got.d +@@ -8,7 +8,7 @@ + #skip: loongarch32-*-* + + #... +- GNU_RELRO 0x003c10 0x0000000000007c10 0x0000000000007c10 0x0003f0 0x0003f0 R 0x1 ++ GNU_RELRO 0x00fc10 0x000000000001fc10 0x000000000001fc10 0x0003f0 0x0003f0 R 0x1 + #... + 01 .dynamic .got .got.plt .data + #... +diff --git a/ld/testsuite/ld-loongarch-elf/desc-relax.d b/ld/testsuite/ld-loongarch-elf/desc-relax.d +index c885953c..48f2d1e3 100644 +--- a/ld/testsuite/ld-loongarch-elf/desc-relax.d ++++ b/ld/testsuite/ld-loongarch-elf/desc-relax.d +@@ -9,7 +9,7 @@ + Disassembly of section .text: + + 0+188 <.*>: +- 188: 18020844 pcaddi \$a0, 4162 ++ 188: 18080844 pcaddi \$a0, 16450 + 18c: 28c00081 ld.d \$ra, \$a0, 0 + 190: 4c000021 jirl \$ra, \$ra, 0 + 194: 0010888c add.d \$t0, \$a0, \$tp +diff --git a/ld/testsuite/ld-loongarch-elf/relax-align-ignore-start.d b/ld/testsuite/ld-loongarch-elf/relax-align-ignore-start.d +index 939cf427..70763ff0 100644 +--- a/ld/testsuite/ld-loongarch-elf/relax-align-ignore-start.d ++++ b/ld/testsuite/ld-loongarch-elf/relax-align-ignore-start.d +@@ -12,5 +12,5 @@ Disassembly of section bbb: + 0000000120000080 : + [ ]+120000080:[ ]+4c000020[ ]+ret + Disassembly of section ccc: +-0000000120000090 <__bss_start-0x4004>: ++0000000120000090 <__bss_start-0x10004>: + [ ]+120000090:[ ]+4c000020[ ]+ret +diff --git a/ld/testsuite/ld-loongarch-elf/tlsdesc_abs.d b/ld/testsuite/ld-loongarch-elf/tlsdesc_abs.d +index 62f5a2a0..7e23454b 100644 +--- a/ld/testsuite/ld-loongarch-elf/tlsdesc_abs.d ++++ b/ld/testsuite/ld-loongarch-elf/tlsdesc_abs.d +@@ -9,15 +9,15 @@ + Disassembly of section .text: + + 0+120000100 <.*>: +- 120000100: 14400084 lu12i.w \$a0, .* +- 120000104: 03850084 ori \$a0, \$a0, .* +- 120000108: 16000024 lu32i.d \$a0, .* ++ 120000100: ........ lu12i.w \$a0, .* ++ 120000104: ........ ori \$a0, \$a0, .* ++ 120000108: ........ lu32i.d \$a0, .* + 12000010c: 03000084 lu52i.d \$a0, \$a0, 0 + 120000110: 28c00081 ld.d \$ra, \$a0, 0 + 120000114: 4c000021 jirl \$ra, \$ra, 0 +- 120000118: 14400084 lu12i.w \$a0, .* +- 12000011c: 03850084 ori \$a0, \$a0, .* +- 120000120: 16000024 lu32i.d \$a0, .* ++ 120000118: ........ lu12i.w \$a0, .* ++ 12000011c: ........ ori \$a0, \$a0, .* ++ 120000120: ........ lu32i.d \$a0, .* + 120000124: 03000084 lu52i.d \$a0, \$a0, 0 + 120000128: 28c00081 ld.d \$ra, \$a0, 0 + 12000012c: 4c000021 jirl \$ra, \$ra, 0 +diff --git a/ld/testsuite/ld-loongarch-elf/tlsdesc_extreme.d b/ld/testsuite/ld-loongarch-elf/tlsdesc_extreme.d +index 55179997..fbc1d945 100644 +--- a/ld/testsuite/ld-loongarch-elf/tlsdesc_extreme.d ++++ b/ld/testsuite/ld-loongarch-elf/tlsdesc_extreme.d +@@ -9,15 +9,15 @@ + Disassembly of section .text: + + 0+120000100 <.*>: +- 120000100: 1a000084 pcalau12i \$a0, .* +- 120000104: 02c52001 li.d \$ra, .* ++ 120000100: ........ pcalau12i \$a0, .* ++ 120000104: ........ li.d \$ra, .* + 120000108: 16000001 lu32i.d \$ra, 0 + 12000010c: 03000021 lu52i.d \$ra, \$ra, 0 + 120000110: 00108484 add.d \$a0, \$a0, \$ra + 120000114: 28c00081 ld.d \$ra, \$a0, 0 + 120000118: 4c000021 jirl \$ra, \$ra, 0 +- 12000011c: 1a000084 pcalau12i \$a0, .* +- 120000120: 02c5200d li.d \$t1, .* ++ 12000011c: ........ pcalau12i \$a0, .* ++ 120000120: ........ li.d \$t1, .* + 120000124: 1600000d lu32i.d \$t1, 0 + 120000128: 030001ad lu52i.d \$t1, \$t1, 0 + 12000012c: 0010b484 add.d \$a0, \$a0, \$t1 +-- +2.47.3 + diff --git a/0002-LoongArch-Check-PC-relative-relocations-for-shared-l.patch b/0002-LoongArch-Check-PC-relative-relocations-for-shared-l.patch new file mode 100644 index 0000000..48aa60f --- /dev/null +++ b/0002-LoongArch-Check-PC-relative-relocations-for-shared-l.patch @@ -0,0 +1,207 @@ +From d63d442331671ff2e1a747d0d4bcf58d3cbe8802 Mon Sep 17 00:00:00 2001 +From: Lulu Cai +Date: Mon, 30 Sep 2024 16:08:59 +0800 +Subject: [PATCH 02/12] LoongArch: Check PC-relative relocations for shared + libraries + +Building shared libraries should not be allowed for PC-relative +relocations against external symbols. +Currently LoongArch has no corresponding checks and silently +generates wrong shared libraries. + +However, In the first version of the medium cmodel, pcalau12i+jirl was +used for function calls, in which case PC-relative relocations were +allowed. +--- + bfd/elfnn-loongarch.c | 21 +++++++++++++++++++ + .../ld-loongarch-elf/bad_pcala_hi20_global.d | 5 +++++ + .../ld-loongarch-elf/bad_pcala_hi20_global.s | 8 +++++++ + .../ld-loongarch-elf/bad_pcala_hi20_weak.d | 5 +++++ + .../ld-loongarch-elf/bad_pcala_hi20_weak.s | 9 ++++++++ + .../ld-loongarch-elf/bad_pcrel20_s2_global.d | 5 +++++ + .../ld-loongarch-elf/bad_pcrel20_s2_global.s | 8 +++++++ + .../ld-loongarch-elf/bad_pcrel20_s2_weak.d | 5 +++++ + .../ld-loongarch-elf/bad_pcrel20_s2_weak.s | 9 ++++++++ + ld/testsuite/ld-loongarch-elf/data-plt.s | 1 + + .../ld-loongarch-elf/ld-loongarch-elf.exp | 4 ++++ + 11 files changed, 80 insertions(+) + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.d + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.s + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.d + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.s + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.d + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.s + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.d + create mode 100644 ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.s + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 8dfb5ea1..46fd1a11 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -1079,6 +1079,18 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info, + h->non_got_ref = 1; + break; + ++ /* Since shared library global symbols interpose, any ++ PC-relative relocations against external symbols ++ should not be used to build shared libraries. */ ++ case R_LARCH_PCREL20_S2: ++ if (bfd_link_pic (info) ++ && (sec->flags & SEC_ALLOC) != 0 ++ && (sec->flags & SEC_READONLY) != 0 ++ && ! LARCH_REF_LOCAL (info, h)) ++ return bad_static_reloc (abfd, rel, sec, r_type, h, NULL); ++ ++ break; ++ + /* For normal cmodel, pcalau12i + addi.d/w used to data. + For first version medium cmodel, pcalau12i + jirl are used to + function call, it need to creat PLT entry for STT_FUNC and +@@ -1096,6 +1108,15 @@ loongarch_elf_check_relocs (bfd *abfd, struct bfd_link_info *info, + h->pointer_equality_needed = 1; + } + ++ /* PC-relative relocations are allowed For first version ++ medium cmodel function call. */ ++ if (h != NULL && !h->needs_plt ++ && bfd_link_pic (info) ++ && (sec->flags & SEC_ALLOC) != 0 ++ && (sec->flags & SEC_READONLY) != 0 ++ && ! LARCH_REF_LOCAL (info, h)) ++ return bad_static_reloc (abfd, rel, sec, r_type, h, NULL); ++ + break; + + case R_LARCH_B16: +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.d b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.d +new file mode 100644 +index 00000000..6ecefd1d +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.d +@@ -0,0 +1,5 @@ ++#name: PC-relative relocation making shared ++#source: bad_pcala_hi20_global.s ++#target: [check_shared_lib_support] ++#ld: -shared --defsym global_a=0x10 --defsym global_b=0x20 ++#error: .*: relocation R_LARCH_PCALA_HI20 against `global_b` can not be used when making a shared object; recompile with -fPIC +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.s b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.s +new file mode 100644 +index 00000000..d8189e45 +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_global.s +@@ -0,0 +1,8 @@ ++ .hidden global_a ++ .text ++ .align 2 ++main: ++ # Symbols defined .hidden are bound local and ++ # the linker should differenciate them. ++ la.pcrel $a0, global_a ++ la.pcrel $a0, global_b +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.d b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.d +new file mode 100644 +index 00000000..cefc42cf +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.d +@@ -0,0 +1,5 @@ ++#name: PC-relative relocation making shared ++#source: bad_pcala_hi20_weak.s ++#target: [check_shared_lib_support] ++#ld: -shared --defsym global_a=0x10 --defsym global_b=0x20 ++#error: .*: relocation R_LARCH_PCALA_HI20 against `global_b` can not be used when making a shared object; recompile with -fPIC +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.s b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.s +new file mode 100644 +index 00000000..73c6ec5f +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcala_hi20_weak.s +@@ -0,0 +1,9 @@ ++ .hidden global_a ++ .weak global_b ++ .text ++ .align 2 ++main: ++ # Symbols defined .hidden are bound local and ++ # the linker should differenciate them. ++ la.pcrel $a0, global_a ++ la.pcrel $a0, global_b +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.d b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.d +new file mode 100644 +index 00000000..8e063dfb +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.d +@@ -0,0 +1,5 @@ ++#name: PC-relative relocation making shared ++#source: bad_pcrel20_s2_global.s ++#target: [check_shared_lib_support] ++#ld: -shared --defsym global_a=0x10 --defsym global_b=0x20 ++#error: .*: relocation R_LARCH_PCREL20_S2 against `global_b` can not be used when making a shared object; recompile with -fPIC +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.s b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.s +new file mode 100644 +index 00000000..39cedbd2 +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_global.s +@@ -0,0 +1,8 @@ ++ .hidden global_a ++ .text ++ .align 2 ++main: ++ # Symbols defined .hidden are bound local and ++ # the linker should differenciate them. ++ pcaddi $a0, %pcrel_20(global_a) ++ pcaddi $a0, %pcrel_20(global_b) +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.d b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.d +new file mode 100644 +index 00000000..605df249 +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.d +@@ -0,0 +1,5 @@ ++#name: PC-relative relocation making shared ++#source: bad_pcrel20_s2_weak.s ++#target: [check_shared_lib_support] ++#ld: -shared --defsym global_a=0x10 --defsym global_b=0x20 ++#error: .*: relocation R_LARCH_PCREL20_S2 against `global_b` can not be used when making a shared object; recompile with -fPIC +diff --git a/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.s b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.s +new file mode 100644 +index 00000000..82faa5bb +--- /dev/null ++++ b/ld/testsuite/ld-loongarch-elf/bad_pcrel20_s2_weak.s +@@ -0,0 +1,9 @@ ++ .hidden global_a ++ .weak global_b ++ .text ++ .align 2 ++main: ++ # Symbols defined .hidden are bound local and ++ # the linker should differenciate them. ++ pcaddi $a0, %pcrel_20(global_a) ++ pcaddi $a0, %pcrel_20(global_b) +diff --git a/ld/testsuite/ld-loongarch-elf/data-plt.s b/ld/testsuite/ld-loongarch-elf/data-plt.s +index faff052c..6c1a8eac 100644 +--- a/ld/testsuite/ld-loongarch-elf/data-plt.s ++++ b/ld/testsuite/ld-loongarch-elf/data-plt.s +@@ -2,6 +2,7 @@ + # R_LARCH_PCALA_HI20 only need to generate PLT entry for function symbols. + .text + .globl a ++ .hidden a + + .data + .align 2 +diff --git a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp +index 555ebf92..615548dc 100644 +--- a/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp ++++ b/ld/testsuite/ld-loongarch-elf/ld-loongarch-elf.exp +@@ -166,6 +166,10 @@ if [istarget "loongarch64-*-*"] { + run_dump_test "relr-got-shared" + run_dump_test "relr-text-shared" + run_dump_test "abssym_shared" ++ run_dump_test "bad_pcala_hi20_global" ++ run_dump_test "bad_pcala_hi20_weak" ++ run_dump_test "bad_pcrel20_s2_global" ++ run_dump_test "bad_pcrel20_s2_weak" + } + + if [check_pie_support] { +-- +2.47.3 + diff --git a/0003-LoongArch-Fix-resolution-of-undefined-weak-hidden-pr.patch b/0003-LoongArch-Fix-resolution-of-undefined-weak-hidden-pr.patch new file mode 100644 index 0000000..e8e130c --- /dev/null +++ b/0003-LoongArch-Fix-resolution-of-undefined-weak-hidden-pr.patch @@ -0,0 +1,159 @@ +From 333a4f786b4568ab1b8d5292ec9cab471c6a449a Mon Sep 17 00:00:00 2001 +From: Xi Ruoyao +Date: Wed, 25 Dec 2024 12:41:43 +0800 +Subject: [PATCH 03/12] LoongArch: Fix resolution of undefined weak + hidden/protected symbols + +An undefined weak hidden/protect symbol should be resolved to runtime +address 0, but we were actually resolving it to link-time address 0. So +in PIE or DSO the runtime address would be incorrect. + +Fix the issue by rewriting pcalau12i to lu12i.w, and pcaddi to addi.w. +The latter does not always work because the immediate field of addi.w is +narrower, report an error in the case the addend is too large. + +Signed-off-by: Xi Ruoyao +--- + bfd/elfnn-loongarch.c | 83 ++++++++++++++++++++++++++++---------- + include/opcode/loongarch.h | 2 + + 2 files changed, 63 insertions(+), 22 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 46fd1a11..1a1a9c34 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -3235,6 +3235,7 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info, + char tls_type; + bfd_vma relocation, off, ie_off, desc_off; + int i, j; ++ bool resolve_pcrel_undef_weak = false; + + /* When an unrecognized relocation is encountered, which usually + occurs when using a newer assembler but an older linker, an error +@@ -4051,23 +4052,74 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info, + + break; + ++ case R_LARCH_PCALA64_HI12: ++ pc -= 4; ++ /* Fall through. */ ++ case R_LARCH_PCALA64_LO20: ++ pc -= 8; ++ /* Fall through. */ + case R_LARCH_PCREL20_S2: +- unresolved_reloc = false; +- if (h && h->plt.offset != MINUS_ONE) +- relocation = sec_addr (plt) + h->plt.offset; +- else +- relocation += rel->r_addend; +- relocation -= pc; +- break; +- + case R_LARCH_PCALA_HI20: + unresolved_reloc = false; ++ ++ /* If sym is hidden undefined weak, (sym + addend) should be ++ resolved to runtime address (0 + addend). */ ++ resolve_pcrel_undef_weak = ++ (is_undefweak ++ && h ++ && ELF_ST_VISIBILITY (h->other) != STV_DEFAULT); ++ ++ if (resolve_pcrel_undef_weak) ++ pc = 0; ++ + if (h && h->plt.offset != MINUS_ONE) + relocation = sec_addr (plt) + h->plt.offset; + else + relocation += rel->r_addend; + +- RELOCATE_CALC_PC32_HI20 (relocation, pc); ++ switch (r_type) ++ { ++ case R_LARCH_PCREL20_S2: ++ relocation -= pc; ++ if (resolve_pcrel_undef_weak) ++ { ++ bfd_signed_vma addr = (bfd_signed_vma) relocation; ++ if (addr >= 2048 || addr < -2048) ++ { ++ const char *msg = ++ _("cannot resolve R_LARCH_PCREL20_S2 against " ++ "undefined weak symbol with addend out of " ++ "[-2048, 2048)"); ++ fatal = ++ loongarch_reloc_is_fatal (info, input_bfd, ++ input_section, rel, ++ howto, ++ bfd_reloc_notsupported, ++ is_undefweak, name, msg); ++ break; ++ } ++ ++ uint32_t insn = bfd_get (32, input_bfd, ++ contents + rel->r_offset); ++ insn = LARCH_GET_RD (insn) | LARCH_OP_ADDI_W; ++ insn |= (relocation & 0xfff) << 10; ++ bfd_put_32 (input_bfd, insn, contents + rel->r_offset); ++ r = bfd_reloc_continue; ++ } ++ break; ++ case R_LARCH_PCALA_HI20: ++ RELOCATE_CALC_PC32_HI20 (relocation, pc); ++ if (resolve_pcrel_undef_weak) ++ { ++ uint32_t insn = bfd_get (32, input_bfd, ++ contents + rel->r_offset); ++ insn = LARCH_GET_RD (insn) | LARCH_OP_LU12I_W; ++ bfd_put_32 (input_bfd, insn, contents + rel->r_offset); ++ } ++ break; ++ default: ++ RELOCATE_CALC_PC64_HI32 (relocation, pc); ++ } + break; + + case R_LARCH_TLS_LE_HI20_R: +@@ -4104,19 +4156,6 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info, + } + break; + +- case R_LARCH_PCALA64_HI12: +- pc -= 4; +- /* Fall through. */ +- case R_LARCH_PCALA64_LO20: +- if (h && h->plt.offset != MINUS_ONE) +- relocation = sec_addr (plt) + h->plt.offset; +- else +- relocation += rel->r_addend; +- +- RELOCATE_CALC_PC64_HI32 (relocation, pc - 8); +- +- break; +- + case R_LARCH_GOT_PC_HI20: + case R_LARCH_GOT_HI20: + /* Calc got offset. */ +diff --git a/include/opcode/loongarch.h b/include/opcode/loongarch.h +index 1dbc16fc..aa54aabd 100644 +--- a/include/opcode/loongarch.h ++++ b/include/opcode/loongarch.h +@@ -36,6 +36,7 @@ extern "C" + + #define LARCH_MK_ADDI_D 0xffc00000 + #define LARCH_OP_ADDI_D 0x02c00000 ++ #define LARCH_OP_ADDI_W 0x02800000 + #define LARCH_MK_PCADDI 0xfe000000 + #define LARCH_OP_PCADDI 0x18000000 + #define LARCH_MK_B 0xfc000000 +@@ -44,6 +45,7 @@ extern "C" + #define LARCH_OP_BL 0x54000000 + #define LARCH_MK_ORI 0xffc00000 + #define LARCH_OP_ORI 0x03800000 ++ #define LARCH_OP_OR 0x00150000 + #define LARCH_MK_LU12I_W 0xfe000000 + #define LARCH_OP_LU12I_W 0x14000000 + #define LARCH_MK_LD_D 0xffc00000 +-- +2.47.3 + diff --git a/0004-LoongArch-Allow-R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch b/0004-LoongArch-Allow-R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch new file mode 100644 index 0000000..8f44f3b --- /dev/null +++ b/0004-LoongArch-Allow-R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch @@ -0,0 +1,67 @@ +From 12a20ace5e553e1b1b3a020721dfbd65e6e605d5 Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Thu, 17 Jul 2025 12:09:03 +0000 +Subject: [PATCH 04/12] LoongArch: Allow R_LARCH_PCALA_HI20 or + R_LARCH_PCREL20_S2 against undefined weak symbols for static PIE + +In a static PIE, undefined weak symbols should be just resolved to +runtime address 0, like those symbols with non-default visibility. This +was silently broken in all prior Binutils releases with "-static-pie +-mdirect-extern-access": + + $ cat t.c + int x (void) __attribute__ ((weak)); + + int + main (void) + { + __builtin_printf("%p\n", x); + } + $ gcc t.c -static-pie -mdirect-extern-access + $ ./a.out + 0x7ffff1d64000 + +Since commit 4cb77761d687 ("LoongArch: Check PC-relative relocations for +shared libraries), the situation has been improved: the linker errors +out instead of silently producing a wrong output file. + +But logically, using -mdirect-extern-access for a static PIE perfectly +makes sense, and we should not prevent that even if the programmer uses +weak symbols. Linux kernel is such an example, and Linux < 6.10 now +fails to build with Binutils trunk. (The silent breakage with prior +Binutils releases was "benign" due to some blind luck.) + +While since the 6.10 release Linux has removed those potentially +undefined weak symbols (due to performance issue), we still should +support weak symbols in -mdirect-extern-access -static-pie and unbreak +building old kernels. +--- + bfd/elfnn-loongarch.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 1a1a9c34..12108476 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -4062,12 +4062,13 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info, + case R_LARCH_PCALA_HI20: + unresolved_reloc = false; + +- /* If sym is hidden undefined weak, (sym + addend) should be +- resolved to runtime address (0 + addend). */ ++ /* If sym is undef weak and it's hidden or we are doing a static ++ link, (sym + addend) should be resolved to runtime address ++ (0 + addend). */ + resolve_pcrel_undef_weak = +- (is_undefweak +- && h +- && ELF_ST_VISIBILITY (h->other) != STV_DEFAULT); ++ ((info->nointerp ++ || (h && ELF_ST_VISIBILITY (h->other) != STV_DEFAULT)) ++ && is_undefweak); + + if (resolve_pcrel_undef_weak) + pc = 0; +-- +2.47.3 + diff --git a/0005-loongson-buffer-overflow.patch b/0005-loongson-buffer-overflow.patch new file mode 100644 index 0000000..4f83148 --- /dev/null +++ b/0005-loongson-buffer-overflow.patch @@ -0,0 +1,32 @@ +From b59e3612808c69d8032d5aba1c639d33f4053a09 Mon Sep 17 00:00:00 2001 +From: Alan Modra +Date: Sun, 26 Jan 2025 13:12:45 +1030 +Subject: [PATCH 05/12] loongson buffer overflow + +bfd_elfNN_loongarch_set_data_segment_info can be called from the target +after_allocation function with a non-ELF hash table. This is seen in +the ld-elf pr21884 testcase. Fix the problem by first checking the +hash table type before writing to a loongarch_elf_hash_table field. +--- + bfd/elfnn-loongarch.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 12108476..e42c8088 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -5159,8 +5159,9 @@ void + bfd_elfNN_loongarch_set_data_segment_info (struct bfd_link_info *info, + int *data_segment_phase) + { +- struct loongarch_elf_link_hash_table *htab = loongarch_elf_hash_table (info); +- htab->data_segment_phase = data_segment_phase; ++ if (is_elf_hash_table (info->hash) ++ && elf_hash_table_id (elf_hash_table (info)) == LARCH_ELF_DATA) ++ loongarch_elf_hash_table (info)->data_segment_phase = data_segment_phase; + } + + /* Implement R_LARCH_ALIGN by deleting excess alignment NOPs. +-- +2.47.3 + diff --git a/0006-LoongArch-Fix-disassembly-option-parsing-stopping-at.patch b/0006-LoongArch-Fix-disassembly-option-parsing-stopping-at.patch new file mode 100644 index 0000000..2a89467 --- /dev/null +++ b/0006-LoongArch-Fix-disassembly-option-parsing-stopping-at.patch @@ -0,0 +1,110 @@ +From 5cf8efae2bce2b09d3041f2d63fa6933908c1317 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Mon, 24 Mar 2025 15:54:25 +0800 +Subject: [PATCH 06/12] LoongArch: Fix disassembly option parsing stopping at + the first option + +Turns out the return value of parse_loongarch_dis_option acts as an +error code, and previously the function always signified failure with +a non-zero return value, making only the first disassembly option get +to take effect. + +Fix by adding the missing `return 0`'s to the two success code paths. + +Signed-off-by: WANG Xuerui +--- + .../binutils-all/loongarch64/dis-options-multi.d | 10 ++++++++++ + .../binutils-all/loongarch64/dis-options-no-alises.d | 10 ++++++++++ + .../binutils-all/loongarch64/dis-options-numeric.d | 10 ++++++++++ + .../testsuite/binutils-all/loongarch64/dis-options.s | 3 +++ + opcodes/loongarch-dis.c | 7 ++++++- + 5 files changed, 39 insertions(+), 1 deletion(-) + create mode 100644 binutils/testsuite/binutils-all/loongarch64/dis-options-multi.d + create mode 100644 binutils/testsuite/binutils-all/loongarch64/dis-options-no-alises.d + create mode 100644 binutils/testsuite/binutils-all/loongarch64/dis-options-numeric.d + create mode 100644 binutils/testsuite/binutils-all/loongarch64/dis-options.s + +diff --git a/binutils/testsuite/binutils-all/loongarch64/dis-options-multi.d b/binutils/testsuite/binutils-all/loongarch64/dis-options-multi.d +new file mode 100644 +index 00000000..2f34cb86 +--- /dev/null ++++ b/binutils/testsuite/binutils-all/loongarch64/dis-options-multi.d +@@ -0,0 +1,10 @@ ++#name: LoongArch disassembler options: multiple ++#source: dis-options.s ++#objdump: -d --no-show-raw-insn -M no-aliases,numeric ++ ++#... ++Disassembly of section \.text: ++ ++[0-9a-f]+ <\.text>: ++ [0-9a-f]+:[ ]+or[ ]+\$r4, \$r21, \$r0 ++ [0-9a-f]+:[ ]+jirl[ ]+\$r0, \$r1, 0 +diff --git a/binutils/testsuite/binutils-all/loongarch64/dis-options-no-alises.d b/binutils/testsuite/binutils-all/loongarch64/dis-options-no-alises.d +new file mode 100644 +index 00000000..eb4ea62a +--- /dev/null ++++ b/binutils/testsuite/binutils-all/loongarch64/dis-options-no-alises.d +@@ -0,0 +1,10 @@ ++#name: LoongArch disassembler options: no-aliases ++#source: dis-options.s ++#objdump: -d --no-show-raw-insn -M no-aliases ++ ++#... ++Disassembly of section \.text: ++ ++[0-9a-f]+ <\.text>: ++ [0-9a-f]+:[ ]+or[ ]+\$a0, \$r21, \$zero ++ [0-9a-f]+:[ ]+jirl[ ]+\$zero, \$ra, 0 +diff --git a/binutils/testsuite/binutils-all/loongarch64/dis-options-numeric.d b/binutils/testsuite/binutils-all/loongarch64/dis-options-numeric.d +new file mode 100644 +index 00000000..e669cef3 +--- /dev/null ++++ b/binutils/testsuite/binutils-all/loongarch64/dis-options-numeric.d +@@ -0,0 +1,10 @@ ++#name: LoongArch disassembler options: numeric ++#source: dis-options.s ++#objdump: -d --no-show-raw-insn -M numeric ++ ++#... ++Disassembly of section \.text: ++ ++[0-9a-f]+ <\.text>: ++ [0-9a-f]+:[ ]+move[ ]+\$r4, \$r21 ++ [0-9a-f]+:[ ]+ret +diff --git a/binutils/testsuite/binutils-all/loongarch64/dis-options.s b/binutils/testsuite/binutils-all/loongarch64/dis-options.s +new file mode 100644 +index 00000000..a3a4469e +--- /dev/null ++++ b/binutils/testsuite/binutils-all/loongarch64/dis-options.s +@@ -0,0 +1,3 @@ ++.text ++ move $a0, $r21 ++ ret +diff --git a/opcodes/loongarch-dis.c b/opcodes/loongarch-dis.c +index 941bf363..ce7dffa9 100644 +--- a/opcodes/loongarch-dis.c ++++ b/opcodes/loongarch-dis.c +@@ -95,13 +95,18 @@ static int + parse_loongarch_dis_option (const char *option) + { + if (strcmp (option, "no-aliases") == 0) +- loongarch_dis_show_aliases = false; ++ { ++ loongarch_dis_show_aliases = false; ++ return 0; ++ } + + if (strcmp (option, "numeric") == 0) + { + loongarch_r_disname = loongarch_r_normal_name; + loongarch_f_disname = loongarch_f_normal_name; ++ return 0; + } ++ + return -1; + } + +-- +2.47.3 + diff --git a/0007-loongarch-gas-resolving-constant-expressions.patch b/0007-loongarch-gas-resolving-constant-expressions.patch new file mode 100644 index 0000000..61079fb --- /dev/null +++ b/0007-loongarch-gas-resolving-constant-expressions.patch @@ -0,0 +1,72 @@ +From 9f7dd59c803f303a04fac1bdd51f043178411618 Mon Sep 17 00:00:00 2001 +From: Alan Modra +Date: Thu, 17 Jul 2025 12:16:42 +0000 +Subject: [PATCH 07/12] loongarch gas resolving constant expressions + +The test added in commit 4fe96ddaf614 results in an asan complaint: +loongarch-parse.y:225:16: runtime error: left shift of negative value -1 +To avoid the complaint, perform left shifts as unsigned (which gives +the same result on 2's complement machines). Do the same for +addition, subtraction and multiplication. Furthermore, warn on +divide/modulus by zero. +--- + gas/config/loongarch-parse.y | 31 +++++++++++++++++++++++-------- + 1 file changed, 23 insertions(+), 8 deletions(-) + +diff --git a/gas/config/loongarch-parse.y b/gas/config/loongarch-parse.y +index ec5a4166..dbb49800 100644 +--- a/gas/config/loongarch-parse.y ++++ b/gas/config/loongarch-parse.y +@@ -207,26 +207,41 @@ emit_bin (int op) + switch (op) + { + case '*': +- opr1 = opr1 * opr2; ++ opr1 = (valueT) opr1 * (valueT) opr2; + break; + case '/': +- opr1 = opr1 / opr2; ++ if (opr2 == 0) ++ { ++ as_warn (_("Divide by zero!")); ++ opr1 = 0; ++ } ++ else ++ opr1 = opr1 / opr2; + break; + case '%': +- opr1 = opr1 % opr2; ++ if (opr2 == 0) ++ { ++ as_warn (_("Divide by zero!")); ++ opr1 = 0; ++ } ++ else ++ opr1 = opr1 % opr2; + break; + case '+': +- opr1 = opr1 + opr2; ++ opr1 = (valueT) opr1 + (valueT) opr2; + break; + case '-': +- opr1 = opr1 - opr2; ++ opr1 = (valueT) opr1 - (valueT) opr2; + break; + case LEFT_OP: +- opr1 = opr1 << opr2; ++ opr1 = (valueT) opr1 << opr2; + break; + case RIGHT_OP: +- /* Algorithm right shift. */ +- opr1 = (offsetT)opr1 >> (offsetT)opr2; ++ if (opr1 < 0) ++ as_warn (_("Right shift of negative numbers may be changed " ++ "from arithmetic right shift to logical right shift!")); ++ /* Arithmetic right shift. */ ++ opr1 = opr1 >> opr2; + break; + case '<': + opr1 = opr1 < opr2; +-- +2.47.3 + diff --git a/0008-ubsan-undefined-shift-in-loongarch_elf_add_sub_reloc.patch b/0008-ubsan-undefined-shift-in-loongarch_elf_add_sub_reloc.patch new file mode 100644 index 0000000..261372f --- /dev/null +++ b/0008-ubsan-undefined-shift-in-loongarch_elf_add_sub_reloc.patch @@ -0,0 +1,54 @@ +From f43135ff3c30e5fad573369dcd192fba32da8ec3 Mon Sep 17 00:00:00 2001 +From: Alan Modra +Date: Tue, 20 May 2025 15:22:13 +0930 +Subject: [PATCH 08/12] ubsan: undefined shift in + loongarch_elf_add_sub_reloc_uleb128 + +An oss-fuzz testcase found: +runtime error: shift exponent 140 is too large for 32-bit type 'int' +OK, that's just a completely silly uleb, but we ought to be able to +handle 64 bits here. + + * elfxx-loongarch.c (loongarch_elf_add_sub_reloc_uleb128): Formatting. + Don't left shift int. Avoid shifts larger than bits in a bfd_vma. +--- + bfd/elfxx-loongarch.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +diff --git a/bfd/elfxx-loongarch.c b/bfd/elfxx-loongarch.c +index ee1323ea..83b4ee64 100644 +--- a/bfd/elfxx-loongarch.c ++++ b/bfd/elfxx-loongarch.c +@@ -2171,11 +2171,11 @@ loongarch_elf_add_sub_reloc_uleb128 (bfd *abfd, + if (output_bfd != NULL) + return bfd_reloc_continue; + +- relocation = symbol->value + symbol->section->output_section->vma +- + symbol->section->output_offset + reloc_entry->addend; ++ relocation = (symbol->value + symbol->section->output_section->vma ++ + symbol->section->output_offset + reloc_entry->addend); + +- bfd_size_type octets = reloc_entry->address +- * bfd_octets_per_byte (abfd, input_section); ++ bfd_size_type octets = (reloc_entry->address ++ * bfd_octets_per_byte (abfd, input_section)); + if (!bfd_reloc_offset_in_range (reloc_entry->howto, abfd, + input_section, octets)) + return bfd_reloc_outofrange; +@@ -2195,8 +2195,11 @@ loongarch_elf_add_sub_reloc_uleb128 (bfd *abfd, + break; + } + +- bfd_vma mask = (1 << (7 * len)) - 1; +- relocation = relocation & mask; ++ if (7 * len < sizeof (bfd_vma)) ++ { ++ bfd_vma mask = ((bfd_vma) 1 << (7 * len)) - 1; ++ relocation = relocation & mask; ++ } + loongarch_write_unsigned_leb128 (p, len, relocation); + return bfd_reloc_ok; + } +-- +2.47.3 + diff --git a/0009-loongarch-gcc-4.5-build-fixes.patch b/0009-loongarch-gcc-4.5-build-fixes.patch new file mode 100644 index 0000000..8ac62b7 --- /dev/null +++ b/0009-loongarch-gcc-4.5-build-fixes.patch @@ -0,0 +1,151 @@ +From 0de08292f8bf1ad019e86d2ab7f942f44e0d5f9e Mon Sep 17 00:00:00 2001 +From: Alan Modra +Date: Tue, 10 Jun 2025 20:29:33 +0930 +Subject: [PATCH 09/12] loongarch gcc-4.5 build fixes + +Yet another case of missing fields in struct initialisation, which +I've replaced with a memset, and some complaints about identifiers +shadowing global declarations. Fixing the shadowing in +loongarch-parse.y is easy. This one isn't so easy: +gas/expr.c: In function 'expr': +gas/expr.c:1891:12: error: declaration of 'is_unsigned' shadows a global declaration +include/opcode/loongarch.h:224:14: error: shadowed declaration is here + +opcode/loongarch.h declares lots of stuff that shouldn't be made +available to generic gas code, so I've removed that header from +tc-loongarch.h and moved the parts of TC_FORCE_RELOCATION_SUB_LOCAL +and TC_FORCE_RELOCATION_SUB_LOCAL that need LARCH_opts to functions +in tc-loongarch.c + + * config/loongarch-parse.y (loongarch_parse_expr): Rename + param to avoid shadowing. + * config/tc-loongarch.c (loongarch_assemble_INSNs): Use memset + rather than struct initialisation. + (loongarch_force_relocation_sub_local): New function. + (loongarch_force_relocation_sub_same): Likewise. + * config/tc-loongarch.h: Don't include opcode/loongarch.h. + (loongarch_force_relocation_sub_local): Declare, and.. + (TC_FORCE_RELOCATION_SUB_LOCAL): ..use here. + (loongarch_force_relocation_sub_same): Declare, and.. + (TC_FORCE_RELOCATION_SUB_SAME): ..use here. +--- + gas/config/loongarch-parse.y | 4 ++-- + gas/config/tc-loongarch.c | 26 +++++++++++++++++++++++++- + gas/config/tc-loongarch.h | 21 ++++++--------------- + 3 files changed, 33 insertions(+), 18 deletions(-) + +diff --git a/gas/config/loongarch-parse.y b/gas/config/loongarch-parse.y +index dbb49800..960e0997 100644 +--- a/gas/config/loongarch-parse.y ++++ b/gas/config/loongarch-parse.y +@@ -42,7 +42,7 @@ is_const (struct reloc_info *info) + } + + int +-loongarch_parse_expr (const char *expr, ++loongarch_parse_expr (const char *exp, + struct reloc_info *reloc_stack_top, + size_t max_reloc_num, + size_t *reloc_num, +@@ -52,7 +52,7 @@ loongarch_parse_expr (const char *expr, + struct yy_buffer_state *buffstate; + top = reloc_stack_top; + end = top + max_reloc_num; +- buffstate = yy_scan_string (expr); ++ buffstate = yy_scan_string (exp); + ret = yyparse (); + + if (ret == 0) +diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c +index cc733675..1d13cbb6 100644 +--- a/gas/config/tc-loongarch.c ++++ b/gas/config/tc-loongarch.c +@@ -1392,7 +1392,8 @@ loongarch_assemble_INSNs (char *str, unsigned int expand_from_macro) + if (*str == '\0') + break; + +- struct loongarch_cl_insn the_one = { 0 }; ++ struct loongarch_cl_insn the_one; ++ memset (&the_one, 0, sizeof (the_one)); + the_one.name = str; + the_one.expand_from_macro = expand_from_macro; + +@@ -1493,6 +1494,29 @@ loongarch_force_relocation (struct fix *fixp) + return generic_force_reloc (fixp); + } + ++/* If subsy of BFD_RELOC32/64 and PC in same segment, and without relax ++ or PC at start of subsy or with relax but sub_symbol_segment not in ++ SEC_CODE, we generate 32/64_PCREL. */ ++bool ++loongarch_force_relocation_sub_local (fixS *fixp, segT sec ATTRIBUTE_UNUSED) ++{ ++ return !(LARCH_opts.thin_add_sub ++ && (fixp->fx_r_type == BFD_RELOC_32 ++ || fixp->fx_r_type == BFD_RELOC_64) ++ && (!LARCH_opts.relax ++ || (S_GET_VALUE (fixp->fx_subsy) ++ == fixp->fx_frag->fr_address + fixp->fx_where) ++ || (S_GET_SEGMENT (fixp->fx_subsy)->flags & SEC_CODE) == 0)); ++} ++ ++/* Postpone text-section label subtraction calculation until linking, since ++ linker relaxations might change the deltas. */ ++bool ++loongarch_force_relocation_sub_same(fixS *fixp ATTRIBUTE_UNUSED, segT sec) ++{ ++ return LARCH_opts.relax && (sec->flags & SEC_CODE) != 0; ++} ++ + static void fix_reloc_insn (fixS *fixP, bfd_vma reloc_val, char *buf) + { + reloc_howto_type *howto; +diff --git a/gas/config/tc-loongarch.h b/gas/config/tc-loongarch.h +index da8b0547..5511946f 100644 +--- a/gas/config/tc-loongarch.h ++++ b/gas/config/tc-loongarch.h +@@ -21,8 +21,6 @@ + #ifndef TC_LOONGARCH + #define TC_LOONGARCH + +-#include "opcode/loongarch.h" +- + #define TARGET_BYTES_BIG_ENDIAN 0 + #define TARGET_ARCH bfd_arch_loongarch + +@@ -80,26 +78,19 @@ extern int loongarch_force_relocation (struct fix *); + /* If subsy of BFD_RELOC32/64 and PC in same segment, and without relax + or PC at start of subsy or with relax but sub_symbol_segment not in + SEC_CODE, we generate 32/64_PCREL. */ +-#define TC_FORCE_RELOCATION_SUB_LOCAL(FIX, SEG) \ +- (!(LARCH_opts.thin_add_sub \ +- && ((FIX)->fx_r_type == BFD_RELOC_32 \ +- ||(FIX)->fx_r_type == BFD_RELOC_64) \ +- && (!LARCH_opts.relax \ +- || S_GET_VALUE (FIX->fx_subsy) \ +- == FIX->fx_frag->fr_address + FIX->fx_where \ +- || (LARCH_opts.relax \ +- && ((S_GET_SEGMENT (FIX->fx_subsy)->flags & SEC_CODE) == 0))))) ++extern bool loongarch_force_relocation_sub_local (struct fix *, asection *); ++#define TC_FORCE_RELOCATION_SUB_LOCAL(FIX, SEC) \ ++ loongarch_force_relocation_sub_local (FIX, SEC) + + #define TC_VALIDATE_FIX_SUB(FIX, SEG) 1 + #define DIFF_EXPR_OK 1 + + /* Postpone text-section label subtraction calculation until linking, since + linker relaxations might change the deltas. */ ++extern bool loongarch_force_relocation_sub_same(struct fix *, asection *); + #define TC_FORCE_RELOCATION_SUB_SAME(FIX, SEC) \ +- (LARCH_opts.relax ? \ +- (GENERIC_FORCE_RELOCATION_SUB_SAME (FIX, SEC) \ +- || ((SEC)->flags & SEC_CODE) != 0) \ +- : (GENERIC_FORCE_RELOCATION_SUB_SAME (FIX, SEC))) \ ++ (loongarch_force_relocation_sub_same (FIX, SEC) \ ++ || GENERIC_FORCE_RELOCATION_SUB_SAME (FIX, SEC)) + + #define TC_LINKRELAX_FIXUP(seg) ((seg->flags & SEC_CODE) \ + || (seg->flags & SEC_DEBUGGING)) +-- +2.47.3 + diff --git a/0010-LoongArch-Batch-delete-bytes-at-the-end-of-each-rela.patch b/0010-LoongArch-Batch-delete-bytes-at-the-end-of-each-rela.patch new file mode 100644 index 0000000..38b97bc --- /dev/null +++ b/0010-LoongArch-Batch-delete-bytes-at-the-end-of-each-rela.patch @@ -0,0 +1,574 @@ +From cb502b4b5946a66a0217de104d9b6d4f34a09263 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 17 Jun 2025 16:12:02 +0800 +Subject: [PATCH 10/12] LoongArch: Batch-delete bytes at the end of each relax + trip + +Previously, memmove and reloc/symbol adjustments happened at each +loongarch_relax_delete_bytes() call, which is O(n^2) time complexity and +leads to unacceptable (multiple hours) linking times for certain inputs +with huge number of relaxable sites -- see the linked issue for details. + +To get rid of the quadratic behavior, defer all delete ops to the end of +each relax trip, with the buffer implemented with the splay tree from +libiberty. The individual relaxation handlers are converted to handle +symbol values and relocation offsets as if all preceding deletions +actually happened, by querying a cumulative offset from the splay tree; +the accesses should be efficient because they are mostly sequential +during a relaxation trip. The exact relaxation behavior remains largely +unchanged. + +Example running times before and after the change with the test case in +the linked issue (mypy transpiled C), cross-linking on Threadripper +3990X: +Before: 4192.80s user 1.09s system 98% cpu 1:10:53.52 total +After: 1.76s user 0.74s system 98% cpu 2.539 total - ~1/2382 the time! + +Also tested with binutils (bootstrapping self), CPython 3.14 and LLVM +20.1.6; all passed the respective test suites. + +Link: https://github.com/loongson-community/discussions/issues/56 +Signed-off-by: WANG Xuerui +--- + bfd/elfnn-loongarch.c | 344 +++++++++++++++++++++++++++++++++++------- + 1 file changed, 293 insertions(+), 51 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index e42c8088..05439fde 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -25,6 +25,7 @@ + #define ARCH_SIZE NN + #include "elf-bfd.h" + #include "objalloc.h" ++#include "splay-tree.h" + #include "elf/loongarch.h" + #include "elfxx-loongarch.h" + #include "opcode/loongarch.h" +@@ -135,6 +136,10 @@ struct loongarch_elf_link_hash_table + a partially updated state (some sections have vma updated but the + others do not), and it's unsafe to do the normal relaxation. */ + bool layout_mutating_for_relr; ++ ++ /* Pending relaxation (byte deletion) operations meant for roughly ++ sequential access. */ ++ splay_tree pending_delete_ops; + }; + + struct loongarch_elf_section_data +@@ -4628,12 +4633,147 @@ loongarch_elf_relocate_section (bfd *output_bfd, struct bfd_link_info *info, + return !fatal; + } + +-static bool ++/* A pending delete op during a linker relaxation trip, to be stored in a ++ splay tree. ++ The key is the starting offset of this op's deletion range, interpreted ++ as if no delete op were executed for this trip. */ ++struct pending_delete_op ++{ ++ /* Number of bytes to delete at the address. */ ++ bfd_size_type size; ++ ++ /* The total offset adjustment at the address as if all preceding delete ++ ops had been executed. Used for calculating expected addresses after ++ relaxation without actually adjusting anything. */ ++ bfd_size_type cumulative_offset; ++}; ++ ++static int ++pending_delete_op_compare (splay_tree_key a, splay_tree_key b) ++{ ++ bfd_vma off_a = (bfd_vma)a; ++ bfd_vma off_b = (bfd_vma)b; ++ ++ if (off_a < off_b) ++ return -1; ++ else if (off_a > off_b) ++ return 1; ++ else ++ return 0; ++} ++ ++static void * ++_allocate_on_bfd (int wanted, void *data) ++{ ++ bfd *abfd = (bfd *)data; ++ return bfd_alloc (abfd, wanted); ++} ++ ++static void ++_deallocate_on_bfd (void *p ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED) ++{ ++ /* Nothing to do; the data will get released along with the associated BFD ++ or an early bfd_release call. */ ++} ++ ++static splay_tree ++pending_delete_ops_new (bfd *abfd) ++{ ++ /* The node values are allocated with bfd_zalloc, so they are automatically ++ taken care of at BFD release time. */ ++ return splay_tree_new_with_allocator (pending_delete_op_compare, NULL, NULL, ++ _allocate_on_bfd, _deallocate_on_bfd, abfd); ++} ++ ++static bfd_vma ++loongarch_calc_relaxed_addr (struct bfd_link_info *info, bfd_vma offset) ++{ ++ struct loongarch_elf_link_hash_table *htab = loongarch_elf_hash_table (info); ++ splay_tree pdops = htab->pending_delete_ops; ++ struct pending_delete_op *op; ++ splay_tree_node node; ++ ++ BFD_ASSERT (pdops != NULL); ++ ++ /* Find the op that starts just before the given address. */ ++ node = splay_tree_predecessor (pdops, (splay_tree_key)offset); ++ if (node == NULL) ++ /* Nothing has been deleted yet. */ ++ return offset; ++ BFD_ASSERT (((bfd_vma)node->key) < offset); ++ op = (struct pending_delete_op *)node->value; ++ ++ /* If offset is inside this op's range, it is actually one of the deleted ++ bytes, so the adjusted node->key should be returned in this case. */ ++ bfd_vma op_end_off = (bfd_vma)node->key + op->size; ++ if (offset < op_end_off) ++ { ++ offset = (bfd_vma)node->key; ++ node = splay_tree_predecessor (pdops, node->key); ++ op = node ? (struct pending_delete_op *)node->value : NULL; ++ } ++ ++ return offset - (op ? op->cumulative_offset : 0); ++} ++ ++static void + loongarch_relax_delete_bytes (bfd *abfd, +- asection *sec, + bfd_vma addr, + size_t count, + struct bfd_link_info *link_info) ++{ ++ struct loongarch_elf_link_hash_table *htab ++ = loongarch_elf_hash_table (link_info); ++ splay_tree pdops = htab->pending_delete_ops; ++ splay_tree_node node; ++ struct pending_delete_op *op = NULL, *new_op = NULL; ++ bool need_new_node = true; ++ ++ if (count == 0) ++ return; ++ ++ BFD_ASSERT (pdops != NULL); ++ ++ node = splay_tree_predecessor (pdops, addr); ++ if (node) ++ { ++ op = (struct pending_delete_op *)node->value; ++ if ((bfd_vma)node->key + op->size >= addr) ++ { ++ /* The previous op already covers this offset, coalesce the new op ++ into it. */ ++ op->size += count; ++ op->cumulative_offset += count; ++ need_new_node = false; ++ } ++ } ++ ++ if (need_new_node) ++ { ++ new_op = bfd_zalloc (abfd, sizeof (struct pending_delete_op)); ++ new_op->size = count; ++ new_op->cumulative_offset = (op ? op->cumulative_offset : 0) + count; ++ node = splay_tree_insert (pdops, (splay_tree_key)addr, ++ (splay_tree_value)new_op); ++ } ++ ++ /* Adjust all cumulative offsets after this op. At this point either: ++ - a new node is created, in which case `node` has been updated with the ++ new value, or ++ - an existing node is to be reused, in which case `node` is untouched by ++ the new node logic above and appropriate to use, ++ so we can just re-use `node` here. */ ++ for (node = splay_tree_successor (pdops, node->key); node != NULL; ++ node = splay_tree_successor (pdops, node->key)) ++ { ++ op = (struct pending_delete_op *)node->value; ++ op->cumulative_offset += count; ++ } ++} ++ ++static void ++loongarch_relax_perform_deletes (bfd *abfd, asection *sec, ++ struct bfd_link_info *link_info) + { + unsigned int i, symcount; + bfd_vma toaddr = sec->size; +@@ -4641,30 +4781,82 @@ loongarch_relax_delete_bytes (bfd *abfd, + Elf_Internal_Shdr *symtab_hdr = &elf_tdata (abfd)->symtab_hdr; + unsigned int sec_shndx = _bfd_elf_section_from_bfd_section (abfd, sec); + struct bfd_elf_section_data *data = elf_section_data (sec); +- bfd_byte *contents = data->this_hdr.contents; ++ bfd_byte *contents = data->this_hdr.contents, *contents_end = NULL; + struct relr_entry *relr = loongarch_elf_section_data (sec)->relr; + struct loongarch_elf_link_hash_table *htab = + loongarch_elf_hash_table (link_info); + struct relr_entry *relr_end = NULL; ++ splay_tree pdops = htab->pending_delete_ops; ++ splay_tree_node node1 = NULL, node2 = NULL; + + if (htab->relr_count) + relr_end = htab->relr + htab->relr_count; + +- /* Actually delete the bytes. */ +- sec->size -= count; +- memmove (contents + addr, contents + addr + count, toaddr - addr - count); ++ BFD_ASSERT (pdops != NULL); ++ node1 = splay_tree_min (pdops); ++ ++ if (node1 == NULL) ++ /* No pending delete ops, nothing to do. */ ++ return; ++ ++ /* Actually delete the bytes. For each delete op the pointer arithmetics ++ look like this: ++ ++ node1->key -\ /- node2->key ++ |<- op1->size ->| | ++ v v v ++ ...-DDDDDD-------xxxxxxxxxxxxxxxxxSSSSSSxxxxxxxxxx----... ++ ^ ^ ^ ++ contents_end node1->key + op1->size ++ | ++ contents_end after this memmove ++ ++ where the "S" and "D" bytes are the memmove's source and destination ++ respectively. In case node1 is the first op, contents_end is initialized ++ to the op's start; in case node2 == NULL, the chunk's end is the section's ++ end. The contents_end pointer will be bumped to the new end of content ++ after each memmove. As no byte is added during the process, it is ++ guaranteed to trail behind the delete ops, and all bytes overwritten are ++ either already copied by an earlier memmove or meant to be discarded. ++ ++ For memmove, we need to translate offsets to pointers by adding them to ++ `contents`. */ ++ for (; node1; node1 = node2) ++ { ++ struct pending_delete_op *op1 = (struct pending_delete_op *)node1->value; ++ bfd_vma op1_start_off = (bfd_vma)node1->key; ++ bfd_vma op1_end_off = op1_start_off + op1->size; ++ node2 = splay_tree_successor (pdops, node1->key); ++ bfd_vma op2_start_off = node2 ? (bfd_vma)node2->key : toaddr; ++ bfd_size_type count = op2_start_off - op1_end_off; ++ ++ if (count) ++ { ++ if (contents_end == NULL) ++ /* Start from the end of the first unmodified content chunk. */ ++ contents_end = contents + op1_start_off; ++ ++ memmove (contents_end, contents + op1_end_off, count); ++ contents_end += count; ++ } ++ ++ /* Adjust the section size once, when we have reached the end. */ ++ if (node2 == NULL) ++ sec->size -= op1->cumulative_offset; ++ } + + /* Adjust the location of all of the relocs. Note that we need not + adjust the addends, since all PC-relative references must be against + symbols, which we will adjust below. */ + for (i = 0; i < sec->reloc_count; i++) +- if (data->relocs[i].r_offset > addr && data->relocs[i].r_offset < toaddr) +- data->relocs[i].r_offset -= count; ++ if (data->relocs[i].r_offset < toaddr) ++ data->relocs[i].r_offset = loongarch_calc_relaxed_addr ( ++ link_info, data->relocs[i].r_offset); + + /* Likewise for relative relocs to be packed into .relr. */ + for (; relr && relr < relr_end && relr->sec == sec; relr++) +- if (relr->off > addr && relr->off < toaddr) +- relr->off -= count; ++ if (relr->off < toaddr) ++ relr->off = loongarch_calc_relaxed_addr (link_info, relr->off); + + /* Adjust the local symbols defined in this section. */ + for (i = 0; i < symtab_hdr->sh_info; i++) +@@ -4672,24 +4864,35 @@ loongarch_relax_delete_bytes (bfd *abfd, + Elf_Internal_Sym *sym = (Elf_Internal_Sym *) symtab_hdr->contents + i; + if (sym->st_shndx == sec_shndx) + { +- /* If the symbol is in the range of memory we just moved, we +- have to adjust its value. */ +- if (sym->st_value > addr && sym->st_value <= toaddr) +- sym->st_value -= count; ++ bfd_vma orig_value = sym->st_value; ++ if (orig_value <= toaddr) ++ sym->st_value ++ = loongarch_calc_relaxed_addr (link_info, orig_value); + +- /* If the symbol *spans* the bytes we just deleted (i.e. its +- *end* is in the moved bytes but its *start* isn't), then we +- must adjust its size. ++ /* If the symbol *spans* some deleted bytes, that is its *end* is in ++ the moved bytes but its *start* isn't, then we must adjust its ++ size. + + This test needs to use the original value of st_value, otherwise + we might accidentally decrease size when deleting bytes right +- before the symbol. But since deleted relocs can't span across +- symbols, we can't have both a st_value and a st_size decrease, +- so it is simpler to just use an else. */ +- else if (sym->st_value <= addr +- && sym->st_value + sym->st_size > addr +- && sym->st_value + sym->st_size <= toaddr) +- sym->st_size -= count; ++ before the symbol. */ ++ bfd_vma sym_end = orig_value + sym->st_size; ++ if (sym_end <= toaddr) ++ { ++ splay_tree_node node = splay_tree_predecessor ( ++ pdops, (splay_tree_key)orig_value); ++ for (; node; node = splay_tree_successor (pdops, node->key)) ++ { ++ bfd_vma addr = (bfd_vma)node->key; ++ struct pending_delete_op *op ++ = (struct pending_delete_op *)node->value; ++ ++ if (addr >= sym_end) ++ break; ++ if (orig_value <= addr && sym_end > addr) ++ sym->st_size -= op->size; ++ } ++ } + } + } + +@@ -4734,20 +4937,33 @@ loongarch_relax_delete_bytes (bfd *abfd, + || sym_hash->root.type == bfd_link_hash_defweak) + && sym_hash->root.u.def.section == sec) + { +- /* As above, adjust the value if needed. */ +- if (sym_hash->root.u.def.value > addr +- && sym_hash->root.u.def.value <= toaddr) +- sym_hash->root.u.def.value -= count; ++ bfd_vma orig_value = sym_hash->root.u.def.value; ++ ++ /* As above, adjust the value. */ ++ if (orig_value <= toaddr) ++ sym_hash->root.u.def.value ++ = loongarch_calc_relaxed_addr (link_info, orig_value); + + /* As above, adjust the size if needed. */ +- else if (sym_hash->root.u.def.value <= addr +- && sym_hash->root.u.def.value + sym_hash->size > addr +- && sym_hash->root.u.def.value + sym_hash->size <= toaddr) +- sym_hash->size -= count; ++ bfd_vma sym_end = orig_value + sym_hash->size; ++ if (sym_end <= toaddr) ++ { ++ splay_tree_node node = splay_tree_predecessor ( ++ pdops, (splay_tree_key)orig_value); ++ for (; node; node = splay_tree_successor (pdops, node->key)) ++ { ++ bfd_vma addr = (bfd_vma)node->key; ++ struct pending_delete_op *op ++ = (struct pending_delete_op *)node->value; ++ ++ if (addr >= sym_end) ++ break; ++ if (orig_value <= addr && sym_end > addr) ++ sym_hash->size -= op->size; ++ } ++ } + } + } +- +- return true; + } + + /* Start perform TLS type transition. +@@ -4823,7 +5039,7 @@ loongarch_tls_perform_trans (bfd *abfd, asection *sec, + bfd_put (32, abfd, LARCH_NOP, contents + rel->r_offset); + /* link with -relax option will delete NOP. */ + if (!info->disable_target_specific_optimizations) +- loongarch_relax_delete_bytes (abfd, sec, rel->r_offset, 4, info); ++ loongarch_relax_delete_bytes (abfd, rel->r_offset, 4, info); + return true; + + case R_LARCH_TLS_IE_PC_HI20: +@@ -4912,8 +5128,7 @@ loongarch_tls_perform_trans (bfd *abfd, asection *sec, + lu52i.d $rd,$rd,%le64_hi12(sym) => (deleted) + */ + static bool +-loongarch_relax_tls_le (bfd *abfd, asection *sec, +- asection *sym_sec ATTRIBUTE_UNUSED, ++loongarch_relax_tls_le (bfd *abfd, asection *sec, asection *sym_sec, + Elf_Internal_Rela *rel, bfd_vma symval, + struct bfd_link_info *link_info, + bool *agin ATTRIBUTE_UNUSED, +@@ -4923,6 +5138,8 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, + uint32_t insn = bfd_get (32, abfd, contents + rel->r_offset); + static uint32_t insn_rj,insn_rd; + symval = symval - elf_hash_table (link_info)->tls_sec->vma; ++ if (sym_sec == sec) ++ symval = loongarch_calc_relaxed_addr (link_info, symval); + /* The old LE instruction sequence can be relaxed when the symbol offset + is smaller than the 12-bit range. */ + if (symval <= 0xfff) +@@ -4937,7 +5154,7 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, + if (symval < 0x800) + { + rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); +- loongarch_relax_delete_bytes (abfd, sec, rel->r_offset, ++ loongarch_relax_delete_bytes (abfd, rel->r_offset, + 4, link_info); + } + break; +@@ -4962,7 +5179,7 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_LE64_HI12: + rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); +- loongarch_relax_delete_bytes (abfd, sec, rel->r_offset, ++ loongarch_relax_delete_bytes (abfd, rel->r_offset, + 4, link_info); + break; + +@@ -5020,7 +5237,11 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec, + size_input_section already took care of updating it after relaxation, + so we additionally update once here. */ + sec->output_offset = sec->output_section->size; +- bfd_vma pc = sec_addr (sec) + rel_hi->r_offset; ++ bfd_vma pc = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, rel_hi->r_offset); ++ if (sym_sec == sec) ++ symval = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + + /* If pc and symbol not in the same segment, add/sub segment alignment. */ + if (!loongarch_two_sections_in_same_segment (info->output_bfd, +@@ -5059,7 +5280,7 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec, + R_LARCH_PCREL20_S2); + rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); + +- loongarch_relax_delete_bytes (abfd, sec, rel_lo->r_offset, 4, info); ++ loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info); + + return true; + } +@@ -5081,7 +5302,11 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec, + size_input_section already took care of updating it after relaxation, + so we additionally update once here. */ + sec->output_offset = sec->output_section->size; +- bfd_vma pc = sec_addr (sec) + rel->r_offset; ++ bfd_vma pc = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, rel->r_offset); ++ if (sym_sec == sec) ++ symval = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + + /* If pc and symbol not in the same segment, add/sub segment alignment. */ + if (!loongarch_two_sections_in_same_segment (info->output_bfd, +@@ -5115,7 +5340,7 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec, + /* Adjust relocations. */ + rel->r_info = ELFNN_R_INFO (ELFNN_R_SYM (rel->r_info), R_LARCH_B26); + /* Delete jirl instruction. */ +- loongarch_relax_delete_bytes (abfd, sec, rel->r_offset + 4, 4, info); ++ loongarch_relax_delete_bytes (abfd, rel->r_offset + 4, 4, info); + return true; + } + +@@ -5169,7 +5394,7 @@ bfd_elfNN_loongarch_set_data_segment_info (struct bfd_link_info *info, + static bool + loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec, + Elf_Internal_Rela *rel, +- bfd_vma symval ATTRIBUTE_UNUSED, ++ bfd_vma symval, + struct bfd_link_info *link_info, + bool *again ATTRIBUTE_UNUSED, + bfd_vma max_alignment ATTRIBUTE_UNUSED) +@@ -5185,6 +5410,10 @@ loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec, + else + alignment = rel->r_addend + 4; + ++ if (sym_sec == sec) ++ symval = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (link_info, symval - sec_addr (sec)); ++ + addend = alignment - 4; /* The bytes of NOPs added by R_LARCH_ALIGN. */ + symval -= addend; /* The address of first NOP added by R_LARCH_ALIGN. */ + bfd_vma aligned_addr = ((symval - 1) & ~(alignment - 1)) + alignment; +@@ -5210,17 +5439,19 @@ loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec, + /* If skipping more bytes than the specified maximum, + then the alignment is not done at all and delete all NOPs. */ + if (max > 0 && need_nop_bytes > max) +- return loongarch_relax_delete_bytes (abfd, sec, rel->r_offset, +- addend, link_info); ++ { ++ loongarch_relax_delete_bytes (abfd, rel->r_offset, addend, link_info); ++ return true; ++ } + + /* If the number of NOPs is already correct, there's nothing to do. */ + if (need_nop_bytes == addend) + return true; + + /* Delete the excess NOPs. */ +- return loongarch_relax_delete_bytes (abfd, sec, +- rel->r_offset + need_nop_bytes, +- addend - need_nop_bytes, link_info); ++ loongarch_relax_delete_bytes (abfd, rel->r_offset + need_nop_bytes, ++ addend - need_nop_bytes, link_info); ++ return true; + } + + /* Relax pcalau12i + addi.d of TLS LD/GD/DESC to pcaddi. */ +@@ -5241,7 +5472,11 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec, + size_input_section already took care of updating it after relaxation, + so we additionally update once here. */ + sec->output_offset = sec->output_section->size; +- bfd_vma pc = sec_addr (sec) + rel_hi->r_offset; ++ bfd_vma pc = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, rel_hi->r_offset); ++ if (sym_sec == sec) ++ symval = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + + /* If pc and symbol not in the same segment, add/sub segment alignment. */ + if (!loongarch_two_sections_in_same_segment (info->output_bfd, +@@ -5296,7 +5531,7 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec, + } + rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); + +- loongarch_relax_delete_bytes (abfd, sec, rel_lo->r_offset, 4, info); ++ loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info); + + return true; + } +@@ -5384,6 +5619,9 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + htab->max_alignment = max_alignment; + } + ++ splay_tree pdops = pending_delete_ops_new (abfd); ++ htab->pending_delete_ops = pdops; ++ + for (unsigned int i = 0; i < sec->reloc_count; i++) + { + char symtype; +@@ -5616,6 +5854,10 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + info, again, max_alignment); + } + ++ loongarch_relax_perform_deletes (abfd, sec, info); ++ htab->pending_delete_ops = NULL; ++ splay_tree_delete (pdops); ++ + return true; + } + +-- +2.47.3 + diff --git a/0011-LoongArch-Allow-to-relax-instructions-into-NOPs-afte.patch b/0011-LoongArch-Allow-to-relax-instructions-into-NOPs-afte.patch new file mode 100644 index 0000000..685f49b --- /dev/null +++ b/0011-LoongArch-Allow-to-relax-instructions-into-NOPs-afte.patch @@ -0,0 +1,405 @@ +From 9762dd402ca161f1b905dddca3d7bd6f2ec37ee8 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sun, 6 Jul 2025 09:06:20 +0800 +Subject: [PATCH 11/12] LoongArch: Allow to relax instructions into NOPs after + handling alignment + +Right now, LoongArch linker relaxation is 2-pass, since after alignment +is done, byte deletion can no longer happen. However, as the alignment +pass also shrinks text sections, new relaxation chances may well be +created after alignment is done. Although at this point we can no longer +delete unused instructions without disturbing alignment, we can still +replace them with NOPs; popular LoongArch micro-architectures can +eliminate NOPs during execution, so we can expect a (very) slight +performance improvement from those late-created relaxation chances. + +To achieve this, the number of relax passes is raised to 3 for +LoongArch, and every relaxation handler except loongarch_relax_align is +migrated to a new helper loongarch_relax_delete_or_nop, that either +deletes bytes or fills the bytes to be "deleted" with NOPs, depending on +whether the containing section already has undergone alignment. Also, +since no byte can be deleted during this relax pass, in the pass the +pending_delete_ops structure is no longer allocated, and +loongarch_calc_relaxed_addr(x) degrades to the trivial "return x" in +this case. + +In addition, previously when calculating distances to symbols, an +extra segment alignment must be considered, because alignment may +increase distance between sites. However in the newly added 3rd pass +code size can no longer increase for "closed" sections, so we can skip +the adjustment for them to allow for a few more relaxation chances. + +A simple way to roughly measure this change's effectiveness is to check +how many pcalau12i + addi.d pairs are relaxed into pcaddi's. Taking a +Firefox 140.0.2 test build of mine as an example: + +Before: 47842 pcaddi's in libxul.so +After: 48089 + +This is a 0.5% increase, which is kind of acceptable for a peephole +optimization like this; of which 9 are due to the "relax"ed symbol +distance treatment. + +Signed-off-by: WANG Xuerui +--- + bfd/elfnn-loongarch.c | 191 ++++++++++++++++++++++++----------- + ld/emultempl/loongarchelf.em | 2 +- + 2 files changed, 131 insertions(+), 62 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 05439fde..48de9f8e 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -178,6 +178,10 @@ loongarch_elf_new_section_hook (bfd *abfd, asection *sec) + #define loongarch_elf_hash_table(p) \ + ((struct loongarch_elf_link_hash_table *) ((p)->hash)) \ + ++/* During linker relaxation, indicates whether the section has already ++ undergone alignment processing and no more byte deletion is permitted. */ ++#define loongarch_sec_closed_for_deletion(sec) ((sec)->sec_flg0) ++ + #define MINUS_ONE ((bfd_vma) 0 - 1) + + #define sec_addr(sec) ((sec)->output_section->vma + (sec)->output_offset) +@@ -4693,7 +4697,10 @@ loongarch_calc_relaxed_addr (struct bfd_link_info *info, bfd_vma offset) + struct pending_delete_op *op; + splay_tree_node node; + +- BFD_ASSERT (pdops != NULL); ++ if (!pdops) ++ /* Currently this means we are past the stages where byte deletion could ++ possibly happen. */ ++ return offset; + + /* Find the op that starts just before the given address. */ + node = splay_tree_predecessor (pdops, (splay_tree_key)offset); +@@ -4718,9 +4725,9 @@ loongarch_calc_relaxed_addr (struct bfd_link_info *info, bfd_vma offset) + + static void + loongarch_relax_delete_bytes (bfd *abfd, +- bfd_vma addr, +- size_t count, +- struct bfd_link_info *link_info) ++ bfd_vma addr, ++ size_t count, ++ struct bfd_link_info *link_info) + { + struct loongarch_elf_link_hash_table *htab + = loongarch_elf_hash_table (link_info); +@@ -4771,6 +4778,34 @@ loongarch_relax_delete_bytes (bfd *abfd, + } + } + ++static void ++loongarch_relax_delete_or_nop (bfd *abfd, ++ asection *sec, ++ bfd_vma addr, ++ size_t count, ++ struct bfd_link_info *link_info) ++{ ++ struct bfd_elf_section_data *data = elf_section_data (sec); ++ bfd_byte *contents = data->this_hdr.contents; ++ ++ BFD_ASSERT (count % 4 == 0); ++ ++ if (!loongarch_sec_closed_for_deletion (sec)) ++ { ++ /* Deletions are still possible within the section. */ ++ loongarch_relax_delete_bytes (abfd, addr, count, link_info); ++ return; ++ } ++ ++ /* We can no longer delete bytes in the section after enforcing alignment. ++ But as the resulting shrinkage may open up a few more relaxation chances, ++ allowing unnecessary instructions to be replaced with NOPs instead of ++ being removed altogether may still benefit performance to a lesser ++ extent. */ ++ for (; count; addr += 4, count -= 4) ++ bfd_put (32, abfd, LARCH_NOP, contents + addr); ++} ++ + static void + loongarch_relax_perform_deletes (bfd *abfd, asection *sec, + struct bfd_link_info *link_info) +@@ -5039,7 +5074,7 @@ loongarch_tls_perform_trans (bfd *abfd, asection *sec, + bfd_put (32, abfd, LARCH_NOP, contents + rel->r_offset); + /* link with -relax option will delete NOP. */ + if (!info->disable_target_specific_optimizations) +- loongarch_relax_delete_bytes (abfd, rel->r_offset, 4, info); ++ loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset, 4, info); + return true; + + case R_LARCH_TLS_IE_PC_HI20: +@@ -5154,7 +5189,7 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, asection *sym_sec, + if (symval < 0x800) + { + rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); +- loongarch_relax_delete_bytes (abfd, rel->r_offset, ++ loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset, + 4, link_info); + } + break; +@@ -5179,8 +5214,8 @@ loongarch_relax_tls_le (bfd *abfd, asection *sec, asection *sym_sec, + case R_LARCH_TLS_LE64_LO20: + case R_LARCH_TLS_LE64_HI12: + rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); +- loongarch_relax_delete_bytes (abfd, rel->r_offset, +- 4, link_info); ++ loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset, ++ 4, link_info); + break; + + case R_LARCH_TLS_LE_LO12: +@@ -5243,17 +5278,22 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec, + symval = sec_addr (sec) + + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + +- /* If pc and symbol not in the same segment, add/sub segment alignment. */ +- if (!loongarch_two_sections_in_same_segment (info->output_bfd, +- sec->output_section, +- sym_sec->output_section)) +- max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize +- : max_alignment; +- +- if (symval > pc) +- pc -= (max_alignment > 4 ? max_alignment : 0); +- else if (symval < pc) +- pc += (max_alignment > 4 ? max_alignment : 0); ++ /* If pc and symbol not in the same segment, add/sub segment alignment if the ++ section has not undergone alignment processing because distances may grow ++ after alignment. */ ++ if (!loongarch_sec_closed_for_deletion (sec)) ++ { ++ if (!loongarch_two_sections_in_same_segment (info->output_bfd, ++ sec->output_section, ++ sym_sec->output_section)) ++ max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize ++ : max_alignment; ++ ++ if (symval > pc) ++ pc -= (max_alignment > 4 ? max_alignment : 0); ++ else if (symval < pc) ++ pc += (max_alignment > 4 ? max_alignment : 0); ++ } + + const uint32_t pcaddi = LARCH_OP_PCADDI; + +@@ -5280,7 +5320,7 @@ loongarch_relax_pcala_addi (bfd *abfd, asection *sec, asection *sym_sec, + R_LARCH_PCREL20_S2); + rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); + +- loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info); ++ loongarch_relax_delete_or_nop (abfd, sec, rel_lo->r_offset, 4, info); + + return true; + } +@@ -5308,17 +5348,22 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec, + symval = sec_addr (sec) + + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + +- /* If pc and symbol not in the same segment, add/sub segment alignment. */ +- if (!loongarch_two_sections_in_same_segment (info->output_bfd, +- sec->output_section, +- sym_sec->output_section)) +- max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize +- : max_alignment; +- +- if (symval > pc) +- pc -= (max_alignment > 4 ? max_alignment : 0); +- else if (symval < pc) +- pc += (max_alignment > 4 ? max_alignment : 0); ++ /* If pc and symbol not in the same segment, add/sub segment alignment if the ++ section has not undergone alignment processing because distances may grow ++ after alignment. */ ++ if (!loongarch_sec_closed_for_deletion (sec)) ++ { ++ if (!loongarch_two_sections_in_same_segment (info->output_bfd, ++ sec->output_section, ++ sym_sec->output_section)) ++ max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize ++ : max_alignment; ++ ++ if (symval > pc) ++ pc -= (max_alignment > 4 ? max_alignment : 0); ++ else if (symval < pc) ++ pc += (max_alignment > 4 ? max_alignment : 0); ++ } + + /* Is pcalau12i + addi.d insns? */ + if (!LARCH_INSN_JIRL (jirl) +@@ -5340,7 +5385,7 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec, + /* Adjust relocations. */ + rel->r_info = ELFNN_R_INFO (ELFNN_R_SYM (rel->r_info), R_LARCH_B26); + /* Delete jirl instruction. */ +- loongarch_relax_delete_bytes (abfd, rel->r_offset + 4, 4, info); ++ loongarch_relax_delete_or_nop (abfd, sec, rel->r_offset + 4, 4, info); + return true; + } + +@@ -5389,8 +5434,9 @@ bfd_elfNN_loongarch_set_data_segment_info (struct bfd_link_info *info, + loongarch_elf_hash_table (info)->data_segment_phase = data_segment_phase; + } + +-/* Implement R_LARCH_ALIGN by deleting excess alignment NOPs. +- Once we've handled an R_LARCH_ALIGN, we can't relax anything else. */ ++/* Honor R_LARCH_ALIGN requests by deleting excess alignment NOPs. ++ Once we've handled an R_LARCH_ALIGN, we can't relax anything else by deleting ++ bytes, or alignment will be disrupted. */ + static bool + loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec, + Elf_Internal_Rela *rel, +@@ -5431,9 +5477,9 @@ loongarch_relax_align (bfd *abfd, asection *sec, asection *sym_sec, + return false; + } + +- /* Once we've handled an R_LARCH_ALIGN in a section, +- we can't relax anything else in this section. */ +- sec->sec_flg0 = true; ++ /* Once we've handled an R_LARCH_ALIGN in a section, we can't relax anything ++ else by deleting bytes, or alignment will be disrupted. */ ++ loongarch_sec_closed_for_deletion (sec) = true; + rel->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); + + /* If skipping more bytes than the specified maximum, +@@ -5478,17 +5524,22 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec, + symval = sec_addr (sec) + + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); + +- /* If pc and symbol not in the same segment, add/sub segment alignment. */ +- if (!loongarch_two_sections_in_same_segment (info->output_bfd, +- sec->output_section, +- sym_sec->output_section)) +- max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize +- : max_alignment; +- +- if (symval > pc) +- pc -= (max_alignment > 4 ? max_alignment : 0); +- else if (symval < pc) +- pc += (max_alignment > 4 ? max_alignment : 0); ++ /* If pc and symbol not in the same segment, add/sub segment alignment if the ++ section has not undergone alignment processing because distances may grow ++ after alignment. */ ++ if (!loongarch_sec_closed_for_deletion (sec)) ++ { ++ if (!loongarch_two_sections_in_same_segment (info->output_bfd, ++ sec->output_section, ++ sym_sec->output_section)) ++ max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize ++ : max_alignment; ++ ++ if (symval > pc) ++ pc -= (max_alignment > 4 ? max_alignment : 0); ++ else if (symval < pc) ++ pc += (max_alignment > 4 ? max_alignment : 0); ++ } + + const uint32_t pcaddi = LARCH_OP_PCADDI; + +@@ -5531,7 +5582,7 @@ loongarch_relax_tls_ld_gd_desc (bfd *abfd, asection *sec, asection *sym_sec, + } + rel_lo->r_info = ELFNN_R_INFO (0, R_LARCH_NONE); + +- loongarch_relax_delete_bytes (abfd, rel_lo->r_offset, 4, info); ++ loongarch_relax_delete_or_nop (abfd, sec, rel_lo->r_offset, 4, info); + + return true; + } +@@ -5575,15 +5626,25 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + if (htab->layout_mutating_for_relr) + return true; + ++ /* Definition of LoongArch linker relaxation passes: ++ ++ - Pass 0: relaxes everything except R_LARCH_ALIGN, byte deletions are ++ performed; skipped if disable_target_specific_optimizations. ++ - Pass 1: handles alignment, byte deletions are performed. Sections with ++ R_LARCH_ALIGN relocations are marked closed for further byte ++ deletion in order to not disturb alignment. This pass is NOT ++ skipped even if disable_target_specific_optimizations is true. ++ - Pass 2: identical to Pass 0, but replacing relaxed insns with NOP in case ++ the containing section is closed for deletion; skip condition ++ also same as Pass 0. */ ++ bool is_alignment_pass = info->relax_pass == 1; + if (bfd_link_relocatable (info) +- || sec->sec_flg0 + || sec->reloc_count == 0 + || (sec->flags & SEC_RELOC) == 0 + || (sec->flags & SEC_HAS_CONTENTS) == 0 + /* The exp_seg_relro_adjust is enum phase_enum (0x4). */ + || *(htab->data_segment_phase) == 4 +- || (info->disable_target_specific_optimizations +- && info->relax_pass == 0)) ++ || (info->disable_target_specific_optimizations && !is_alignment_pass)) + return true; + + struct bfd_elf_section_data *data = elf_section_data (sec); +@@ -5619,7 +5680,10 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + htab->max_alignment = max_alignment; + } + +- splay_tree pdops = pending_delete_ops_new (abfd); ++ splay_tree pdops = NULL; ++ if (!loongarch_sec_closed_for_deletion (sec)) ++ pdops = pending_delete_ops_new (abfd); ++ + htab->pending_delete_ops = pdops; + + for (unsigned int i = 0; i < sec->reloc_count; i++) +@@ -5661,7 +5725,13 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + } + + relax_func_t relax_func = NULL; +- if (info->relax_pass == 0) ++ if (is_alignment_pass) ++ { ++ if (r_type != R_LARCH_ALIGN) ++ continue; ++ relax_func = loongarch_relax_align; ++ } ++ else + { + switch (r_type) + { +@@ -5715,10 +5785,6 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + continue; + } + } +- else if (info->relax_pass == 1 && r_type == R_LARCH_ALIGN) +- relax_func = loongarch_relax_align; +- else +- continue; + + /* Four kind of relocations: + Normal: symval is the symbol address. +@@ -5854,9 +5920,12 @@ loongarch_elf_relax_section (bfd *abfd, asection *sec, + info, again, max_alignment); + } + +- loongarch_relax_perform_deletes (abfd, sec, info); +- htab->pending_delete_ops = NULL; +- splay_tree_delete (pdops); ++ if (pdops) ++ { ++ loongarch_relax_perform_deletes (abfd, sec, info); ++ htab->pending_delete_ops = NULL; ++ splay_tree_delete (pdops); ++ } + + return true; + } +diff --git a/ld/emultempl/loongarchelf.em b/ld/emultempl/loongarchelf.em +index e50d85d0..7843410d 100644 +--- a/ld/emultempl/loongarchelf.em ++++ b/ld/emultempl/loongarchelf.em +@@ -59,7 +59,7 @@ larch_elf_before_allocation (void) + ENABLE_RELAXATION; + } + +- link_info.relax_pass = 2; ++ link_info.relax_pass = 3; + } + + static void +-- +2.47.3 + diff --git a/0012-LoongArch-Fix-errors-due-to-version-differences.patch b/0012-LoongArch-Fix-errors-due-to-version-differences.patch new file mode 100644 index 0000000..8f000ea --- /dev/null +++ b/0012-LoongArch-Fix-errors-due-to-version-differences.patch @@ -0,0 +1,103 @@ +From 15c6f6873dfaf95cf9becbdcdf4f30a1fa20f562 Mon Sep 17 00:00:00 2001 +From: Peng Fan +Date: Mon, 21 Jul 2025 02:01:17 +0000 +Subject: [PATCH 12/12] LoongArch: Fix errors due to version differences + +Signed-off-by: Peng Fan +--- + bfd/elfnn-loongarch.c | 50 +++++++++++++++++++++++++++++---------- + gas/config/tc-loongarch.c | 2 +- + 2 files changed, 39 insertions(+), 13 deletions(-) + +diff --git a/bfd/elfnn-loongarch.c b/bfd/elfnn-loongarch.c +index 48de9f8e..2d9c6444 100644 +--- a/bfd/elfnn-loongarch.c ++++ b/bfd/elfnn-loongarch.c +@@ -5392,33 +5392,59 @@ loongarch_relax_call36 (bfd *abfd, asection *sec, asection *sym_sec, + /* Relax pcalau12i,ld.d => pcalau12i,addi.d. */ + static bool + loongarch_relax_pcala_ld (bfd *abfd, asection *sec, +- asection *sym_sec ATTRIBUTE_UNUSED, +- Elf_Internal_Rela *rel_hi, +- bfd_vma symval ATTRIBUTE_UNUSED, +- struct bfd_link_info *info ATTRIBUTE_UNUSED, +- bool *again ATTRIBUTE_UNUSED, +- bfd_vma max_alignment ATTRIBUTE_UNUSED) ++ asection *sym_sec, ++ Elf_Internal_Rela *rel_hi, ++ bfd_vma symval, ++ struct bfd_link_info *info, ++ bool *again ATTRIBUTE_UNUSED, ++ bfd_vma max_alignment) + { + bfd_byte *contents = elf_section_data (sec)->this_hdr.contents; + Elf_Internal_Rela *rel_lo = rel_hi + 2; + uint32_t pca = bfd_get (32, abfd, contents + rel_hi->r_offset); + uint32_t ld = bfd_get (32, abfd, contents + rel_lo->r_offset); +- uint32_t rd = LARCH_GET_RD(pca); ++ uint32_t rd = LARCH_GET_RD (pca); + uint32_t addi_d = LARCH_OP_ADDI_D; + ++ /* This section's output_offset need to subtract the bytes of instructions ++ relaxed by the previous sections, so it needs to be updated beforehand. ++ size_input_section already took care of updating it after relaxation, ++ so we additionally update once here. */ ++ sec->output_offset = sec->output_section->size; ++ bfd_vma pc = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, rel_hi->r_offset); ++ if (sym_sec == sec) ++ symval = sec_addr (sec) ++ + loongarch_calc_relaxed_addr (info, symval - sec_addr (sec)); ++ ++ /* If pc and symbol not in the same segment, add/sub segment alignment. */ ++ if (!loongarch_two_sections_in_same_segment (info->output_bfd, ++ sec->output_section, ++ sym_sec->output_section)) ++ max_alignment = info->maxpagesize > max_alignment ? info->maxpagesize ++ : max_alignment; ++ ++ if (symval > pc) ++ pc -= (max_alignment > 4 ? max_alignment : 0); ++ else if (symval < pc) ++ pc += (max_alignment > 4 ? max_alignment : 0); ++ + if ((ELFNN_R_TYPE (rel_lo->r_info) != R_LARCH_GOT_PC_LO12) +- || (LARCH_GET_RD(ld) != rd) +- || (LARCH_GET_RJ(ld) != rd) +- || !LARCH_INSN_LD_D(ld)) ++ || (LARCH_GET_RD (ld) != rd) ++ || (LARCH_GET_RJ (ld) != rd) ++ || !LARCH_INSN_LD_D (ld) ++ /* Within +-2G addressing range. */ ++ || (bfd_signed_vma)(symval - pc) < (bfd_signed_vma)(int32_t)0x80000000 ++ || (bfd_signed_vma)(symval - pc) > (bfd_signed_vma)(int32_t)0x7fffffff) + return false; + + addi_d = addi_d | (rd << 5) | rd; + bfd_put (32, abfd, addi_d, contents + rel_lo->r_offset); + + rel_hi->r_info = ELFNN_R_INFO (ELFNN_R_SYM (rel_hi->r_info), +- R_LARCH_PCALA_HI20); ++ R_LARCH_PCALA_HI20); + rel_lo->r_info = ELFNN_R_INFO (ELFNN_R_SYM (rel_lo->r_info), +- R_LARCH_PCALA_LO12); ++ R_LARCH_PCALA_LO12); + return true; + } + +diff --git a/gas/config/tc-loongarch.c b/gas/config/tc-loongarch.c +index 1d13cbb6..9be48be8 100644 +--- a/gas/config/tc-loongarch.c ++++ b/gas/config/tc-loongarch.c +@@ -613,7 +613,7 @@ s_loongarch_align (int arg) + + static const pseudo_typeS loongarch_pseudo_table[] = + { +- { "align", s_loongarch_align, -4 }, ++ { "align", s_loongarch_align, 0 }, + { "dword", cons, 8 }, + { "word", cons, 4 }, + { "half", cons, 2 }, +-- +2.47.3 + diff --git a/binutils.spec b/binutils.spec index c4aed6d..6828e06 100644 --- a/binutils.spec +++ b/binutils.spec @@ -2,7 +2,7 @@ Summary: A GNU collection of binary utilities Name: binutils%{?_with_debug:-debug} Version: 2.41 -Release: 17 +Release: 18 License: GPL-3.0-or-later AND (GPL-3.0-or-later WITH Bison-exception-2.2) AND (LGPL-2.0-or-later WITH GCC-exception-2.0) AND BSD-3-Clause AND GFDL-1.3-or-later AND GPL-2.0-or-later AND LGPL-2.1-or-later AND LGPL-2.0-or-later URL: https://sourceware.org/binutils @@ -300,7 +300,19 @@ Patch3131: Modify-test-because-of-readelf-not-update.patch Patch3132: remove-file-produced-by-bison.patch Patch3133: replace-space-with-tab.patch Patch3134: LoongArch-binutils-compatible-with-older-gcc.patch - +#-- +Patch3135: 0001-LoongArch-Default-to-a-maximum-page-size-of-64KiB.patch +Patch3136: 0002-LoongArch-Check-PC-relative-relocations-for-shared-l.patch +Patch3137: 0003-LoongArch-Fix-resolution-of-undefined-weak-hidden-pr.patch +Patch3138: 0004-LoongArch-Allow-R_LARCH_PCALA_HI20-or-R_LARCH_PCREL2.patch +Patch3139: 0005-loongson-buffer-overflow.patch +Patch3140: 0006-LoongArch-Fix-disassembly-option-parsing-stopping-at.patch +Patch3141: 0007-loongarch-gas-resolving-constant-expressions.patch +Patch3142: 0008-ubsan-undefined-shift-in-loongarch_elf_add_sub_reloc.patch +Patch3143: 0009-loongarch-gcc-4.5-build-fixes.patch +Patch3144: 0010-LoongArch-Batch-delete-bytes-at-the-end-of-each-rela.patch +Patch3145: 0011-LoongArch-Allow-to-relax-instructions-into-NOPs-afte.patch +Patch3146: 0012-LoongArch-Fix-errors-due-to-version-differences.patch # Part 5000 - # Purpose: Use /lib64 and /usr/lib64 instead of /lib and /usr/lib in the @@ -1389,6 +1401,9 @@ exit 0 #---------------------------------------------------------------------------- %changelog +* Mon Aug 4 2025 Peng Fan - 2.41-18 +- LoongArch: common sync from upstream. + * Thu Jul 24 2025 Huanyu Li -2.41-17 - Fix CVE-2025-7545: heap-based buffer overflow in objcopy -- Gitee