diff --git a/0005-cmd-asm-add-RDTIME-L-H-.W-RDTIME.D-support-for-loong.patch b/0005-cmd-asm-add-RDTIME-L-H-.W-RDTIME.D-support-for-loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e70c96a56eff810093c9a50bca7f48bdb26c7e7 --- /dev/null +++ b/0005-cmd-asm-add-RDTIME-L-H-.W-RDTIME.D-support-for-loong.patch @@ -0,0 +1,858 @@ +From eacad4aa5e48cef5fc9ed83a53214c2b28bb10db Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Thu, 4 Aug 2022 18:13:59 +0800 +Subject: [PATCH 05/62] cmd/asm: add RDTIME{L,H}.W, RDTIME.D support for + loong64 + +Instruction formats: rdtime rd, rj + +The RDTIME family of instructions are used to read constant frequency timer +information, the stable counter value is written into the general register +rd, and the counter id information is written into the general register rj. +(Note: both of its register operands are outputs). + +Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + +Change-Id: Ida5bbb28316ef70b5f616dac3e6fa6f2e77875b5 +Reviewed-on: https://go-review.googlesource.com/c/go/+/421655 +Reviewed-by: xiaodong liu +Reviewed-by: WANG Xuerui +Reviewed-by: Wayne Zuo +Reviewed-by: Cherry Mui +Reviewed-by: Michael Knyszek +Run-TryBot: Wayne Zuo +TryBot-Result: Gopher Robot +Reviewed-by: Meidan Li +--- + src/cmd/asm/internal/arch/loong64.go | 11 + + src/cmd/asm/internal/asm/asm.go | 12 + + .../asm/internal/asm/testdata/loong64enc1.s | 4 + + src/cmd/internal/obj/loong64/a.out.go | 5 + + src/cmd/internal/obj/loong64/anames.go | 3 + + src/cmd/internal/obj/loong64/asm.go | 647 +++++++++--------- + src/cmd/internal/obj/util.go | 1 + + 7 files changed, 373 insertions(+), 310 deletions(-) + +diff --git a/src/cmd/asm/internal/arch/loong64.go b/src/cmd/asm/internal/arch/loong64.go +index ebf842c1f2..2958ee1a86 100644 +--- a/src/cmd/asm/internal/arch/loong64.go ++++ b/src/cmd/asm/internal/arch/loong64.go +@@ -44,6 +44,17 @@ func IsLoong64MUL(op obj.As) bool { + return false + } + ++// IsLoong64RDTIME reports whether the op (as defined by an loong64.A* ++// constant) is one of the RDTIMELW/RDTIMEHW/RDTIMED instructions that ++// require special handling. ++func IsLoong64RDTIME(op obj.As) bool { ++ switch op { ++ case loong64.ARDTIMELW, loong64.ARDTIMEHW, loong64.ARDTIMED: ++ return true ++ } ++ return false ++} ++ + func loong64RegisterNumber(name string, n int16) (int16, bool) { + switch name { + case "F": +diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go +index 00fb7f417f..4d0eeacc74 100644 +--- a/src/cmd/asm/internal/asm/asm.go ++++ b/src/cmd/asm/internal/asm/asm.go +@@ -642,6 +642,18 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { + prog.Reg = p.getRegister(prog, op, &a[1]) + break + } ++ ++ if arch.IsLoong64RDTIME(op) { ++ // The Loong64 RDTIME family of instructions is a bit special, ++ // in that both its register operands are outputs ++ prog.To = a[0] ++ if a[1].Type != obj.TYPE_REG { ++ p.errorf("invalid addressing modes for 2nd operand to %s instruction, must be register", op) ++ return ++ } ++ prog.RegTo2 = a[1].Reg ++ break ++ } + } + prog.From = a[0] + prog.To = a[1] +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 83bb6ec078..0cc077c091 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -218,3 +218,7 @@ lable2: + CMPGEF F4, R5 // a090130c + CMPGED F4, R5 // a090230c + CMPEQD F4, R5 // a010220c ++ ++ RDTIMELW R4, R0 // 80600000 ++ RDTIMEHW R4, R0 // 80640000 ++ RDTIMED R4, R5 // 85680000 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 88bf714c5f..10cf396669 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -391,6 +391,11 @@ const ( + AMOVVF + AMOVVD + ++ // 2.2.10. Other Miscellaneous Instructions ++ ARDTIMELW ++ ARDTIMEHW ++ ARDTIMED ++ + ALAST + + // aliases +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 20e7465556..eb13da20c3 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -130,5 +130,8 @@ var Anames = []string{ + "MOVDV", + "MOVVF", + "MOVVD", ++ "RDTIMELW", ++ "RDTIMEHW", ++ "RDTIMED", + "LAST", + } +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index df3e9bf866..982ddd8103 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -33,9 +33,10 @@ const ( + + type Optab struct { + as obj.As +- a1 uint8 +- a2 uint8 +- a3 uint8 ++ a1 uint8 // first source operand ++ a2 uint8 // 2nd source operand ++ a3 uint8 // first destination operand ++ a4 uint8 // 2nd destination operand + type_ int8 + size int8 + param int16 +@@ -48,308 +49,312 @@ const ( + ) + + var optab = []Optab{ +- {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0}, +- +- {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0}, +- {AMOVV, C_REG, C_NONE, C_REG, 1, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_REG, 12, 8, 0, 0, NOTUSETMP}, +- {AMOVBU, C_REG, C_NONE, C_REG, 13, 4, 0, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_REG, 14, 8, 0, sys.Loong64, NOTUSETMP}, +- +- {ASUB, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, +- {ASUBV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AADD, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, +- {AADDV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AAND, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, +- {ASUB, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, +- {ASUBV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AADD, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, +- {AADDV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AAND, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, +- {ANEGW, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, +- {ANEGV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AMASKEQZ, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, +- +- {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, +- {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0}, +- {ASLLV, C_REG, C_NONE, C_REG, 9, 4, 0, sys.Loong64, 0}, +- {ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.Loong64, 0}, +- {ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0}, +- +- {AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0}, +- {AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0}, +- {ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0}, +- {AABSF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, +- {AMOVVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0}, +- +- {AMOVW, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVWL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVVL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, +- {AMOVWL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0}, +- {AMOVVL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, +- {AMOVWL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, +- {AMOVVL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, +- {ASC, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0}, +- {ASCV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, +- +- {AMOVW, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVWU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVBU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVWL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVVL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, +- {AMOVWU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVV, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVB, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, +- {AMOVBU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, +- {AMOVWL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0}, +- {AMOVVL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, +- {AMOVWU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, +- {AMOVBU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, +- {AMOVWL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, +- {AMOVVL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, +- {ALL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0}, +- {ALLV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0}, +- +- {AMOVW, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, +- {ASC, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, +- {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0}, +- +- {AMOVW, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, +- {AMOVWU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, +- {AMOVV, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, +- {AMOVB, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, +- {AMOVBU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, +- {AMOVWU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0}, +- {AMOVV, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0}, +- {AMOVB, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, +- {AMOVBU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0}, +- {AMOVW, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, +- {AMOVWU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, +- {AMOVBU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0}, +- {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, +- {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVWU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVV, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, +- {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0}, +- {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, +- {AMOVWU, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, +- {AMOVBU, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0}, +- +- {AMOVW, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SACON, C_NONE, C_REG, 3, 4, REGSP, 0, 0}, +- {AMOVV, C_SACON, C_NONE, C_REG, 3, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, 0, NOTUSETMP}, +- {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP}, +- {AMOVV, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP}, +- +- {AMOVW, C_LACON, C_NONE, C_REG, 26, 12, REGSP, 0, 0}, +- {AMOVV, C_LACON, C_NONE, C_REG, 26, 12, REGSP, sys.Loong64, 0}, +- {AMOVW, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, +- {AMOVV, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0}, +- {AMOVV, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0}, +- {AMOVW, C_STCON, C_NONE, C_REG, 55, 12, 0, 0, 0}, +- {AMOVV, C_STCON, C_NONE, C_REG, 55, 12, 0, sys.Loong64, 0}, +- +- {AMOVW, C_UCON, C_NONE, C_REG, 24, 4, 0, 0, 0}, +- {AMOVV, C_UCON, C_NONE, C_REG, 24, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_LCON, C_NONE, C_REG, 19, 8, 0, 0, NOTUSETMP}, +- {AMOVV, C_LCON, C_NONE, C_REG, 19, 8, 0, sys.Loong64, NOTUSETMP}, +- {AMOVV, C_DCON, C_NONE, C_REG, 59, 16, 0, sys.Loong64, NOTUSETMP}, +- +- {AMUL, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0}, +- {AMUL, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0}, +- {AMULV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0}, +- {AMULV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0}, +- +- {AADD, C_ADD0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, +- {AADD, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, +- {AADD, C_ANDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, +- {AADD, C_ANDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, +- +- {AADDV, C_ADD0CON, C_REG, C_REG, 4, 4, 0, sys.Loong64, 0}, +- {AADDV, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, sys.Loong64, 0}, +- {AADDV, C_ANDCON, C_REG, C_REG, 10, 8, 0, sys.Loong64, 0}, +- {AADDV, C_ANDCON, C_NONE, C_REG, 10, 8, 0, sys.Loong64, 0}, +- +- {AAND, C_AND0CON, C_REG, C_REG, 4, 4, 0, 0, 0}, +- {AAND, C_AND0CON, C_NONE, C_REG, 4, 4, 0, 0, 0}, +- {AAND, C_ADDCON, C_REG, C_REG, 10, 8, 0, 0, 0}, +- {AAND, C_ADDCON, C_NONE, C_REG, 10, 8, 0, 0, 0}, +- +- {AADD, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, +- {AADD, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, +- {AADDV, C_UCON, C_REG, C_REG, 25, 8, 0, sys.Loong64, 0}, +- {AADDV, C_UCON, C_NONE, C_REG, 25, 8, 0, sys.Loong64, 0}, +- {AAND, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0}, +- {AAND, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0}, +- +- {AADD, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, +- {AADDV, C_LCON, C_NONE, C_REG, 23, 12, 0, sys.Loong64, 0}, +- {AAND, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0}, +- {AADD, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, +- {AADDV, C_LCON, C_REG, C_REG, 23, 12, 0, sys.Loong64, 0}, +- {AAND, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0}, +- +- {AADDV, C_DCON, C_NONE, C_REG, 60, 20, 0, sys.Loong64, 0}, +- {AADDV, C_DCON, C_REG, C_REG, 60, 20, 0, sys.Loong64, 0}, +- +- {ASLL, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0}, +- {ASLL, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0}, +- +- {ASLLV, C_SCON, C_REG, C_REG, 16, 4, 0, sys.Loong64, 0}, +- {ASLLV, C_SCON, C_NONE, C_REG, 16, 4, 0, sys.Loong64, 0}, +- +- {ASYSCALL, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, +- +- {ABEQ, C_REG, C_REG, C_SBRA, 6, 4, 0, 0, 0}, +- {ABEQ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, +- {ABLEZ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0}, +- {ABFPT, C_NONE, C_NONE, C_SBRA, 6, 4, 0, 0, NOTUSETMP}, +- +- {AJMP, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // b +- {AJAL, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // bl +- +- {AJMP, C_NONE, C_NONE, C_ZOREG, 18, 4, REGZERO, 0, 0}, // jirl r0, rj, 0 +- {AJAL, C_NONE, C_NONE, C_ZOREG, 18, 4, REGLINK, 0, 0}, // jirl r1, rj, 0 +- +- {AMOVW, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, +- {AMOVD, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, sys.Loong64, 0}, +- {AMOVF, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, +- {AMOVD, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0}, +- {AMOVW, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, +- {AMOVD, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0}, +- +- {AMOVW, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, +- {AMOVF, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, +- {AMOVD, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, sys.Loong64, 0}, +- {AMOVF, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, +- {AMOVD, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0}, +- {AMOVW, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, +- {AMOVD, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0}, +- {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0}, +- {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0}, +- {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0}, +- {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0}, +- +- {AMOVW, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0}, +- +- {AMOVW, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0}, +- {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, +- {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0}, +- +- {AMOVW, C_REG, C_NONE, C_FREG, 30, 4, 0, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_REG, 31, 4, 0, 0, 0}, +- {AMOVV, C_REG, C_NONE, C_FREG, 47, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_FREG, C_NONE, C_REG, 48, 4, 0, sys.Loong64, 0}, +- +- {AMOVW, C_ADDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0}, +- +- {AWORD, C_LCON, C_NONE, C_NONE, 40, 4, 0, 0, 0}, +- {AWORD, C_DCON, C_NONE, C_NONE, 61, 4, 0, 0, 0}, +- +- {ATEQ, C_SCON, C_REG, C_REG, 15, 8, 0, 0, 0}, +- {ATEQ, C_SCON, C_NONE, C_REG, 15, 8, 0, 0, 0}, +- +- {ABREAK, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, // really CACHE instruction +- {ABREAK, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0}, +- {ABREAK, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0}, +- {ABREAK, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, +- +- {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}, +- {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0}, +- {obj.APCDATA, C_DCON, C_NONE, C_DCON, 0, 0, 0, 0, 0}, +- {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 +- {obj.ANOP, C_DCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 +- {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP +- {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP +- +- {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, ++ {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, C_NONE, 0, 0, 0, 0, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_REG, C_NONE, 1, 4, 0, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_REG, C_NONE, 1, 4, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_REG, C_NONE, 12, 8, 0, 0, NOTUSETMP}, ++ {AMOVBU, C_REG, C_NONE, C_REG, C_NONE, 13, 4, 0, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_REG, C_NONE, 14, 8, 0, sys.Loong64, NOTUSETMP}, ++ ++ {ASUB, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {ASUBV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AADD, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {AADDV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AAND, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {ASUB, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {ASUBV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AADD, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {AADDV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AAND, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {ANEGW, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {ANEGV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AMASKEQZ, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ ++ {ASLL, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0, 0}, ++ {ASLL, C_REG, C_REG, C_REG, C_NONE, 9, 4, 0, 0, 0}, ++ {ASLLV, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, sys.Loong64, 0}, ++ {ASLLV, C_REG, C_REG, C_REG, C_NONE, 9, 4, 0, sys.Loong64, 0}, ++ {ACLO, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0, 0}, ++ ++ {AADDF, C_FREG, C_NONE, C_FREG, C_NONE, 32, 4, 0, 0, 0}, ++ {AADDF, C_FREG, C_REG, C_FREG, C_NONE, 32, 4, 0, 0, 0}, ++ {ACMPEQF, C_FREG, C_REG, C_NONE, C_NONE, 32, 4, 0, 0, 0}, ++ {AABSF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, ++ {AMOVVF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVWU, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVBU, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVWL, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVVL, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, ++ {AMOVWL, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, ++ {AMOVVL, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, ++ {AMOVWL, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, ++ {AMOVVL, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, ++ {ASC, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, ++ {ASCV, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, ++ ++ {AMOVW, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVWU, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVV, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVB, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVBU, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVWL, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVVL, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, ++ {AMOVWU, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, ++ {AMOVV, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, ++ {AMOVB, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, ++ {AMOVBU, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, ++ {AMOVWL, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, ++ {AMOVVL, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, ++ {AMOVW, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, ++ {AMOVWU, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVV, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVB, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, ++ {AMOVBU, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, ++ {AMOVWL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, ++ {AMOVVL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, ++ {ALL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, ++ {ALLV, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, ++ {AMOVWU, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, ++ {AMOVBU, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, ++ {ASC, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVWU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, ++ ++ {AMOVW, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, ++ {AMOVWU, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, ++ {AMOVV, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, ++ {AMOVB, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, ++ {AMOVBU, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, ++ {AMOVW, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, ++ {AMOVWU, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, sys.Loong64, 0}, ++ {AMOVV, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, sys.Loong64, 0}, ++ {AMOVB, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, ++ {AMOVBU, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, ++ {AMOVW, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, ++ {AMOVWU, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVV, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVB, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, ++ {AMOVBU, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, ++ {AMOVW, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, ++ {AMOVW, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVWU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVV, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVB, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, ++ {AMOVB, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, ++ {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVW, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ {AMOVWU, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, ++ {AMOVB, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ {AMOVBU, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ ++ {AMOVW, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, ++ {AMOVV, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_SACON, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0, 0}, ++ {AMOVV, C_SACON, C_NONE, C_REG, C_NONE, 3, 4, REGSP, sys.Loong64, 0}, ++ {AMOVW, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, 0, NOTUSETMP}, ++ {AMOVW, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, sys.Loong64, NOTUSETMP}, ++ {AMOVV, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, sys.Loong64, NOTUSETMP}, ++ ++ {AMOVW, C_LACON, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0, 0}, ++ {AMOVV, C_LACON, C_NONE, C_REG, C_NONE, 26, 12, REGSP, sys.Loong64, 0}, ++ {AMOVW, C_ADDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0, 0}, ++ {AMOVV, C_ADDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVW, C_ANDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0, 0}, ++ {AMOVV, C_ANDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVW, C_STCON, C_NONE, C_REG, C_NONE, 55, 12, 0, 0, 0}, ++ {AMOVV, C_STCON, C_NONE, C_REG, C_NONE, 55, 12, 0, sys.Loong64, 0}, ++ ++ {AMOVW, C_UCON, C_NONE, C_REG, C_NONE, 24, 4, 0, 0, 0}, ++ {AMOVV, C_UCON, C_NONE, C_REG, C_NONE, 24, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_LCON, C_NONE, C_REG, C_NONE, 19, 8, 0, 0, NOTUSETMP}, ++ {AMOVV, C_LCON, C_NONE, C_REG, C_NONE, 19, 8, 0, sys.Loong64, NOTUSETMP}, ++ {AMOVV, C_DCON, C_NONE, C_REG, C_NONE, 59, 16, 0, sys.Loong64, NOTUSETMP}, ++ ++ {AMUL, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {AMUL, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, ++ {AMULV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ {AMULV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, ++ ++ {AADD, C_ADD0CON, C_REG, C_REG, C_NONE, 4, 4, 0, 0, 0}, ++ {AADD, C_ADD0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, 0, 0}, ++ {AADD, C_ANDCON, C_REG, C_REG, C_NONE, 10, 8, 0, 0, 0}, ++ {AADD, C_ANDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, 0, 0}, ++ ++ {AADDV, C_ADD0CON, C_REG, C_REG, C_NONE, 4, 4, 0, sys.Loong64, 0}, ++ {AADDV, C_ADD0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, sys.Loong64, 0}, ++ {AADDV, C_ANDCON, C_REG, C_REG, C_NONE, 10, 8, 0, sys.Loong64, 0}, ++ {AADDV, C_ANDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, sys.Loong64, 0}, ++ ++ {AAND, C_AND0CON, C_REG, C_REG, C_NONE, 4, 4, 0, 0, 0}, ++ {AAND, C_AND0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, 0, 0}, ++ {AAND, C_ADDCON, C_REG, C_REG, C_NONE, 10, 8, 0, 0, 0}, ++ {AAND, C_ADDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, 0, 0}, ++ ++ {AADD, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, 0, 0}, ++ {AADD, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, 0, 0}, ++ {AADDV, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, sys.Loong64, 0}, ++ {AADDV, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, sys.Loong64, 0}, ++ {AAND, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, 0, 0}, ++ {AAND, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, 0, 0}, ++ ++ {AADD, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, 0, 0}, ++ {AADDV, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, sys.Loong64, 0}, ++ {AAND, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, 0, 0}, ++ {AADD, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, 0, 0}, ++ {AADDV, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, sys.Loong64, 0}, ++ {AAND, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, 0, 0}, ++ ++ {AADDV, C_DCON, C_NONE, C_REG, C_NONE, 60, 20, 0, sys.Loong64, 0}, ++ {AADDV, C_DCON, C_REG, C_REG, C_NONE, 60, 20, 0, sys.Loong64, 0}, ++ ++ {ASLL, C_SCON, C_REG, C_REG, C_NONE, 16, 4, 0, 0, 0}, ++ {ASLL, C_SCON, C_NONE, C_REG, C_NONE, 16, 4, 0, 0, 0}, ++ ++ {ASLLV, C_SCON, C_REG, C_REG, C_NONE, 16, 4, 0, sys.Loong64, 0}, ++ {ASLLV, C_SCON, C_NONE, C_REG, C_NONE, 16, 4, 0, sys.Loong64, 0}, ++ ++ {ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, ++ ++ {ABEQ, C_REG, C_REG, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, ++ {ABEQ, C_REG, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, ++ {ABLEZ, C_REG, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, ++ {ABFPT, C_NONE, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, NOTUSETMP}, ++ ++ {AJMP, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // b ++ {AJAL, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // bl ++ ++ {AJMP, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGZERO, 0, 0}, // jirl r0, rj, 0 ++ {AJAL, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGLINK, 0, 0}, // jirl r1, rj, 0 ++ ++ {AMOVW, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, ++ {AMOVF, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, ++ {AMOVD, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, sys.Loong64, 0}, ++ {AMOVF, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0, 0}, ++ {AMOVD, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0, 0}, ++ {AMOVW, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVF, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0, 0}, ++ {AMOVD, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0, 0}, ++ ++ {AMOVW, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, ++ {AMOVF, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, ++ {AMOVD, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, ++ {AMOVW, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, sys.Loong64, 0}, ++ {AMOVF, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0, 0}, ++ {AMOVD, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0, 0}, ++ {AMOVW, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVF, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0, 0}, ++ {AMOVD, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0, 0}, ++ {AMOVF, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0, 0}, ++ {AMOVF, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ {AMOVD, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0, 0}, ++ {AMOVD, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, sys.Loong64, 0}, ++ ++ {AMOVW, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, ++ {AMOVD, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, ++ {AMOVW, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0, 0}, ++ ++ {AMOVW, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, ++ {AMOVD, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, ++ {AMOVW, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, sys.Loong64, 0}, ++ {AMOVF, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ {AMOVD, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_REG, C_NONE, 31, 4, 0, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_FREG, C_NONE, 47, 4, 0, sys.Loong64, 0}, ++ {AMOVV, C_FREG, C_NONE, C_REG, C_NONE, 48, 4, 0, sys.Loong64, 0}, ++ ++ {AMOVW, C_ADDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, ++ {AMOVW, C_ANDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, ++ ++ {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0, 0}, ++ {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0, 0}, ++ ++ {ATEQ, C_SCON, C_REG, C_REG, C_NONE, 15, 8, 0, 0, 0}, ++ {ATEQ, C_SCON, C_NONE, C_REG, C_NONE, 15, 8, 0, 0, 0}, ++ ++ {ABREAK, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, // really CACHE instruction ++ {ABREAK, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, ++ {ABREAK, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, ++ {ABREAK, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, ++ ++ {ARDTIMELW, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, ++ {ARDTIMEHW, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, ++ {ARDTIMED, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, ++ ++ {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}, ++ {obj.APCDATA, C_LCON, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.APCDATA, C_DCON, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 ++ {obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 ++ {obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, ++ {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // same as AJMP ++ {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // same as AJMP ++ ++ {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, + } + + var oprange [ALAST & obj.AMask][]Optab +@@ -698,40 +703,50 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab { + if a1 != 0 { + return &optab[a1-1] + } ++ ++ // first source operand + a1 = int(p.From.Class) + if a1 == 0 { + a1 = c.aclass(&p.From) + 1 + p.From.Class = int8(a1) + } +- + a1-- ++ ++ // first destination operand + a3 := int(p.To.Class) + if a3 == 0 { + a3 = c.aclass(&p.To) + 1 + p.To.Class = int8(a3) + } +- + a3-- ++ ++ // 2nd source operand + a2 := C_NONE + if p.Reg != 0 { + a2 = C_REG + } + ++ // 2nd destination operand ++ a4 := C_NONE ++ if p.RegTo2 != 0 { ++ a4 = C_REG ++ } ++ + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] + c3 := &xcmp[a3] + for i := range ops { + op := &ops[i] +- if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && (op.family == 0 || c.ctxt.Arch.Family == op.family) { ++ if (int(op.a2) == a2) && c1[op.a1] && c3[op.a3] && (int(op.a4) == a4) { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + +- c.ctxt.Diag("illegal combination %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3)) ++ c.ctxt.Diag("illegal combination %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4)) + prasm(p) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout. +- return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0} ++ return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0} + } + + func cmp(a int, b int) bool { +@@ -1030,6 +1045,9 @@ func buildop(ctxt *obj.Link) { + ANEGW, + ANEGV, + AWORD, ++ ARDTIMELW, ++ ARDTIMEHW, ++ ARDTIMED, + obj.ANOP, + obj.ATEXT, + obj.AUNDEF, +@@ -1604,6 +1622,9 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + case 61: // word C_DCON + o1 = uint32(c.vregoff(&p.From)) + o2 = uint32(c.vregoff(&p.From) >> 32) ++ ++ case 62: // rdtimex rd, rj ++ o1 = OP_RR(c.oprr(p.As), uint32(p.To.Reg), uint32(p.RegTo2)) + } + + out[0] = o1 +@@ -1811,6 +1832,12 @@ func (c *ctxt0) oprr(a obj.As) uint32 { + return 0x4 << 10 + case ACLZ: + return 0x5 << 10 ++ case ARDTIMELW: ++ return 0x18 << 10 ++ case ARDTIMEHW: ++ return 0x19 << 10 ++ case ARDTIMED: ++ return 0x1a << 10 + } + + c.ctxt.Diag("bad rr opcode %v", a) +diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go +index b219a07063..f0955039c0 100644 +--- a/src/cmd/internal/obj/util.go ++++ b/src/cmd/internal/obj/util.go +@@ -202,6 +202,7 @@ func (p *Prog) WriteInstructionString(w io.Writer) { + if p.To.Type != TYPE_NONE { + io.WriteString(w, sep) + WriteDconv(w, p, &p.To) ++ sep = ", " + } + if p.RegTo2 != REG_NONE { + fmt.Fprintf(w, "%s%v", sep, Rconv(int(p.RegTo2))) +-- +2.38.1 + diff --git a/0006-runtime-implement-cputicks-with-the-stable-counter-o.patch b/0006-runtime-implement-cputicks-with-the-stable-counter-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..4ff9d2dd65fa8fe5ec4899c69c9920efcba66c1d --- /dev/null +++ b/0006-runtime-implement-cputicks-with-the-stable-counter-o.patch @@ -0,0 +1,65 @@ +From 8e84405201ae877dd2eb1d5ceddb90b076e50b8f Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 5 Aug 2022 13:32:08 +0800 +Subject: [PATCH 06/62] runtime: implement cputicks with the stable counter on + loong64 + +The stable counter is described in Section 2.2.10.4, LoongArch Reference Manual Volume 1. + +Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + +Change-Id: I160b695a8c0e38ef49b21fb8b41460fd23d9538c +--- + src/runtime/asm_loong64.s | 6 ++++++ + src/runtime/cputicks.go | 2 +- + src/runtime/os_linux_loong64.go | 7 ------- + 3 files changed, 7 insertions(+), 8 deletions(-) + +diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s +index a6ccd196c9..c97371fefd 100644 +--- a/src/runtime/asm_loong64.s ++++ b/src/runtime/asm_loong64.s +@@ -86,6 +86,12 @@ TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0 + JAL runtime·mstart0(SB) + RET // not reached + ++// func cputicks() int64 ++TEXT runtime·cputicks(SB),NOSPLIT,$0-8 ++ RDTIMED R0, R4 ++ MOVV R4, ret+0(FP) ++ RET ++ + /* + * go-routine + */ +diff --git a/src/runtime/cputicks.go b/src/runtime/cputicks.go +index 91270617fc..2cf3240333 100644 +--- a/src/runtime/cputicks.go ++++ b/src/runtime/cputicks.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !arm && !arm64 && !loong64 && !mips64 && !mips64le && !mips && !mipsle && !wasm ++//go:build !arm && !arm64 && !mips64 && !mips64le && !mips && !mipsle && !wasm + + package runtime + +diff --git a/src/runtime/os_linux_loong64.go b/src/runtime/os_linux_loong64.go +index 3d84e9accb..61213dadf8 100644 +--- a/src/runtime/os_linux_loong64.go ++++ b/src/runtime/os_linux_loong64.go +@@ -9,10 +9,3 @@ package runtime + func archauxv(tag, val uintptr) {} + + func osArchInit() {} +- +-//go:nosplit +-func cputicks() int64 { +- // Currently cputicks() is used in blocking profiler and to seed fastrand(). +- // nanotime() is a poor approximation of CPU ticks that is enough for the profiler. +- return nanotime() +-} +-- +2.38.1 + diff --git a/0007-runtime-remove-the-fake-mstart-caller-in-systemstack.patch b/0007-runtime-remove-the-fake-mstart-caller-in-systemstack.patch new file mode 100644 index 0000000000000000000000000000000000000000..6b83336ab0e3c544c2d555586fefbac47754f9bc --- /dev/null +++ b/0007-runtime-remove-the-fake-mstart-caller-in-systemstack.patch @@ -0,0 +1,43 @@ +From 72ff0fd9e69a0f9a9c17fd5b9320c660e1d59a64 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Mon, 4 Jul 2022 14:29:52 +0800 +Subject: [PATCH 07/62] runtime: remove the fake mstart caller in systemstack + on linux/loong64 + +The backtrace knows to stop in the system stack due to writing to the SP, +so here the fake mstart caller in the system stack is no longer needed and +can be removed + +ref. CL 288799 + +Change-Id: I0841e75fd515cf6a0d98abe4cffc3f63fc275e0e +--- + src/runtime/asm_loong64.s | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s +index c97371fefd..b934fab1ca 100644 +--- a/src/runtime/asm_loong64.s ++++ b/src/runtime/asm_loong64.s +@@ -128,7 +128,6 @@ TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 + MOVV R3, (g_sched+gobuf_sp)(g) + MOVV R1, (g_sched+gobuf_pc)(g) + MOVV R0, (g_sched+gobuf_lr)(g) +- MOVV g, (g_sched+gobuf_g)(g) + + // Switch to m->g0 & its stack, call fn. + MOVV g, R19 +@@ -186,10 +185,6 @@ switch: + MOVV R5, g + JAL runtime·save_g(SB) + MOVV (g_sched+gobuf_sp)(g), R19 +- // make it look like mstart called systemstack on g0, to stop traceback +- ADDV $-8, R19 +- MOVV $runtime·mstart(SB), R6 +- MOVV R6, 0(R19) + MOVV R19, R3 + + // call target function +-- +2.38.1 + diff --git a/0008-cmd-internal-obj-loong64-save-LR-after-decrementing-.patch b/0008-cmd-internal-obj-loong64-save-LR-after-decrementing-.patch new file mode 100644 index 0000000000000000000000000000000000000000..925701a80efc5573752653a599a898b29ae8aae7 --- /dev/null +++ b/0008-cmd-internal-obj-loong64-save-LR-after-decrementing-.patch @@ -0,0 +1,59 @@ +From 3f4c5d2a5633cddb6f90fc6024a0b95965a91f79 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Tue, 5 Jul 2022 19:41:27 +0800 +Subject: [PATCH 08/62] cmd/internal/obj/loong64: save LR after decrementing SP + +Refer to CL 413428 and 412474, for loong64, like mips, s390x and riscv, there +is no single instruction that saves the LR and decrements the SP, so we also +need to insert an instruction to save the LR after decrementing the SP. + +Fixes #56623. +Updates #53374. + +Change-Id: I3de040792f0a041d3d2a98ea89c23a2dd2f4ad10 +--- + src/cmd/asm/internal/asm/testdata/loong64.s | 4 ++-- + src/cmd/internal/obj/loong64/obj.go | 14 ++++++++++++++ + 2 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64.s b/src/cmd/asm/internal/asm/testdata/loong64.s +index 133cf48db4..6c44d2208a 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64.s +@@ -6,6 +6,6 @@ + // TODO: cover more instruction + + TEXT foo(SB),DUPOK|NOSPLIT,$0 +- JAL 1(PC) //CALL 1(PC) //000c0054 ++ JAL 1(PC) //CALL 1(PC) //00100054 + JAL (R4) //CALL (R4) //8100004c +- JAL foo(SB) //CALL foo(SB) //00100054 ++ JAL foo(SB) //CALL foo(SB) //00140054 +diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go +index dc05e18c7d..0c1f5c029d 100644 +--- a/src/cmd/internal/obj/loong64/obj.go ++++ b/src/cmd/internal/obj/loong64/obj.go +@@ -260,6 +260,20 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + q.Spadj = +autosize + + q = c.ctxt.EndUnsafePoint(q, c.newprog, -1) ++ ++ // On Linux, in a cgo binary we may get a SIGSETXID signal early on ++ // before the signal stack is set, as glibc doesn't allow us to block ++ // SIGSETXID. So a signal may land on the current stack and clobber ++ // the content below the SP. We store the LR again after the SP is ++ // decremented. ++ q = obj.Appendp(q, newprog) ++ q.As = mov ++ q.Pos = p.Pos ++ q.From.Type = obj.TYPE_REG ++ q.From.Reg = REGLINK ++ q.To.Type = obj.TYPE_MEM ++ q.To.Offset = 0 ++ q.To.Reg = REGSP + } + + if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { +-- +2.38.1 + diff --git a/0009-runtime-refactor-the-linux-loong64-entrypoint.patch b/0009-runtime-refactor-the-linux-loong64-entrypoint.patch new file mode 100644 index 0000000000000000000000000000000000000000..17fb6a781d2b4ff23b206985955f9e31a74da7ff --- /dev/null +++ b/0009-runtime-refactor-the-linux-loong64-entrypoint.patch @@ -0,0 +1,39 @@ +From 57af8cb85970fade444d787b26dc77e22adfbe0c Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Mon, 25 Jul 2022 15:30:53 +0800 +Subject: [PATCH 09/62] runtime: refactor the linux/loong64 entrypoint + +Remove the meaningless jump, and add the missing NOFRAME flag to _rt0_loong64_linux. + +Change-Id: I1aec68c556615b42042684bd176dfc2a8af094d1 +--- + src/runtime/rt0_linux_loong64.s | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/src/runtime/rt0_linux_loong64.s b/src/runtime/rt0_linux_loong64.s +index b23ae7837a..b9aaa510e1 100644 +--- a/src/runtime/rt0_linux_loong64.s ++++ b/src/runtime/rt0_linux_loong64.s +@@ -4,16 +4,13 @@ + + #include "textflag.h" + +-TEXT _rt0_loong64_linux(SB),NOSPLIT,$0 +- JMP _main<>(SB) +- +-TEXT _main<>(SB),NOSPLIT|NOFRAME,$0 ++TEXT _rt0_loong64_linux(SB),NOSPLIT|NOFRAME,$0 + // In a statically linked binary, the stack contains argc, + // argv as argc string pointers followed by a NULL, envv as a + // sequence of string pointers followed by a NULL, and auxv. + // There is no TLS base pointer. +- MOVW 0(R3), R4 // argc +- ADDV $8, R3, R5 // argv ++ MOVW 0(R3), R4 // argc ++ ADDV $8, R3, R5 // argv + JMP main(SB) + + TEXT main(SB),NOSPLIT|NOFRAME,$0 +-- +2.38.1 + diff --git a/0010-cmd-internal-obj-loong64-remove-invalid-branch-delay.patch b/0010-cmd-internal-obj-loong64-remove-invalid-branch-delay.patch new file mode 100644 index 0000000000000000000000000000000000000000..03e26f285a348baadd58cd397d64899fdc17d70b --- /dev/null +++ b/0010-cmd-internal-obj-loong64-remove-invalid-branch-delay.patch @@ -0,0 +1,28 @@ +From a43da65cdbbe17430534a74a2a27d57fa67c6196 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 3 Aug 2022 17:45:02 +0800 +Subject: [PATCH 10/62] cmd/internal/obj/loong64: remove invalid branch delay + slots + +Change-Id: I222717771019f7aefa547971b2d94ef4677a42c9 +--- + src/cmd/internal/obj/loong64/asm.go | 3 --- + 1 file changed, 3 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 982ddd8103..02e44ee0a1 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -435,9 +435,6 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + q.Pos = p.Pos + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) +- +- c.addnop(p.Link) +- c.addnop(p) + bflag = 1 + } + } +-- +2.38.1 + diff --git a/0011-runtime-calculate-nanoseconds-in-usleep-on-linux-loo.patch b/0011-runtime-calculate-nanoseconds-in-usleep-on-linux-loo.patch new file mode 100644 index 0000000000000000000000000000000000000000..441d2076ccdd60e74d6b96d70398a3dd358ca0bb --- /dev/null +++ b/0011-runtime-calculate-nanoseconds-in-usleep-on-linux-loo.patch @@ -0,0 +1,54 @@ +From 47da5877393a5e1e4eeb7f196099f4a1e3f5939b Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 24 Aug 2022 02:44:22 +0800 +Subject: [PATCH 11/62] runtime: calculate nanoseconds in usleep on + linux/loong64 + +Change-Id: Ia4cfdea3df8834e6260527ce8e6e894a0547070f +--- + src/runtime/sys_linux_loong64.s | 27 +++++++++++++++------------ + 1 file changed, 15 insertions(+), 12 deletions(-) + +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 9ce5e72256..72eaac3c40 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -113,20 +113,23 @@ TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20 + RET + + TEXT runtime·usleep(SB),NOSPLIT,$16-4 +- MOVWU usec+0(FP), R6 +- MOVV R6, R5 +- MOVW $1000000, R4 +- DIVVU R4, R6, R6 +- MOVV R6, 8(R3) +- MOVW $1000, R4 +- MULVU R6, R4, R4 +- SUBVU R4, R5 +- MOVV R5, 16(R3) ++ MOVWU usec+0(FP), R6 ++ MOVV $1000, R4 ++ MULVU R4, R6, R6 ++ MOVV $1000000000, R4 ++ ++ // ts->tv_sec ++ DIVVU R4, R6, R5 ++ MOVV R5, 8(R3) ++ ++ // ts->tv_nsec ++ REMVU R4, R6, R5 ++ MOVV R5, 16(R3) + + // nanosleep(&ts, 0) +- ADDV $8, R3, R4 +- MOVW $0, R5 +- MOVV $SYS_nanosleep, R11 ++ ADDV $8, R3, R4 ++ MOVV R0, R5 ++ MOVV $SYS_nanosleep, R11 + SYSCALL + RET + +-- +2.38.1 + diff --git a/0012-cmd-internal-obj-remove-redundant-cnames-on-loong64.patch b/0012-cmd-internal-obj-remove-redundant-cnames-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..7eb2b7d8571269d29050169d533660a376d8ddac --- /dev/null +++ b/0012-cmd-internal-obj-remove-redundant-cnames-on-loong64.patch @@ -0,0 +1,30 @@ +From 079c0391caf2b30e9347cadee78c9741001323db Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 24 Aug 2022 09:22:36 +0800 +Subject: [PATCH 12/62] cmd/internal/obj: remove redundant cnames on loong64 + +Change-Id: I5aa6328a12e74b2801ab60b5a5bb8571d382d5ef +--- + src/cmd/internal/obj/loong64/cnames.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index f3970777bb..00f6136603 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -4,11 +4,11 @@ + + package loong64 + ++// This order should be strictly consistent to that in a.out.go. + var cnames0 = []string{ + "NONE", + "REG", + "FREG", +- "FCREG", + "FCSRREG", + "FCCREG", + "ZCON", +-- +2.38.1 + diff --git a/0013-runtime-save-fetch-g-register-during-VDSO-on-loong64.patch b/0013-runtime-save-fetch-g-register-during-VDSO-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..e94166d911fba9313b21c8486ca97a2f8cd9b334 --- /dev/null +++ b/0013-runtime-save-fetch-g-register-during-VDSO-on-loong64.patch @@ -0,0 +1,113 @@ +From 8439e407e44770a149a13941341f724760f01a27 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 26 Aug 2022 14:01:27 +0800 +Subject: [PATCH 13/62] runtime: save/fetch g register during VDSO on loong64 + +Change-Id: Iaffa8cce4f0ef8ef74225c355ec3c20ed238025f +--- + src/runtime/signal_unix.go | 2 +- + src/runtime/sys_linux_loong64.s | 44 ++++++++++++++++++++++++++++++--- + 2 files changed, 41 insertions(+), 5 deletions(-) + +diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go +index c1abe62cb3..d0551021e5 100644 +--- a/src/runtime/signal_unix.go ++++ b/src/runtime/signal_unix.go +@@ -397,7 +397,7 @@ func preemptM(mp *m) { + //go:nosplit + func sigFetchG(c *sigctxt) *g { + switch GOARCH { +- case "arm", "arm64", "ppc64", "ppc64le", "riscv64", "s390x": ++ case "arm", "arm64", "loong64", "ppc64", "ppc64le", "riscv64", "s390x": + if !iscgo && inVDSOPage(c.sigpc()) { + // When using cgo, we save the g on TLS and load it from there + // in sigtramp. Just use that. +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 72eaac3c40..4d00fd0ca3 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -10,7 +10,9 @@ + #include "go_tls.h" + #include "textflag.h" + +-#define AT_FDCWD -100 ++#define AT_FDCWD -100 ++#define CLOCK_REALTIME 0 ++#define CLOCK_MONOTONIC 1 + + #define SYS_exit 93 + #define SYS_read 63 +@@ -219,7 +221,7 @@ TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28 + RET + + // func walltime() (sec int64, nsec int32) +-TEXT runtime·walltime(SB),NOSPLIT,$16-12 ++TEXT runtime·walltime(SB),NOSPLIT,$24-12 + MOVV R3, R23 // R23 is unchanged by C code + MOVV R3, R25 + +@@ -249,12 +251,29 @@ noswitch: + AND $~15, R25 // Align for C code + MOVV R25, R3 + +- MOVW $0, R4 // CLOCK_REALTIME=0 ++ MOVW $CLOCK_REALTIME, R4 + MOVV $0(R3), R5 + + MOVV runtime·vdsoClockgettimeSym(SB), R20 + BEQ R20, fallback + ++ // Store g on gsignal's stack, see sys_linux_arm64.s for detail ++ MOVBU runtime·iscgo(SB), R25 ++ BNE R0, R25, nosaveg ++ ++ MOVV m_gsignal(R24), R25 // g.m.gsignal ++ BEQ R25, nosaveg ++ BEQ g, R25, nosaveg ++ ++ MOVV (g_stack+stack_lo)(R25), R25 // g.m.gsignal.stack.lo ++ MOVV g, (R25) ++ ++ JAL (R20) ++ ++ MOVV R0, (R25) ++ JMP finish ++ ++nosaveg: + JAL (R20) + + finish: +@@ -311,12 +330,29 @@ noswitch: + AND $~15, R25 // Align for C code + MOVV R25, R3 + +- MOVW $1, R4 // CLOCK_MONOTONIC=1 ++ MOVW $CLOCK_MONOTONIC, R4 + MOVV $0(R3), R5 + + MOVV runtime·vdsoClockgettimeSym(SB), R20 + BEQ R20, fallback + ++ // Store g on gsignal's stack, see sys_linux_arm64.s for detail ++ MOVBU runtime·iscgo(SB), R25 ++ BNE R0, R25, nosaveg ++ ++ MOVV m_gsignal(R24), R25 // g.m.gsignal ++ BEQ R25, nosaveg ++ BEQ g, R25, nosaveg ++ ++ MOVV (g_stack+stack_lo)(R25), R25 // g.m.gsignal.stack.lo ++ MOVV g, (R25) ++ ++ JAL (R20) ++ ++ MOVV R0, (R25) ++ JMP finish ++ ++nosaveg: + JAL (R20) + + finish: +-- +2.38.1 + diff --git a/0014-runtime-save-restore-callee-saved-registers-in-loong.patch b/0014-runtime-save-restore-callee-saved-registers-in-loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..46da1dde95cfb3941e38bd0e8a6a709d6434a7be --- /dev/null +++ b/0014-runtime-save-restore-callee-saved-registers-in-loong.patch @@ -0,0 +1,222 @@ +From 1dbd1cf5134a9e357d6752b050e445237e8b333f Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 26 Aug 2022 14:05:31 +0800 +Subject: [PATCH 14/62] runtime: save/restore callee-saved registers in + loong64's sigtramp + +Loong64's R22-R31 and F24-F31 are callee saved registers, which +should be saved in the beginning of sigtramp, and restored at +the end. + +In reviewing comments about sigtramp in sys_linux_arm64 it was +noted that a previous issue in arm64 due to missing callee save +registers could also be a problem on loong64, so code was added +to save and restore those. + +Updates #31827 + +Change-Id: I3ae58fe8a64ddb052d0a89b63e82c01ad328dd15 +--- + src/runtime/cgo/abi_loong64.h | 60 +++++++++++++++++++++++++++++++++ + src/runtime/cgo/asm_loong64.s | 55 ++++++++---------------------- + src/runtime/sys_linux_loong64.s | 28 ++++++++++----- + 3 files changed, 94 insertions(+), 49 deletions(-) + create mode 100644 src/runtime/cgo/abi_loong64.h + +diff --git a/src/runtime/cgo/abi_loong64.h b/src/runtime/cgo/abi_loong64.h +new file mode 100644 +index 0000000000..b10d83732f +--- /dev/null ++++ b/src/runtime/cgo/abi_loong64.h +@@ -0,0 +1,60 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// Macros for transitioning from the host ABI to Go ABI0. ++// ++// These macros save and restore the callee-saved registers ++// from the stack, but they don't adjust stack pointer, so ++// the user should prepare stack space in advance. ++// SAVE_R22_TO_R31(offset) saves R22 ~ R31 to the stack space ++// of ((offset)+0*8)(R3) ~ ((offset)+9*8)(R3). ++// ++// SAVE_F24_TO_F31(offset) saves F24 ~ F31 to the stack space ++// of ((offset)+0*8)(R3) ~ ((offset)+7*8)(R3). ++// ++// Note: g is R22 ++ ++#define SAVE_R22_TO_R31(offset) \ ++ MOVV g, ((offset)+(0*8))(R3) \ ++ MOVV R23, ((offset)+(1*8))(R3) \ ++ MOVV R24, ((offset)+(2*8))(R3) \ ++ MOVV R25, ((offset)+(3*8))(R3) \ ++ MOVV R26, ((offset)+(4*8))(R3) \ ++ MOVV R27, ((offset)+(5*8))(R3) \ ++ MOVV R28, ((offset)+(6*8))(R3) \ ++ MOVV R29, ((offset)+(7*8))(R3) \ ++ MOVV R30, ((offset)+(8*8))(R3) \ ++ MOVV R31, ((offset)+(9*8))(R3) ++ ++#define SAVE_F24_TO_F31(offset) \ ++ MOVD F24, ((offset)+(0*8))(R3) \ ++ MOVD F25, ((offset)+(1*8))(R3) \ ++ MOVD F26, ((offset)+(2*8))(R3) \ ++ MOVD F27, ((offset)+(3*8))(R3) \ ++ MOVD F28, ((offset)+(4*8))(R3) \ ++ MOVD F29, ((offset)+(5*8))(R3) \ ++ MOVD F30, ((offset)+(6*8))(R3) \ ++ MOVD F31, ((offset)+(7*8))(R3) ++ ++#define RESTORE_R22_TO_R31(offset) \ ++ MOVV ((offset)+(0*8))(R3), g \ ++ MOVV ((offset)+(1*8))(R3), R23 \ ++ MOVV ((offset)+(2*8))(R3), R24 \ ++ MOVV ((offset)+(3*8))(R3), R25 \ ++ MOVV ((offset)+(4*8))(R3), R26 \ ++ MOVV ((offset)+(5*8))(R3), R27 \ ++ MOVV ((offset)+(6*8))(R3), R28 \ ++ MOVV ((offset)+(7*8))(R3), R29 \ ++ MOVV ((offset)+(8*8))(R3), R30 \ ++ MOVV ((offset)+(9*8))(R3), R31 ++ ++#define RESTORE_F24_TO_F31(offset) \ ++ MOVD ((offset)+(0*8))(R3), F24 \ ++ MOVD ((offset)+(1*8))(R3), F25 \ ++ MOVD ((offset)+(2*8))(R3), F26 \ ++ MOVD ((offset)+(3*8))(R3), F27 \ ++ MOVD ((offset)+(4*8))(R3), F28 \ ++ MOVD ((offset)+(5*8))(R3), F29 \ ++ MOVD ((offset)+(6*8))(R3), F30 \ ++ MOVD ((offset)+(7*8))(R3), F31 +diff --git a/src/runtime/cgo/asm_loong64.s b/src/runtime/cgo/asm_loong64.s +index 961a3dd484..aea4f8e6b9 100644 +--- a/src/runtime/cgo/asm_loong64.s ++++ b/src/runtime/cgo/asm_loong64.s +@@ -3,6 +3,7 @@ + // license that can be found in the LICENSE file. + + #include "textflag.h" ++#include "abi_loong64.h" + + // Called by C code generated by cmd/cgo. + // func crosscall2(fn, a unsafe.Pointer, n int32, ctxt uintptr) +@@ -16,52 +17,24 @@ TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0 + * first arg. + */ + +- ADDV $(-8*22), R3 +- MOVV R4, (8*1)(R3) // fn unsafe.Pointer +- MOVV R5, (8*2)(R3) // a unsafe.Pointer +- MOVV R7, (8*3)(R3) // ctxt uintptr +- MOVV R23, (8*4)(R3) +- MOVV R24, (8*5)(R3) +- MOVV R25, (8*6)(R3) +- MOVV R26, (8*7)(R3) +- MOVV R27, (8*8)(R3) +- MOVV R28, (8*9)(R3) +- MOVV R29, (8*10)(R3) +- MOVV R30, (8*11)(R3) +- MOVV g, (8*12)(R3) +- MOVV R1, (8*13)(R3) +- MOVD F24, (8*14)(R3) +- MOVD F25, (8*15)(R3) +- MOVD F26, (8*16)(R3) +- MOVD F27, (8*17)(R3) +- MOVD F28, (8*18)(R3) +- MOVD F29, (8*19)(R3) +- MOVD F30, (8*20)(R3) +- MOVD F31, (8*21)(R3) ++ ADDV $(-23*8), R3 ++ MOVV R4, (1*8)(R3) // fn unsafe.Pointer ++ MOVV R5, (2*8)(R3) // a unsafe.Pointer ++ MOVV R7, (3*8)(R3) // ctxt uintptr ++ ++ SAVE_R22_TO_R31((4*8)) ++ SAVE_F24_TO_F31((14*8)) ++ MOVV R1, (22*8)(R3) + + // Initialize Go ABI environment + JAL runtime·load_g(SB) + + JAL runtime·cgocallback(SB) + +- MOVV (8*4)(R3), R23 +- MOVV (8*5)(R3), R24 +- MOVV (8*6)(R3), R25 +- MOVV (8*7)(R3), R26 +- MOVV (8*8)(R3), R27 +- MOVV (8*9)(R3), R28 +- MOVV (8*10)(R3), R29 +- MOVV (8*11)(R3), R30 +- MOVV (8*12)(R3), g +- MOVV (8*13)(R3), R1 +- MOVD (8*14)(R3), F24 +- MOVD (8*15)(R3), F25 +- MOVD (8*16)(R3), F26 +- MOVD (8*17)(R3), F27 +- MOVD (8*18)(R3), F28 +- MOVD (8*19)(R3), F29 +- MOVD (8*20)(R3), F30 +- MOVD (8*21)(R3), F31 +- ADDV $(8*22), R3 ++ RESTORE_R22_TO_R31((4*8)) ++ RESTORE_F24_TO_F31((14*8)) ++ MOVV (22*8)(R3), R1 ++ ++ ADDV $(23*8), R3 + + RET +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 4d00fd0ca3..053bdd7f52 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -9,6 +9,7 @@ + #include "go_asm.h" + #include "go_tls.h" + #include "textflag.h" ++#include "cgo/abi_loong64.h" + + #define AT_FDCWD -100 + #define CLOCK_REALTIME 0 +@@ -413,18 +414,29 @@ TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 + JAL (R20) + RET + +-TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$64 ++TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$182 ++ MOVW R4, (1*8)(R3) ++ MOVV R5, (2*8)(R3) ++ MOVV R6, (3*8)(R3) ++ ++ // Save callee-save registers in the case of signal forwarding. ++ // Please refer to https://golang.org/issue/31827 . ++ SAVE_R22_TO_R31((4*8)) ++ SAVE_F24_TO_F31((14*8)) ++ + // this might be called in external code context, + // where g is not set. +- MOVB runtime·iscgo(SB), R19 +- BEQ R19, 2(PC) ++ MOVB runtime·iscgo(SB), R4 ++ BEQ R4, 2(PC) + JAL runtime·load_g(SB) + +- MOVW R4, 8(R3) +- MOVV R5, 16(R3) +- MOVV R6, 24(R3) +- MOVV $runtime·sigtrampgo(SB), R19 +- JAL (R19) ++ MOVV $runtime·sigtrampgo(SB), R4 ++ JAL (R4) ++ ++ // Restore callee-save registers. ++ RESTORE_R22_TO_R31((4*8)) ++ RESTORE_F24_TO_F31((14*8)) ++ + RET + + TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 +-- +2.38.1 + diff --git a/0015-runtime-add-comment-for-sys_linux_loong64.patch b/0015-runtime-add-comment-for-sys_linux_loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..783a9a670b4f8045d9dc1c7d8036f4ea1bc1ef11 --- /dev/null +++ b/0015-runtime-add-comment-for-sys_linux_loong64.patch @@ -0,0 +1,243 @@ +From b17915a7a4e8c513641f9e4c27fb613165ec6f73 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 26 Aug 2022 14:19:06 +0800 +Subject: [PATCH 15/62] runtime: add comment for sys_linux_loong64 + +Change-Id: I617d6d788cb213c1405f81d9f689fd6846ee105a +--- + src/runtime/sys_linux_loong64.s | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 053bdd7f52..2a16b4f01d 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -46,6 +46,7 @@ + #define SYS_timer_settime 110 + #define SYS_timer_delete 111 + ++// func exit(code int32) + TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 + MOVW code+0(FP), R4 + MOVV $SYS_exit_group, R11 +@@ -65,6 +66,7 @@ TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8 + SYSCALL + JMP 0(PC) + ++// func open(name *byte, mode, perm int32) int32 + TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20 + MOVW $AT_FDCWD, R4 // AT_FDCWD, so this acts like open + MOVV name+0(FP), R5 +@@ -78,6 +80,7 @@ TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20 + MOVW R4, ret+16(FP) + RET + ++// func closefd(fd int32) int32 + TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12 + MOVW fd+0(FP), R4 + MOVV $SYS_close, R11 +@@ -88,6 +91,7 @@ TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12 + MOVW R4, ret+8(FP) + RET + ++// func write1(fd uintptr, p unsafe.Pointer, n int32) int32 + TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28 + MOVV fd+0(FP), R4 + MOVV p+8(FP), R5 +@@ -97,6 +101,7 @@ TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28 + MOVW R4, ret+24(FP) + RET + ++// func read(fd int32, p unsafe.Pointer, n int32) int32 + TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28 + MOVW fd+0(FP), R4 + MOVV p+8(FP), R5 +@@ -115,6 +120,7 @@ TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20 + MOVW R4, errno+16(FP) + RET + ++// func usleep(usec uint32) + TEXT runtime·usleep(SB),NOSPLIT,$16-4 + MOVWU usec+0(FP), R6 + MOVV $1000, R4 +@@ -136,12 +142,14 @@ TEXT runtime·usleep(SB),NOSPLIT,$16-4 + SYSCALL + RET + ++// func gettid() uint32 + TEXT runtime·gettid(SB),NOSPLIT,$0-4 + MOVV $SYS_gettid, R11 + SYSCALL + MOVW R4, ret+0(FP) + RET + ++// func raise(sig uint32) + TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0 + MOVV $SYS_getpid, R11 + SYSCALL +@@ -155,6 +163,7 @@ TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0 + SYSCALL + RET + ++// func raiseproc(sig uint32) + TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0 + MOVV $SYS_getpid, R11 + SYSCALL +@@ -164,12 +173,14 @@ TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0 + SYSCALL + RET + ++// func getpid() int + TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8 + MOVV $SYS_getpid, R11 + SYSCALL + MOVV R4, ret+0(FP) + RET + ++// func tgkill(tgid, tid, sig int) + TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24 + MOVV tgid+0(FP), R4 + MOVV tid+8(FP), R5 +@@ -178,6 +189,7 @@ TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24 + SYSCALL + RET + ++// func setitimer(mode int32, new, old *itimerval) + TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24 + MOVW mode+0(FP), R4 + MOVV new+8(FP), R5 +@@ -186,6 +198,7 @@ TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24 + SYSCALL + RET + ++// func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32 + TEXT runtime·timer_create(SB),NOSPLIT,$0-28 + MOVW clockid+0(FP), R4 + MOVV sevp+8(FP), R5 +@@ -195,6 +208,7 @@ TEXT runtime·timer_create(SB),NOSPLIT,$0-28 + MOVW R4, ret+24(FP) + RET + ++// func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32 + TEXT runtime·timer_settime(SB),NOSPLIT,$0-28 + MOVW timerid+0(FP), R4 + MOVW flags+4(FP), R5 +@@ -205,6 +219,7 @@ TEXT runtime·timer_settime(SB),NOSPLIT,$0-28 + MOVW R4, ret+24(FP) + RET + ++// func timer_delete(timerid int32) int32 + TEXT runtime·timer_delete(SB),NOSPLIT,$0-12 + MOVW timerid+0(FP), R4 + MOVV $SYS_timer_delete, R11 +@@ -212,6 +227,7 @@ TEXT runtime·timer_delete(SB),NOSPLIT,$0-12 + MOVW R4, ret+8(FP) + RET + ++// func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 + TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 +@@ -301,6 +317,7 @@ fallback: + SYSCALL + JMP finish + ++// func nanotime1() int64 + TEXT runtime·nanotime1(SB),NOSPLIT,$16-8 + MOVV R3, R23 // R23 is unchanged by C code + MOVV R3, R25 +@@ -384,6 +401,7 @@ fallback: + SYSCALL + JMP finish + ++// func rtsigprocmask(how int32, new, old *sigset, size int32) + TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28 + MOVW how+0(FP), R4 + MOVV new+8(FP), R5 +@@ -396,6 +414,7 @@ TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28 + MOVV R0, 0xf1(R0) // crash + RET + ++// func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32 + TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36 + MOVV sig+0(FP), R4 + MOVV new+8(FP), R5 +@@ -406,6 +425,7 @@ TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36 + MOVW R4, ret+32(FP) + RET + ++// func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer) + TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 + MOVW sig+8(FP), R4 + MOVV info+16(FP), R5 +@@ -414,6 +434,7 @@ TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 + JAL (R20) + RET + ++// func sigtramp(signo, ureg, ctxt unsafe.Pointer) + TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$182 + MOVW R4, (1*8)(R3) + MOVV R5, (2*8)(R3) +@@ -439,9 +460,11 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$182 + + RET + ++// func cgoSigtramp() + TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 + JMP runtime·sigtramp(SB) + ++// func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) + TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 +@@ -463,6 +486,7 @@ ok: + MOVV $0, err+40(FP) + RET + ++// func munmap(addr unsafe.Pointer, n uintptr) + TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 +@@ -473,6 +497,7 @@ TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 + MOVV R0, 0xf3(R0) // crash + RET + ++// func madvise(addr unsafe.Pointer, n uintptr, flags int32) + TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 +@@ -482,8 +507,7 @@ TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 + MOVW R4, ret+24(FP) + RET + +-// int64 futex(int32 *uaddr, int32 op, int32 val, +-// struct timespec *timeout, int32 *uaddr2, int32 val2); ++// func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32 + TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVW op+8(FP), R5 +@@ -557,6 +581,7 @@ nog: + SYSCALL + JMP -3(PC) // keep exiting + ++// func sigaltstack(new, old *stackt) + TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0 + MOVV new+0(FP), R4 + MOVV old+8(FP), R5 +@@ -567,11 +592,13 @@ TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0 + MOVV R0, 0xf1(R0) // crash + RET + ++// func osyield() + TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0 + MOVV $SYS_sched_yield, R11 + SYSCALL + RET + ++// func sched_getaffinity(pid, len uintptr, buf *uintptr) int32 + TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0 + MOVV pid+0(FP), R4 + MOVV len+8(FP), R5 +-- +2.38.1 + diff --git a/0016-runtime-mark-morestack_noctxt-SPWRITE-for-linux-loon.patch b/0016-runtime-mark-morestack_noctxt-SPWRITE-for-linux-loon.patch new file mode 100644 index 0000000000000000000000000000000000000000..e3170231e55011aab0bd416182f4350034bd51e4 --- /dev/null +++ b/0016-runtime-mark-morestack_noctxt-SPWRITE-for-linux-loon.patch @@ -0,0 +1,36 @@ +From db9cbb5d295846ee7da8f33106e208f4ed90998b Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 4 Nov 2022 13:27:23 +0800 +Subject: [PATCH 16/62] runtime: mark morestack_noctxt SPWRITE for + linux/loong64 + +ref. CL 425396 + +Updates #54332. + +Change-Id: I1a235b0cca4dbf79cf61cf5f40b594fc2d940857 +--- + src/runtime/asm_loong64.s | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s +index b934fab1ca..3921091fea 100644 +--- a/src/runtime/asm_loong64.s ++++ b/src/runtime/asm_loong64.s +@@ -261,6 +261,13 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 + UNDEF + + TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 ++ // Force SPWRITE. This function doesn't actually write SP, ++ // but it is called with a special calling convention where ++ // the caller doesn't save LR on stack but passes it as a ++ // register (R5), and the unwinder currently doesn't understand. ++ // Make it SPWRITE to stop unwinding. (See issue 54332) ++ MOVV R3, R3 ++ + MOVV R0, REGCTXT + JMP runtime·morestack(SB) + +-- +2.38.1 + diff --git a/0017-cmd-internal-obj-loong64-add-the-PCALAU12I-instructi.patch b/0017-cmd-internal-obj-loong64-add-the-PCALAU12I-instructi.patch new file mode 100644 index 0000000000000000000000000000000000000000..a20e7073dc6a5227a90fd82368bc198bf2dfb9c4 --- /dev/null +++ b/0017-cmd-internal-obj-loong64-add-the-PCALAU12I-instructi.patch @@ -0,0 +1,62 @@ +From fd0d94c6691299f6abba31407952e6c1e88b30ee Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sat, 3 Dec 2022 20:57:52 +0800 +Subject: [PATCH 17/62] cmd/internal/obj/loong64: add the PCALAU12I instruction + for reloc use + +The LoongArch ELF psABI v2.00 revamped the relocation design, largely +moving to using the `pcalau12i + addi/ld/st` pair for PC-relative +addressing within +/- 32 bits. The "pcala" in `pcalau12i` stands for +"PC-aligned add"; the instruction's semantics is actually the same as +arm64's `adrp`. + +Add support for emitting this instruction as part of the relevant +addressing ops, for use with new reloc types later. + +Change-Id: Ic1747cd9745aad0d1abb9bd78400cd5ff5978bc8 +--- + src/cmd/internal/obj/loong64/a.out.go | 1 + + src/cmd/internal/obj/loong64/anames.go | 1 + + src/cmd/internal/obj/loong64/asm.go | 2 ++ + 3 files changed, 4 insertions(+) + +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 10cf396669..e7ac592b8b 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -265,6 +265,7 @@ const ( + ALU12IW + ALU32ID + ALU52ID ++ APCALAU12I + APCADDU12I + AJIRL + ABGE +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index eb13da20c3..f61756e7a8 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -33,6 +33,7 @@ var Anames = []string{ + "LU12IW", + "LU32ID", + "LU52ID", ++ "PCALAU12I", + "PCADDU12I", + "JIRL", + "BGE", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 02e44ee0a1..f4311c4c07 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1847,6 +1847,8 @@ func (c *ctxt0) opir(a obj.As) uint32 { + return 0x0a << 25 + case ALU32ID: + return 0x0b << 25 ++ case APCALAU12I: ++ return 0x0d << 25 + case APCADDU12I: + return 0x0e << 25 + } +-- +2.38.1 + diff --git a/0018-cmd-internal-obj-loong64-cmd-link-internal-loong64-s.patch b/0018-cmd-internal-obj-loong64-cmd-link-internal-loong64-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..44c8dd28e5c13e5640005f15b13318ab479de6d4 --- /dev/null +++ b/0018-cmd-internal-obj-loong64-cmd-link-internal-loong64-s.patch @@ -0,0 +1,316 @@ +From e51fac967c418dda93dbc7c8b057252ef4cfa29e Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sat, 3 Dec 2022 21:16:49 +0800 +Subject: [PATCH 18/62] cmd/internal/obj/loong64, cmd/link/internal/loong64: + switch to LoongArch ELF psABI v2 relocs + +The LoongArch ELF psABI v2 [1] relocs are vastly simplified from the v1 +which involved a stack machine for computing the reloc values, but the +details of PC-relative addressing are changed as well. Specifically, the +`pcaddu12i` instruction is substituted with the `pcalau12i`, which has +the same semantics as the arm64's `adrp` -- meaning the lower bits of a +symbol's address now have to be absolute and not PC-relative. However, +the little bit of added complexity apart, the obvious advantage is that +only 1 reloc needs to be emitted for every kind of external reloc we +care about, and no open-coded stack ops has to remain any more. + +[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html + +Change-Id: I5c13bc710eaf58293a32e930dd33feff2ef14c28 +--- + .../asm/internal/asm/testdata/loong64enc2.s | 38 ++--- + src/cmd/internal/obj/loong64/asm.go | 10 +- + src/cmd/link/internal/ld/elf.go | 2 +- + src/cmd/link/internal/loong64/asm.go | 136 ++++++------------ + 4 files changed, 68 insertions(+), 118 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc2.s b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +index 3b5e3cb81a..00768365b6 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc2.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +@@ -61,22 +61,22 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + XOR $-1, R4 // 1efcbf0284f81500 + MOVH R4, R5 // 85c04000a5c04800 + +- // relocation instructions +- MOVW R4, name(SB) // 1e00001cc4038029 +- MOVWU R4, name(SB) // 1e00001cc4038029 +- MOVV R4, name(SB) // 1e00001cc403c029 +- MOVB R4, name(SB) // 1e00001cc4030029 +- MOVBU R4, name(SB) // 1e00001cc4030029 +- MOVF F4, name(SB) // 1e00001cc403402b +- MOVD F4, name(SB) // 1e00001cc403c02b +- MOVW name(SB), R4 // 1e00001cc4038028 +- MOVWU name(SB), R4 // 1e00001cc403802a +- MOVV name(SB), R4 // 1e00001cc403c028 +- MOVB name(SB), R4 // 1e00001cc4030028 +- MOVBU name(SB), R4 // 1e00001cc403002a +- MOVF name(SB), F4 // 1e00001cc403002b +- MOVD name(SB), F4 // 1e00001cc403802b +- MOVH R4, name(SB) // 1e00001cc4034029 +- MOVH name(SB), R4 // 1e00001cc4034028 +- MOVHU R4, name(SB) // 1e00001cc4034029 +- MOVHU name(SB), R4 // 1e00001cc403402a ++ // relocation instructions ++ MOVW R4, name(SB) // 1e00001ac4038029 ++ MOVWU R4, name(SB) // 1e00001ac4038029 ++ MOVV R4, name(SB) // 1e00001ac403c029 ++ MOVB R4, name(SB) // 1e00001ac4030029 ++ MOVBU R4, name(SB) // 1e00001ac4030029 ++ MOVF F4, name(SB) // 1e00001ac403402b ++ MOVD F4, name(SB) // 1e00001ac403c02b ++ MOVW name(SB), R4 // 1e00001ac4038028 ++ MOVWU name(SB), R4 // 1e00001ac403802a ++ MOVV name(SB), R4 // 1e00001ac403c028 ++ MOVB name(SB), R4 // 1e00001ac4030028 ++ MOVBU name(SB), R4 // 1e00001ac403002a ++ MOVF name(SB), F4 // 1e00001ac403002b ++ MOVD name(SB), F4 // 1e00001ac403802b ++ MOVH R4, name(SB) // 1e00001ac4034029 ++ MOVH name(SB), R4 // 1e00001ac4034028 ++ MOVHU R4, name(SB) // 1e00001ac4034029 ++ MOVHU name(SB), R4 // 1e00001ac403402a +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index f4311c4c07..d3c34def73 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1485,8 +1485,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 = c.oprrr(ABREAK) + + // relocation operations +- case 50: // mov r,addr ==> pcaddu12i + sw +- o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) ++ case 50: // mov r,addr ==> pcalau12i + sw ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 +@@ -1502,8 +1502,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + rel2.Add = p.To.Offset + rel2.Type = objabi.R_ADDRLOONG64 + +- case 51: // mov addr,r ==> pcaddu12i + lw +- o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP)) ++ case 51: // mov addr,r ==> pcalau12i + lw ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 +@@ -1521,7 +1521,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + case 52: // mov $lext, r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. +- o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(p.To.Reg)) ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg)) + rel := obj.Addrel(c.cursym) + rel.Off = int32(c.pc) + rel.Siz = 4 +diff --git a/src/cmd/link/internal/ld/elf.go b/src/cmd/link/internal/ld/elf.go +index a1ae7eab57..2931d94c98 100644 +--- a/src/cmd/link/internal/ld/elf.go ++++ b/src/cmd/link/internal/ld/elf.go +@@ -229,7 +229,7 @@ func Elfinit(ctxt *Link) { + ehdr.Flags = 0x20000004 /* MIPS 3 CPIC */ + } + if ctxt.Arch.Family == sys.Loong64 { +- ehdr.Flags = 0x3 /* LoongArch lp64d */ ++ ehdr.Flags = 0x43 /* LoongArch ABI v1, lp64d */ + } + if ctxt.Arch.Family == sys.RISCV64 { + ehdr.Flags = 0x4 /* RISCV Float ABI Double */ +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index 0eb3a813b2..e9cf07023f 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -46,100 +46,28 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + } + case objabi.R_ADDRLOONG64TLS: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32) ++ out.Write64(uint64(elf.R_LARCH_TLS_LE_LO12) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) + +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) +- out.Write64(uint64(0xfff)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_AND)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_POP_32_U_10_12)) +- out.Write64(uint64(0x0)) +- + case objabi.R_ADDRLOONG64TLSU: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_TLS_TPREL) | uint64(elfsym)<<32) ++ out.Write64(uint64(elf.R_LARCH_TLS_LE_HI20) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) + +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) +- out.Write64(uint64(0xc)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_SR)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32) +- out.Write64(uint64(0x0)) +- + case objabi.R_CALLLOONG64: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PLT_PCREL) | uint64(elfsym)<<32) ++ out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) + +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_0_10_10_16_S2)) +- out.Write64(uint64(0x0)) +- // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64. +- // The low 12-bit of the symbol address need to be added. The addi.d instruction have +- // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced +- // to do sign extending from 12 bits. The 0x804 is 0x800 + 4, 4 is instruction bit +- // width on Loong64 and is used to correct the PC of the addi.d instruction. + case objabi.R_ADDRLOONG64: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) +- out.Write64(uint64(r.Xadd + 0x4)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) +- out.Write64(uint64(r.Xadd + 0x804)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) +- out.Write64(uint64(0xc)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_SR)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) +- out.Write64(uint64(0xc)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_SL)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_SUB)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_10_12)) +- out.Write64(uint64(0x0)) ++ out.Write64(uint64(elf.R_LARCH_PCALA_LO12) | uint64(elfsym)<<32) ++ out.Write64(uint64(r.Xadd)) + + case objabi.R_ADDRLOONG64U: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_PCREL) | uint64(elfsym)<<32) +- out.Write64(uint64(r.Xadd + 0x800)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_PUSH_ABSOLUTE)) +- out.Write64(uint64(0xc)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_SR)) +- out.Write64(uint64(0x0)) +- +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_SOP_POP_32_S_5_20) | uint64(0)<<32) +- out.Write64(uint64(0x0)) ++ out.Write64(uint64(elf.R_LARCH_PCALA_HI20) | uint64(elfsym)<<32) ++ out.Write64(uint64(r.Xadd)) + } + + return true +@@ -156,7 +84,6 @@ func machoreloc1(*sys.Arch, *ld.OutBuf, *loader.Loader, loader.Sym, loader.ExtRe + func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loader.Reloc, s loader.Sym, val int64) (o int64, nExtReloc int, ok bool) { + rs := r.Sym() + if target.IsExternal() { +- nExtReloc := 0 + switch r.Type() { + default: + return val, 0, false +@@ -168,20 +95,12 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + if rst != sym.SHOSTOBJ && rst != sym.SDYNIMPORT && ldr.SymSect(rs) == nil { + ldr.Errorf(s, "missing section for %s", ldr.SymName(rs)) + } +- nExtReloc = 8 // need 8 ELF relocations. see elfreloc1 +- if r.Type() == objabi.R_ADDRLOONG64U { +- nExtReloc = 4 +- } +- return val, nExtReloc, true ++ return val, 1, true + case objabi.R_ADDRLOONG64TLS, + objabi.R_ADDRLOONG64TLSU, + objabi.R_CALLLOONG64, + objabi.R_JMPLOONG64: +- nExtReloc = 4 +- if r.Type() == objabi.R_CALLLOONG64 || r.Type() == objabi.R_JMPLOONG64 { +- nExtReloc = 2 +- } +- return val, nExtReloc, true ++ return val, 1, true + } + } + +@@ -196,11 +115,11 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + case objabi.R_ADDRLOONG64, + objabi.R_ADDRLOONG64U: + pc := ldr.SymValue(s) + int64(r.Off()) +- t := ldr.SymAddr(rs) + r.Add() - pc ++ t := calculatePCAlignedReloc(r.Type(), ldr.SymAddr(rs)+r.Add(), pc) + if r.Type() == objabi.R_ADDRLOONG64 { +- return int64(val&0xffc003ff | (((t + 4 - ((t + 4 + 1<<11) >> 12 << 12)) << 10) & 0x3ffc00)), noExtReloc, isOk ++ return int64(val&0xffc003ff | (t << 10)), noExtReloc, isOk + } +- return int64(val&0xfe00001f | (((t + 1<<11) >> 12 << 5) & 0x1ffffe0)), noExtReloc, isOk ++ return int64(val&0xfe00001f | (t << 5)), noExtReloc, isOk + case objabi.R_ADDRLOONG64TLS, + objabi.R_ADDRLOONG64TLSU: + t := ldr.SymAddr(rs) + r.Add() +@@ -238,3 +157,34 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy + } + return loader.ExtReloc{}, false + } ++ ++func isRequestingLowPageBits(t objabi.RelocType) bool { ++ switch t { ++ case objabi.R_ADDRLOONG64: ++ return true ++ } ++ return false ++} ++ ++// Calculates the value to put into the immediate slot, according to the ++// desired relocation type, target and PC. ++// The value to use varies based on the reloc type, because of the arm64-like ++// design of the LoongArch ELF psABI v2 relocs. Namely, the absolute low bits of ++// the target are to be used for the low part, while the page-aligned offset is ++// to be used for the higher part. A "page" here is not related to the system's ++// actual page size, but rather a fixed 12-bit range, just like the semantics of ++// arm64's adrp. ++func calculatePCAlignedReloc(t objabi.RelocType, tgt int64, pc int64) int64 { ++ if isRequestingLowPageBits(t) { ++ // corresponding immediate field is 12 bits wide ++ return tgt & 0xfff ++ } ++ ++ pageDelta := (tgt >> 12) - (pc >> 12) ++ if tgt&0xfff >= 0x800 { ++ // adjust for sign-extended addition of the low bits ++ pageDelta += 1 ++ } ++ // corresponding immediate field is 20 bits wide ++ return pageDelta & 0xfffff ++} +-- +2.38.1 + diff --git a/0019-runtime-add-support-for-buildmode-c-shared-on-loong6.patch b/0019-runtime-add-support-for-buildmode-c-shared-on-loong6.patch new file mode 100644 index 0000000000000000000000000000000000000000..192b62b2f426d4f63e5e0b7f98ba47257fcf4ed4 --- /dev/null +++ b/0019-runtime-add-support-for-buildmode-c-shared-on-loong6.patch @@ -0,0 +1,147 @@ +From 17e47970aefce364c926605006a0ba2b15ea7b3b Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:07:25 +0800 +Subject: [PATCH 19/62] runtime: add support for --buildmode=c-shared on + loong64 + +These c-shared related CLs are follow up of CLs 455016, 455017, 455018. Here we +follow the LoongArch ELF psABI v2 standard, which requires the support of the +PCALAU12I instruction. + +Updates #53301 + +Change-Id: I7f1ddbf3b2470d610f12069d147aa9b3a6a96f32 +--- + src/runtime/rt0_linux_loong64.s | 51 +++++++++++++++++++++++++++++++++ + src/runtime/tls_loong64.s | 37 ++++++++++++++++++++++-- + 2 files changed, 86 insertions(+), 2 deletions(-) + +diff --git a/src/runtime/rt0_linux_loong64.s b/src/runtime/rt0_linux_loong64.s +index b9aaa510e1..2075a0b590 100644 +--- a/src/runtime/rt0_linux_loong64.s ++++ b/src/runtime/rt0_linux_loong64.s +@@ -3,6 +3,7 @@ + // license that can be found in the LICENSE file. + + #include "textflag.h" ++#include "cgo/abi_loong64.h" + + TEXT _rt0_loong64_linux(SB),NOSPLIT|NOFRAME,$0 + // In a statically linked binary, the stack contains argc, +@@ -13,6 +14,56 @@ TEXT _rt0_loong64_linux(SB),NOSPLIT|NOFRAME,$0 + ADDV $8, R3, R5 // argv + JMP main(SB) + ++// When building with -buildmode=c-shared, this symbol is called when the shared ++// library is loaded. ++TEXT _rt0_loong64_linux_lib(SB),NOSPLIT,$232 ++ // Preserve callee-save registers. ++ SAVE_R22_TO_R31(24) ++ SAVE_F24_TO_F31(104) ++ ++ // Initialize g as null in case of using g later e.g. sigaction in cgo_sigaction.go ++ MOVV R0, g ++ ++ MOVV R4, _rt0_loong64_linux_lib_argc<>(SB) ++ MOVV R5, _rt0_loong64_linux_lib_argv<>(SB) ++ ++ // Synchronous initialization. ++ MOVV $runtime·libpreinit(SB), R19 ++ JAL (R19) ++ ++ // Create a new thread to do the runtime initialization and return. ++ MOVV _cgo_sys_thread_create(SB), R19 ++ BEQ R19, nocgo ++ MOVV $_rt0_loong64_linux_lib_go(SB), R4 ++ MOVV $0, R5 ++ JAL (R19) ++ JMP restore ++ ++nocgo: ++ MOVV $0x800000, R4 // stacksize = 8192KB ++ MOVV $_rt0_loong64_linux_lib_go(SB), R5 ++ MOVV R4, 8(R3) ++ MOVV R5, 16(R3) ++ MOVV $runtime·newosproc0(SB), R19 ++ JAL (R19) ++ ++restore: ++ // Restore callee-save registers. ++ RESTORE_R22_TO_R31(24) ++ RESTORE_F24_TO_F31(104) ++ RET ++ ++TEXT _rt0_loong64_linux_lib_go(SB),NOSPLIT,$0 ++ MOVV _rt0_loong64_linux_lib_argc<>(SB), R4 ++ MOVV _rt0_loong64_linux_lib_argv<>(SB), R5 ++ MOVV $runtime·rt0_go(SB),R19 ++ JMP (R19) ++ ++DATA _rt0_loong64_linux_lib_argc<>(SB)/8, $0 ++GLOBL _rt0_loong64_linux_lib_argc<>(SB),NOPTR, $8 ++DATA _rt0_loong64_linux_lib_argv<>(SB)/8, $0 ++GLOBL _rt0_loong64_linux_lib_argv<>(SB),NOPTR, $8 ++ + TEXT main(SB),NOSPLIT|NOFRAME,$0 + // in external linking, glibc jumps to main with argc in R4 + // and argv in R5 +diff --git a/src/runtime/tls_loong64.s b/src/runtime/tls_loong64.s +index bc3be3da1b..100f28b5ca 100644 +--- a/src/runtime/tls_loong64.s ++++ b/src/runtime/tls_loong64.s +@@ -10,17 +10,50 @@ + // If !iscgo, this is a no-op. + // + // NOTE: mcall() assumes this clobbers only R30 (REGTMP). +-TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0 ++TEXT runtime·save_g(SB),NOSPLIT,$0-0 + MOVB runtime·iscgo(SB), R30 + BEQ R30, nocgo + ++ // here use the func __tls_get_addr to get the address of tls_g, which clobbers these regs below. ++ ADDV $-56, R3 ++ MOVV R1, 0(R3) ++ MOVV R4, 8(R3) ++ MOVV R5, 16(R3) ++ MOVV R6, 24(R3) ++ MOVV R12, 32(R3) ++ MOVV R13, 40(R3) ++ MOVV R30, 48(R3) + MOVV g, runtime·tls_g(SB) ++ MOVV 0(R3), R1 ++ MOVV 8(R3), R4 ++ MOVV 16(R3), R5 ++ MOVV 24(R3), R6 ++ MOVV 32(R3), R12 ++ MOVV 40(R3), R13 ++ MOVV 48(R3), R30 ++ ADDV $56, R3 + + nocgo: + RET + +-TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0 ++TEXT runtime·load_g(SB),NOSPLIT,$0-0 ++ ADDV $-56, R3 ++ MOVV R1, 0(R3) ++ MOVV R4, 8(R3) ++ MOVV R5, 16(R3) ++ MOVV R6, 24(R3) ++ MOVV R12, 32(R3) ++ MOVV R13, 40(R3) ++ MOVV R30, 48(R3) + MOVV runtime·tls_g(SB), g ++ MOVV 0(R3), R1 ++ MOVV 8(R3), R4 ++ MOVV 16(R3), R5 ++ MOVV 24(R3), R6 ++ MOVV 32(R3), R12 ++ MOVV 40(R3), R13 ++ MOVV 48(R3), R30 ++ ADDV $56, R3 + RET + + GLOBL runtime·tls_g(SB), TLSBSS, $8 +-- +2.38.1 + diff --git a/0020-cmd-compile-add-support-for-buildmode-c-shared-on-lo.patch b/0020-cmd-compile-add-support-for-buildmode-c-shared-on-lo.patch new file mode 100644 index 0000000000000000000000000000000000000000..37a2edc6b9c6c31d6c480586f0cbc1c32214eda3 --- /dev/null +++ b/0020-cmd-compile-add-support-for-buildmode-c-shared-on-lo.patch @@ -0,0 +1,43 @@ +From 5f4517d443d699a1798917900368d19bdb1fb025 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:09:22 +0800 +Subject: [PATCH 20/62] cmd/compile: add support for --buildmode=c-shared on + loong64 + +Updates #53301 + +Change-Id: I78a90155b17d7d8be04e8ba5e4d75e27d15b3311 +--- + src/cmd/compile/internal/base/flag.go | 2 +- + src/cmd/compile/internal/liveness/plive.go | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go +index be555c3d06..a09740f736 100644 +--- a/src/cmd/compile/internal/base/flag.go ++++ b/src/cmd/compile/internal/base/flag.go +@@ -204,7 +204,7 @@ func ParseFlags() { + if Flag.Race && !platform.RaceDetectorSupported(buildcfg.GOOS, buildcfg.GOARCH) { + log.Fatalf("%s/%s does not support -race", buildcfg.GOOS, buildcfg.GOARCH) + } +- if (*Flag.Shared || *Flag.Dynlink || *Flag.LinkShared) && !Ctxt.Arch.InFamily(sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.PPC64, sys.RISCV64, sys.S390X) { ++ if (*Flag.Shared || *Flag.Dynlink || *Flag.LinkShared) && !Ctxt.Arch.InFamily(sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.I386, sys.PPC64, sys.RISCV64, sys.S390X) { + log.Fatalf("%s/%s does not support -shared", buildcfg.GOOS, buildcfg.GOARCH) + } + parseSpectre(Flag.Spectre) // left as string for RecordFlags +diff --git a/src/cmd/compile/internal/liveness/plive.go b/src/cmd/compile/internal/liveness/plive.go +index 689b5286c6..ab79843ad8 100644 +--- a/src/cmd/compile/internal/liveness/plive.go ++++ b/src/cmd/compile/internal/liveness/plive.go +@@ -513,7 +513,7 @@ func (lv *liveness) markUnsafePoints() { + v = v.Args[0] + continue + } +- case ssa.Op386MOVLload, ssa.OpARM64MOVWUload, ssa.OpPPC64MOVWZload, ssa.OpWasmI64Load32U: ++ case ssa.Op386MOVLload, ssa.OpARM64MOVWUload, ssa.OpLOONG64MOVWUload, ssa.OpPPC64MOVWZload, ssa.OpWasmI64Load32U: + // Args[0] is the address of the write + // barrier control. Ignore Args[1], + // which is the mem operand. +-- +2.38.1 + diff --git a/0021-cmd-internal-obj-loong64-cmd-internal-objabi-add-c-s.patch b/0021-cmd-internal-obj-loong64-cmd-internal-objabi-add-c-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..87a98038473f948ba3529a950e0eecd25b5b40f9 --- /dev/null +++ b/0021-cmd-internal-obj-loong64-cmd-internal-objabi-add-c-s.patch @@ -0,0 +1,269 @@ +From d5dd17f6c07f5de36f53bfd50bfb71889166f75b Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:13:10 +0800 +Subject: [PATCH 21/62] cmd/internal/obj/loong64, cmd/internal/objabi: add + c-shared relocations on loong64 + +Updates #53301 + +Change-Id: Ifcb40871f609531dfd8b568db9ac14da9b451742 +--- + src/cmd/internal/obj/loong64/a.out.go | 3 +- + src/cmd/internal/obj/loong64/asm.go | 105 +++++++++++++++++--- + src/cmd/internal/obj/loong64/cnames.go | 3 +- + src/cmd/internal/objabi/reloctype.go | 11 ++ + src/cmd/internal/objabi/reloctype_string.go | 20 ++-- + 5 files changed, 120 insertions(+), 22 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index e7ac592b8b..29aa746951 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -225,7 +225,8 @@ const ( + C_LOREG + C_GOK + C_ADDR +- C_TLS ++ C_TLS_LE ++ C_TLS_GD + C_TEXTSIZE + + C_NCLASS // must be the last +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index d3c34def73..b0a5cd6cec 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -157,11 +157,11 @@ var optab = []Optab{ + {AMOVB, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, + {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_TLS, C_NONE, 53, 16, 0, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, + + {AMOVW, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, + {AMOVWU, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +@@ -186,11 +186,11 @@ var optab = []Optab{ + {AMOVB, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, + {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, +- {AMOVWU, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, +- {AMOVBU, C_TLS, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ {AMOVW, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ {AMOVWU, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, ++ {AMOVB, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, ++ {AMOVBU, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, + + {AMOVW, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, + {AMOVV, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, +@@ -329,6 +329,17 @@ var optab = []Optab{ + + {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0, 0}, + {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, ++ {AMOVBU, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, ++ {AMOVWU, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, ++ ++ {AMOVB, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, ++ {AMOVW, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, ++ {AMOVV, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, ++ {AMOVBU, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, ++ {AMOVWU, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, + + {ATEQ, C_SCON, C_REG, C_REG, C_NONE, 15, 8, 0, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_REG, C_NONE, 15, 8, 0, 0, 0}, +@@ -461,7 +472,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + + bp := c.cursym.P + var i int32 +- var out [5]uint32 ++ var out [6]uint32 + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { + c.pc = p.Pc + o = c.oplook(p) +@@ -544,7 +555,11 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + c.instoffset = a.Offset + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { +- return C_TLS ++ if c.ctxt.Flag_shared { ++ return C_TLS_GD ++ } else { ++ return C_TLS_LE ++ } + } + return C_ADDR + } +@@ -1118,6 +1133,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) ++ o6 := uint32(0) + + add := AADDU + add = AADDVU +@@ -1595,6 +1611,70 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + rel2.Type = objabi.R_ADDRLOONG64TLS + o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(p.To.Reg)) + ++ case 56: // mov r, tlsvar GD model ==> (pcalau12i + ld.d)__tls_get_addr + (pcalau12i + addi.d)tlsvar@got + jirl + st.d ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = c.ctxt.Lookup("__tls_get_addr") ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_GOTPCREL_HI ++ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = c.ctxt.Lookup("__tls_get_addr") ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_GOT_LO ++ o3 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REG_R4)) ++ rel3 := obj.Addrel(c.cursym) ++ rel3.Off = int32(c.pc + 8) ++ rel3.Siz = 4 ++ rel3.Sym = p.To.Sym ++ rel3.Add = 0x0 ++ rel3.Type = objabi.R_LOONG64_TLS_GD_PCREL_HI ++ o4 = OP_12IRR(c.opirr(AADDV), uint32(0), uint32(REG_R4), uint32(REG_R4)) ++ rel4 := obj.Addrel(c.cursym) ++ rel4.Off = int32(c.pc + 12) ++ rel4.Siz = 4 ++ rel4.Sym = p.To.Sym ++ rel4.Add = 0x0 ++ rel4.Type = objabi.R_LOONG64_TLS_GD_LO ++ o5 = OP_16IRR(c.opirr(AJIRL), uint32(0), uint32(REGTMP), uint32(REGLINK)) ++ o6 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REG_R4), uint32(p.From.Reg)) ++ ++ case 57: // mov tlsvar, r GD model ==> (pcalau12i + ld.d)__tls_get_addr + (pcalau12i + addi.d)tlsvar@got + jirl + ld.d ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = c.ctxt.Lookup("__tls_get_addr") ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_GOTPCREL_HI ++ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = c.ctxt.Lookup("__tls_get_addr") ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_GOT_LO ++ o3 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REG_R4)) ++ rel3 := obj.Addrel(c.cursym) ++ rel3.Off = int32(c.pc + 8) ++ rel3.Siz = 4 ++ rel3.Sym = p.From.Sym ++ rel3.Type = objabi.R_LOONG64_TLS_GD_PCREL_HI ++ rel3.Add = 0x0 ++ o4 = OP_12IRR(c.opirr(AADDV), uint32(0), uint32(REG_R4), uint32(REG_R4)) ++ rel4 := obj.Addrel(c.cursym) ++ rel4.Off = int32(c.pc + 12) ++ rel4.Siz = 4 ++ rel4.Sym = p.From.Sym ++ rel4.Type = objabi.R_LOONG64_TLS_GD_LO ++ rel4.Add = 0x0 ++ o5 = OP_16IRR(c.opirr(AJIRL), uint32(0), uint32(REGTMP), uint32(REGLINK)) ++ o6 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REG_R4), uint32(p.To.Reg)) ++ + case 59: // mov $dcon,r + // NOTE: this case does not use REGTMP. If it ever does, + // remove the NOTUSETMP flag in optab. +@@ -1629,6 +1709,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + out[2] = o3 + out[3] = o4 + out[4] = o5 ++ out[5] = o6 + } + + func (c *ctxt0) vregoff(a *obj.Addr) int64 { +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index 00f6136603..393d0a007d 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -37,7 +37,8 @@ var cnames0 = []string{ + "LOREG", + "GOK", + "ADDR", +- "TLS", ++ "TLS_LE", ++ "TLS_GD", + "TEXTSIZE", + "NCLASS", + } +diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go +index 2bc7b2dd7a..f7a2af5ccc 100644 +--- a/src/cmd/internal/objabi/reloctype.go ++++ b/src/cmd/internal/objabi/reloctype.go +@@ -310,6 +310,17 @@ const ( + // instruction, by encoding the address into the instruction. + R_CALLLOONG64 + ++ // R_LOONG64_TLS_GD_PCREL_HI and R_LOONG64_TLS_GD_LO relocates an pcalau12i, addi.d pair to compute ++ // the address of the GOT slot of the tls symbol, the address will be passed to __tls_get_addr to ++ // get the true address of tlsvar. ++ R_LOONG64_TLS_GD_PCREL_HI ++ R_LOONG64_TLS_GD_LO ++ ++ // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute ++ // the address of the GOT slot of the referenced symbol. ++ R_LOONG64_GOTPCREL_HI ++ R_LOONG64_GOT_LO ++ + // R_JMPLOONG64 resolves to non-PC-relative target address of a JMP instruction, + // by encoding the address into the instruction. + R_JMPLOONG64 +diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go +index 9ce37d00de..ad4258c4a3 100644 +--- a/src/cmd/internal/objabi/reloctype_string.go ++++ b/src/cmd/internal/objabi/reloctype_string.go +@@ -79,17 +79,21 @@ func _() { + _ = x[R_ADDRLOONG64TLS-69] + _ = x[R_ADDRLOONG64TLSU-70] + _ = x[R_CALLLOONG64-71] +- _ = x[R_JMPLOONG64-72] +- _ = x[R_ADDRMIPSU-73] +- _ = x[R_ADDRMIPSTLS-74] +- _ = x[R_ADDRCUOFF-75] +- _ = x[R_WASMIMPORT-76] +- _ = x[R_XCOFFREF-77] ++ _ = x[R_LOONG64_TLS_GD_PCREL_HI-72] ++ _ = x[R_LOONG64_TLS_GD_LO-73] ++ _ = x[R_LOONG64_GOTPCREL_HI-74] ++ _ = x[R_LOONG64_GOT_LO-75] ++ _ = x[R_JMPLOONG64-76] ++ _ = x[R_ADDRMIPSU-77] ++ _ = x[R_ADDRMIPSTLS-78] ++ _ = x[R_ADDRCUOFF-79] ++ _ = x[R_WASMIMPORT-80] ++ _ = x[R_XCOFFREF-81] + } + +-const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" ++const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_GD_PCREL_HIR_LOONG64_TLS_GD_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" + +-var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 952, 963, 976, 987, 999, 1009} ++var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090} + + func (i RelocType) String() string { + i -= 1 +-- +2.38.1 + diff --git a/0022-cmd-link-add-support-for-buildmode-c-shared-on-loong.patch b/0022-cmd-link-add-support-for-buildmode-c-shared-on-loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..b6c7d95cd7c0c51087933dafead2e0ac1ab031c3 --- /dev/null +++ b/0022-cmd-link-add-support-for-buildmode-c-shared-on-loong.patch @@ -0,0 +1,98 @@ +From e3d3e76b19e44157fbcf1fc3620f59056c6148cf Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:13:41 +0800 +Subject: [PATCH 22/62] cmd/link: add support for --buildmode=c-shared on + loong64 + +Updates #53301 + +Change-Id: I4b726b0cc09e5e008b92b3e0a8a7bdd103b062c4 +--- + src/cmd/link/internal/ld/config.go | 2 +- + src/cmd/link/internal/loong64/asm.go | 36 +++++++++++++++++++++++++++- + 2 files changed, 36 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/link/internal/ld/config.go b/src/cmd/link/internal/ld/config.go +index ba74b6fc96..129d30f35a 100644 +--- a/src/cmd/link/internal/ld/config.go ++++ b/src/cmd/link/internal/ld/config.go +@@ -75,7 +75,7 @@ func (mode *BuildMode) Set(s string) error { + *mode = BuildModeCArchive + case "c-shared": + switch buildcfg.GOARCH { +- case "386", "amd64", "arm", "arm64", "ppc64le", "riscv64", "s390x": ++ case "386", "amd64", "arm", "arm64", "loong64", "ppc64le", "riscv64", "s390x": + default: + return badmode() + } +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index e9cf07023f..238d77a610 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -59,6 +59,31 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) + ++ case objabi.R_LOONG64_TLS_GD_PCREL_HI: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_TLS_GD_PC_HI20) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ ++ case objabi.R_LOONG64_TLS_GD_LO: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ ++ case objabi.R_LOONG64_GOTPCREL_HI: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_GOT_PC_HI20) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ ++ case objabi.R_LOONG64_GOT_LO: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ ++ // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64. ++ // The low 12-bit of the symbol address need to be added. The addi.d instruction have ++ // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced ++ // to do sign extending from 12 bits. The 0x804 is 0x800 + 4, 4 is instruction bit ++ // width on Loong64 and is used to correct the PC of the addi.d instruction. + case objabi.R_ADDRLOONG64: + out.Write64(uint64(sectoff)) + out.Write64(uint64(elf.R_LARCH_PCALA_LO12) | uint64(elfsym)<<32) +@@ -101,6 +126,11 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + objabi.R_CALLLOONG64, + objabi.R_JMPLOONG64: + return val, 1, true ++ case objabi.R_LOONG64_TLS_GD_PCREL_HI, ++ objabi.R_LOONG64_GOTPCREL_HI, ++ objabi.R_LOONG64_TLS_GD_LO, ++ objabi.R_LOONG64_GOT_LO: ++ return val, 1, true + } + } + +@@ -144,6 +174,8 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant + func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { + switch r.Type() { + case objabi.R_ADDRLOONG64, ++ objabi.R_LOONG64_GOTPCREL_HI, ++ objabi.R_LOONG64_GOT_LO, + objabi.R_ADDRLOONG64U: + return ld.ExtrelocViaOuterSym(ldr, r, s), true + +@@ -152,7 +184,9 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy + objabi.R_CONST, + objabi.R_GOTOFF, + objabi.R_CALLLOONG64, +- objabi.R_JMPLOONG64: ++ objabi.R_JMPLOONG64, ++ objabi.R_LOONG64_TLS_GD_PCREL_HI, ++ objabi.R_LOONG64_TLS_GD_LO: + return ld.ExtrelocSimple(ldr, r), true + } + return loader.ExtReloc{}, false +-- +2.38.1 + diff --git a/0023-cmd-internal-sys-enable-c-shared-feature-on-loong64.patch b/0023-cmd-internal-sys-enable-c-shared-feature-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..9427e24f4edca064480c72f53e968173a741813a --- /dev/null +++ b/0023-cmd-internal-sys-enable-c-shared-feature-on-loong64.patch @@ -0,0 +1,28 @@ +From 9cdedaf304f96f360aef36006218be3fda77c6ea Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:14:15 +0800 +Subject: [PATCH 23/62] cmd/internal/sys: enable c-shared feature on loong64 + +Updates #53301 + +Change-Id: I4e0be140a71b86f4626ed39d76cf3ac78f842018 +--- + src/internal/platform/supported.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index 046352f34c..e6d8adb40e 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -143,7 +143,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool { + + case "c-shared": + switch platform { +- case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/ppc64le", "linux/riscv64", "linux/s390x", ++ case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/loong64", "linux/ppc64le", "linux/riscv64", "linux/s390x", + "android/amd64", "android/arm", "android/arm64", "android/386", + "freebsd/amd64", + "darwin/amd64", "darwin/arm64", +-- +2.38.1 + diff --git a/0024-cmd-dist-misc-cgo-testcshared-enable-c-shared-test-o.patch b/0024-cmd-dist-misc-cgo-testcshared-enable-c-shared-test-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..4541136b9f8d9d35f71e31880635d71323d26a87 --- /dev/null +++ b/0024-cmd-dist-misc-cgo-testcshared-enable-c-shared-test-o.patch @@ -0,0 +1,57 @@ +From 72247b5bde442deaf8dbd03d6ffa865ed5ccaa7e Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 25 Aug 2022 11:14:28 +0800 +Subject: [PATCH 24/62] cmd/dist, misc/cgo/testcshared: enable c-shared test on + loong64 + +Updates #53301 + +Change-Id: I68357e420f0920d6609d399cee40cd44af018385 +--- + misc/cgo/testcshared/testdata/libgo2/dup2.go | 2 +- + misc/cgo/testcshared/testdata/libgo2/dup3.go | 2 +- + src/cmd/dist/test.go | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +diff --git a/misc/cgo/testcshared/testdata/libgo2/dup2.go b/misc/cgo/testcshared/testdata/libgo2/dup2.go +index d343aa54d9..73ef600ecb 100644 +--- a/misc/cgo/testcshared/testdata/libgo2/dup2.go ++++ b/misc/cgo/testcshared/testdata/libgo2/dup2.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-// +build darwin dragonfly freebsd linux,!arm64,!riscv64 netbsd openbsd ++// +build darwin dragonfly freebsd linux,!arm64,!riscv64,!loong64 netbsd openbsd + + package main + +diff --git a/misc/cgo/testcshared/testdata/libgo2/dup3.go b/misc/cgo/testcshared/testdata/libgo2/dup3.go +index 459f0dc196..f83b96778b 100644 +--- a/misc/cgo/testcshared/testdata/libgo2/dup3.go ++++ b/misc/cgo/testcshared/testdata/libgo2/dup3.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-// +build linux,arm64 linux,riscv64 ++// +build linux,arm64 linux,riscv64 linux,loong64 + + package main + +diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go +index 9f2660631d..ac93194165 100644 +--- a/src/cmd/dist/test.go ++++ b/src/cmd/dist/test.go +@@ -1191,7 +1191,7 @@ func (t *tester) supportedBuildmode(mode string) bool { + return false + case "c-shared": + switch pair { +- case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-ppc64le", "linux-riscv64", "linux-s390x", ++ case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-loong64", "linux-ppc64le", "linux-riscv64", "linux-s390x", + "darwin-amd64", "darwin-arm64", + "freebsd-amd64", + "android-arm", "android-arm64", "android-386", +-- +2.38.1 + diff --git a/0025-cmd-link-cmd-internal-in-shared-mode-change-to-use-I.patch b/0025-cmd-link-cmd-internal-in-shared-mode-change-to-use-I.patch new file mode 100644 index 0000000000000000000000000000000000000000..6335bc11724c557d7b521c8bdebaa5b076baa87a --- /dev/null +++ b/0025-cmd-link-cmd-internal-in-shared-mode-change-to-use-I.patch @@ -0,0 +1,279 @@ +From 06335c8600a8964468efd090bae7ade0b610d166 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Wed, 8 Mar 2023 01:56:16 +0800 +Subject: [PATCH 25/62] cmd/link, cmd/internal: in shared mode, change to use + IE mode to access TLS variables + +Change-Id: I097a1b1a48e18c3d517142199b32fb14cca1f590 +--- + src/cmd/internal/obj/loong64/a.out.go | 2 +- + src/cmd/internal/obj/loong64/asm.go | 118 ++++++++------------ + src/cmd/internal/obj/loong64/cnames.go | 2 +- + src/cmd/internal/objabi/reloctype.go | 6 +- + src/cmd/internal/objabi/reloctype_string.go | 6 +- + src/cmd/link/internal/loong64/asm.go | 16 +-- + 6 files changed, 61 insertions(+), 89 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 29aa746951..99a7da388f 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -226,7 +226,7 @@ const ( + C_GOK + C_ADDR + C_TLS_LE +- C_TLS_GD ++ C_TLS_IE + C_TEXTSIZE + + C_NCLASS // must be the last +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index b0a5cd6cec..792ed22a02 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -329,17 +329,17 @@ var optab = []Optab{ + + {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0, 0}, + {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0, 0}, +- {AMOVB, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_TLS_GD, C_NONE, 56, 24, 0, sys.Loong64, 0}, +- +- {AMOVB, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, +- {AMOVW, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, +- {AMOVV, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, +- {AMOVBU, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, +- {AMOVWU, C_TLS_GD, C_NONE, C_REG, C_NONE, 57, 24, 0, sys.Loong64, 0}, ++ {AMOVB, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, ++ {AMOVW, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, ++ {AMOVBU, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, ++ {AMOVWU, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, ++ ++ {AMOVB, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, ++ {AMOVW, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, ++ {AMOVV, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, ++ {AMOVBU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, ++ {AMOVWU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, + + {ATEQ, C_SCON, C_REG, C_REG, C_NONE, 15, 8, 0, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_REG, C_NONE, 15, 8, 0, 0, 0}, +@@ -556,7 +556,7 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + if a.Sym != nil { // use relocation + if a.Sym.Type == objabi.STLSBSS { + if c.ctxt.Flag_shared { +- return C_TLS_GD ++ return C_TLS_IE + } else { + return C_TLS_LE + } +@@ -1611,69 +1611,41 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + rel2.Type = objabi.R_ADDRLOONG64TLS + o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(p.To.Reg)) + +- case 56: // mov r, tlsvar GD model ==> (pcalau12i + ld.d)__tls_get_addr + (pcalau12i + addi.d)tlsvar@got + jirl + st.d ++ case 56: // mov r, tlsvar IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + st.d + o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) +- rel := obj.Addrel(c.cursym) +- rel.Off = int32(c.pc) +- rel.Siz = 4 +- rel.Sym = c.ctxt.Lookup("__tls_get_addr") +- rel.Add = 0x0 +- rel.Type = objabi.R_LOONG64_GOTPCREL_HI ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = p.To.Sym ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI + o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) +- rel2 := obj.Addrel(c.cursym) +- rel2.Off = int32(c.pc + 4) +- rel2.Siz = 4 +- rel2.Sym = c.ctxt.Lookup("__tls_get_addr") +- rel2.Add = 0x0 +- rel2.Type = objabi.R_LOONG64_GOT_LO +- o3 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REG_R4)) +- rel3 := obj.Addrel(c.cursym) +- rel3.Off = int32(c.pc + 8) +- rel3.Siz = 4 +- rel3.Sym = p.To.Sym +- rel3.Add = 0x0 +- rel3.Type = objabi.R_LOONG64_TLS_GD_PCREL_HI +- o4 = OP_12IRR(c.opirr(AADDV), uint32(0), uint32(REG_R4), uint32(REG_R4)) +- rel4 := obj.Addrel(c.cursym) +- rel4.Off = int32(c.pc + 12) +- rel4.Siz = 4 +- rel4.Sym = p.To.Sym +- rel4.Add = 0x0 +- rel4.Type = objabi.R_LOONG64_TLS_GD_LO +- o5 = OP_16IRR(c.opirr(AJIRL), uint32(0), uint32(REGTMP), uint32(REGLINK)) +- o6 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REG_R4), uint32(p.From.Reg)) +- +- case 57: // mov tlsvar, r GD model ==> (pcalau12i + ld.d)__tls_get_addr + (pcalau12i + addi.d)tlsvar@got + jirl + ld.d +- o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) +- rel := obj.Addrel(c.cursym) +- rel.Off = int32(c.pc) +- rel.Siz = 4 +- rel.Sym = c.ctxt.Lookup("__tls_get_addr") +- rel.Add = 0x0 +- rel.Type = objabi.R_LOONG64_GOTPCREL_HI +- o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) +- rel2 := obj.Addrel(c.cursym) +- rel2.Off = int32(c.pc + 4) +- rel2.Siz = 4 +- rel2.Sym = c.ctxt.Lookup("__tls_get_addr") +- rel2.Add = 0x0 +- rel2.Type = objabi.R_LOONG64_GOT_LO +- o3 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REG_R4)) +- rel3 := obj.Addrel(c.cursym) +- rel3.Off = int32(c.pc + 8) +- rel3.Siz = 4 +- rel3.Sym = p.From.Sym +- rel3.Type = objabi.R_LOONG64_TLS_GD_PCREL_HI +- rel3.Add = 0x0 +- o4 = OP_12IRR(c.opirr(AADDV), uint32(0), uint32(REG_R4), uint32(REG_R4)) +- rel4 := obj.Addrel(c.cursym) +- rel4.Off = int32(c.pc + 12) +- rel4.Siz = 4 +- rel4.Sym = p.From.Sym +- rel4.Type = objabi.R_LOONG64_TLS_GD_LO +- rel4.Add = 0x0 +- o5 = OP_16IRR(c.opirr(AJIRL), uint32(0), uint32(REGTMP), uint32(REGLINK)) +- o6 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REG_R4), uint32(p.To.Reg)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = p.To.Sym ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_TLS_IE_LO ++ o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP)) ++ o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) ++ ++ case 57: // mov tlsvar, r IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + ld.d ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = p.From.Sym ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI ++ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = p.From.Sym ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_TLS_IE_LO ++ o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP)) ++ o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) + + case 59: // mov $dcon,r + // NOTE: this case does not use REGTMP. If it ever does, +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index 393d0a007d..8b8af6ba31 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -38,7 +38,7 @@ var cnames0 = []string{ + "GOK", + "ADDR", + "TLS_LE", +- "TLS_GD", ++ "TLS_IE", + "TEXTSIZE", + "NCLASS", + } +diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go +index f7a2af5ccc..db061632cf 100644 +--- a/src/cmd/internal/objabi/reloctype.go ++++ b/src/cmd/internal/objabi/reloctype.go +@@ -310,11 +310,11 @@ const ( + // instruction, by encoding the address into the instruction. + R_CALLLOONG64 + +- // R_LOONG64_TLS_GD_PCREL_HI and R_LOONG64_TLS_GD_LO relocates an pcalau12i, addi.d pair to compute ++ // R_LOONG64_TLS_IE_PCREL_HI and R_LOONG64_TLS_IE_LO relocates an pcalau12i, addi.d pair to compute + // the address of the GOT slot of the tls symbol, the address will be passed to __tls_get_addr to + // get the true address of tlsvar. +- R_LOONG64_TLS_GD_PCREL_HI +- R_LOONG64_TLS_GD_LO ++ R_LOONG64_TLS_IE_PCREL_HI ++ R_LOONG64_TLS_IE_LO + + // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute + // the address of the GOT slot of the referenced symbol. +diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go +index ad4258c4a3..53104c76b3 100644 +--- a/src/cmd/internal/objabi/reloctype_string.go ++++ b/src/cmd/internal/objabi/reloctype_string.go +@@ -79,8 +79,8 @@ func _() { + _ = x[R_ADDRLOONG64TLS-69] + _ = x[R_ADDRLOONG64TLSU-70] + _ = x[R_CALLLOONG64-71] +- _ = x[R_LOONG64_TLS_GD_PCREL_HI-72] +- _ = x[R_LOONG64_TLS_GD_LO-73] ++ _ = x[R_LOONG64_TLS_IE_PCREL_HI-72] ++ _ = x[R_LOONG64_TLS_IE_LO-73] + _ = x[R_LOONG64_GOTPCREL_HI-74] + _ = x[R_LOONG64_GOT_LO-75] + _ = x[R_JMPLOONG64-76] +@@ -91,7 +91,7 @@ func _() { + _ = x[R_XCOFFREF-81] + } + +-const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_GD_PCREL_HIR_LOONG64_TLS_GD_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" ++const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" + + var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090} + +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index 238d77a610..de2e93f757 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -59,14 +59,14 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32) + out.Write64(uint64(r.Xadd)) + +- case objabi.R_LOONG64_TLS_GD_PCREL_HI: ++ case objabi.R_LOONG64_TLS_IE_PCREL_HI: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_TLS_GD_PC_HI20) | uint64(elfsym)<<32) ++ out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_HI20) | uint64(elfsym)<<32) + out.Write64(uint64(0x0)) + +- case objabi.R_LOONG64_TLS_GD_LO: ++ case objabi.R_LOONG64_TLS_IE_LO: + out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) ++ out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_LO12) | uint64(elfsym)<<32) + out.Write64(uint64(0x0)) + + case objabi.R_LOONG64_GOTPCREL_HI: +@@ -126,9 +126,9 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + objabi.R_CALLLOONG64, + objabi.R_JMPLOONG64: + return val, 1, true +- case objabi.R_LOONG64_TLS_GD_PCREL_HI, ++ case objabi.R_LOONG64_TLS_IE_PCREL_HI, + objabi.R_LOONG64_GOTPCREL_HI, +- objabi.R_LOONG64_TLS_GD_LO, ++ objabi.R_LOONG64_TLS_IE_LO, + objabi.R_LOONG64_GOT_LO: + return val, 1, true + } +@@ -185,8 +185,8 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy + objabi.R_GOTOFF, + objabi.R_CALLLOONG64, + objabi.R_JMPLOONG64, +- objabi.R_LOONG64_TLS_GD_PCREL_HI, +- objabi.R_LOONG64_TLS_GD_LO: ++ objabi.R_LOONG64_TLS_IE_PCREL_HI, ++ objabi.R_LOONG64_TLS_IE_LO: + return ld.ExtrelocSimple(ldr, r), true + } + return loader.ExtReloc{}, false +-- +2.38.1 + diff --git a/0026-cmd-compile-cmd-dist-cmd-go-internal-enable-buildmod.patch b/0026-cmd-compile-cmd-dist-cmd-go-internal-enable-buildmod.patch new file mode 100644 index 0000000000000000000000000000000000000000..9c2632ac89557a589ad03ac8bc107a8d4b8523d3 --- /dev/null +++ b/0026-cmd-compile-cmd-dist-cmd-go-internal-enable-buildmod.patch @@ -0,0 +1,72 @@ +From 7ced8f3799641fb3b75f5fe8b7157ff0143d1435 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sun, 4 Dec 2022 15:06:45 +0800 +Subject: [PATCH 26/62] cmd/compile, cmd/dist, cmd/go, internal: enable + buildmode=pie for linux/loong64 + +Only external linking is supported for now, due to missing adddynrel +implementation for loong64. Enable the corresponding tests while at it. + +Change-Id: I6906d9eb4bd8655c685b059283e200cb7e210369 +--- + src/cmd/compile/internal/base/flag.go | 2 +- + src/cmd/dist/test.go | 2 +- + src/cmd/go/go_test.go | 2 +- + src/internal/platform/supported.go | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go +index a09740f736..a18da88717 100644 +--- a/src/cmd/compile/internal/base/flag.go ++++ b/src/cmd/compile/internal/base/flag.go +@@ -204,7 +204,7 @@ func ParseFlags() { + if Flag.Race && !platform.RaceDetectorSupported(buildcfg.GOOS, buildcfg.GOARCH) { + log.Fatalf("%s/%s does not support -race", buildcfg.GOOS, buildcfg.GOARCH) + } +- if (*Flag.Shared || *Flag.Dynlink || *Flag.LinkShared) && !Ctxt.Arch.InFamily(sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.I386, sys.PPC64, sys.RISCV64, sys.S390X) { ++ if (*Flag.Shared || *Flag.Dynlink || *Flag.LinkShared) && !Ctxt.Arch.InFamily(sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X) { + log.Fatalf("%s/%s does not support -shared", buildcfg.GOOS, buildcfg.GOARCH) + } + parseSpectre(Flag.Spectre) // left as string for RecordFlags +diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go +index ac93194165..04c7fabcaa 100644 +--- a/src/cmd/dist/test.go ++++ b/src/cmd/dist/test.go +@@ -1218,7 +1218,7 @@ func (t *tester) supportedBuildmode(mode string) bool { + case "pie": + switch pair { + case "aix/ppc64", +- "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-ppc64le", "linux-riscv64", "linux-s390x", ++ "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-loong64", "linux-ppc64le", "linux-riscv64", "linux-s390x", + "android-amd64", "android-arm", "android-arm64", "android-386": + return true + case "darwin-amd64", "darwin-arm64": +diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go +index 6b6620feeb..182ca02a24 100644 +--- a/src/cmd/go/go_test.go ++++ b/src/cmd/go/go_test.go +@@ -2129,7 +2129,7 @@ func TestBuildmodePIE(t *testing.T) { + + platform := fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH) + switch platform { +- case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/ppc64le", "linux/riscv64", "linux/s390x", ++ case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/ppc64le", "linux/riscv64", "linux/s390x", + "android/amd64", "android/arm", "android/arm64", "android/386", + "freebsd/amd64", + "windows/386", "windows/amd64", "windows/arm", "windows/arm64": +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index e6d8adb40e..f00f978eb7 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -160,7 +160,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool { + + case "pie": + switch platform { +- case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/ppc64le", "linux/riscv64", "linux/s390x", ++ case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/ppc64le", "linux/riscv64", "linux/s390x", + "android/amd64", "android/arm", "android/arm64", "android/386", + "freebsd/amd64", + "darwin/amd64", "darwin/arm64", +-- +2.38.1 + diff --git a/0027-net-disable-TestLookupDotsWithRemoteSource-and-TestL.patch b/0027-net-disable-TestLookupDotsWithRemoteSource-and-TestL.patch new file mode 100644 index 0000000000000000000000000000000000000000..2d64381d8ad9cf23eb52739361dcea8ce9837761 --- /dev/null +++ b/0027-net-disable-TestLookupDotsWithRemoteSource-and-TestL.patch @@ -0,0 +1,51 @@ +From c9ef559e66fb064b435e45915829c2a93c7541c0 Mon Sep 17 00:00:00 2001 +From: Michael Anthony Knyszek +Date: Fri, 11 Nov 2022 17:26:45 +0000 +Subject: [PATCH 27/62] net: disable TestLookupDotsWithRemoteSource and + TestLookupGoogleSRV + +These tests fail consistently due to a DNS change causing widespread +trybot outages. + +Fixes #56707. + +Reviewed-on: https://go-review.googlesource.com/c/go/+/449640 +Reviewed-by: Carlos Amedee +Reviewed-by: Bryan Mills +Run-TryBot: Michael Knyszek +TryBot-Result: Gopher Robot +Reviewed-by: Damien Neil +Change-Id: I7ea7ed1f701ce5a5f930885a9c817fdebd6aee4d +--- + src/net/lookup_test.go | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/src/net/lookup_test.go b/src/net/lookup_test.go +index fa1a706c78..cb2b64f0b5 100644 +--- a/src/net/lookup_test.go ++++ b/src/net/lookup_test.go +@@ -71,6 +71,10 @@ var lookupGoogleSRVTests = []struct { + var backoffDuration = [...]time.Duration{time.Second, 5 * time.Second, 30 * time.Second} + + func TestLookupGoogleSRV(t *testing.T) { ++ // TODO(mknyszek): Figure out next steps for this test. This is just ++ // a quick fix. ++ t.Skip("fails consistently due to an upstream DNS change; see #56707.") ++ + t.Parallel() + mustHaveExternalNetwork(t) + +@@ -633,6 +637,10 @@ func TestLookupDotsWithLocalSource(t *testing.T) { + } + + func TestLookupDotsWithRemoteSource(t *testing.T) { ++ // TODO(mknyszek): Figure out next steps for this test. This is just ++ // a quick fix. ++ t.Skip("fails consistently due to an upstream DNS change; see #56707.") ++ + if runtime.GOOS == "darwin" || runtime.GOOS == "ios" { + testenv.SkipFlaky(t, 27992) + } +-- +2.38.1 + diff --git a/0028-enable-c-archive-test-on-linux-loong64.patch b/0028-enable-c-archive-test-on-linux-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..f515e441cc79f2802bf0baf6789cda65c2425a4e --- /dev/null +++ b/0028-enable-c-archive-test-on-linux-loong64.patch @@ -0,0 +1,28 @@ +From 580019af2e1c2f6e0bf62aa19115208594e4f7d2 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Fri, 10 Mar 2023 14:15:30 +0800 +Subject: [PATCH 28/62] enable c-archive test on linux/loong64 + +Since c-shared is already supported, c-archive can be used. + +Change-Id: I34e15ed4206d9b58f3c7768f654aff147342fa07 +--- + src/cmd/dist/test.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go +index 04c7fabcaa..920018721d 100644 +--- a/src/cmd/dist/test.go ++++ b/src/cmd/dist/test.go +@@ -1183,7 +1183,7 @@ func (t *tester) supportedBuildmode(mode string) bool { + switch pair { + case "aix-ppc64", + "darwin-amd64", "darwin-arm64", "ios-arm64", +- "linux-amd64", "linux-386", "linux-ppc64le", "linux-riscv64", "linux-s390x", ++ "linux-amd64", "linux-386", "linux-loong64", "linux-ppc64le", "linux-riscv64", "linux-s390x", + "freebsd-amd64", + "windows-amd64", "windows-386": + return true +-- +2.38.1 + diff --git a/0029-cmd-internal-cmd-link-remove-invalid-GOT-relative-re.patch b/0029-cmd-internal-cmd-link-remove-invalid-GOT-relative-re.patch new file mode 100644 index 0000000000000000000000000000000000000000..854bffe6190ee9b0fd5ed6b636bf7e4343852f19 --- /dev/null +++ b/0029-cmd-internal-cmd-link-remove-invalid-GOT-relative-re.patch @@ -0,0 +1,145 @@ +From 75f79b2ed8a053edfacb52ca0231f076b48aa943 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Fri, 10 Mar 2023 10:48:24 +0800 +Subject: [PATCH 29/62] cmd/internal, cmd/link: remove invalid GOT relative + relocations + +Change-Id: I14bf211cb3760d4d1f8e9e2e486cdca45c64889a +--- + src/cmd/internal/obj/loong64/asm.go | 2 -- + src/cmd/internal/objabi/reloctype.go | 10 ++-------- + src/cmd/internal/objabi/reloctype_string.go | 20 +++++++++----------- + src/cmd/link/internal/loong64/asm.go | 21 +++------------------ + 4 files changed, 14 insertions(+), 39 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 792ed22a02..ca82986bb3 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1133,7 +1133,6 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o3 := uint32(0) + o4 := uint32(0) + o5 := uint32(0) +- o6 := uint32(0) + + add := AADDU + add = AADDVU +@@ -1681,7 +1680,6 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + out[2] = o3 + out[3] = o4 + out[4] = o5 +- out[5] = o6 + } + + func (c *ctxt0) vregoff(a *obj.Addr) int64 { +diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go +index db061632cf..1a6a73fe12 100644 +--- a/src/cmd/internal/objabi/reloctype.go ++++ b/src/cmd/internal/objabi/reloctype.go +@@ -310,17 +310,11 @@ const ( + // instruction, by encoding the address into the instruction. + R_CALLLOONG64 + +- // R_LOONG64_TLS_IE_PCREL_HI and R_LOONG64_TLS_IE_LO relocates an pcalau12i, addi.d pair to compute +- // the address of the GOT slot of the tls symbol, the address will be passed to __tls_get_addr to +- // get the true address of tlsvar. ++ // R_LOONG64_TLS_IE_PCREL_HI and R_LOONG64_TLS_IE_LO relocates an pcalau12i, ld.d pair to compute ++ // the address of the GOT slot of the tls symbol. + R_LOONG64_TLS_IE_PCREL_HI + R_LOONG64_TLS_IE_LO + +- // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute +- // the address of the GOT slot of the referenced symbol. +- R_LOONG64_GOTPCREL_HI +- R_LOONG64_GOT_LO +- + // R_JMPLOONG64 resolves to non-PC-relative target address of a JMP instruction, + // by encoding the address into the instruction. + R_JMPLOONG64 +diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go +index 53104c76b3..8cfff5ae8c 100644 +--- a/src/cmd/internal/objabi/reloctype_string.go ++++ b/src/cmd/internal/objabi/reloctype_string.go +@@ -1,4 +1,4 @@ +-// Code generated by "stringer -type=RelocType"; DO NOT EDIT. ++// Code generated by "stringer -type=RelocType cmd/internal/objabi/reloctype.go"; DO NOT EDIT. + + package objabi + +@@ -81,19 +81,17 @@ func _() { + _ = x[R_CALLLOONG64-71] + _ = x[R_LOONG64_TLS_IE_PCREL_HI-72] + _ = x[R_LOONG64_TLS_IE_LO-73] +- _ = x[R_LOONG64_GOTPCREL_HI-74] +- _ = x[R_LOONG64_GOT_LO-75] +- _ = x[R_JMPLOONG64-76] +- _ = x[R_ADDRMIPSU-77] +- _ = x[R_ADDRMIPSTLS-78] +- _ = x[R_ADDRCUOFF-79] +- _ = x[R_WASMIMPORT-80] +- _ = x[R_XCOFFREF-81] ++ _ = x[R_JMPLOONG64-74] ++ _ = x[R_ADDRMIPSU-75] ++ _ = x[R_ADDRMIPSTLS-76] ++ _ = x[R_ADDRCUOFF-77] ++ _ = x[R_WASMIMPORT-78] ++ _ = x[R_XCOFFREF-79] + } + +-const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" ++const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" + +-var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090} ++var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 996, 1007, 1020, 1031, 1043, 1053} + + func (i RelocType) String() string { + i -= 1 +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index de2e93f757..2f760fd231 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -69,16 +69,6 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_LO12) | uint64(elfsym)<<32) + out.Write64(uint64(0x0)) + +- case objabi.R_LOONG64_GOTPCREL_HI: +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_GOT_PC_HI20) | uint64(elfsym)<<32) +- out.Write64(uint64(0x0)) +- +- case objabi.R_LOONG64_GOT_LO: +- out.Write64(uint64(sectoff)) +- out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) +- out.Write64(uint64(0x0)) +- + // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64. + // The low 12-bit of the symbol address need to be added. The addi.d instruction have + // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced +@@ -124,12 +114,9 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + case objabi.R_ADDRLOONG64TLS, + objabi.R_ADDRLOONG64TLSU, + objabi.R_CALLLOONG64, +- objabi.R_JMPLOONG64: +- return val, 1, true +- case objabi.R_LOONG64_TLS_IE_PCREL_HI, +- objabi.R_LOONG64_GOTPCREL_HI, +- objabi.R_LOONG64_TLS_IE_LO, +- objabi.R_LOONG64_GOT_LO: ++ objabi.R_JMPLOONG64, ++ objabi.R_LOONG64_TLS_IE_PCREL_HI, ++ objabi.R_LOONG64_TLS_IE_LO: + return val, 1, true + } + } +@@ -174,8 +161,6 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant + func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { + switch r.Type() { + case objabi.R_ADDRLOONG64, +- objabi.R_LOONG64_GOTPCREL_HI, +- objabi.R_LOONG64_GOT_LO, + objabi.R_ADDRLOONG64U: + return ld.ExtrelocViaOuterSym(ldr, r, s), true + +-- +2.38.1 + diff --git a/0030-runtime-no-need-to-save-these-registers-in-load_g-sa.patch b/0030-runtime-no-need-to-save-these-registers-in-load_g-sa.patch new file mode 100644 index 0000000000000000000000000000000000000000..7d6f0ad69463c30e16cace0222516f5e3277b894 --- /dev/null +++ b/0030-runtime-no-need-to-save-these-registers-in-load_g-sa.patch @@ -0,0 +1,77 @@ +From bc8d803c39c483059572013ce248059737aafe7e Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Fri, 10 Mar 2023 10:49:08 +0800 +Subject: [PATCH 30/62] runtime: no need to save these registers in + load_g&save_g + +Change-Id: I69d7db05010e61e76fbbfb89ee8c4b90393f1367 +--- + src/runtime/tls_loong64.s | 39 --------------------------------------- + 1 file changed, 39 deletions(-) + +diff --git a/src/runtime/tls_loong64.s b/src/runtime/tls_loong64.s +index 100f28b5ca..9d43c60556 100644 +--- a/src/runtime/tls_loong64.s ++++ b/src/runtime/tls_loong64.s +@@ -2,58 +2,19 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-#include "go_asm.h" +-#include "go_tls.h" +-#include "funcdata.h" + #include "textflag.h" + + // If !iscgo, this is a no-op. +-// +-// NOTE: mcall() assumes this clobbers only R30 (REGTMP). + TEXT runtime·save_g(SB),NOSPLIT,$0-0 + MOVB runtime·iscgo(SB), R30 + BEQ R30, nocgo +- +- // here use the func __tls_get_addr to get the address of tls_g, which clobbers these regs below. +- ADDV $-56, R3 +- MOVV R1, 0(R3) +- MOVV R4, 8(R3) +- MOVV R5, 16(R3) +- MOVV R6, 24(R3) +- MOVV R12, 32(R3) +- MOVV R13, 40(R3) +- MOVV R30, 48(R3) + MOVV g, runtime·tls_g(SB) +- MOVV 0(R3), R1 +- MOVV 8(R3), R4 +- MOVV 16(R3), R5 +- MOVV 24(R3), R6 +- MOVV 32(R3), R12 +- MOVV 40(R3), R13 +- MOVV 48(R3), R30 +- ADDV $56, R3 + + nocgo: + RET + + TEXT runtime·load_g(SB),NOSPLIT,$0-0 +- ADDV $-56, R3 +- MOVV R1, 0(R3) +- MOVV R4, 8(R3) +- MOVV R5, 16(R3) +- MOVV R6, 24(R3) +- MOVV R12, 32(R3) +- MOVV R13, 40(R3) +- MOVV R30, 48(R3) + MOVV runtime·tls_g(SB), g +- MOVV 0(R3), R1 +- MOVV 8(R3), R4 +- MOVV 16(R3), R5 +- MOVV 24(R3), R6 +- MOVV 32(R3), R12 +- MOVV 40(R3), R13 +- MOVV 48(R3), R30 +- ADDV $56, R3 + RET + + GLOBL runtime·tls_g(SB), TLSBSS, $8 +-- +2.38.1 + diff --git a/0031-cmd-internal-obj-loong64-add-support-for-movgr2cf-an.patch b/0031-cmd-internal-obj-loong64-add-support-for-movgr2cf-an.patch new file mode 100644 index 0000000000000000000000000000000000000000..b72d439ad12dbba8d83a33c49d15482de9d680c4 --- /dev/null +++ b/0031-cmd-internal-obj-loong64-add-support-for-movgr2cf-an.patch @@ -0,0 +1,53 @@ +From cc76b5e8c5e10adc8e3a187571ee2f996fb1f8f1 Mon Sep 17 00:00:00 2001 +From: huangqiqi +Date: Thu, 16 Mar 2023 13:16:06 +0800 +Subject: [PATCH 31/62] cmd/internal/obj/loong64: add support for movgr2cf and + movcf2gr instructions + +Change-Id: I223154d86a1034546a72c100125f33754737208e +--- + src/cmd/asm/internal/asm/testdata/loong64enc1.s | 3 +++ + src/cmd/internal/obj/loong64/asm.go | 9 +++++++++ + 2 files changed, 12 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 0cc077c091..4f3cb2b2ee 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -222,3 +222,6 @@ lable2: + RDTIMELW R4, R0 // 80600000 + RDTIMEHW R4, R0 // 80640000 + RDTIMED R4, R5 // 85680000 ++ ++ MOVV FCC0, R4 // 04dc1401 ++ MOVV R4, FCC0 // 80d81401 +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index ca82986bb3..eff60104ce 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -324,6 +324,9 @@ var optab = []Optab{ + {AMOVV, C_REG, C_NONE, C_FREG, C_NONE, 47, 4, 0, sys.Loong64, 0}, + {AMOVV, C_FREG, C_NONE, C_REG, C_NONE, 48, 4, 0, sys.Loong64, 0}, + ++ {AMOVV, C_FCCREG, C_NONE, C_REG, C_NONE, 63, 4, 0, sys.Loong64, 0}, ++ {AMOVV, C_REG, C_NONE, C_FCCREG, C_NONE, 64, 4, 0, sys.Loong64, 0}, ++ + {AMOVW, C_ADDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, + {AMOVW, C_ANDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, + +@@ -1673,6 +1676,12 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + + case 62: // rdtimex rd, rj + o1 = OP_RR(c.oprr(p.As), uint32(p.To.Reg), uint32(p.RegTo2)) ++ case 63: // movv c_fcc0, c_reg ==> movcf2gr rd, cj ++ a := OP_TEN(8, 1335) ++ o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) ++ case 64: // movv c_reg, c_fcc0 ==> movgr2cf cd, rj ++ a := OP_TEN(8, 1334) ++ o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + } + + out[0] = o1 +-- +2.38.1 + diff --git a/0032-runtime-save-and-restore-fcc-registers-in-async-pree.patch b/0032-runtime-save-and-restore-fcc-registers-in-async-pree.patch new file mode 100644 index 0000000000000000000000000000000000000000..e623972a22b12070a89ea08dde251745d3c5ccfb --- /dev/null +++ b/0032-runtime-save-and-restore-fcc-registers-in-async-pree.patch @@ -0,0 +1,72 @@ +From d0b675c714413babaea745164c5d1db0ee6a33bf Mon Sep 17 00:00:00 2001 +From: huangqiqi +Date: Thu, 16 Mar 2023 13:16:38 +0800 +Subject: [PATCH 32/62] runtime: save and restore fcc registers in async + preempt on loong64 + +During the context switch of goroutine scheduling, the value of the +fcc0 register needs to be saved on the stack. + +Change-Id: Id390611cf891ca080187815607127493682fd6e2 +--- + src/runtime/mkpreempt.go | 6 ++++++ + src/runtime/preempt_loong64.s | 12 ++++++++---- + 2 files changed, 14 insertions(+), 4 deletions(-) + +diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go +index 61d2d0247e..70eca7c7e2 100644 +--- a/src/runtime/mkpreempt.go ++++ b/src/runtime/mkpreempt.go +@@ -483,6 +483,12 @@ func genLoong64() { + l.add(movf, reg, regsize) + } + ++ // save and restore FCC0 ++ l.addSpecial( ++ mov+" FCC0, R4\n"+mov+" R4, %d(R3)", ++ mov+" %d(R3), R4\n"+mov+" R4, FCC0", ++ regsize) ++ + // allocate frame, save PC of interrupted instruction (in LR) + p(mov+" R1, -%d(R3)", l.stack) + p(sub+" $%d, R3", l.stack) +diff --git a/src/runtime/preempt_loong64.s b/src/runtime/preempt_loong64.s +index ba59a07b7f..999e72c470 100644 +--- a/src/runtime/preempt_loong64.s ++++ b/src/runtime/preempt_loong64.s +@@ -4,8 +4,8 @@ + #include "textflag.h" + + TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 +- MOVV R1, -472(R3) +- SUBV $472, R3 ++ MOVV R1, -480(R3) ++ SUBV $480, R3 + MOVV R4, 8(R3) + MOVV R5, 16(R3) + MOVV R6, 24(R3) +@@ -64,7 +64,11 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 + MOVD F29, 448(R3) + MOVD F30, 456(R3) + MOVD F31, 464(R3) ++ MOVV FCC0, R4 ++ MOVV R4, 472(R3) + CALL ·asyncPreempt2(SB) ++ MOVV 472(R3), R4 ++ MOVV R4, FCC0 + MOVD 464(R3), F31 + MOVD 456(R3), F30 + MOVD 448(R3), F29 +@@ -123,7 +127,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 + MOVV 24(R3), R6 + MOVV 16(R3), R5 + MOVV 8(R3), R4 +- MOVV 472(R3), R1 ++ MOVV 480(R3), R1 + MOVV (R3), R30 +- ADDV $480, R3 ++ ADDV $488, R3 + JMP (R30) +-- +2.38.1 + diff --git a/0033-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch b/0033-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch new file mode 100644 index 0000000000000000000000000000000000000000..f952c3ea6f57d8ba53f5ba43ca8fa7b6171b73e5 --- /dev/null +++ b/0033-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch @@ -0,0 +1,2250 @@ +From 4643f2407c658b965d3b0350f0de66b9fd4b05f6 Mon Sep 17 00:00:00 2001 +From: Xiaodong Liu +Date: Fri, 10 Feb 2023 15:00:12 +0800 +Subject: [PATCH 33/62] cmd,cmd/vendor: pick up updates for + golang.org/x/arch/loong64 + +Bring in updates to golang.org/x/arch/ to support loong64 disassembler +from CL 358854. + + Used the directions found in README.vendor: + + cd $GOROOT/src/cmd + go get -d golang.org/x/arch@latest + go mod tidy + go mod vendor + +Change-Id: I448f653ca8d530303d0cd05fba1c380b3e0cd6f2 +--- + .../x/arch/loong64/loong64asm/arg.go | 93 + + .../x/arch/loong64/loong64asm/decode.go | 269 +++ + .../x/arch/loong64/loong64asm/gnu.go | 16 + + .../x/arch/loong64/loong64asm/inst.go | 296 ++++ + .../x/arch/loong64/loong64asm/tables.go | 1513 +++++++++++++++++ + 5 files changed, 2187 insertions(+) + create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go + create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go + create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go + create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go + create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go + +diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go +new file mode 100644 +index 0000000000..9496e8c34d +--- /dev/null ++++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go +@@ -0,0 +1,93 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package loong64asm ++ ++// Naming for Go decoder arguments: ++// ++// - arg_fd: a Floating Point operand register fd encoded in the fd[4:0] field ++// ++// - arg_fj: a Floating Point operand register fj encoded in the fj[9:5] field ++// ++// - arg_fk: a Floating Point operand register fk encoded in the fk[14:10] field ++// ++// - arg_fa: a Floating Point operand register fa encoded in the fa[19:15] field ++// ++// - arg_rd: a general-purpose register rd encoded in the rd[4:0] field ++// ++// - arg_rj: a general-purpose register rj encoded in the rj[9:5] field ++// ++// - arg_rk: a general-purpose register rk encoded in the rk[14:10] field ++// ++// - arg_fcsr_4_0: float control status register encoded in [4:0] field ++// ++// - arg_cd_2_0: condition flag register encoded in [2:0] field ++// ++// - arg_sa2_16_15: shift bits constant encoded in [16:15] field ++// ++// - arg_code_14_0: arg for exception process routine encoded in [14:0] field ++// ++// - arg_ui5_14_10: 5bits unsigned immediate ++// ++// - arg_lsbw: For details, please refer to chapter 2.2.3.8 of instruction manual ++// ++// - arg_msbw: For details, please refer to chapter 2.2.3.9 of instruction manual ++// ++// - arg_hint_4_0: hint field implied the prefetch type and the data should fetch to cache's level ++// 0: load to data cache level 1 ++// 8: store to data cache level 1 ++// other: no define ++// ++// - arg_si12_21_10: 12bits signed immediate ++ ++type instArg uint16 ++ ++const ( ++ _ instArg = iota ++ //1-5 ++ arg_fd ++ arg_fj ++ arg_fk ++ arg_fa ++ arg_rd ++ //6-10 ++ arg_rj ++ arg_rk ++ arg_op_4_0 ++ arg_fcsr_4_0 ++ arg_fcsr_9_5 ++ //11-15 ++ arg_csr_23_10 ++ arg_cd ++ arg_cj ++ arg_ca ++ arg_sa2_16_15 ++ //16-20 ++ arg_sa3_17_15 ++ arg_code_4_0 ++ arg_code_14_0 ++ arg_ui5_14_10 ++ arg_ui6_15_10 ++ //21-25 ++ arg_ui12_21_10 ++ arg_lsbw ++ arg_msbw ++ arg_lsbd ++ arg_msbd ++ //26-30 ++ arg_hint_4_0 ++ arg_hint_14_0 ++ arg_level_14_0 ++ arg_level_17_10 ++ arg_seq_17_10 ++ //31-35 ++ arg_si12_21_10 ++ arg_si14_23_10 ++ arg_si16_25_10 ++ arg_si20_24_5 ++ arg_offset_20_0 ++ //36~ ++ arg_offset_25_0 ++ arg_offset_15_0 ++) +diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go +new file mode 100644 +index 0000000000..ac3448f170 +--- /dev/null ++++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go +@@ -0,0 +1,269 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package loong64asm ++ ++import ( ++ "encoding/binary" ++ "fmt" ++) ++ ++type instArgs [5]instArg ++ ++// An instFormat describes the format of an instruction encoding. ++type instFormat struct { ++ mask uint32 ++ value uint32 ++ op Op ++ // args describe how to decode the instruction arguments. ++ // args is stored as a fixed-size array. ++ // if there are fewer than len(args) arguments, args[i] == 0 marks ++ // the end of the argument list. ++ args instArgs ++} ++ ++var ( ++ errShort = fmt.Errorf("truncated instruction") ++ errUnknown = fmt.Errorf("unknown instruction") ++) ++ ++var decoderCover []bool ++ ++func init() { ++ decoderCover = make([]bool, len(instFormats)) ++} ++ ++// Decode decodes the 4 bytes in src as a single instruction. ++func Decode(src []byte) (inst Inst, err error) { ++ if len(src) < 4 { ++ return Inst{}, errShort ++ } ++ ++ x := binary.LittleEndian.Uint32(src) ++ ++Search: ++ for i := range instFormats { ++ f := &instFormats[i] ++ ++ if (x & f.mask) != f.value { ++ continue ++ } ++ ++ // Decode args. ++ var args Args ++ for j, aop := range f.args { ++ if aop == 0 { ++ break ++ } ++ ++ arg := decodeArg(aop, x, i) ++ if arg == nil { ++ // Cannot decode argument ++ continue Search ++ } ++ ++ args[j] = arg ++ } ++ ++ decoderCover[i] = true ++ inst = Inst{ ++ Op: f.op, ++ Args: args, ++ Enc: x, ++ } ++ return inst, nil ++ } ++ ++ return Inst{}, errUnknown ++} ++ ++// decodeArg decodes the arg described by aop from the instruction bits x. ++// It returns nil if x cannot be decoded according to aop. ++func decodeArg(aop instArg, x uint32, index int) Arg { ++ switch aop { ++ case arg_fd: ++ return F0 + Reg(x&((1<<5)-1)) ++ ++ case arg_fj: ++ return F0 + Reg((x>>5)&((1<<5)-1)) ++ ++ case arg_fk: ++ return F0 + Reg((x>>10)&((1<<5)-1)) ++ ++ case arg_fa: ++ return F0 + Reg((x>>15)&((1<<5)-1)) ++ ++ case arg_rd: ++ return R0 + Reg(x&((1<<5)-1)) ++ ++ case arg_rj: ++ return R0 + Reg((x>>5)&((1<<5)-1)) ++ ++ case arg_rk: ++ return R0 + Reg((x>>10)&((1<<5)-1)) ++ ++ case arg_fcsr_4_0: ++ return FCSR0 + Fcsr(x&((1<<5)-1)) ++ ++ case arg_fcsr_9_5: ++ return FCSR0 + Fcsr((x>>5)&((1<<5)-1)) ++ ++ case arg_cd: ++ return FCC0 + Fcc(x&((1<<3)-1)) ++ ++ case arg_cj: ++ return FCC0 + Fcc((x>>5)&((1<<3)-1)) ++ ++ case arg_ca: ++ return FCC0 + Fcc((x>>15)&((1<<3)-1)) ++ ++ case arg_op_4_0: ++ tmp := x & ((1 << 5) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_csr_23_10: ++ tmp := (x >> 10) & ((1 << 14) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_sa2_16_15: ++ f := &instFormats[index] ++ tmp := SaSimm((x >> 15) & ((1 << 2) - 1)) ++ if (f.op == ALSL_D) || (f.op == ALSL_W) || (f.op == ALSL_WU) { ++ return tmp + 1 ++ } else { ++ return tmp + 0 ++ } ++ ++ case arg_sa3_17_15: ++ return SaSimm((x >> 15) & ((1 << 3) - 1)) ++ ++ case arg_code_4_0: ++ return CodeSimm(x & ((1 << 5) - 1)) ++ ++ case arg_code_14_0: ++ return CodeSimm(x & ((1 << 15) - 1)) ++ ++ case arg_ui5_14_10: ++ tmp := (x >> 10) & ((1 << 5) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_ui6_15_10: ++ tmp := (x >> 10) & ((1 << 6) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_ui12_21_10: ++ tmp := ((x >> 10) & ((1 << 12) - 1) & 0xfff) ++ return Uimm{tmp, false} ++ ++ case arg_lsbw: ++ tmp := (x >> 10) & ((1 << 5) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_msbw: ++ tmp := (x >> 16) & ((1 << 5) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_lsbd: ++ tmp := (x >> 10) & ((1 << 6) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_msbd: ++ tmp := (x >> 16) & ((1 << 6) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_hint_4_0: ++ tmp := int16(x & ((1 << 5) - 1)) ++ return Simm16{tmp, 4} ++ ++ case arg_hint_14_0: ++ tmp := int16(x & ((1 << 15) - 1)) ++ return Simm16{tmp, 15} ++ ++ case arg_level_14_0: ++ tmp := x & ((1 << 15) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_level_17_10: ++ tmp := (x >> 10) & ((1 << 8) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_seq_17_10: ++ tmp := (x >> 10) & ((1 << 8) - 1) ++ return Uimm{tmp, false} ++ ++ case arg_si12_21_10: ++ var tmp int16 ++ ++ //no int12, so sign-extend a 12-bit signed to 16-bit signed ++ if (x & 0x200000) == 0x200000 { ++ tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0xf000) ++ } else { ++ tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0x0000) ++ } ++ return Simm16{tmp, 12} ++ ++ case arg_si14_23_10: ++ var tmp int32 ++ if (x & 0x800000) == 0x800000 { ++ tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0xffff0000) ++ } else { ++ tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0x00000000) ++ } ++ return Simm32{tmp, 16} ++ ++ case arg_si16_25_10: ++ var tmp int32 ++ ++ if (x & 0x2000000) == 0x2000000 { ++ tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0xffff0000) ++ } else { ++ tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0x00000000) ++ } ++ ++ return Simm32{tmp, 16} ++ ++ case arg_si20_24_5: ++ var tmp int32 ++ if (x & 0x1000000) == 0x1000000 { ++ tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0xfff00000) ++ } else { ++ tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0x00000000) ++ } ++ return Simm32{tmp, 20} ++ ++ case arg_offset_20_0: ++ var tmp int32 ++ ++ if (x & 0x1000000) == 0x1000000 { ++ tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) ++ } else { ++ tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) ++ } ++ ++ return OffsetSimm{tmp, 28} ++ ++ case arg_offset_15_0: ++ var tmp int32 ++ if (x & 0x2000000) == 0x2000000 { ++ tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0xfffc0000) ++ } else { ++ tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0x00000000) ++ } ++ ++ return OffsetSimm{tmp, 18} ++ ++ case arg_offset_25_0: ++ var tmp int32 ++ ++ if (x & 0x200) == 0x200 { ++ tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0xf0000000) ++ } else { ++ tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0x00000000) ++ } ++ ++ return OffsetSimm{tmp, 28} ++ default: ++ return nil ++ } ++} +diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go +new file mode 100644 +index 0000000000..fd6bcffde1 +--- /dev/null ++++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go +@@ -0,0 +1,16 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package loong64asm ++ ++import ( ++ "strings" ++) ++ ++// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. ++// This form typically matches the syntax defined in the Loong64 Reference Manual. See ++// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html ++func GNUSyntax(inst Inst) string { ++ return strings.ToLower(inst.String()) ++} +diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go +new file mode 100644 +index 0000000000..187a46fc01 +--- /dev/null ++++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go +@@ -0,0 +1,296 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package loong64asm ++ ++import ( ++ "fmt" ++ "strconv" ++ "strings" ++ "unsafe" ++) ++ ++// An Inst is a single instruction. ++type Inst struct { ++ Op Op // Opcode mnemonic ++ Enc uint32 // Raw encoding bits. ++ Args Args // Instruction arguments, in Loong64 manual order. ++} ++ ++func (i Inst) String() string { ++ var args []string ++ ++ for _, arg := range i.Args { ++ if arg == nil { ++ break ++ } ++ ++ args = append(args, arg.String()) ++ } ++ ++ str2 := strings.Join(args, ", ") ++ if str2 == "" { ++ str := i.Op.String() ++ return strings.Replace(str, ", (", "(", -1) ++ } else { ++ str := i.Op.String() + " " + strings.Join(args, ", ") ++ return strings.Replace(str, ", (", "(", -1) ++ } ++} ++ ++// An Op is an Loong64 opcode. ++type Op uint16 ++ ++// NOTE: The actual Op values are defined in tables.go. ++// They are chosen to simplify instruction decoding and ++// are not a dense packing from 0 to N, although the ++// density is high, probably at least 90%. ++func (op Op) String() string { ++ if (op >= Op(len(opstr))) || (opstr[op] == "") { ++ return fmt.Sprintf("Op(%d)", int(op)) ++ } ++ ++ return opstr[op] ++} ++ ++// An Args holds the instruction arguments. ++// If an instruction has fewer than 5 arguments, ++// the final elements in the array are nil. ++type Args [5]Arg ++ ++// An Arg is a single instruction argument ++type Arg interface { ++ String() string ++} ++ ++// A Reg is a single register. ++// The zero value denotes R0, not the absence of a register. ++type Reg uint16 ++ ++const ( ++ //_ Reg = iota ++ ++ // General-purpose register ++ R0 Reg = iota ++ R1 ++ R2 ++ R3 ++ R4 ++ R5 ++ R6 ++ R7 ++ R8 ++ R9 ++ R10 ++ R11 ++ R12 ++ R13 ++ R14 ++ R15 ++ R16 ++ R17 ++ R18 ++ R19 ++ R20 ++ R21 ++ R22 ++ R23 ++ R24 ++ R25 ++ R26 ++ R27 ++ R28 ++ R29 ++ R30 ++ R31 ++ ++ // Float point register ++ F0 ++ F1 ++ F2 ++ F3 ++ F4 ++ F5 ++ F6 ++ F7 ++ F8 ++ F9 ++ F10 ++ F11 ++ F12 ++ F13 ++ F14 ++ F15 ++ F16 ++ F17 ++ F18 ++ F19 ++ F20 ++ F21 ++ F22 ++ F23 ++ F24 ++ F25 ++ F26 ++ F27 ++ F28 ++ F29 ++ F30 ++ F31 ++) ++ ++func (r Reg) String() string { ++ switch { ++ case r == R0: ++ return "$zero" ++ ++ case r == R1: ++ return "$ra" ++ ++ case r == R2: ++ return "$tp" ++ ++ case r == R3: ++ return "$sp" ++ ++ case (r >= R4) && (r <= R11): ++ return fmt.Sprintf("$a%d", int(r-R4)) ++ ++ case (r >= R12) && (r <= R20): ++ return fmt.Sprintf("$t%d", int(r-R12)) ++ ++ case r == R21: ++ return "$r21" ++ ++ case r == R22: ++ return "$fp" ++ ++ case (r >= R23) && (r <= R31): ++ return fmt.Sprintf("$s%d", int(r-R23)) ++ ++ case (r >= F0) && (r <= F7): ++ return fmt.Sprintf("$fa%d", int(r-F0)) ++ ++ case (r >= F8) && (r <= F23): ++ return fmt.Sprintf("$ft%d", int(r-F8)) ++ ++ case (r >= F24) && (r <= F31): ++ return fmt.Sprintf("$fs%d", int(r-F24)) ++ ++ default: ++ return fmt.Sprintf("Unknown(%d)", int(r)) ++ } ++} ++ ++// float control status register ++type Fcsr uint8 ++ ++const ( ++ //_ Fcsr = iota ++ FCSR0 Fcsr = iota ++ FCSR1 ++ FCSR2 ++ FCSR3 ++) ++ ++func (f Fcsr) String() string { ++ switch f { ++ case FCSR0: ++ return fmt.Sprintf("$zero") ++ case FCSR1, FCSR2, FCSR3: ++ return fmt.Sprintf("$r%d", uint8(f)) ++ } ++ ++ return fmt.Sprintf("$unknow%d", uint8(f)) ++} ++ ++// float condition flags register ++type Fcc uint8 ++ ++const ( ++ //_ Fcc = iota ++ FCC0 Fcc = iota ++ FCC1 ++ FCC2 ++ FCC3 ++ FCC4 ++ FCC5 ++ FCC6 ++ FCC7 ++) ++ ++func (f Fcc) String() string { ++ return fmt.Sprintf("$fcc%d", uint8(f)) ++} ++ ++// An Imm is an integer constant. ++type Uimm struct { ++ Imm uint32 ++ Decimal bool ++} ++ ++func (i Uimm) String() string { ++ if i.Decimal == true { ++ return fmt.Sprintf("%d", i.Imm) ++ } else { ++ return fmt.Sprintf("%#x", i.Imm) ++ } ++} ++ ++type Simm16 struct { ++ Imm int16 ++ Width uint8 ++} ++ ++func (si Simm16) String() string { ++ if si.Imm == 0 { ++ return fmt.Sprintf("%#x", int(si.Imm)) ++ } else { ++ hex := int16(si.Imm & ((1 << si.Width) - 1)) ++ str := strconv.FormatUint(uint64(*(*int16)(unsafe.Pointer(&hex))), 16) ++ return fmt.Sprintf("%d(0x%s)", int16(si.Imm), str) ++ } ++} ++ ++type Simm32 struct { ++ Imm int32 ++ Width uint8 ++} ++ ++func (si Simm32) String() string { ++ if si.Imm == 0 { ++ return fmt.Sprintf("%#x", int(si.Imm)) ++ } else { ++ hex := int32(si.Imm & ((1 << si.Width) - 1)) ++ str := strconv.FormatUint(uint64(*(*int32)(unsafe.Pointer(&hex))), 16) ++ return fmt.Sprintf("%d(0x%s)", int32(si.Imm), str) ++ } ++} ++ ++type OffsetSimm struct { ++ Imm int32 ++ Width uint8 ++} ++ ++func (o OffsetSimm) String() string { ++ if o.Imm == 0 { ++ return fmt.Sprintf("%#x", int(o.Imm)) ++ } else { ++ hex := int32(o.Imm & ((1 << o.Width) - 1)) ++ str := strconv.FormatUint(uint64(*(*int32)(unsafe.Pointer(&hex))), 16) ++ return fmt.Sprintf("%d(0x%s)", int32(o.Imm), str) ++ } ++} ++ ++type SaSimm int16 ++ ++func (s SaSimm) String() string { ++ return fmt.Sprintf("%#x", int(s)) ++} ++ ++type CodeSimm int16 ++ ++func (c CodeSimm) String() string { ++ return fmt.Sprintf("%#x", int(c)) ++} +diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go +new file mode 100644 +index 0000000000..f90e9295f1 +--- /dev/null ++++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go +@@ -0,0 +1,1513 @@ ++// Generated by Loong64 internal tool ++// DO NOT EDIT ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package loong64asm ++ ++const ( ++ _ Op = iota ++ ADDI_D ++ ADDI_W ++ ADDU16I_D ++ ADD_D ++ ADD_W ++ ALSL_D ++ ALSL_W ++ ALSL_WU ++ AMADD_D ++ AMADD_DB_D ++ AMADD_DB_W ++ AMADD_W ++ AMAND_D ++ AMAND_DB_D ++ AMAND_DB_W ++ AMAND_W ++ AMMAX_D ++ AMMAX_DB_D ++ AMMAX_DB_DU ++ AMMAX_DB_W ++ AMMAX_DB_WU ++ AMMAX_DU ++ AMMAX_W ++ AMMAX_WU ++ AMMIN_D ++ AMMIN_DB_D ++ AMMIN_DB_DU ++ AMMIN_DB_W ++ AMMIN_DB_WU ++ AMMIN_DU ++ AMMIN_W ++ AMMIN_WU ++ AMOR_D ++ AMOR_DB_D ++ AMOR_DB_W ++ AMOR_W ++ AMSWAP_D ++ AMSWAP_DB_D ++ AMSWAP_DB_W ++ AMSWAP_W ++ AMXOR_D ++ AMXOR_DB_D ++ AMXOR_DB_W ++ AMXOR_W ++ AND ++ ANDI ++ ANDN ++ ASRTGT_D ++ ASRTLE_D ++ B ++ BCEQZ ++ BCNEZ ++ BEQ ++ BEQZ ++ BGE ++ BGEU ++ BITREV_4B ++ BITREV_8B ++ BITREV_D ++ BITREV_W ++ BL ++ BLT ++ BLTU ++ BNE ++ BNEZ ++ BREAK ++ BSTRINS_D ++ BSTRINS_W ++ BSTRPICK_D ++ BSTRPICK_W ++ BYTEPICK_D ++ BYTEPICK_W ++ CACOP ++ CLO_D ++ CLO_W ++ CLZ_D ++ CLZ_W ++ CPUCFG ++ CRCC_W_B_W ++ CRCC_W_D_W ++ CRCC_W_H_W ++ CRCC_W_W_W ++ CRC_W_B_W ++ CRC_W_D_W ++ CRC_W_H_W ++ CRC_W_W_W ++ CSRRD ++ CSRWR ++ CSRXCHG ++ CTO_D ++ CTO_W ++ CTZ_D ++ CTZ_W ++ DBAR ++ DBCL ++ DIV_D ++ DIV_DU ++ DIV_W ++ DIV_WU ++ ERTN ++ EXT_W_B ++ EXT_W_H ++ FABS_D ++ FABS_S ++ FADD_D ++ FADD_S ++ FCLASS_D ++ FCLASS_S ++ FCMP_CAF_D ++ FCMP_CAF_S ++ FCMP_CEQ_D ++ FCMP_CEQ_S ++ FCMP_CLE_D ++ FCMP_CLE_S ++ FCMP_CLT_D ++ FCMP_CLT_S ++ FCMP_CNE_D ++ FCMP_CNE_S ++ FCMP_COR_D ++ FCMP_COR_S ++ FCMP_CUEQ_D ++ FCMP_CUEQ_S ++ FCMP_CULE_D ++ FCMP_CULE_S ++ FCMP_CULT_D ++ FCMP_CULT_S ++ FCMP_CUNE_D ++ FCMP_CUNE_S ++ FCMP_CUN_D ++ FCMP_CUN_S ++ FCMP_SAF_D ++ FCMP_SAF_S ++ FCMP_SEQ_D ++ FCMP_SEQ_S ++ FCMP_SLE_D ++ FCMP_SLE_S ++ FCMP_SLT_D ++ FCMP_SLT_S ++ FCMP_SNE_D ++ FCMP_SNE_S ++ FCMP_SOR_D ++ FCMP_SOR_S ++ FCMP_SUEQ_D ++ FCMP_SUEQ_S ++ FCMP_SULE_D ++ FCMP_SULE_S ++ FCMP_SULT_D ++ FCMP_SULT_S ++ FCMP_SUNE_D ++ FCMP_SUNE_S ++ FCMP_SUN_D ++ FCMP_SUN_S ++ FCOPYSIGN_D ++ FCOPYSIGN_S ++ FCVT_D_S ++ FCVT_S_D ++ FDIV_D ++ FDIV_S ++ FFINT_D_L ++ FFINT_D_W ++ FFINT_S_L ++ FFINT_S_W ++ FLDGT_D ++ FLDGT_S ++ FLDLE_D ++ FLDLE_S ++ FLDX_D ++ FLDX_S ++ FLD_D ++ FLD_S ++ FLOGB_D ++ FLOGB_S ++ FMADD_D ++ FMADD_S ++ FMAXA_D ++ FMAXA_S ++ FMAX_D ++ FMAX_S ++ FMINA_D ++ FMINA_S ++ FMIN_D ++ FMIN_S ++ FMOV_D ++ FMOV_S ++ FMSUB_D ++ FMSUB_S ++ FMUL_D ++ FMUL_S ++ FNEG_D ++ FNEG_S ++ FNMADD_D ++ FNMADD_S ++ FNMSUB_D ++ FNMSUB_S ++ FRECIP_D ++ FRECIP_S ++ FRINT_D ++ FRINT_S ++ FRSQRT_D ++ FRSQRT_S ++ FSCALEB_D ++ FSCALEB_S ++ FSEL ++ FSQRT_D ++ FSQRT_S ++ FSTGT_D ++ FSTGT_S ++ FSTLE_D ++ FSTLE_S ++ FSTX_D ++ FSTX_S ++ FST_D ++ FST_S ++ FSUB_D ++ FSUB_S ++ FTINTRM_L_D ++ FTINTRM_L_S ++ FTINTRM_W_D ++ FTINTRM_W_S ++ FTINTRNE_L_D ++ FTINTRNE_L_S ++ FTINTRNE_W_D ++ FTINTRNE_W_S ++ FTINTRP_L_D ++ FTINTRP_L_S ++ FTINTRP_W_D ++ FTINTRP_W_S ++ FTINTRZ_L_D ++ FTINTRZ_L_S ++ FTINTRZ_W_D ++ FTINTRZ_W_S ++ FTINT_L_D ++ FTINT_L_S ++ FTINT_W_D ++ FTINT_W_S ++ IBAR ++ IDLE ++ INVTLB ++ IOCSRRD_B ++ IOCSRRD_D ++ IOCSRRD_H ++ IOCSRRD_W ++ IOCSRWR_B ++ IOCSRWR_D ++ IOCSRWR_H ++ IOCSRWR_W ++ JIRL ++ LDDIR ++ LDGT_B ++ LDGT_D ++ LDGT_H ++ LDGT_W ++ LDLE_B ++ LDLE_D ++ LDLE_H ++ LDLE_W ++ LDPTE ++ LDPTR_D ++ LDPTR_W ++ LDX_B ++ LDX_BU ++ LDX_D ++ LDX_H ++ LDX_HU ++ LDX_W ++ LDX_WU ++ LD_B ++ LD_BU ++ LD_D ++ LD_H ++ LD_HU ++ LD_W ++ LD_WU ++ LL_D ++ LL_W ++ LU12I_W ++ LU32I_D ++ LU52I_D ++ MASKEQZ ++ MASKNEZ ++ MOD_D ++ MOD_DU ++ MOD_W ++ MOD_WU ++ MOVCF2FR ++ MOVCF2GR ++ MOVFCSR2GR ++ MOVFR2CF ++ MOVFR2GR_D ++ MOVFR2GR_S ++ MOVFRH2GR_S ++ MOVGR2CF ++ MOVGR2FCSR ++ MOVGR2FRH_W ++ MOVGR2FR_D ++ MOVGR2FR_W ++ MULH_D ++ MULH_DU ++ MULH_W ++ MULH_WU ++ MULW_D_W ++ MULW_D_WU ++ MUL_D ++ MUL_W ++ NOR ++ OR ++ ORI ++ ORN ++ PCADDI ++ PCADDU12I ++ PCADDU18I ++ PCALAU12I ++ PRELD ++ PRELDX ++ RDTIMEH_W ++ RDTIMEL_W ++ RDTIME_D ++ REVB_2H ++ REVB_2W ++ REVB_4H ++ REVB_D ++ REVH_2W ++ REVH_D ++ ROTRI_D ++ ROTRI_W ++ ROTR_D ++ ROTR_W ++ SC_D ++ SC_W ++ SLLI_D ++ SLLI_W ++ SLL_D ++ SLL_W ++ SLT ++ SLTI ++ SLTU ++ SLTUI ++ SRAI_D ++ SRAI_W ++ SRA_D ++ SRA_W ++ SRLI_D ++ SRLI_W ++ SRL_D ++ SRL_W ++ STGT_B ++ STGT_D ++ STGT_H ++ STGT_W ++ STLE_B ++ STLE_D ++ STLE_H ++ STLE_W ++ STPTR_D ++ STPTR_W ++ STX_B ++ STX_D ++ STX_H ++ STX_W ++ ST_B ++ ST_D ++ ST_H ++ ST_W ++ SUB_D ++ SUB_W ++ SYSCALL ++ TLBCLR ++ TLBFILL ++ TLBFLUSH ++ TLBRD ++ TLBSRCH ++ TLBWR ++ XOR ++ XORI ++) ++ ++var opstr = [...]string{ ++ ADDI_D: "ADDI.D", ++ ADDI_W: "ADDI.W", ++ ADDU16I_D: "ADDU16I.D", ++ ADD_D: "ADD.D", ++ ADD_W: "ADD.W", ++ ALSL_D: "ALSL.D", ++ ALSL_W: "ALSL.W", ++ ALSL_WU: "ALSL.WU", ++ AMADD_D: "AMADD.D", ++ AMADD_DB_D: "AMADD_DB.D", ++ AMADD_DB_W: "AMADD_DB.W", ++ AMADD_W: "AMADD.W", ++ AMAND_D: "AMAND.D", ++ AMAND_DB_D: "AMAND_DB.D", ++ AMAND_DB_W: "AMAND_DB.W", ++ AMAND_W: "AMAND.W", ++ AMMAX_D: "AMMAX.D", ++ AMMAX_DB_D: "AMMAX_DB.D", ++ AMMAX_DB_DU: "AMMAX_DB.DU", ++ AMMAX_DB_W: "AMMAX_DB.W", ++ AMMAX_DB_WU: "AMMAX_DB.WU", ++ AMMAX_DU: "AMMAX.DU", ++ AMMAX_W: "AMMAX.W", ++ AMMAX_WU: "AMMAX.WU", ++ AMMIN_D: "AMMIN.D", ++ AMMIN_DB_D: "AMMIN_DB.D", ++ AMMIN_DB_DU: "AMMIN_DB.DU", ++ AMMIN_DB_W: "AMMIN_DB.W", ++ AMMIN_DB_WU: "AMMIN_DB.WU", ++ AMMIN_DU: "AMMIN.DU", ++ AMMIN_W: "AMMIN.W", ++ AMMIN_WU: "AMMIN.WU", ++ AMOR_D: "AMOR.D", ++ AMOR_DB_D: "AMOR_DB.D", ++ AMOR_DB_W: "AMOR_DB.W", ++ AMOR_W: "AMOR.W", ++ AMSWAP_D: "AMSWAP.D", ++ AMSWAP_DB_D: "AMSWAP_DB.D", ++ AMSWAP_DB_W: "AMSWAP_DB.W", ++ AMSWAP_W: "AMSWAP.W", ++ AMXOR_D: "AMXOR.D", ++ AMXOR_DB_D: "AMXOR_DB.D", ++ AMXOR_DB_W: "AMXOR_DB.W", ++ AMXOR_W: "AMXOR.W", ++ AND: "AND", ++ ANDI: "ANDI", ++ ANDN: "ANDN", ++ ASRTGT_D: "ASRTGT.D", ++ ASRTLE_D: "ASRTLE.D", ++ B: "B", ++ BCEQZ: "BCEQZ", ++ BCNEZ: "BCNEZ", ++ BEQ: "BEQ", ++ BEQZ: "BEQZ", ++ BGE: "BGE", ++ BGEU: "BGEU", ++ BITREV_4B: "BITREV.4B", ++ BITREV_8B: "BITREV.8B", ++ BITREV_D: "BITREV.D", ++ BITREV_W: "BITREV.W", ++ BL: "BL", ++ BLT: "BLT", ++ BLTU: "BLTU", ++ BNE: "BNE", ++ BNEZ: "BNEZ", ++ BREAK: "BREAK", ++ BSTRINS_D: "BSTRINS.D", ++ BSTRINS_W: "BSTRINS.W", ++ BSTRPICK_D: "BSTRPICK.D", ++ BSTRPICK_W: "BSTRPICK.W", ++ BYTEPICK_D: "BYTEPICK.D", ++ BYTEPICK_W: "BYTEPICK.W", ++ CACOP: "CACOP", ++ CLO_D: "CLO.D", ++ CLO_W: "CLO.W", ++ CLZ_D: "CLZ.D", ++ CLZ_W: "CLZ.W", ++ CPUCFG: "CPUCFG", ++ CRCC_W_B_W: "CRCC.W.B.W", ++ CRCC_W_D_W: "CRCC.W.D.W", ++ CRCC_W_H_W: "CRCC.W.H.W", ++ CRCC_W_W_W: "CRCC.W.W.W", ++ CRC_W_B_W: "CRC.W.B.W", ++ CRC_W_D_W: "CRC.W.D.W", ++ CRC_W_H_W: "CRC.W.H.W", ++ CRC_W_W_W: "CRC.W.W.W", ++ CSRRD: "CSRRD", ++ CSRWR: "CSRWR", ++ CSRXCHG: "CSRXCHG", ++ CTO_D: "CTO.D", ++ CTO_W: "CTO.W", ++ CTZ_D: "CTZ.D", ++ CTZ_W: "CTZ.W", ++ DBAR: "DBAR", ++ DBCL: "DBCL", ++ DIV_D: "DIV.D", ++ DIV_DU: "DIV.DU", ++ DIV_W: "DIV.W", ++ DIV_WU: "DIV.WU", ++ ERTN: "ERTN", ++ EXT_W_B: "EXT.W.B", ++ EXT_W_H: "EXT.W.H", ++ FABS_D: "FABS.D", ++ FABS_S: "FABS.S", ++ FADD_D: "FADD.D", ++ FADD_S: "FADD.S", ++ FCLASS_D: "FCLASS.D", ++ FCLASS_S: "FCLASS.S", ++ FCMP_CAF_D: "FCMP.CAF.D", ++ FCMP_CAF_S: "FCMP.CAF.S", ++ FCMP_CEQ_D: "FCMP.CEQ.D", ++ FCMP_CEQ_S: "FCMP.CEQ.S", ++ FCMP_CLE_D: "FCMP.CLE.D", ++ FCMP_CLE_S: "FCMP.CLE.S", ++ FCMP_CLT_D: "FCMP.CLT.D", ++ FCMP_CLT_S: "FCMP.CLT.S", ++ FCMP_CNE_D: "FCMP.CNE.D", ++ FCMP_CNE_S: "FCMP.CNE.S", ++ FCMP_COR_D: "FCMP.COR.D", ++ FCMP_COR_S: "FCMP.COR.S", ++ FCMP_CUEQ_D: "FCMP.CUEQ.D", ++ FCMP_CUEQ_S: "FCMP.CUEQ.S", ++ FCMP_CULE_D: "FCMP.CULE.D", ++ FCMP_CULE_S: "FCMP.CULE.S", ++ FCMP_CULT_D: "FCMP.CULT.D", ++ FCMP_CULT_S: "FCMP.CULT.S", ++ FCMP_CUNE_D: "FCMP.CUNE.D", ++ FCMP_CUNE_S: "FCMP.CUNE.S", ++ FCMP_CUN_D: "FCMP.CUN.D", ++ FCMP_CUN_S: "FCMP.CUN.S", ++ FCMP_SAF_D: "FCMP.SAF.D", ++ FCMP_SAF_S: "FCMP.SAF.S", ++ FCMP_SEQ_D: "FCMP.SEQ.D", ++ FCMP_SEQ_S: "FCMP.SEQ.S", ++ FCMP_SLE_D: "FCMP.SLE.D", ++ FCMP_SLE_S: "FCMP.SLE.S", ++ FCMP_SLT_D: "FCMP.SLT.D", ++ FCMP_SLT_S: "FCMP.SLT.S", ++ FCMP_SNE_D: "FCMP.SNE.D", ++ FCMP_SNE_S: "FCMP.SNE.S", ++ FCMP_SOR_D: "FCMP.SOR.D", ++ FCMP_SOR_S: "FCMP.SOR.S", ++ FCMP_SUEQ_D: "FCMP.SUEQ.D", ++ FCMP_SUEQ_S: "FCMP.SUEQ.S", ++ FCMP_SULE_D: "FCMP.SULE.D", ++ FCMP_SULE_S: "FCMP.SULE.S", ++ FCMP_SULT_D: "FCMP.SULT.D", ++ FCMP_SULT_S: "FCMP.SULT.S", ++ FCMP_SUNE_D: "FCMP.SUNE.D", ++ FCMP_SUNE_S: "FCMP.SUNE.S", ++ FCMP_SUN_D: "FCMP.SUN.D", ++ FCMP_SUN_S: "FCMP.SUN.S", ++ FCOPYSIGN_D: "FCOPYSIGN.D", ++ FCOPYSIGN_S: "FCOPYSIGN.S", ++ FCVT_D_S: "FCVT.D.S", ++ FCVT_S_D: "FCVT.S.D", ++ FDIV_D: "FDIV.D", ++ FDIV_S: "FDIV.S", ++ FFINT_D_L: "FFINT.D.L", ++ FFINT_D_W: "FFINT.D.W", ++ FFINT_S_L: "FFINT.S.L", ++ FFINT_S_W: "FFINT.S.W", ++ FLDGT_D: "FLDGT.D", ++ FLDGT_S: "FLDGT.S", ++ FLDLE_D: "FLDLE.D", ++ FLDLE_S: "FLDLE.S", ++ FLDX_D: "FLDX.D", ++ FLDX_S: "FLDX.S", ++ FLD_D: "FLD.D", ++ FLD_S: "FLD.S", ++ FLOGB_D: "FLOGB.D", ++ FLOGB_S: "FLOGB.S", ++ FMADD_D: "FMADD.D", ++ FMADD_S: "FMADD.S", ++ FMAXA_D: "FMAXA.D", ++ FMAXA_S: "FMAXA.S", ++ FMAX_D: "FMAX.D", ++ FMAX_S: "FMAX.S", ++ FMINA_D: "FMINA.D", ++ FMINA_S: "FMINA.S", ++ FMIN_D: "FMIN.D", ++ FMIN_S: "FMIN.S", ++ FMOV_D: "FMOV.D", ++ FMOV_S: "FMOV.S", ++ FMSUB_D: "FMSUB.D", ++ FMSUB_S: "FMSUB.S", ++ FMUL_D: "FMUL.D", ++ FMUL_S: "FMUL.S", ++ FNEG_D: "FNEG.D", ++ FNEG_S: "FNEG.S", ++ FNMADD_D: "FNMADD.D", ++ FNMADD_S: "FNMADD.S", ++ FNMSUB_D: "FNMSUB.D", ++ FNMSUB_S: "FNMSUB.S", ++ FRECIP_D: "FRECIP.D", ++ FRECIP_S: "FRECIP.S", ++ FRINT_D: "FRINT.D", ++ FRINT_S: "FRINT.S", ++ FRSQRT_D: "FRSQRT.D", ++ FRSQRT_S: "FRSQRT.S", ++ FSCALEB_D: "FSCALEB.D", ++ FSCALEB_S: "FSCALEB.S", ++ FSEL: "FSEL", ++ FSQRT_D: "FSQRT.D", ++ FSQRT_S: "FSQRT.S", ++ FSTGT_D: "FSTGT.D", ++ FSTGT_S: "FSTGT.S", ++ FSTLE_D: "FSTLE.D", ++ FSTLE_S: "FSTLE.S", ++ FSTX_D: "FSTX.D", ++ FSTX_S: "FSTX.S", ++ FST_D: "FST.D", ++ FST_S: "FST.S", ++ FSUB_D: "FSUB.D", ++ FSUB_S: "FSUB.S", ++ FTINTRM_L_D: "FTINTRM.L.D", ++ FTINTRM_L_S: "FTINTRM.L.S", ++ FTINTRM_W_D: "FTINTRM.W.D", ++ FTINTRM_W_S: "FTINTRM.W.S", ++ FTINTRNE_L_D: "FTINTRNE.L.D", ++ FTINTRNE_L_S: "FTINTRNE.L.S", ++ FTINTRNE_W_D: "FTINTRNE.W.D", ++ FTINTRNE_W_S: "FTINTRNE.W.S", ++ FTINTRP_L_D: "FTINTRP.L.D", ++ FTINTRP_L_S: "FTINTRP.L.S", ++ FTINTRP_W_D: "FTINTRP.W.D", ++ FTINTRP_W_S: "FTINTRP.W.S", ++ FTINTRZ_L_D: "FTINTRZ.L.D", ++ FTINTRZ_L_S: "FTINTRZ.L.S", ++ FTINTRZ_W_D: "FTINTRZ.W.D", ++ FTINTRZ_W_S: "FTINTRZ.W.S", ++ FTINT_L_D: "FTINT.L.D", ++ FTINT_L_S: "FTINT.L.S", ++ FTINT_W_D: "FTINT.W.D", ++ FTINT_W_S: "FTINT.W.S", ++ IBAR: "IBAR", ++ IDLE: "IDLE", ++ INVTLB: "INVTLB", ++ IOCSRRD_B: "IOCSRRD.B", ++ IOCSRRD_D: "IOCSRRD.D", ++ IOCSRRD_H: "IOCSRRD.H", ++ IOCSRRD_W: "IOCSRRD.W", ++ IOCSRWR_B: "IOCSRWR.B", ++ IOCSRWR_D: "IOCSRWR.D", ++ IOCSRWR_H: "IOCSRWR.H", ++ IOCSRWR_W: "IOCSRWR.W", ++ JIRL: "JIRL", ++ LDDIR: "LDDIR", ++ LDGT_B: "LDGT.B", ++ LDGT_D: "LDGT.D", ++ LDGT_H: "LDGT.H", ++ LDGT_W: "LDGT.W", ++ LDLE_B: "LDLE.B", ++ LDLE_D: "LDLE.D", ++ LDLE_H: "LDLE.H", ++ LDLE_W: "LDLE.W", ++ LDPTE: "LDPTE", ++ LDPTR_D: "LDPTR.D", ++ LDPTR_W: "LDPTR.W", ++ LDX_B: "LDX.B", ++ LDX_BU: "LDX.BU", ++ LDX_D: "LDX.D", ++ LDX_H: "LDX.H", ++ LDX_HU: "LDX.HU", ++ LDX_W: "LDX.W", ++ LDX_WU: "LDX.WU", ++ LD_B: "LD.B", ++ LD_BU: "LD.BU", ++ LD_D: "LD.D", ++ LD_H: "LD.H", ++ LD_HU: "LD.HU", ++ LD_W: "LD.W", ++ LD_WU: "LD.WU", ++ LL_D: "LL.D", ++ LL_W: "LL.W", ++ LU12I_W: "LU12I.W", ++ LU32I_D: "LU32I.D", ++ LU52I_D: "LU52I.D", ++ MASKEQZ: "MASKEQZ", ++ MASKNEZ: "MASKNEZ", ++ MOD_D: "MOD.D", ++ MOD_DU: "MOD.DU", ++ MOD_W: "MOD.W", ++ MOD_WU: "MOD.WU", ++ MOVCF2FR: "MOVCF2FR", ++ MOVCF2GR: "MOVCF2GR", ++ MOVFCSR2GR: "MOVFCSR2GR", ++ MOVFR2CF: "MOVFR2CF", ++ MOVFR2GR_D: "MOVFR2GR.D", ++ MOVFR2GR_S: "MOVFR2GR.S", ++ MOVFRH2GR_S: "MOVFRH2GR.S", ++ MOVGR2CF: "MOVGR2CF", ++ MOVGR2FCSR: "MOVGR2FCSR", ++ MOVGR2FRH_W: "MOVGR2FRH.W", ++ MOVGR2FR_D: "MOVGR2FR.D", ++ MOVGR2FR_W: "MOVGR2FR.W", ++ MULH_D: "MULH.D", ++ MULH_DU: "MULH.DU", ++ MULH_W: "MULH.W", ++ MULH_WU: "MULH.WU", ++ MULW_D_W: "MULW.D.W", ++ MULW_D_WU: "MULW.D.WU", ++ MUL_D: "MUL.D", ++ MUL_W: "MUL.W", ++ NOR: "NOR", ++ OR: "OR", ++ ORI: "ORI", ++ ORN: "ORN", ++ PCADDI: "PCADDI", ++ PCADDU12I: "PCADDU12I", ++ PCADDU18I: "PCADDU18I", ++ PCALAU12I: "PCALAU12I", ++ PRELD: "PRELD", ++ PRELDX: "PRELDX", ++ RDTIMEH_W: "RDTIMEH.W", ++ RDTIMEL_W: "RDTIMEL.W", ++ RDTIME_D: "RDTIME.D", ++ REVB_2H: "REVB.2H", ++ REVB_2W: "REVB.2W", ++ REVB_4H: "REVB.4H", ++ REVB_D: "REVB.D", ++ REVH_2W: "REVH.2W", ++ REVH_D: "REVH.D", ++ ROTRI_D: "ROTRI.D", ++ ROTRI_W: "ROTRI.W", ++ ROTR_D: "ROTR.D", ++ ROTR_W: "ROTR.W", ++ SC_D: "SC.D", ++ SC_W: "SC.W", ++ SLLI_D: "SLLI.D", ++ SLLI_W: "SLLI.W", ++ SLL_D: "SLL.D", ++ SLL_W: "SLL.W", ++ SLT: "SLT", ++ SLTI: "SLTI", ++ SLTU: "SLTU", ++ SLTUI: "SLTUI", ++ SRAI_D: "SRAI.D", ++ SRAI_W: "SRAI.W", ++ SRA_D: "SRA.D", ++ SRA_W: "SRA.W", ++ SRLI_D: "SRLI.D", ++ SRLI_W: "SRLI.W", ++ SRL_D: "SRL.D", ++ SRL_W: "SRL.W", ++ STGT_B: "STGT.B", ++ STGT_D: "STGT.D", ++ STGT_H: "STGT.H", ++ STGT_W: "STGT.W", ++ STLE_B: "STLE.B", ++ STLE_D: "STLE.D", ++ STLE_H: "STLE.H", ++ STLE_W: "STLE.W", ++ STPTR_D: "STPTR.D", ++ STPTR_W: "STPTR.W", ++ STX_B: "STX.B", ++ STX_D: "STX.D", ++ STX_H: "STX.H", ++ STX_W: "STX.W", ++ ST_B: "ST.B", ++ ST_D: "ST.D", ++ ST_H: "ST.H", ++ ST_W: "ST.W", ++ SUB_D: "SUB.D", ++ SUB_W: "SUB.W", ++ SYSCALL: "SYSCALL", ++ TLBCLR: "TLBCLR", ++ TLBFILL: "TLBFILL", ++ TLBFLUSH: "TLBFLUSH", ++ TLBRD: "TLBRD", ++ TLBSRCH: "TLBSRCH", ++ TLBWR: "TLBWR", ++ XOR: "XOR", ++ XORI: "XORI", ++} ++ ++var instFormats = [...]instFormat{ ++ // ADDI.D rd, rj, si12 ++ {mask: 0xffc00000, value: 0x02c00000, op: ADDI_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // ADDI.W rd, rj, si12 ++ {mask: 0xffc00000, value: 0x02800000, op: ADDI_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // ADDU16I.D rd, rj, si16 ++ {mask: 0xfc000000, value: 0x10000000, op: ADDU16I_D, args: instArgs{arg_rd, arg_rj, arg_si16_25_10}}, ++ // ADD.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00108000, op: ADD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ADD.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00100000, op: ADD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ALSL.D rd, rj, rk, sa2 ++ {mask: 0xfffe0000, value: 0x002c0000, op: ALSL_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, ++ // ALSL.W rd, rj, rk, sa2 ++ {mask: 0xfffe0000, value: 0x00040000, op: ALSL_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, ++ // ALSL.WU rd, rj, rk, sa2 ++ {mask: 0xfffe0000, value: 0x00060000, op: ALSL_WU, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, ++ // AMADD.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38618000, op: AMADD_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMADD_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386a8000, op: AMADD_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMADD_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386a0000, op: AMADD_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMADD.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38610000, op: AMADD_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMAND.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38628000, op: AMAND_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMAND_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386b8000, op: AMAND_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMAND_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386b0000, op: AMAND_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMAND.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38620000, op: AMAND_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38658000, op: AMMAX_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386e8000, op: AMMAX_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX_DB.DU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38708000, op: AMMAX_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386e0000, op: AMMAX_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX_DB.WU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38700000, op: AMMAX_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX.DU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38678000, op: AMMAX_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38650000, op: AMMAX_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMAX.WU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38670000, op: AMMAX_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38668000, op: AMMIN_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386f8000, op: AMMIN_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN_DB.DU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38718000, op: AMMIN_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386f0000, op: AMMIN_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN_DB.WU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38710000, op: AMMIN_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN.DU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38688000, op: AMMIN_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38660000, op: AMMIN_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMMIN.WU rd, rk, rj ++ {mask: 0xffff8000, value: 0x38680000, op: AMMIN_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMOR.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38638000, op: AMOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMOR_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386c8000, op: AMOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMOR_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386c0000, op: AMOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMOR.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38630000, op: AMOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMSWAP.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38608000, op: AMSWAP_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMSWAP_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38698000, op: AMSWAP_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMSWAP_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38690000, op: AMSWAP_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMSWAP.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38600000, op: AMSWAP_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMXOR.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x38648000, op: AMXOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMXOR_DB.D rd, rk, rj ++ {mask: 0xffff8000, value: 0x386d8000, op: AMXOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMXOR_DB.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x386d0000, op: AMXOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AMXOR.W rd, rk, rj ++ {mask: 0xffff8000, value: 0x38640000, op: AMXOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, ++ // AND rd, rj, rk ++ {mask: 0xffff8000, value: 0x00148000, op: AND, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ANDI rd, rj, ui12 ++ {mask: 0xffc00000, value: 0x03400000, op: ANDI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, ++ // ANDN rd, rj, rk ++ {mask: 0xffff8000, value: 0x00168000, op: ANDN, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ASRTGT.D rj, rk ++ {mask: 0xffff801f, value: 0x00018000, op: ASRTGT_D, args: instArgs{arg_rj, arg_rk}}, ++ // ASRTLE.D rj, rk ++ {mask: 0xffff801f, value: 0x00010000, op: ASRTLE_D, args: instArgs{arg_rj, arg_rk}}, ++ // B offs ++ {mask: 0xfc000000, value: 0x50000000, op: B, args: instArgs{arg_offset_25_0}}, ++ // BCEQZ cj, offs ++ {mask: 0xfc000300, value: 0x48000000, op: BCEQZ, args: instArgs{arg_cj, arg_offset_20_0}}, ++ // BCNEZ cj, offs ++ {mask: 0xfc000300, value: 0x48000100, op: BCNEZ, args: instArgs{arg_cj, arg_offset_20_0}}, ++ // BEQ rj, rd, offs ++ {mask: 0xfc000000, value: 0x58000000, op: BEQ, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BEQZ rj, offs ++ {mask: 0xfc000000, value: 0x40000000, op: BEQZ, args: instArgs{arg_rj, arg_offset_20_0}}, ++ // BGE rj, rd, offs ++ {mask: 0xfc000000, value: 0x64000000, op: BGE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BGEU rj, rd, offs ++ {mask: 0xfc000000, value: 0x6c000000, op: BGEU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BITREV.4B rd, rj ++ {mask: 0xfffffc00, value: 0x00004800, op: BITREV_4B, args: instArgs{arg_rd, arg_rj}}, ++ // BITREV.8B rd, rj ++ {mask: 0xfffffc00, value: 0x00004c00, op: BITREV_8B, args: instArgs{arg_rd, arg_rj}}, ++ // BITREV.D rd, rj ++ {mask: 0xfffffc00, value: 0x00005400, op: BITREV_D, args: instArgs{arg_rd, arg_rj}}, ++ // BITREV.W rd, rj ++ {mask: 0xfffffc00, value: 0x00005000, op: BITREV_W, args: instArgs{arg_rd, arg_rj}}, ++ // BL offs ++ {mask: 0xfc000000, value: 0x54000000, op: BL, args: instArgs{arg_offset_25_0}}, ++ // BLT rj, rd, offs ++ {mask: 0xfc000000, value: 0x60000000, op: BLT, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BLTU rj, rd, offs ++ {mask: 0xfc000000, value: 0x68000000, op: BLTU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BNE rj, rd, offs ++ {mask: 0xfc000000, value: 0x5c000000, op: BNE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, ++ // BNEZ rj, offs ++ {mask: 0xfc000000, value: 0x44000000, op: BNEZ, args: instArgs{arg_rj, arg_offset_20_0}}, ++ // BREAK code ++ {mask: 0xffff8000, value: 0x002a0000, op: BREAK, args: instArgs{arg_code_14_0}}, ++ // BSTRINS.D rd, rj, msbd, lsbd ++ {mask: 0xffc00000, value: 0x00800000, op: BSTRINS_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, ++ // BSTRINS.W rd, rj, msbw, lsbw ++ {mask: 0xffe08000, value: 0x00600000, op: BSTRINS_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, ++ // BSTRPICK.D rd, rj, msbd, lsbd ++ {mask: 0xffc00000, value: 0x00c00000, op: BSTRPICK_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, ++ // BSTRPICK.W rd, rj, msbw, lsbw ++ {mask: 0xffe08000, value: 0x00608000, op: BSTRPICK_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, ++ // BYTEPICK.D rd, rj, rk, sa3 ++ {mask: 0xfffc0000, value: 0x000c0000, op: BYTEPICK_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa3_17_15}}, ++ // BYTEPICK.W rd, rj, rk, sa2 ++ {mask: 0xfffe0000, value: 0x00080000, op: BYTEPICK_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, ++ // CACOP code, rj, si12 ++ {mask: 0xffc00000, value: 0x06000000, op: CACOP, args: instArgs{arg_code_4_0, arg_rj, arg_si12_21_10}}, ++ // CLO.D rd, rj ++ {mask: 0xfffffc00, value: 0x00002000, op: CLO_D, args: instArgs{arg_rd, arg_rj}}, ++ // CLO.W rd, rj ++ {mask: 0xfffffc00, value: 0x00001000, op: CLO_W, args: instArgs{arg_rd, arg_rj}}, ++ // CLZ.D rd, rj ++ {mask: 0xfffffc00, value: 0x00002400, op: CLZ_D, args: instArgs{arg_rd, arg_rj}}, ++ // CLZ.W rd, rj ++ {mask: 0xfffffc00, value: 0x00001400, op: CLZ_W, args: instArgs{arg_rd, arg_rj}}, ++ // CPUCFG rd, rj ++ {mask: 0xfffffc00, value: 0x00006c00, op: CPUCFG, args: instArgs{arg_rd, arg_rj}}, ++ // CRCC.W.B.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00260000, op: CRCC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRCC.W.D.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00278000, op: CRCC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRCC.W.H.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00268000, op: CRCC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRCC.W.W.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00270000, op: CRCC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRC.W.B.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00240000, op: CRC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRC.W.D.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00258000, op: CRC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRC.W.H.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00248000, op: CRC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CRC.W.W.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00250000, op: CRC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // CSRRD rd, csr ++ {mask: 0xff0003e0, value: 0x04000000, op: CSRRD, args: instArgs{arg_rd, arg_csr_23_10}}, ++ // CSRWR rd, csr ++ {mask: 0xff0003e0, value: 0x04000020, op: CSRWR, args: instArgs{arg_rd, arg_csr_23_10}}, ++ // CSRXCHG rd, rj, csr ++ {mask: 0xff000000, value: 0x04000000, op: CSRXCHG, args: instArgs{arg_rd, arg_rj, arg_csr_23_10}}, ++ // CTO.D rd, rj ++ {mask: 0xfffffc00, value: 0x00002800, op: CTO_D, args: instArgs{arg_rd, arg_rj}}, ++ // CTO.W rd, rj ++ {mask: 0xfffffc00, value: 0x00001800, op: CTO_W, args: instArgs{arg_rd, arg_rj}}, ++ // CTZ.D rd, rj ++ {mask: 0xfffffc00, value: 0x00002c00, op: CTZ_D, args: instArgs{arg_rd, arg_rj}}, ++ // CTZ.W rd, rj ++ {mask: 0xfffffc00, value: 0x00001c00, op: CTZ_W, args: instArgs{arg_rd, arg_rj}}, ++ // DBAR hint ++ {mask: 0xffff8000, value: 0x38720000, op: DBAR, args: instArgs{arg_hint_14_0}}, ++ // DBCL code ++ {mask: 0xffff8000, value: 0x002a8000, op: DBCL, args: instArgs{arg_code_14_0}}, ++ // DIV.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00220000, op: DIV_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // DIV.DU rd, rj, rk ++ {mask: 0xffff8000, value: 0x00230000, op: DIV_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // DIV.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00200000, op: DIV_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // DIV.WU rd, rj, rk ++ {mask: 0xffff8000, value: 0x00210000, op: DIV_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ERTN ++ {mask: 0xffffffff, value: 0x06483800, op: ERTN, args: instArgs{}}, ++ // EXT.W.B rd, rj ++ {mask: 0xfffffc00, value: 0x00005c00, op: EXT_W_B, args: instArgs{arg_rd, arg_rj}}, ++ // EXT.W.H rd, rj ++ {mask: 0xfffffc00, value: 0x00005800, op: EXT_W_H, args: instArgs{arg_rd, arg_rj}}, ++ // FABS.D fd, fj ++ {mask: 0xfffffc00, value: 0x01140800, op: FABS_D, args: instArgs{arg_fd, arg_fj}}, ++ // FABS.S fd, fj ++ {mask: 0xfffffc00, value: 0x01140400, op: FABS_S, args: instArgs{arg_fd, arg_fj}}, ++ // FADD.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01010000, op: FADD_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FADD.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01008000, op: FADD_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FCLASS.D fd, fj ++ {mask: 0xfffffc00, value: 0x01143800, op: FCLASS_D, args: instArgs{arg_fd, arg_fj}}, ++ // FCLASS.S fd, fj ++ {mask: 0xfffffc00, value: 0x01143400, op: FCLASS_S, args: instArgs{arg_fd, arg_fj}}, ++ // FCMP.CAF.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c200000, op: FCMP_CAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CAF.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c100000, op: FCMP_CAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CEQ.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c220000, op: FCMP_CEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CEQ.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c120000, op: FCMP_CEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CLE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c230000, op: FCMP_CLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CLE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c130000, op: FCMP_CLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CLT.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c210000, op: FCMP_CLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CLT.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c110000, op: FCMP_CLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CNE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c280000, op: FCMP_CNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CNE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c180000, op: FCMP_CNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.COR.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c2a0000, op: FCMP_COR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.COR.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c1a0000, op: FCMP_COR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUEQ.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c260000, op: FCMP_CUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUEQ.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c160000, op: FCMP_CUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CULE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c270000, op: FCMP_CULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CULE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c170000, op: FCMP_CULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CULT.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c250000, op: FCMP_CULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CULT.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c150000, op: FCMP_CULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUNE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c2c0000, op: FCMP_CUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUNE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c1c0000, op: FCMP_CUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUN.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c240000, op: FCMP_CUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.CUN.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c140000, op: FCMP_CUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SAF.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c208000, op: FCMP_SAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SAF.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c108000, op: FCMP_SAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SEQ.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c228000, op: FCMP_SEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SEQ.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c128000, op: FCMP_SEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SLE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c238000, op: FCMP_SLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SLE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c138000, op: FCMP_SLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SLT.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c218000, op: FCMP_SLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SLT.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c118000, op: FCMP_SLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SNE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c288000, op: FCMP_SNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SNE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c188000, op: FCMP_SNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SOR.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c2a8000, op: FCMP_SOR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SOR.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c1a8000, op: FCMP_SOR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUEQ.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c268000, op: FCMP_SUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUEQ.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c168000, op: FCMP_SUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SULE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c278000, op: FCMP_SULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SULE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c178000, op: FCMP_SULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SULT.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c258000, op: FCMP_SULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SULT.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c158000, op: FCMP_SULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUNE.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c2c8000, op: FCMP_SUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUNE.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c1c8000, op: FCMP_SUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUN.D cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c248000, op: FCMP_SUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCMP.SUN.S cd, fj, fk ++ {mask: 0xffff8018, value: 0x0c148000, op: FCMP_SUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, ++ // FCOPYSIGN.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01130000, op: FCOPYSIGN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FCOPYSIGN.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01128000, op: FCOPYSIGN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FCVT.D.S fd, fj ++ {mask: 0xfffffc00, value: 0x01192400, op: FCVT_D_S, args: instArgs{arg_fd, arg_fj}}, ++ // FCVT.S.D fd, fj ++ {mask: 0xfffffc00, value: 0x01191800, op: FCVT_S_D, args: instArgs{arg_fd, arg_fj}}, ++ // FDIV.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01070000, op: FDIV_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FDIV.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01068000, op: FDIV_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FFINT.D.L fd, fj ++ {mask: 0xfffffc00, value: 0x011d2800, op: FFINT_D_L, args: instArgs{arg_fd, arg_fj}}, ++ // FFINT.D.W fd, fj ++ {mask: 0xfffffc00, value: 0x011d2000, op: FFINT_D_W, args: instArgs{arg_fd, arg_fj}}, ++ // FFINT.S.L fd, fj ++ {mask: 0xfffffc00, value: 0x011d1800, op: FFINT_S_L, args: instArgs{arg_fd, arg_fj}}, ++ // FFINT.S.W fd, fj ++ {mask: 0xfffffc00, value: 0x011d1000, op: FFINT_S_W, args: instArgs{arg_fd, arg_fj}}, ++ // FLDGT.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x38748000, op: FLDGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLDGT.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38740000, op: FLDGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLDLE.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x38758000, op: FLDLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLDLE.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38750000, op: FLDLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLDX.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x38340000, op: FLDX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLDX.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38300000, op: FLDX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FLD.D fd, rj, si12 ++ {mask: 0xffc00000, value: 0x2b800000, op: FLD_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, ++ // FLD.S fd, rj, si12 ++ {mask: 0xffc00000, value: 0x2b000000, op: FLD_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, ++ // FLOGB.D fd, fj ++ {mask: 0xfffffc00, value: 0x01142800, op: FLOGB_D, args: instArgs{arg_fd, arg_fj}}, ++ // FLOGB.S fd, fj ++ {mask: 0xfffffc00, value: 0x01142400, op: FLOGB_S, args: instArgs{arg_fd, arg_fj}}, ++ // FMADD.D fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08200000, op: FMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FMADD.S fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08100000, op: FMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FMAXA.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x010d0000, op: FMAXA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMAXA.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x010c8000, op: FMAXA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMAX.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01090000, op: FMAX_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMAX.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01088000, op: FMAX_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMINA.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x010f0000, op: FMINA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMINA.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x010e8000, op: FMINA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMIN.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x010b0000, op: FMIN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMIN.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x010a8000, op: FMIN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMOV.D fd, fj ++ {mask: 0xfffffc00, value: 0x01149800, op: FMOV_D, args: instArgs{arg_fd, arg_fj}}, ++ // FMOV.S fd, fj ++ {mask: 0xfffffc00, value: 0x01149400, op: FMOV_S, args: instArgs{arg_fd, arg_fj}}, ++ // FMSUB.D fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08600000, op: FMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FMSUB.S fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08500000, op: FMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FMUL.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01050000, op: FMUL_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FMUL.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01048000, op: FMUL_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FNEG.D fd, fj ++ {mask: 0xfffffc00, value: 0x01141800, op: FNEG_D, args: instArgs{arg_fd, arg_fj}}, ++ // FNEG.S fd, fj ++ {mask: 0xfffffc00, value: 0x01141400, op: FNEG_S, args: instArgs{arg_fd, arg_fj}}, ++ // FNMADD.D fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08a00000, op: FNMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FNMADD.S fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08900000, op: FNMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FNMSUB.D fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08e00000, op: FNMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FNMSUB.S fd, fj, fk, fa ++ {mask: 0xfff00000, value: 0x08d00000, op: FNMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, ++ // FRECIP.D fd, fj ++ {mask: 0xfffffc00, value: 0x01145800, op: FRECIP_D, args: instArgs{arg_fd, arg_fj}}, ++ // FRECIP.S fd, fj ++ {mask: 0xfffffc00, value: 0x01145400, op: FRECIP_S, args: instArgs{arg_fd, arg_fj}}, ++ // FRINT.D fd, fj ++ {mask: 0xfffffc00, value: 0x011e4800, op: FRINT_D, args: instArgs{arg_fd, arg_fj}}, ++ // FRINT.S fd, fj ++ {mask: 0xfffffc00, value: 0x011e4400, op: FRINT_S, args: instArgs{arg_fd, arg_fj}}, ++ // FRSQRT.D fd, fj ++ {mask: 0xfffffc00, value: 0x01146800, op: FRSQRT_D, args: instArgs{arg_fd, arg_fj}}, ++ // FRSQRT.S fd, fj ++ {mask: 0xfffffc00, value: 0x01146400, op: FRSQRT_S, args: instArgs{arg_fd, arg_fj}}, ++ // FSCALEB.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01110000, op: FSCALEB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FSCALEB.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01108000, op: FSCALEB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FSEL fd, fj, fk, ca ++ {mask: 0xfffc0000, value: 0x0d000000, op: FSEL, args: instArgs{arg_fd, arg_fj, arg_fk, arg_ca}}, ++ // FSQRT.D fd, fj ++ {mask: 0xfffffc00, value: 0x01144800, op: FSQRT_D, args: instArgs{arg_fd, arg_fj}}, ++ // FSQRT.S fd, fj ++ {mask: 0xfffffc00, value: 0x01144400, op: FSQRT_S, args: instArgs{arg_fd, arg_fj}}, ++ // FSTGT.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x38768000, op: FSTGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FSTGT.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38760000, op: FSTGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FSTLE.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x38778000, op: FSTLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FSTLE.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38770000, op: FSTLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FSTX.D fd, rj, rk ++ {mask: 0xffff8000, value: 0x383c0000, op: FSTX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FSTX.S fd, rj, rk ++ {mask: 0xffff8000, value: 0x38380000, op: FSTX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, ++ // FST.D fd, rj, si12 ++ {mask: 0xffc00000, value: 0x2bc00000, op: FST_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, ++ // FST.S fd, rj, si12 ++ {mask: 0xffc00000, value: 0x2b400000, op: FST_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, ++ // FSUB.D fd, fj, fk ++ {mask: 0xffff8000, value: 0x01030000, op: FSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FSUB.S fd, fj, fk ++ {mask: 0xffff8000, value: 0x01028000, op: FSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, ++ // FTINTRM.L.D fd, fj ++ {mask: 0xfffffc00, value: 0x011a2800, op: FTINTRM_L_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRM.L.S fd, fj ++ {mask: 0xfffffc00, value: 0x011a2400, op: FTINTRM_L_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRM.W.D fd, fj ++ {mask: 0xfffffc00, value: 0x011a0800, op: FTINTRM_W_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRM.W.S fd, fj ++ {mask: 0xfffffc00, value: 0x011a0400, op: FTINTRM_W_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRNE.L.D fd, fj ++ {mask: 0xfffffc00, value: 0x011ae800, op: FTINTRNE_L_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRNE.L.S fd, fj ++ {mask: 0xfffffc00, value: 0x011ae400, op: FTINTRNE_L_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRNE.W.D fd, fj ++ {mask: 0xfffffc00, value: 0x011ac800, op: FTINTRNE_W_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRNE.W.S fd, fj ++ {mask: 0xfffffc00, value: 0x011ac400, op: FTINTRNE_W_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRP.L.D fd, fj ++ {mask: 0xfffffc00, value: 0x011a6800, op: FTINTRP_L_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRP.L.S fd, fj ++ {mask: 0xfffffc00, value: 0x011a6400, op: FTINTRP_L_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRP.W.D fd, fj ++ {mask: 0xfffffc00, value: 0x011a4800, op: FTINTRP_W_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRP.W.S fd, fj ++ {mask: 0xfffffc00, value: 0x011a4400, op: FTINTRP_W_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRZ.L.D fd, fj ++ {mask: 0xfffffc00, value: 0x011aa800, op: FTINTRZ_L_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRZ.L.S fd, fj ++ {mask: 0xfffffc00, value: 0x011aa400, op: FTINTRZ_L_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRZ.W.D fd, fj ++ {mask: 0xfffffc00, value: 0x011a8800, op: FTINTRZ_W_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINTRZ.W.S fd, fj ++ {mask: 0xfffffc00, value: 0x011a8400, op: FTINTRZ_W_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINT.L.D fd, fj ++ {mask: 0xfffffc00, value: 0x011b2800, op: FTINT_L_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINT.L.S fd, fj ++ {mask: 0xfffffc00, value: 0x011b2400, op: FTINT_L_S, args: instArgs{arg_fd, arg_fj}}, ++ // FTINT.W.D fd, fj ++ {mask: 0xfffffc00, value: 0x011b0800, op: FTINT_W_D, args: instArgs{arg_fd, arg_fj}}, ++ // FTINT.W.S fd, fj ++ {mask: 0xfffffc00, value: 0x011b0400, op: FTINT_W_S, args: instArgs{arg_fd, arg_fj}}, ++ // IBAR hint ++ {mask: 0xffff8000, value: 0x38728000, op: IBAR, args: instArgs{arg_hint_14_0}}, ++ // IDLE level ++ {mask: 0xffff8000, value: 0x06488000, op: IDLE, args: instArgs{arg_level_14_0}}, ++ // INVTLB op, rj, rk ++ {mask: 0xffff8000, value: 0x06498000, op: INVTLB, args: instArgs{arg_op_4_0, arg_rj, arg_rk}}, ++ // IOCSRRD.B rd, rj ++ {mask: 0xfffffc00, value: 0x06480000, op: IOCSRRD_B, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRRD.D rd, rj ++ {mask: 0xfffffc00, value: 0x06480c00, op: IOCSRRD_D, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRRD.H rd, rj ++ {mask: 0xfffffc00, value: 0x06480400, op: IOCSRRD_H, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRRD.W rd, rj ++ {mask: 0xfffffc00, value: 0x06480800, op: IOCSRRD_W, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRWR.B rd, rj ++ {mask: 0xfffffc00, value: 0x06481000, op: IOCSRWR_B, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRWR.D rd, rj ++ {mask: 0xfffffc00, value: 0x06481c00, op: IOCSRWR_D, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRWR.H rd, rj ++ {mask: 0xfffffc00, value: 0x06481400, op: IOCSRWR_H, args: instArgs{arg_rd, arg_rj}}, ++ // IOCSRWR.W rd, rj ++ {mask: 0xfffffc00, value: 0x06481800, op: IOCSRWR_W, args: instArgs{arg_rd, arg_rj}}, ++ // JIRL rd, rj, offs ++ {mask: 0xfc000000, value: 0x4c000000, op: JIRL, args: instArgs{arg_rd, arg_rj, arg_offset_15_0}}, ++ // LDDIR rd, rj, level ++ {mask: 0xfffc0000, value: 0x06400000, op: LDDIR, args: instArgs{arg_rd, arg_rj, arg_level_17_10}}, ++ // LDGT.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x38780000, op: LDGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDGT.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x38798000, op: LDGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDGT.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x38788000, op: LDGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDGT.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x38790000, op: LDGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDLE.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x387a0000, op: LDLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDLE.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x387b8000, op: LDLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDLE.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x387a8000, op: LDLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDLE.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x387b0000, op: LDLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDPTE rj, seq ++ {mask: 0xfffc001f, value: 0x06440000, op: LDPTE, args: instArgs{arg_rj, arg_seq_17_10}}, ++ // LDPTR.D rd, rj, si14 ++ {mask: 0xff000000, value: 0x26000000, op: LDPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // LDPTR.W rd, rj, si14 ++ {mask: 0xff000000, value: 0x24000000, op: LDPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // LDX.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x38000000, op: LDX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.BU rd, rj, rk ++ {mask: 0xffff8000, value: 0x38200000, op: LDX_BU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x380c0000, op: LDX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x38040000, op: LDX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.HU rd, rj, rk ++ {mask: 0xffff8000, value: 0x38240000, op: LDX_HU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x38080000, op: LDX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LDX.WU rd, rj, rk ++ {mask: 0xffff8000, value: 0x38280000, op: LDX_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // LD.B rd, rj, si12 ++ {mask: 0xffc00000, value: 0x28000000, op: LD_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.BU rd, rj, si12 ++ {mask: 0xffc00000, value: 0x2a000000, op: LD_BU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.D rd, rj, si12 ++ {mask: 0xffc00000, value: 0x28c00000, op: LD_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.H rd, rj, si12 ++ {mask: 0xffc00000, value: 0x28400000, op: LD_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.HU rd, rj, si12 ++ {mask: 0xffc00000, value: 0x2a400000, op: LD_HU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.W rd, rj, si12 ++ {mask: 0xffc00000, value: 0x28800000, op: LD_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LD.WU rd, rj, si12 ++ {mask: 0xffc00000, value: 0x2a800000, op: LD_WU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // LL.D rd, rj, si14 ++ {mask: 0xff000000, value: 0x22000000, op: LL_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // LL.W rd, rj, si14 ++ {mask: 0xff000000, value: 0x20000000, op: LL_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // LU12I.W rd, si20 ++ {mask: 0xfe000000, value: 0x14000000, op: LU12I_W, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // LU32I.D rd, si20 ++ {mask: 0xfe000000, value: 0x16000000, op: LU32I_D, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // LU52I.D rd, rj, si12 ++ {mask: 0xffc00000, value: 0x03000000, op: LU52I_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // MASKEQZ rd, rj, rk ++ {mask: 0xffff8000, value: 0x00130000, op: MASKEQZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MASKNEZ rd, rj, rk ++ {mask: 0xffff8000, value: 0x00138000, op: MASKNEZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MOD.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00228000, op: MOD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MOD.DU rd, rj, rk ++ {mask: 0xffff8000, value: 0x00238000, op: MOD_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MOD.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00208000, op: MOD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MOD.WU rd, rj, rk ++ {mask: 0xffff8000, value: 0x00218000, op: MOD_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MOVCF2FR fd, cj ++ {mask: 0xffffff00, value: 0x0114d400, op: MOVCF2FR, args: instArgs{arg_fd, arg_cj}}, ++ // MOVCF2GR rd, cj ++ {mask: 0xffffff00, value: 0x0114dc00, op: MOVCF2GR, args: instArgs{arg_rd, arg_cj}}, ++ // MOVFCSR2GR rd, fcsr ++ {mask: 0xfffffc00, value: 0x0114c800, op: MOVFCSR2GR, args: instArgs{arg_rd, arg_fcsr_9_5}}, ++ // MOVFR2CF cd, fj ++ {mask: 0xfffffc18, value: 0x0114d000, op: MOVFR2CF, args: instArgs{arg_cd, arg_fj}}, ++ // MOVFR2GR.D rd, fj ++ {mask: 0xfffffc00, value: 0x0114b800, op: MOVFR2GR_D, args: instArgs{arg_rd, arg_fj}}, ++ // MOVFR2GR.S rd, fj ++ {mask: 0xfffffc00, value: 0x0114b400, op: MOVFR2GR_S, args: instArgs{arg_rd, arg_fj}}, ++ // MOVFRH2GR.S rd, fj ++ {mask: 0xfffffc00, value: 0x0114bc00, op: MOVFRH2GR_S, args: instArgs{arg_rd, arg_fj}}, ++ // MOVGR2CF cd, rj ++ {mask: 0xfffffc18, value: 0x0114d800, op: MOVGR2CF, args: instArgs{arg_cd, arg_rj}}, ++ // MOVGR2FCSR fcsr, rj ++ {mask: 0xfffffc00, value: 0x0114c000, op: MOVGR2FCSR, args: instArgs{arg_fcsr_4_0, arg_rj}}, ++ // MOVGR2FRH.W fd, rj ++ {mask: 0xfffffc00, value: 0x0114ac00, op: MOVGR2FRH_W, args: instArgs{arg_fd, arg_rj}}, ++ // MOVGR2FR.D fd, rj ++ {mask: 0xfffffc00, value: 0x0114a800, op: MOVGR2FR_D, args: instArgs{arg_fd, arg_rj}}, ++ // MOVGR2FR.W fd, rj ++ {mask: 0xfffffc00, value: 0x0114a400, op: MOVGR2FR_W, args: instArgs{arg_fd, arg_rj}}, ++ // MULH.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x001e0000, op: MULH_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MULH.DU rd, rj, rk ++ {mask: 0xffff8000, value: 0x001e8000, op: MULH_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MULH.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x001c8000, op: MULH_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MULH.WU rd, rj, rk ++ {mask: 0xffff8000, value: 0x001d0000, op: MULH_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MULW.D.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x001f0000, op: MULW_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MULW.D.WU rd, rj, rk ++ {mask: 0xffff8000, value: 0x001f8000, op: MULW_D_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MUL.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x001d8000, op: MUL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // MUL.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x001c0000, op: MUL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // NOR rd, rj, rk ++ {mask: 0xffff8000, value: 0x00140000, op: NOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // OR rd, rj, rk ++ {mask: 0xffff8000, value: 0x00150000, op: OR, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ORI rd, rj, ui12 ++ {mask: 0xffc00000, value: 0x03800000, op: ORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, ++ // ORN rd, rj, rk ++ {mask: 0xffff8000, value: 0x00160000, op: ORN, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // PCADDI rd, si20 ++ {mask: 0xfe000000, value: 0x18000000, op: PCADDI, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // PCADDU12I rd, si20 ++ {mask: 0xfe000000, value: 0x1c000000, op: PCADDU12I, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // PCADDU18I rd, si20 ++ {mask: 0xfe000000, value: 0x1e000000, op: PCADDU18I, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // PCALAU12I rd, si20 ++ {mask: 0xfe000000, value: 0x1a000000, op: PCALAU12I, args: instArgs{arg_rd, arg_si20_24_5}}, ++ // PRELD hint, rj, si12 ++ {mask: 0xffc00000, value: 0x2ac00000, op: PRELD, args: instArgs{arg_hint_4_0, arg_rj, arg_si12_21_10}}, ++ // PRELDX hint, rj, rk ++ {mask: 0xffff8000, value: 0x382c0000, op: PRELDX, args: instArgs{arg_hint_4_0, arg_rj, arg_rk}}, ++ // RDTIMEH.W rd, rj ++ {mask: 0xfffffc00, value: 0x00006400, op: RDTIMEH_W, args: instArgs{arg_rd, arg_rj}}, ++ // RDTIMEL.W rd, rj ++ {mask: 0xfffffc00, value: 0x00006000, op: RDTIMEL_W, args: instArgs{arg_rd, arg_rj}}, ++ // RDTIME.D rd, rj ++ {mask: 0xfffffc00, value: 0x00006800, op: RDTIME_D, args: instArgs{arg_rd, arg_rj}}, ++ // REVB.2H rd, rj ++ {mask: 0xfffffc00, value: 0x00003000, op: REVB_2H, args: instArgs{arg_rd, arg_rj}}, ++ // REVB.2W rd, rj ++ {mask: 0xfffffc00, value: 0x00003800, op: REVB_2W, args: instArgs{arg_rd, arg_rj}}, ++ // REVB.4H rd, rj ++ {mask: 0xfffffc00, value: 0x00003400, op: REVB_4H, args: instArgs{arg_rd, arg_rj}}, ++ // REVB.D rd, rj ++ {mask: 0xfffffc00, value: 0x00003c00, op: REVB_D, args: instArgs{arg_rd, arg_rj}}, ++ // REVH.2W rd, rj ++ {mask: 0xfffffc00, value: 0x00004000, op: REVH_2W, args: instArgs{arg_rd, arg_rj}}, ++ // REVH.D rd, rj ++ {mask: 0xfffffc00, value: 0x00004400, op: REVH_D, args: instArgs{arg_rd, arg_rj}}, ++ // ROTRI.D rd, rj, ui6 ++ {mask: 0xffff0000, value: 0x004d0000, op: ROTRI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, ++ // ROTRI.W rd, rj, ui5 ++ {mask: 0xffff8000, value: 0x004c8000, op: ROTRI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, ++ // ROTR.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x001b8000, op: ROTR_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ROTR.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x001b0000, op: ROTR_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SC.D rd, rj, si14 ++ {mask: 0xff000000, value: 0x23000000, op: SC_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // SC.W rd, rj, si14 ++ {mask: 0xff000000, value: 0x21000000, op: SC_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // SLLI.D rd, rj, ui6 ++ {mask: 0xffff0000, value: 0x00410000, op: SLLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, ++ // SLLI.W rd, rj, ui5 ++ {mask: 0xffff8000, value: 0x00408000, op: SLLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, ++ // SLL.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00188000, op: SLL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SLL.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00170000, op: SLL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SLT rd, rj, rk ++ {mask: 0xffff8000, value: 0x00120000, op: SLT, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SLTI rd, rj, si12 ++ {mask: 0xffc00000, value: 0x02000000, op: SLTI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // SLTU rd, rj, rk ++ {mask: 0xffff8000, value: 0x00128000, op: SLTU, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SLTUI rd, rj, si12 ++ {mask: 0xffc00000, value: 0x02400000, op: SLTUI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // SRAI.D rd, rj, ui6 ++ {mask: 0xffff0000, value: 0x00490000, op: SRAI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, ++ // SRAI.W rd, rj, ui5 ++ {mask: 0xffff8000, value: 0x00488000, op: SRAI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, ++ // SRA.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00198000, op: SRA_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SRA.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00180000, op: SRA_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SRLI.D rd, rj, ui6 ++ {mask: 0xffff0000, value: 0x00450000, op: SRLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, ++ // SRLI.W rd, rj, ui5 ++ {mask: 0xffff8000, value: 0x00448000, op: SRLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, ++ // SRL.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00190000, op: SRL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SRL.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00178000, op: SRL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STGT.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x387c0000, op: STGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STGT.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x387d8000, op: STGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STGT.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x387c8000, op: STGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STGT.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x387d0000, op: STGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STLE.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x387e0000, op: STLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STLE.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x387f8000, op: STLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STLE.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x387e8000, op: STLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STLE.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x387f0000, op: STLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STPTR.D rd, rj, si14 ++ {mask: 0xff000000, value: 0x27000000, op: STPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // STPTR.W rd, rj, si14 ++ {mask: 0xff000000, value: 0x25000000, op: STPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, ++ // STX.B rd, rj, rk ++ {mask: 0xffff8000, value: 0x38100000, op: STX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STX.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x381c0000, op: STX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STX.H rd, rj, rk ++ {mask: 0xffff8000, value: 0x38140000, op: STX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // STX.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x38180000, op: STX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // ST.B rd, rj, si12 ++ {mask: 0xffc00000, value: 0x29000000, op: ST_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // ST.D rd, rj, si12 ++ {mask: 0xffc00000, value: 0x29c00000, op: ST_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // ST.H rd, rj, si12 ++ {mask: 0xffc00000, value: 0x29400000, op: ST_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // ST.W rd, rj, si12 ++ {mask: 0xffc00000, value: 0x29800000, op: ST_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, ++ // SUB.D rd, rj, rk ++ {mask: 0xffff8000, value: 0x00118000, op: SUB_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SUB.W rd, rj, rk ++ {mask: 0xffff8000, value: 0x00110000, op: SUB_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // SYSCALL code ++ {mask: 0xffff8000, value: 0x002b0000, op: SYSCALL, args: instArgs{arg_code_14_0}}, ++ // TLBCLR ++ {mask: 0xffffffff, value: 0x06482000, op: TLBCLR, args: instArgs{}}, ++ // TLBFILL ++ {mask: 0xffffffff, value: 0x06483400, op: TLBFILL, args: instArgs{}}, ++ // TLBFLUSH ++ {mask: 0xffffffff, value: 0x06482400, op: TLBFLUSH, args: instArgs{}}, ++ // TLBRD ++ {mask: 0xffffffff, value: 0x06482c00, op: TLBRD, args: instArgs{}}, ++ // TLBSRCH ++ {mask: 0xffffffff, value: 0x06482800, op: TLBSRCH, args: instArgs{}}, ++ // TLBWR ++ {mask: 0xffffffff, value: 0x06483000, op: TLBWR, args: instArgs{}}, ++ // XOR rd, rj, rk ++ {mask: 0xffff8000, value: 0x00158000, op: XOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, ++ // XORI rd, rj, ui12 ++ {mask: 0xffc00000, value: 0x03c00000, op: XORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, ++} +-- +2.38.1 + diff --git a/0034-cmd-internal-objfile-add-loong64-disassembler-suppor.patch b/0034-cmd-internal-objfile-add-loong64-disassembler-suppor.patch new file mode 100644 index 0000000000000000000000000000000000000000..42573eebafc216156901bd2a709452731f3573ae --- /dev/null +++ b/0034-cmd-internal-objfile-add-loong64-disassembler-suppor.patch @@ -0,0 +1,73 @@ +From 96e128b3aa87a849210e7eb5212e3607abf2254b Mon Sep 17 00:00:00 2001 +From: Xiaodong Liu +Date: Fri, 10 Feb 2023 15:10:48 +0800 +Subject: [PATCH 34/62] cmd/internal/objfile: add loong64 disassembler support + +Change-Id: I5628436209aa58f1ba781db15ca6e78b62123065 +--- + src/cmd/internal/objfile/disasm.go | 14 ++++++++++++++ + src/cmd/internal/objfile/elf.go | 2 ++ + 2 files changed, 16 insertions(+) + +diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go +index c298d7e1a9..129741fe01 100644 +--- a/src/cmd/internal/objfile/disasm.go ++++ b/src/cmd/internal/objfile/disasm.go +@@ -23,6 +23,7 @@ import ( + + "golang.org/x/arch/arm/armasm" + "golang.org/x/arch/arm64/arm64asm" ++ "golang.org/x/arch/loong64/loong64asm" + "golang.org/x/arch/ppc64/ppc64asm" + "golang.org/x/arch/x86/x86asm" + ) +@@ -366,6 +367,17 @@ func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.By + return text, 4 + } + ++func disasm_loong64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { ++ inst, err := loong64asm.Decode(code) ++ var text string ++ if err != nil || inst.Op == 0 { ++ text = "?" ++ } else if gnuAsm { ++ text = fmt.Sprintf("%s %s", "", loong64asm.GNUSyntax(inst)) ++ } ++ return text, 4 ++} ++ + func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { + inst, err := ppc64asm.Decode(code, byteOrder) + var text string +@@ -388,6 +400,7 @@ var disasms = map[string]disasmFunc{ + "amd64": disasm_amd64, + "arm": disasm_arm, + "arm64": disasm_arm64, ++ "loong64": disasm_loong64, + "ppc64": disasm_ppc64, + "ppc64le": disasm_ppc64, + } +@@ -397,6 +410,7 @@ var byteOrders = map[string]binary.ByteOrder{ + "amd64": binary.LittleEndian, + "arm": binary.LittleEndian, + "arm64": binary.LittleEndian, ++ "loong64": binary.LittleEndian, + "ppc64": binary.BigEndian, + "ppc64le": binary.LittleEndian, + "s390x": binary.BigEndian, +diff --git a/src/cmd/internal/objfile/elf.go b/src/cmd/internal/objfile/elf.go +index c64c2540f4..2ad0465170 100644 +--- a/src/cmd/internal/objfile/elf.go ++++ b/src/cmd/internal/objfile/elf.go +@@ -120,6 +120,8 @@ func (f *elfFile) goarch() string { + return "arm" + case elf.EM_AARCH64: + return "arm64" ++ case elf.EM_LOONGARCH: ++ return "loong64" + case elf.EM_PPC64: + if f.elf.ByteOrder == binary.LittleEndian { + return "ppc64le" +-- +2.38.1 + diff --git a/0035-cmd-compile-link-internal-runtime-support-buildmode-.patch b/0035-cmd-compile-link-internal-runtime-support-buildmode-.patch new file mode 100644 index 0000000000000000000000000000000000000000..b841f5dfe6081c4d9a9ddfda13743e2258fe81ee --- /dev/null +++ b/0035-cmd-compile-link-internal-runtime-support-buildmode-.patch @@ -0,0 +1,1372 @@ +From 87dbb8a1616793bd06dcdc43196ca8f887c0d918 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 22 Mar 2023 06:31:51 +0800 +Subject: [PATCH 35/62] cmd/{compile,link,internal},runtime: support + -buildmode=shared for loong64 + +Signed-off-by: Guoqi Chen +Change-Id: I1c02373496d5f6e13282a4055d294dc76db30518 +--- + .../compile/internal/ssa/_gen/LOONG64.rules | 101 ++++------ + src/cmd/compile/internal/ssa/regalloc.go | 2 + + .../compile/internal/ssa/rewriteLOONG64.go | 190 +++++++++++------- + src/cmd/internal/obj/loong64/a.out.go | 1 + + src/cmd/internal/obj/loong64/asm.go | 75 ++++--- + src/cmd/internal/obj/loong64/cnames.go | 1 + + src/cmd/internal/obj/loong64/obj.go | 116 +++++++++++ + src/cmd/internal/objabi/reloctype.go | 5 + + src/cmd/internal/objabi/reloctype_string.go | 20 +- + src/cmd/link/internal/ld/config.go | 2 +- + src/cmd/link/internal/loong64/asm.go | 57 +++++- + src/internal/platform/supported.go | 2 +- + src/runtime/asm_loong64.s | 10 + + src/runtime/internal/atomic/atomic_loong64.s | 6 +- + 14 files changed, 408 insertions(+), 180 deletions(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index 1caaf13600..f8c07f3024 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -453,66 +453,47 @@ + (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVVaddr [int32(off1)+int32(off2)] {sym} ptr) + + // fold address into load/store +-(MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBload [off1+int32(off2)] {sym} ptr mem) +-(MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBUload [off1+int32(off2)] {sym} ptr mem) +-(MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHload [off1+int32(off2)] {sym} ptr mem) +-(MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHUload [off1+int32(off2)] {sym} ptr mem) +-(MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWload [off1+int32(off2)] {sym} ptr mem) +-(MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWUload [off1+int32(off2)] {sym} ptr mem) +-(MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVVload [off1+int32(off2)] {sym} ptr mem) +-(MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVFload [off1+int32(off2)] {sym} ptr mem) +-(MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVDload [off1+int32(off2)] {sym} ptr mem) +- +-(MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVHstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVWstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVVstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVFstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVDstore [off1+int32(off2)] {sym} ptr val mem) +-(MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) +-(MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) +-(MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) +-(MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) +- +-(MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +- +-(MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) +-(MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) +-(MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => +- (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBload [off1+int32(off2)] {sym} ptr mem) ++(MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBUload [off1+int32(off2)] {sym} ptr mem) ++(MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHload [off1+int32(off2)] {sym} ptr mem) ++(MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHUload [off1+int32(off2)] {sym} ptr mem) ++(MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWload [off1+int32(off2)] {sym} ptr mem) ++(MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWUload [off1+int32(off2)] {sym} ptr mem) ++(MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVload [off1+int32(off2)] {sym} ptr mem) ++(MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVFload [off1+int32(off2)] {sym} ptr mem) ++(MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDload [off1+int32(off2)] {sym} ptr mem) ++ ++(MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVFstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstore [off1+int32(off2)] {sym} ptr val mem) ++(MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) ++(MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) ++(MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) ++(MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) ++ ++(MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++ ++(MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) ++(MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) ++(MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + + (LoweredAtomicStore(32|64) ptr (MOVVconst [0]) mem) => (LoweredAtomicStorezero(32|64) ptr mem) + (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst32 [int32(c)] ptr mem) +diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go +index 294c522a90..db6b5a6eca 100644 +--- a/src/cmd/compile/internal/ssa/regalloc.go ++++ b/src/cmd/compile/internal/ssa/regalloc.go +@@ -640,6 +640,8 @@ func (s *regAllocState) init(f *Func) { + s.allocatable &^= 1 << 9 // R9 + case "arm64": + // nothing to do ++ case "loong64": ++ // nothing to do + case "ppc64le": // R2 already reserved. + // nothing to do + case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved. +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index f6da0b7ff0..1a2593ef55 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -1674,8 +1674,10 @@ func rewriteValueLOONG64_OpLOONG64MASKNEZ(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBUload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1686,7 +1688,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBUload) +@@ -1696,7 +1698,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { + return true + } + // match: (MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1708,7 +1710,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBUload) +@@ -1759,8 +1761,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBUreg(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1771,7 +1775,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBload) +@@ -1781,7 +1785,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { + return true + } + // match: (MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1793,7 +1797,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBload) +@@ -1845,8 +1849,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1858,7 +1864,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBstore) +@@ -1868,7 +1874,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { + return true + } + // match: (MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -1881,7 +1887,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBstore) +@@ -1997,8 +2003,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2009,7 +2017,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBstorezero) +@@ -2019,7 +2027,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { + return true + } + // match: (MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2031,7 +2039,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVBstorezero) +@@ -2045,8 +2053,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVDload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2057,7 +2067,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVDload) +@@ -2067,7 +2077,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { + return true + } + // match: (MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2079,7 +2089,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVDload) +@@ -2094,8 +2104,10 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVDstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2107,7 +2119,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVDstore) +@@ -2117,7 +2129,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { + return true + } + // match: (MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2130,7 +2142,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVDstore) +@@ -2144,8 +2156,10 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVFload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2156,7 +2170,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVFload) +@@ -2166,7 +2180,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { + return true + } + // match: (MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2178,7 +2192,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVFload) +@@ -2193,8 +2207,10 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVFstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2206,7 +2222,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVFstore) +@@ -2216,7 +2232,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { + return true + } + // match: (MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2229,7 +2245,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVFstore) +@@ -2243,8 +2259,10 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHUload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2255,7 +2273,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHUload) +@@ -2265,7 +2283,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { + return true + } + // match: (MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2277,7 +2295,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHUload) +@@ -2350,8 +2368,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHUreg(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2362,7 +2382,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHload) +@@ -2372,7 +2392,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { + return true + } + // match: (MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2384,7 +2404,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHload) +@@ -2480,8 +2500,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2493,7 +2515,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHstore) +@@ -2503,7 +2525,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { + return true + } + // match: (MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2516,7 +2538,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHstore) +@@ -2598,8 +2620,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2610,7 +2634,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHstorezero) +@@ -2620,7 +2644,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { + return true + } + // match: (MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2632,7 +2656,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVHstorezero) +@@ -2646,8 +2670,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2658,7 +2684,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVload) +@@ -2668,7 +2694,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { + return true + } + // match: (MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2680,7 +2706,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVload) +@@ -2722,8 +2748,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2735,7 +2763,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVstore) +@@ -2745,7 +2773,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { + return true + } + // match: (MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2758,7 +2786,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVstore) +@@ -2772,8 +2800,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2784,7 +2814,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVstorezero) +@@ -2794,7 +2824,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { + return true + } + // match: (MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2806,7 +2836,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVVstorezero) +@@ -2820,8 +2850,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2832,7 +2864,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWUload) +@@ -2842,7 +2874,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { + return true + } + // match: (MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2854,7 +2886,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWUload) +@@ -2949,8 +2981,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWUreg(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2961,7 +2995,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWload) +@@ -2971,7 +3005,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { + return true + } + // match: (MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -2983,7 +3017,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWload) +@@ -3112,8 +3146,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -3125,7 +3161,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWstore) +@@ -3135,7 +3171,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { + return true + } + // match: (MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -3148,7 +3184,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { + ptr := v_0.Args[0] + val := v_1 + mem := v_2 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWstore) +@@ -3196,8 +3232,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { + func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] ++ b := v.Block ++ config := b.Func.Config + // match: (MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) +- // cond: is32Bit(int64(off1)+off2) ++ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -3208,7 +3246,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 +- if !(is32Bit(int64(off1) + off2)) { ++ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWstorezero) +@@ -3218,7 +3256,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { + return true + } + // match: (MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) +- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) ++ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) +@@ -3230,7 +3268,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 +- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { ++ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpLOONG64MOVWstorezero) +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 99a7da388f..9527e99b56 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -227,6 +227,7 @@ const ( + C_ADDR + C_TLS_LE + C_TLS_IE ++ C_GOTADDR + C_TEXTSIZE + + C_NCLASS // must be the last +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index eff60104ce..17c0539972 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -344,6 +344,8 @@ var optab = []Optab{ + {AMOVBU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, + {AMOVWU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, + ++ {AMOVV, C_GOTADDR, C_NONE, C_REG, C_NONE, 65, 8, 0, sys.Loong64, 0}, ++ + {ATEQ, C_SCON, C_REG, C_REG, C_NONE, 15, 8, 0, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_REG, C_NONE, 15, 8, 0, 0, 0}, + +@@ -601,6 +603,9 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + return C_SOREG + } + return C_LOREG ++ ++ case obj.NAME_GOTREF: ++ return C_GOTADDR + } + + return C_GOK +@@ -1615,38 +1620,38 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + + case 56: // mov r, tlsvar IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + st.d + o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) +- rel := obj.Addrel(c.cursym) +- rel.Off = int32(c.pc) +- rel.Siz = 4 +- rel.Sym = p.To.Sym +- rel.Add = 0x0 +- rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = p.To.Sym ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI + o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) +- rel2 := obj.Addrel(c.cursym) +- rel2.Off = int32(c.pc + 4) +- rel2.Siz = 4 +- rel2.Sym = p.To.Sym +- rel2.Add = 0x0 +- rel2.Type = objabi.R_LOONG64_TLS_IE_LO ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = p.To.Sym ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_TLS_IE_LO + o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP)) + o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) + + case 57: // mov tlsvar, r IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + ld.d +- o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) +- rel := obj.Addrel(c.cursym) +- rel.Off = int32(c.pc) +- rel.Siz = 4 +- rel.Sym = p.From.Sym +- rel.Add = 0x0 +- rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI +- o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) +- rel2 := obj.Addrel(c.cursym) +- rel2.Off = int32(c.pc + 4) +- rel2.Siz = 4 +- rel2.Sym = p.From.Sym +- rel2.Add = 0x0 +- rel2.Type = objabi.R_LOONG64_TLS_IE_LO +- o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP)) ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = p.From.Sym ++ rel.Add = 0x0 ++ rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI ++ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = p.From.Sym ++ rel2.Add = 0x0 ++ rel2.Type = objabi.R_LOONG64_TLS_IE_LO ++ o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP)) + o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) + + case 59: // mov $dcon,r +@@ -1682,6 +1687,22 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + case 64: // movv c_reg, c_fcc0 ==> movgr2cf cd, rj + a := OP_TEN(8, 1334) + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) ++ ++ case 65: // mov sym@GOT, r ==> pcaddu12i + ld.d ++ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg)) ++ rel := obj.Addrel(c.cursym) ++ rel.Off = int32(c.pc) ++ rel.Siz = 4 ++ rel.Sym = p.From.Sym ++ rel.Type = objabi.R_LOONG64_GOTPCREL_HI ++ rel.Add = 0x0 ++ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) ++ rel2 := obj.Addrel(c.cursym) ++ rel2.Off = int32(c.pc + 4) ++ rel2.Siz = 4 ++ rel2.Sym = p.From.Sym ++ rel2.Type = objabi.R_LOONG64_GOT_LO ++ rel2.Add = 0x0 + } + + out[0] = o1 +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index 8b8af6ba31..94b1b54c93 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -39,6 +39,7 @@ var cnames0 = []string{ + "ADDR", + "TLS_LE", + "TLS_IE", ++ "GOTADDR", + "TEXTSIZE", + "NCLASS", + } +diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go +index 0c1f5c029d..5d7213d8c7 100644 +--- a/src/cmd/internal/obj/loong64/obj.go ++++ b/src/cmd/internal/obj/loong64/obj.go +@@ -84,6 +84,122 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + p.As = AADDVU + } + } ++ ++ if ctxt.Flag_dynlink { ++ rewriteToUseGot(ctxt, p, newprog) ++ } ++} ++ ++func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { ++ // ADUFFxxx $offset ++ // becomes ++ // MOVV runtime.duffxxx@GOT, REGTMP ++ // ADD $offset, REGTMP ++ // JAL REGTMP ++ if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { ++ var sym *obj.LSym ++ if p.As == obj.ADUFFZERO { ++ sym = ctxt.Lookup("runtime.duffzero") ++ } else { ++ sym = ctxt.Lookup("runtime.duffcopy") ++ } ++ offset := p.To.Offset ++ p.As = AMOVV ++ p.From.Type = obj.TYPE_MEM ++ p.From.Sym = sym ++ p.From.Name = obj.NAME_GOTREF ++ p.To.Type = obj.TYPE_REG ++ p.To.Reg = REGTMP ++ p.To.Name = obj.NAME_NONE ++ p.To.Offset = 0 ++ p.To.Sym = nil ++ p1 := obj.Appendp(p, newprog) ++ p1.As = AADDV ++ p1.From.Type = obj.TYPE_CONST ++ p1.From.Offset = offset ++ p1.To.Type = obj.TYPE_REG ++ p1.To.Reg = REGTMP ++ p2 := obj.Appendp(p1, newprog) ++ p2.As = AJAL ++ p2.To.Type = obj.TYPE_MEM ++ p2.To.Reg = REGTMP ++ } ++ ++ // We only care about global data: NAME_EXTERN means a global ++ // symbol in the Go sense, and p.Sym.Local is true for a few ++ // internally defined symbols. ++ if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { ++ // MOVV $sym, Rx becomes MOVV sym@GOT, Rx ++ // MOVV $sym+, Rx becomes MOVV sym@GOT, Rx; ADD , Rx ++ if p.As != AMOVV { ++ ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -shared", p) ++ } ++ if p.To.Type != obj.TYPE_REG { ++ ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -shared", p) ++ } ++ p.From.Type = obj.TYPE_MEM ++ p.From.Name = obj.NAME_GOTREF ++ if p.From.Offset != 0 { ++ q := obj.Appendp(p, newprog) ++ q.As = AADDV ++ q.From.Type = obj.TYPE_CONST ++ q.From.Offset = p.From.Offset ++ q.To = p.To ++ p.From.Offset = 0 ++ } ++ } ++ if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { ++ ctxt.Diag("don't know how to handle %v with -shared", p) ++ } ++ ++ var source *obj.Addr ++ // MOVx sym, Ry becomes MOVV sym@GOT, REGTMP; MOVx (REGTMP), Ry ++ // MOVx Ry, sym becomes MOVV sym@GOT, REGTMP; MOVx Ry, (REGTMP) ++ // An addition may be inserted between the two MOVs if there is an offset. ++ if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { ++ if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { ++ ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -shared", p) ++ } ++ source = &p.From ++ } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { ++ source = &p.To ++ } else { ++ return ++ } ++ if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { ++ return ++ } ++ if source.Sym.Type == objabi.STLSBSS { ++ return ++ } ++ if source.Type != obj.TYPE_MEM { ++ ctxt.Diag("don't know how to handle %v with -shared", p) ++ } ++ p1 := obj.Appendp(p, newprog) ++ p2 := obj.Appendp(p1, newprog) ++ p1.As = AMOVV ++ p1.From.Type = obj.TYPE_MEM ++ p1.From.Sym = source.Sym ++ p1.From.Name = obj.NAME_GOTREF ++ p1.To.Type = obj.TYPE_REG ++ p1.To.Reg = REGTMP ++ ++ p2.As = p.As ++ p2.From = p.From ++ p2.To = p.To ++ if p.From.Name == obj.NAME_EXTERN { ++ p2.From.Reg = REGTMP ++ p2.From.Name = obj.NAME_NONE ++ p2.From.Sym = nil ++ } else if p.To.Name == obj.NAME_EXTERN { ++ p2.To.Reg = REGTMP ++ p2.To.Name = obj.NAME_NONE ++ p2.To.Sym = nil ++ } else { ++ return ++ } ++ obj.Nopout(p) ++ + } + + func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { +diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go +index 1a6a73fe12..aaefd15663 100644 +--- a/src/cmd/internal/objabi/reloctype.go ++++ b/src/cmd/internal/objabi/reloctype.go +@@ -315,6 +315,11 @@ const ( + R_LOONG64_TLS_IE_PCREL_HI + R_LOONG64_TLS_IE_LO + ++ // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute ++ // the address of the GOT slot of the referenced symbol. ++ R_LOONG64_GOTPCREL_HI ++ R_LOONG64_GOT_LO ++ + // R_JMPLOONG64 resolves to non-PC-relative target address of a JMP instruction, + // by encoding the address into the instruction. + R_JMPLOONG64 +diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go +index 8cfff5ae8c..53104c76b3 100644 +--- a/src/cmd/internal/objabi/reloctype_string.go ++++ b/src/cmd/internal/objabi/reloctype_string.go +@@ -1,4 +1,4 @@ +-// Code generated by "stringer -type=RelocType cmd/internal/objabi/reloctype.go"; DO NOT EDIT. ++// Code generated by "stringer -type=RelocType"; DO NOT EDIT. + + package objabi + +@@ -81,17 +81,19 @@ func _() { + _ = x[R_CALLLOONG64-71] + _ = x[R_LOONG64_TLS_IE_PCREL_HI-72] + _ = x[R_LOONG64_TLS_IE_LO-73] +- _ = x[R_JMPLOONG64-74] +- _ = x[R_ADDRMIPSU-75] +- _ = x[R_ADDRMIPSTLS-76] +- _ = x[R_ADDRCUOFF-77] +- _ = x[R_WASMIMPORT-78] +- _ = x[R_XCOFFREF-79] ++ _ = x[R_LOONG64_GOTPCREL_HI-74] ++ _ = x[R_LOONG64_GOT_LO-75] ++ _ = x[R_JMPLOONG64-76] ++ _ = x[R_ADDRMIPSU-77] ++ _ = x[R_ADDRMIPSTLS-78] ++ _ = x[R_ADDRCUOFF-79] ++ _ = x[R_WASMIMPORT-80] ++ _ = x[R_XCOFFREF-81] + } + +-const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" ++const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREF" + +-var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 996, 1007, 1020, 1031, 1043, 1053} ++var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090} + + func (i RelocType) String() string { + i -= 1 +diff --git a/src/cmd/link/internal/ld/config.go b/src/cmd/link/internal/ld/config.go +index 129d30f35a..5809d7eb93 100644 +--- a/src/cmd/link/internal/ld/config.go ++++ b/src/cmd/link/internal/ld/config.go +@@ -84,7 +84,7 @@ func (mode *BuildMode) Set(s string) error { + switch buildcfg.GOOS { + case "linux": + switch buildcfg.GOARCH { +- case "386", "amd64", "arm", "arm64", "ppc64le", "s390x": ++ case "386", "amd64", "arm", "arm64", "loong64", "ppc64le", "s390x": + default: + return badmode() + } +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index 2f760fd231..a4c48e64cd 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -14,7 +14,43 @@ import ( + "log" + ) + +-func gentext(ctxt *ld.Link, ldr *loader.Loader) {} ++func gentext(ctxt *ld.Link, ldr *loader.Loader) { ++ initfunc, addmoduledata := ld.PrepareAddmoduledata(ctxt) ++ if initfunc == nil { ++ return ++ } ++ ++ o := func(op uint32) { ++ initfunc.AddUint32(ctxt.Arch, op) ++ } ++ ++ // 0: pcalau12i r4, $0 ++ // ++ // 0: R_ADDRLOONG64U ++ // ++ // 4: addi.d r4, r4, $0 ++ // ++ // 4: R_ADDRLOONG64 ++ o(0x1a000004) ++ rel, _ := initfunc.AddRel(objabi.R_ADDRLOONG64U) ++ rel.SetOff(0) ++ rel.SetSiz(4) ++ rel.SetSym(ctxt.Moduledata) ++ o(0x02c00084) ++ rel2, _ := initfunc.AddRel(objabi.R_ADDRLOONG64) ++ rel2.SetOff(4) ++ rel2.SetSiz(4) ++ rel2.SetSym(ctxt.Moduledata) ++ ++ // 8: b $0 ++ // ++ // 8: R_CALLLOONG64 ++ o(0x50000000) ++ rel3, _ := initfunc.AddRel(objabi.R_CALLLOONG64) ++ rel3.SetOff(8) ++ rel3.SetSiz(4) ++ rel3.SetSym(addmoduledata) ++} + + func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym, r loader.Reloc, rIdx int) bool { + log.Fatalf("adddynrel not implemented") +@@ -69,6 +105,16 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_LO12) | uint64(elfsym)<<32) + out.Write64(uint64(0x0)) + ++ case objabi.R_LOONG64_GOTPCREL_HI: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_GOT_PC_HI20) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ ++ case objabi.R_LOONG64_GOT_LO: ++ out.Write64(uint64(sectoff)) ++ out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) ++ out.Write64(uint64(0x0)) ++ + // The pcaddu12i + addi.d instructions is used to obtain address of a symbol on Loong64. + // The low 12-bit of the symbol address need to be added. The addi.d instruction have + // signed 12-bit immediate operand. The 0x800 (addr+U12 <=> addr+0x800+S12) is introduced +@@ -116,7 +162,9 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + objabi.R_CALLLOONG64, + objabi.R_JMPLOONG64, + objabi.R_LOONG64_TLS_IE_PCREL_HI, +- objabi.R_LOONG64_TLS_IE_LO: ++ objabi.R_LOONG64_GOTPCREL_HI, ++ objabi.R_LOONG64_TLS_IE_LO, ++ objabi.R_LOONG64_GOT_LO: + return val, 1, true + } + } +@@ -161,7 +209,10 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant + func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { + switch r.Type() { + case objabi.R_ADDRLOONG64, +- objabi.R_ADDRLOONG64U: ++ objabi.R_ADDRLOONG64U, ++ objabi.R_LOONG64_GOTPCREL_HI, ++ objabi.R_LOONG64_GOT_LO: ++ + return ld.ExtrelocViaOuterSym(ldr, r, s), true + + case objabi.R_ADDRLOONG64TLS, +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index f00f978eb7..8971edad94 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -173,7 +173,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool { + + case "shared": + switch platform { +- case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/ppc64le", "linux/s390x": ++ case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/ppc64le", "linux/s390x": + return true + } + return false +diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s +index 3921091fea..ba06203fa1 100644 +--- a/src/runtime/asm_loong64.s ++++ b/src/runtime/asm_loong64.s +@@ -618,6 +618,16 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 + // traceback from goexit1 must hit code range of goexit + NOR R0, R0 // NOP + ++TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 ++ ADDV $-16, R3 ++ MOVV R30, 8(R3) ++ MOVV runtime·lastmoduledatap(SB), R5 ++ MOVV R4, moduledata_next(R5) ++ MOVV R4, runtime·lastmoduledatap(SB) ++ MOVV 8(R3), R30 ++ ADDV $16, R3 ++ RET ++ + TEXT ·checkASM(SB),NOSPLIT,$0-1 + MOVW $1, R19 + MOVB R19, ret+0(FP) +diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s +index 3d802beaa7..80ff980739 100644 +--- a/src/runtime/internal/atomic/atomic_loong64.s ++++ b/src/runtime/internal/atomic/atomic_loong64.s +@@ -294,13 +294,13 @@ TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$0-16 + + // uint32 runtime∕internal∕atomic·LoadAcq(uint32 volatile* ptr) + TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$0-12 +- JMP atomic·Load(SB) ++ JMP ·Load(SB) + + // uint64 ·LoadAcq64(uint64 volatile* ptr) + TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$0-16 +- JMP atomic·Load64(SB) ++ JMP ·Load64(SB) + + // uintptr ·LoadAcquintptr(uintptr volatile* ptr) + TEXT ·LoadAcquintptr(SB),NOSPLIT|NOFRAME,$0-16 +- JMP atomic·Load64(SB) ++ JMP ·Load64(SB) + +-- +2.38.1 + diff --git a/0036-cmd-link-internal-support-buildmode-plugin-for-loong.patch b/0036-cmd-link-internal-support-buildmode-plugin-for-loong.patch new file mode 100644 index 0000000000000000000000000000000000000000..79fc0af05853c9e1a87e0c087f09cb0500e33d51 --- /dev/null +++ b/0036-cmd-link-internal-support-buildmode-plugin-for-loong.patch @@ -0,0 +1,42 @@ +From 22b9666742538ee0a074b43ab18fbcd35d5fc673 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 22 Mar 2023 07:03:06 +0800 +Subject: [PATCH 36/62] cmd/{link,internal}: support -buildmode=plugin for + loong64 + +Signed-off-by: Guoqi Chen +Change-Id: I889409080d5f9f78d2fe78dd5373c428f031b587 +--- + src/cmd/link/internal/ld/config.go | 2 +- + src/internal/platform/supported.go | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/link/internal/ld/config.go b/src/cmd/link/internal/ld/config.go +index 5809d7eb93..cf516ad955 100644 +--- a/src/cmd/link/internal/ld/config.go ++++ b/src/cmd/link/internal/ld/config.go +@@ -96,7 +96,7 @@ func (mode *BuildMode) Set(s string) error { + switch buildcfg.GOOS { + case "linux": + switch buildcfg.GOARCH { +- case "386", "amd64", "arm", "arm64", "s390x", "ppc64le": ++ case "386", "amd64", "arm", "arm64", "loong64", "s390x", "ppc64le": + default: + return badmode() + } +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index 8971edad94..8a0ff8372e 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -180,7 +180,7 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool { + + case "plugin": + switch platform { +- case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/s390x", "linux/ppc64le", ++ case "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/386", "linux/s390x", "linux/ppc64le", + "android/amd64", "android/arm", "android/arm64", "android/386", + "darwin/amd64", "darwin/arm64", + "freebsd/amd64": +-- +2.38.1 + diff --git a/0037-cmd-dist-test-enable-buildmode-shared-plugin-test-fo.patch b/0037-cmd-dist-test-enable-buildmode-shared-plugin-test-fo.patch new file mode 100644 index 0000000000000000000000000000000000000000..53609321be19ec4ede1d39120a29699ea8c3f367 --- /dev/null +++ b/0037-cmd-dist-test-enable-buildmode-shared-plugin-test-fo.patch @@ -0,0 +1,35 @@ +From e615b7408358f7faab8413d883c654b5a3653a95 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 22 Mar 2023 07:03:39 +0800 +Subject: [PATCH 37/62] cmd/dist/test: enable buildmode={shared,plugin} test + for loong64 + +Signed-off-by: Guoqi Chen +Change-Id: I6a215459ebbc153f5b0efeaeb3821fa54c24befe +--- + src/cmd/dist/test.go | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go +index 920018721d..f31e613711 100644 +--- a/src/cmd/dist/test.go ++++ b/src/cmd/dist/test.go +@@ -1201,13 +1201,13 @@ func (t *tester) supportedBuildmode(mode string) bool { + return false + case "shared": + switch pair { +- case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-ppc64le", "linux-s390x": ++ case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-loong64", "linux-ppc64le", "linux-s390x": + return true + } + return false + case "plugin": + switch pair { +- case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-s390x", "linux-ppc64le": ++ case "linux-386", "linux-amd64", "linux-arm", "linux-arm64", "linux-loong64", "linux-s390x", "linux-ppc64le": + return true + case "darwin-amd64", "darwin-arm64": + return true +-- +2.38.1 + diff --git a/0038-runtime-enable-memory-sanitizer-on-loong64.patch b/0038-runtime-enable-memory-sanitizer-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..cc992af430b368f85b871380a8fbba2fd8e90f07 --- /dev/null +++ b/0038-runtime-enable-memory-sanitizer-on-loong64.patch @@ -0,0 +1,248 @@ +From ccc075ea51518e51b15e64939d52c14d0bf271d3 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 14 Dec 2022 15:44:36 +0800 +Subject: [PATCH 38/62] runtime: enable memory sanitizer on loong64 + +Change-Id: I24364239d3dfe3ea9185bdbd0a525523ae50dfb7 +--- + misc/cgo/testsanitizers/cc_test.go | 2 +- + src/internal/platform/supported.go | 2 +- + src/runtime/cgo/gcc_mmap.c | 2 +- + src/runtime/cgo/mmap.go | 2 +- + src/runtime/cgo_mmap.go | 2 +- + src/runtime/mmap.go | 2 +- + src/runtime/msan/msan.go | 2 +- + src/runtime/msan_loong64.s | 72 ++++++++++++++++++++++++++++++ + src/runtime/sys_linux_loong64.s | 31 ++++++++++++- + 9 files changed, 108 insertions(+), 9 deletions(-) + create mode 100644 src/runtime/msan_loong64.s + +diff --git a/misc/cgo/testsanitizers/cc_test.go b/misc/cgo/testsanitizers/cc_test.go +index 8eda1372f6..b9157d5f4a 100644 +--- a/misc/cgo/testsanitizers/cc_test.go ++++ b/misc/cgo/testsanitizers/cc_test.go +@@ -563,7 +563,7 @@ func hangProneCmd(name string, arg ...string) *exec.Cmd { + func mSanSupported(goos, goarch string) bool { + switch goos { + case "linux": +- return goarch == "amd64" || goarch == "arm64" ++ return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" + case "freebsd": + return goarch == "amd64" + default: +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index 8a0ff8372e..7aed873d41 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -28,7 +28,7 @@ func RaceDetectorSupported(goos, goarch string) bool { + func MSanSupported(goos, goarch string) bool { + switch goos { + case "linux": +- return goarch == "amd64" || goarch == "arm64" ++ return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" + case "freebsd": + return goarch == "amd64" + default: +diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c +index 83d857f0d1..bda5471340 100644 +--- a/src/runtime/cgo/gcc_mmap.c ++++ b/src/runtime/cgo/gcc_mmap.c +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-// +build linux,amd64 linux,arm64 linux,ppc64le freebsd,amd64 ++// +build linux,amd64 linux,arm64 linux,loong64 linux,ppc64le freebsd,amd64 + + #include + #include +diff --git a/src/runtime/cgo/mmap.go b/src/runtime/cgo/mmap.go +index 2f7e83bcb7..716c1a3c8a 100644 +--- a/src/runtime/cgo/mmap.go ++++ b/src/runtime/cgo/mmap.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64) ++//go:build (linux && amd64) || (linux && arm64) || (linux && loong64) || (freebsd && amd64) + + package cgo + +diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go +index 30660f7784..014ce80d68 100644 +--- a/src/runtime/cgo_mmap.go ++++ b/src/runtime/cgo_mmap.go +@@ -4,7 +4,7 @@ + + // Support for memory sanitizer. See runtime/cgo/mmap.go. + +-//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64) ++//go:build (linux && amd64) || (linux && arm64) || (linux && loong64) || (freebsd && amd64) + + package runtime + +diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go +index f0183f61cf..552aed22b3 100644 +--- a/src/runtime/mmap.go ++++ b/src/runtime/mmap.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !aix && !darwin && !js && (!linux || !amd64) && (!linux || !arm64) && (!freebsd || !amd64) && !openbsd && !plan9 && !solaris && !windows ++//go:build !aix && !darwin && !js && (!linux || !amd64) && (!linux || !arm64) && (!linux || !loong64) && (!freebsd || !amd64) && !openbsd && !plan9 && !solaris && !windows + + package runtime + +diff --git a/src/runtime/msan/msan.go b/src/runtime/msan/msan.go +index 4e41f8528d..7b3e8e608d 100644 +--- a/src/runtime/msan/msan.go ++++ b/src/runtime/msan/msan.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build msan && ((linux && (amd64 || arm64)) || (freebsd && amd64)) ++//go:build msan && ((linux && (amd64 || arm64 || loong64)) || (freebsd && amd64)) + + package msan + +diff --git a/src/runtime/msan_loong64.s b/src/runtime/msan_loong64.s +new file mode 100644 +index 0000000000..56376a4f3b +--- /dev/null ++++ b/src/runtime/msan_loong64.s +@@ -0,0 +1,72 @@ ++// Copyright 2022 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build msan ++ ++#include "go_asm.h" ++#include "textflag.h" ++ ++#define RARG0 R4 ++#define RARG1 R5 ++#define RARG2 R6 ++#define FARG R7 ++ ++// func runtime·domsanread(addr unsafe.Pointer, sz uintptr) ++// Called from msanread. ++TEXT runtime·domsanread(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __msan_read_go(void *addr, uintptr_t sz); ++ MOVV $__msan_read_go(SB), FARG ++ JMP msancall<>(SB) ++ ++// func runtime·msanwrite(addr unsafe.Pointer, sz uintptr) ++// Called from instrumented code. ++TEXT runtime·msanwrite(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __msan_write_go(void *addr, uintptr_t sz); ++ MOVV $__msan_write_go(SB), FARG ++ JMP msancall<>(SB) ++ ++// func runtime·msanmalloc(addr unsafe.Pointer, sz uintptr) ++TEXT runtime·msanmalloc(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __msan_malloc_go(void *addr, uintptr_t sz); ++ MOVV $__msan_malloc_go(SB), FARG ++ JMP msancall<>(SB) ++ ++// func runtime·msanfree(addr unsafe.Pointer, sz uintptr) ++TEXT runtime·msanfree(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __msan_free_go(void *addr, uintptr_t sz); ++ MOVV $__msan_free_go(SB), FARG ++ JMP msancall<>(SB) ++ ++// func runtime·msanmove(dst, src unsafe.Pointer, sz uintptr) ++TEXT runtime·msanmove(SB), NOSPLIT, $0-24 ++ MOVV dst+0(FP), RARG0 ++ MOVV src+8(FP), RARG1 ++ MOVV size+16(FP), RARG2 ++ // void __msan_memmove(void *dst, void *src, uintptr_t sz); ++ MOVV $__msan_memmove(SB), FARG ++ JMP msancall<>(SB) ++ ++// Switches SP to g0 stack and calls (FARG). Arguments already set. ++TEXT msancall<>(SB), NOSPLIT, $0-0 ++ MOVV R3, R23 // callee-saved ++ BEQ g, g0stack // no g, still on a system stack ++ MOVV g_m(g), R14 ++ MOVV m_g0(R14), R15 ++ BEQ R15, g, g0stack ++ ++ MOVV (g_sched+gobuf_sp)(R15), R9 ++ MOVV R9, R3 ++ ++g0stack: ++ JAL (FARG) ++ MOVV R23, R3 ++ RET +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 2a16b4f01d..30a2a03389 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -465,7 +465,7 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 + JMP runtime·sigtramp(SB) + + // func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) +-TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0 ++TEXT runtime·sysMmap(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 + MOVW prot+16(FP), R6 +@@ -486,8 +486,24 @@ ok: + MOVV $0, err+40(FP) + RET + ++// Call the function stored in _cgo_mmap using the GCC calling convention. ++// This must be called on the system stack. ++TEXT runtime·callCgoMmap(SB),NOSPLIT,$0 ++ MOVV addr+0(FP), R4 ++ MOVV n+8(FP), R5 ++ MOVW prot+16(FP), R6 ++ MOVW flags+20(FP), R7 ++ MOVW fd+24(FP), R8 ++ MOVW off+28(FP), R9 ++ MOVV _cgo_mmap(SB), R13 ++ SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. ++ JAL (R13) ++ ADDV $16, R3 ++ MOVV R4, ret+32(FP) ++ RET ++ + // func munmap(addr unsafe.Pointer, n uintptr) +-TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 ++TEXT runtime·sysMunmap(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 + MOVV n+8(FP), R5 + MOVV $SYS_munmap, R11 +@@ -497,6 +513,17 @@ TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 + MOVV R0, 0xf3(R0) // crash + RET + ++// Call the function stored in _cgo_munmap using the GCC calling convention. ++// This must be called on the system stack. ++TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 ++ MOVV addr+0(FP), R4 ++ MOVV n+8(FP), R5 ++ MOVV _cgo_munmap(SB), R13 ++ SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. ++ JAL (R13) ++ ADDV $16, R3 ++ RET ++ + // func madvise(addr unsafe.Pointer, n uintptr, flags int32) + TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 + MOVV addr+0(FP), R4 +-- +2.38.1 + diff --git a/0039-runtime-enable-address-sanitizer-on-loong64.patch b/0039-runtime-enable-address-sanitizer-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..0549cdb7bf23c7ce8b04ff7dfb81ac96b147e125 --- /dev/null +++ b/0039-runtime-enable-address-sanitizer-on-loong64.patch @@ -0,0 +1,137 @@ +From 2ed9a97b517e6eb6f5e8f4114f7f26d08edf3072 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 14 Dec 2022 16:10:24 +0800 +Subject: [PATCH 39/62] runtime: enable address sanitizer on loong64 + +Change-Id: I73e9f1112fdb80b70b0d18880424d055be965fff +--- + misc/cgo/testsanitizers/cc_test.go | 2 +- + src/internal/platform/supported.go | 2 +- + src/runtime/asan/asan.go | 2 +- + src/runtime/asan_loong64.s | 75 ++++++++++++++++++++++++++++++ + 4 files changed, 78 insertions(+), 3 deletions(-) + create mode 100644 src/runtime/asan_loong64.s + +diff --git a/misc/cgo/testsanitizers/cc_test.go b/misc/cgo/testsanitizers/cc_test.go +index b9157d5f4a..275b067345 100644 +--- a/misc/cgo/testsanitizers/cc_test.go ++++ b/misc/cgo/testsanitizers/cc_test.go +@@ -576,7 +576,7 @@ func mSanSupported(goos, goarch string) bool { + func aSanSupported(goos, goarch string) bool { + switch goos { + case "linux": +- return goarch == "amd64" || goarch == "arm64" || goarch == "riscv64" || goarch == "ppc64le" ++ return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" || goarch == "riscv64" || goarch == "ppc64le" + default: + return false + } +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index 7aed873d41..d8e64dea78 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -42,7 +42,7 @@ func MSanSupported(goos, goarch string) bool { + func ASanSupported(goos, goarch string) bool { + switch goos { + case "linux": +- return goarch == "arm64" || goarch == "amd64" || goarch == "riscv64" || goarch == "ppc64le" ++ return goarch == "arm64" || goarch == "amd64" || goarch == "loong64" || goarch == "riscv64" || goarch == "ppc64le" + default: + return false + } +diff --git a/src/runtime/asan/asan.go b/src/runtime/asan/asan.go +index 25f15ae45b..ef70b0145b 100644 +--- a/src/runtime/asan/asan.go ++++ b/src/runtime/asan/asan.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build asan && linux && (arm64 || amd64 || riscv64 || ppc64le) ++//go:build asan && linux && (arm64 || amd64 || loong64 || riscv64 || ppc64le) + + package asan + +diff --git a/src/runtime/asan_loong64.s b/src/runtime/asan_loong64.s +new file mode 100644 +index 0000000000..fe6742fffc +--- /dev/null ++++ b/src/runtime/asan_loong64.s +@@ -0,0 +1,75 @@ ++// Copyright 2021 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build asan ++ ++#include "go_asm.h" ++#include "textflag.h" ++ ++#define RARG0 R4 ++#define RARG1 R5 ++#define RARG2 R6 ++#define RARG3 R7 ++#define FARG R8 ++ ++// Called from instrumented code. ++// func runtime·doasanread(addr unsafe.Pointer, sz, sp, pc uintptr) ++TEXT runtime·doasanread(SB), NOSPLIT, $0-32 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ MOVV sp+16(FP), RARG2 ++ MOVV pc+24(FP), RARG3 ++ // void __asan_read_go(void *addr, uintptr_t sz, void *sp, void *pc); ++ MOVV $__asan_read_go(SB), FARG ++ JMP asancall<>(SB) ++ ++// func runtime·doasanwrite(addr unsafe.Pointer, sz, sp, pc uintptr) ++TEXT runtime·doasanwrite(SB), NOSPLIT, $0-32 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ MOVV sp+16(FP), RARG2 ++ MOVV pc+24(FP), RARG3 ++ // void __asan_write_go(void *addr, uintptr_t sz, void *sp, void *pc); ++ MOVV $__asan_write_go(SB), FARG ++ JMP asancall<>(SB) ++ ++// func runtime·asanunpoison(addr unsafe.Pointer, sz uintptr) ++TEXT runtime·asanunpoison(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __asan_unpoison_go(void *addr, uintptr_t sz); ++ MOVV $__asan_unpoison_go(SB), FARG ++ JMP asancall<>(SB) ++ ++// func runtime·asanpoison(addr unsafe.Pointer, sz uintptr) ++TEXT runtime·asanpoison(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __asan_poison_go(void *addr, uintptr_t sz); ++ MOVV $__asan_poison_go(SB), FARG ++ JMP asancall<>(SB) ++ ++// func runtime·asanregisterglobals(addr unsafe.Pointer, n uintptr) ++TEXT runtime·asanregisterglobals(SB), NOSPLIT, $0-16 ++ MOVV addr+0(FP), RARG0 ++ MOVV size+8(FP), RARG1 ++ // void __asan_register_globals_go(void *addr, uintptr_t n); ++ MOVV $__asan_register_globals_go(SB), FARG ++ JMP asancall<>(SB) ++ ++// Switches SP to g0 stack and calls (FARG). Arguments already set. ++TEXT asancall<>(SB), NOSPLIT, $0-0 ++ MOVV R3, R23 // callee-saved ++ BEQ g, g0stack // no g, still on a system stack ++ MOVV g_m(g), R14 ++ MOVV m_g0(R14), R15 ++ BEQ R15, g, g0stack ++ ++ MOVV (g_sched+gobuf_sp)(R15), R9 ++ MOVV R9, R3 ++ ++g0stack: ++ JAL (FARG) ++ MOVV R23, R3 ++ RET +-- +2.38.1 + diff --git a/0040-cmd-link-workaround-linkshared-test-errors-on-loong6.patch b/0040-cmd-link-workaround-linkshared-test-errors-on-loong6.patch new file mode 100644 index 0000000000000000000000000000000000000000..ad87625b39bfd59f7771cdf8a1fc81eb0d576316 --- /dev/null +++ b/0040-cmd-link-workaround-linkshared-test-errors-on-loong6.patch @@ -0,0 +1,31 @@ +From 0f40e3e826f43d7c8ab54ddf7bb75228b64b2e85 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Mon, 27 Mar 2023 19:06:21 +0800 +Subject: [PATCH 40/62] cmd/link: workaround linkshared test errors on loong64. + +Signed-off-by: Guoqi Chen +Change-Id: Iee95ec29c348b1944d79954aba976ec2300ec28c +--- + src/cmd/link/internal/ld/lib.go | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go +index 03b9f11608..54672c0686 100644 +--- a/src/cmd/link/internal/ld/lib.go ++++ b/src/cmd/link/internal/ld/lib.go +@@ -1790,6 +1790,12 @@ func (ctxt *Link) hostlink() { + argv = append(argv, peimporteddlls()...) + } + ++ if ctxt.Arch.Family == sys.Loong64 { ++ if (ctxt.BuildMode != BuildModeShared) && ctxt.linkShared { ++ argv = append(argv, "-pie") ++ } ++ } ++ + if ctxt.Debugvlog != 0 { + ctxt.Logf("host link:") + for _, v := range argv { +-- +2.38.1 + diff --git a/0041-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch b/0041-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch new file mode 100644 index 0000000000000000000000000000000000000000..efb728c7751bb63d148f309fef53e26c1fbb9b47 --- /dev/null +++ b/0041-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch @@ -0,0 +1,542 @@ +From ceace0927f4cb2e7a70eee964d0735e505abfb32 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 21 Apr 2023 11:08:09 +0800 +Subject: [PATCH 41/62] runtime: remove the meaningless offset of 8 for + duffzero on loong64 + +Currently we subtract 8 from offset when calling duffzero because 8 +is added to offset in the duffzero implementation. This operation is +meaningless, so remove it. + +Change-Id: I7e451d04d7e98ccafe711645d81d3aadf376766f +--- + src/cmd/compile/internal/loong64/ggen.go | 23 +- + src/cmd/compile/internal/loong64/ssa.go | 10 +- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 2 +- + src/cmd/compile/internal/ssa/opGen.go | 2 +- + src/runtime/duff_loong64.s | 256 +++++++++--------- + src/runtime/mkduff.go | 4 +- + 6 files changed, 148 insertions(+), 149 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ggen.go b/src/cmd/compile/internal/loong64/ggen.go +index c6fd1a65a1..2f94aad912 100644 +--- a/src/cmd/compile/internal/loong64/ggen.go ++++ b/src/cmd/compile/internal/loong64/ggen.go +@@ -5,6 +5,7 @@ + package loong64 + + import ( ++ "cmd/compile/internal/base" + "cmd/compile/internal/ir" + "cmd/compile/internal/objw" + "cmd/compile/internal/types" +@@ -16,34 +17,38 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog + if cnt == 0 { + return p + } ++ ++ // Adjust the frame to account for LR. ++ off += base.Ctxt.Arch.FixedFrameSize ++ + if cnt < int64(4*types.PtrSize) { + for i := int64(0); i < cnt; i += int64(types.PtrSize) { +- p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, 8+off+i) ++ p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, off+i) + } + } else if cnt <= int64(128*types.PtrSize) { +- p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) ++ p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0) + p.Reg = loong64.REGSP + p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ir.Syms.Duffzero + p.To.Offset = 8 * (128 - cnt/int64(types.PtrSize)) + } else { +- // ADDV $(8+frame+lo-8), SP, r1 ++ // ADDV $(off), SP, r1 + // ADDV $cnt, r1, r2 + // loop: +- // MOVV R0, (Widthptr)r1 ++ // MOVV R0, (r1) + // ADDV $Widthptr, r1 +- // BNE r1, r2, loop +- p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) ++ // BNE r1, r2, loop ++ p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0) + p.Reg = loong64.REGSP + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, loong64.REGRT2, 0) + p.Reg = loong64.REGRT1 +- p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, int64(types.PtrSize)) +- p1 := p ++ p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, 0) ++ loop := p + p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, int64(types.PtrSize), obj.TYPE_REG, loong64.REGRT1, 0) + p = pp.Append(p, loong64.ABNE, obj.TYPE_REG, loong64.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) + p.Reg = loong64.REGRT2 +- p.To.SetTarget(p1) ++ p.To.SetTarget(loop) + } + + return p +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 59f9e189bd..5726396e4a 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -390,14 +390,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpLOONG64DUFFZERO: +- // runtime.duffzero expects start address - 8 in R19 +- p := s.Prog(loong64.ASUBVU) +- p.From.Type = obj.TYPE_CONST +- p.From.Offset = 8 +- p.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = loong64.REG_R19 +- p = s.Prog(obj.ADUFFZERO) ++ // runtime.duffzero expects start address in R19 ++ p := s.Prog(obj.ADUFFZERO) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = ir.Syms.Duffzero +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index 22a83fb300..3a594dd6f7 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -288,7 +288,7 @@ func init() { + aux: "Int64", + argLength: 2, + reg: regInfo{ +- inputs: []regMask{gp}, ++ inputs: []regMask{buildReg("R19")}, + clobbers: buildReg("R19 R1"), + }, + faultOnNilArg0: true, +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 407ecbb250..8b9df4043e 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -24181,7 +24181,7 @@ var opcodeTable = [...]opInfo{ + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 262144}, // R19 + }, + clobbers: 262146, // R1 R19 + }, +diff --git a/src/runtime/duff_loong64.s b/src/runtime/duff_loong64.s +index 7f78e4fa9f..63fa3bcca1 100644 +--- a/src/runtime/duff_loong64.s ++++ b/src/runtime/duff_loong64.s +@@ -5,261 +5,261 @@ + #include "textflag.h" + + TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 +- MOVV R0, 8(R19) ++ MOVV R0, (R19) + ADDV $8, R19 + RET + +diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go +index 6b42b8524b..eb2b9c07ba 100644 +--- a/src/runtime/mkduff.go ++++ b/src/runtime/mkduff.go +@@ -179,11 +179,11 @@ func copyARM64(w io.Writer) { + + func zeroLOONG64(w io.Writer) { + // R0: always zero +- // R19 (aka REGRT1): ptr to memory to be zeroed - 8 ++ // R19 (aka REGRT1): ptr to memory to be zeroed + // On return, R19 points to the last zeroed dword. + fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { +- fmt.Fprintln(w, "\tMOVV\tR0, 8(R19)") ++ fmt.Fprintln(w, "\tMOVV\tR0, (R19)") + fmt.Fprintln(w, "\tADDV\t$8, R19") + } + fmt.Fprintln(w, "\tRET") +-- +2.38.1 + diff --git a/0042-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch b/0042-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch new file mode 100644 index 0000000000000000000000000000000000000000..18fc51ac83e445ae79fe46453fa36e00c1624d54 --- /dev/null +++ b/0042-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch @@ -0,0 +1,312 @@ +From 7d2ecbb18f7032f634a40c05af8cc46931afb886 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Tue, 25 Apr 2023 03:27:23 +0800 +Subject: [PATCH 42/62] cmd/compiler: remove the meaningless offset of 8 for + Lowered{Zero,Move} on loong64 + +Like the CL 487295, remove the meaningless +/- offset operation in the +LoweredZero and LoweredMove implementation. + +Change LoweredMove's Rarg0 register to R20, consistent with duffcopy. + +Change-Id: Ia3f3c8b25e1e93c97ab72328651de78ca9dec016 +--- + src/cmd/compile/internal/loong64/ssa.go | 168 ++++++++---------- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 28 ++- + src/cmd/compile/internal/ssa/opGen.go | 6 +- + 3 files changed, 91 insertions(+), 111 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 5726396e4a..75eb732df8 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -80,6 +80,28 @@ func storeByType(t *types.Type, r int16) obj.As { + panic("bad store type") + } + ++// largestMove returns the largest move instruction possible and its size, ++// given the alignment of the total size of the move. ++// ++// e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB. ++// ++// Note that the moves may not be on naturally aligned addresses depending on ++// the source and destination. ++// ++// This matches the calculation in ssa.moveSize. ++func largestMove(alignment int64) (obj.As, int64) { ++ switch { ++ case alignment%8 == 0: ++ return loong64.AMOVV, 8 ++ case alignment%4 == 0: ++ return loong64.AMOVW, 4 ++ case alignment%2 == 0: ++ return loong64.AMOVH, 2 ++ default: ++ return loong64.AMOVB, 1 ++ } ++} ++ + func ssaGenValue(s *ssagen.State, v *ssa.Value) { + switch v.Op { + case ssa.OpCopy, ssa.OpLOONG64MOVVreg: +@@ -397,49 +419,29 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.To.Sym = ir.Syms.Duffzero + p.To.Offset = v.AuxInt + case ssa.OpLOONG64LoweredZero: +- // SUBV $8, R19 +- // MOVV R0, 8(R19) +- // ADDV $8, R19 +- // BNE Rarg1, R19, -2(PC) +- // arg1 is the address of the last element to zero +- var sz int64 +- var mov obj.As +- switch { +- case v.AuxInt%8 == 0: +- sz = 8 +- mov = loong64.AMOVV +- case v.AuxInt%4 == 0: +- sz = 4 +- mov = loong64.AMOVW +- case v.AuxInt%2 == 0: +- sz = 2 +- mov = loong64.AMOVH +- default: +- sz = 1 +- mov = loong64.AMOVB +- } +- p := s.Prog(loong64.ASUBVU) +- p.From.Type = obj.TYPE_CONST +- p.From.Offset = sz +- p.To.Type = obj.TYPE_REG +- p.To.Reg = loong64.REG_R19 +- p2 := s.Prog(mov) +- p2.From.Type = obj.TYPE_REG +- p2.From.Reg = loong64.REGZERO +- p2.To.Type = obj.TYPE_MEM +- p2.To.Reg = loong64.REG_R19 +- p2.To.Offset = sz +- p3 := s.Prog(loong64.AADDVU) +- p3.From.Type = obj.TYPE_CONST +- p3.From.Offset = sz +- p3.To.Type = obj.TYPE_REG +- p3.To.Reg = loong64.REG_R19 +- p4 := s.Prog(loong64.ABNE) +- p4.From.Type = obj.TYPE_REG +- p4.From.Reg = v.Args[1].Reg() +- p4.Reg = loong64.REG_R19 +- p4.To.Type = obj.TYPE_BRANCH +- p4.To.SetTarget(p2) ++ // MOVx R0, (Rarg0) ++ // ADDV $sz, Rarg0 ++ // BGEU Rarg1, Rarg0, -2(PC) ++ mov, sz := largestMove(v.AuxInt) ++ p := s.Prog(mov) ++ p.From.Type = obj.TYPE_REG ++ p.From.Reg = loong64.REGZERO ++ p.To.Type = obj.TYPE_MEM ++ p.To.Reg = v.Args[0].Reg() ++ ++ p2 := s.Prog(loong64.AADDVU) ++ p2.From.Type = obj.TYPE_CONST ++ p2.From.Offset = sz ++ p2.To.Type = obj.TYPE_REG ++ p2.To.Reg = v.Args[0].Reg() ++ ++ p3 := s.Prog(loong64.ABGEU) ++ p3.From.Type = obj.TYPE_REG ++ p3.From.Reg = v.Args[1].Reg() ++ p3.Reg = v.Args[0].Reg() ++ p3.To.Type = obj.TYPE_BRANCH ++ p3.To.SetTarget(p) ++ + case ssa.OpLOONG64DUFFCOPY: + p := s.Prog(obj.ADUFFCOPY) + p.To.Type = obj.TYPE_MEM +@@ -447,61 +449,43 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.To.Sym = ir.Syms.Duffcopy + p.To.Offset = v.AuxInt + case ssa.OpLOONG64LoweredMove: +- // SUBV $8, R19 +- // MOVV 8(R19), Rtmp +- // MOVV Rtmp, (R4) +- // ADDV $8, R19 +- // ADDV $8, R4 +- // BNE Rarg2, R19, -4(PC) +- // arg2 is the address of the last element of src +- var sz int64 +- var mov obj.As +- switch { +- case v.AuxInt%8 == 0: +- sz = 8 +- mov = loong64.AMOVV +- case v.AuxInt%4 == 0: +- sz = 4 +- mov = loong64.AMOVW +- case v.AuxInt%2 == 0: +- sz = 2 +- mov = loong64.AMOVH +- default: +- sz = 1 +- mov = loong64.AMOVB +- } +- p := s.Prog(loong64.ASUBVU) +- p.From.Type = obj.TYPE_CONST +- p.From.Offset = sz ++ // MOVx (Rarg1), Rtmp ++ // MOVx Rtmp, (Rarg0) ++ // ADDV $sz, Rarg1 ++ // ADDV $sz, Rarg0 ++ // BGEU Rarg2, Rarg0, -4(PC) ++ mov, sz := largestMove(v.AuxInt) ++ p := s.Prog(mov) ++ p.From.Type = obj.TYPE_MEM ++ p.From.Reg = v.Args[1].Reg() + p.To.Type = obj.TYPE_REG +- p.To.Reg = loong64.REG_R19 ++ p.To.Reg = loong64.REGTMP ++ + p2 := s.Prog(mov) +- p2.From.Type = obj.TYPE_MEM +- p2.From.Reg = loong64.REG_R19 +- p2.From.Offset = sz +- p2.To.Type = obj.TYPE_REG +- p2.To.Reg = loong64.REGTMP +- p3 := s.Prog(mov) +- p3.From.Type = obj.TYPE_REG +- p3.From.Reg = loong64.REGTMP +- p3.To.Type = obj.TYPE_MEM +- p3.To.Reg = loong64.REG_R4 ++ p2.From.Type = obj.TYPE_REG ++ p2.From.Reg = loong64.REGTMP ++ p2.To.Type = obj.TYPE_MEM ++ p2.To.Reg = v.Args[0].Reg() ++ ++ p3 := s.Prog(loong64.AADDVU) ++ p3.From.Type = obj.TYPE_CONST ++ p3.From.Offset = sz ++ p3.To.Type = obj.TYPE_REG ++ p3.To.Reg = v.Args[1].Reg() ++ + p4 := s.Prog(loong64.AADDVU) + p4.From.Type = obj.TYPE_CONST + p4.From.Offset = sz + p4.To.Type = obj.TYPE_REG +- p4.To.Reg = loong64.REG_R19 +- p5 := s.Prog(loong64.AADDVU) +- p5.From.Type = obj.TYPE_CONST +- p5.From.Offset = sz +- p5.To.Type = obj.TYPE_REG +- p5.To.Reg = loong64.REG_R4 +- p6 := s.Prog(loong64.ABNE) +- p6.From.Type = obj.TYPE_REG +- p6.From.Reg = v.Args[2].Reg() +- p6.Reg = loong64.REG_R19 +- p6.To.Type = obj.TYPE_BRANCH +- p6.To.SetTarget(p2) ++ p4.To.Reg = v.Args[0].Reg() ++ ++ p5 := s.Prog(loong64.ABGEU) ++ p5.From.Type = obj.TYPE_REG ++ p5.From.Reg = v.Args[2].Reg() ++ p5.Reg = v.Args[1].Reg() ++ p5.To.Type = obj.TYPE_BRANCH ++ p5.To.SetTarget(p) ++ + case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter: + s.Call(v) + case ssa.OpLOONG64CALLtail: +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index 3a594dd6f7..aca1bd7358 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -318,10 +318,9 @@ func init() { + // arg2 = mem + // auxint = alignment + // returns mem +- // SUBV $8, R19 +- // MOVV R0, 8(R19) +- // ADDV $8, R19 +- // BNE Rarg1, R19, -2(PC) ++ // MOVx R0, (R19) ++ // ADDV $sz, R19 ++ // BGEU Rarg1, R19, -2(PC) + { + name: "LoweredZero", + aux: "Int64", +@@ -330,32 +329,31 @@ func init() { + inputs: []regMask{buildReg("R19"), gp}, + clobbers: buildReg("R19"), + }, +- clobberFlags: true, ++ typ: "Mem", + faultOnNilArg0: true, + }, + + // large or unaligned move +- // arg0 = address of dst memory (in R4, changed as side effect) ++ // arg0 = address of dst memory (in R20, changed as side effect) + // arg1 = address of src memory (in R19, changed as side effect) + // arg2 = address of the last element of src + // arg3 = mem + // auxint = alignment + // returns mem +- // SUBV $8, R19 +- // MOVV 8(R19), Rtmp +- // MOVV Rtmp, (R4) +- // ADDV $8, R19 +- // ADDV $8, R4 +- // BNE Rarg2, R19, -4(PC) ++ // MOVx (R19), Rtmp ++ // MOVx Rtmp, (R20) ++ // ADDV $sz, R19 ++ // ADDV $sz, R20 ++ // BGEU Rarg2, R19, -4(PC) + { + name: "LoweredMove", + aux: "Int64", + argLength: 4, + reg: regInfo{ +- inputs: []regMask{buildReg("R4"), buildReg("R19"), gp}, +- clobbers: buildReg("R19 R4"), ++ inputs: []regMask{buildReg("R20"), buildReg("R19"), gp}, ++ clobbers: buildReg("R19 R20"), + }, +- clobberFlags: true, ++ typ: "Mem", + faultOnNilArg0: true, + faultOnNilArg1: true, + }, +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 8b9df4043e..57cd6d6931 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -24204,7 +24204,6 @@ var opcodeTable = [...]opInfo{ + name: "LoweredZero", + auxType: auxInt64, + argLen: 3, +- clobberFlags: true, + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ +@@ -24218,16 +24217,15 @@ var opcodeTable = [...]opInfo{ + name: "LoweredMove", + auxType: auxInt64, + argLen: 4, +- clobberFlags: true, + faultOnNilArg0: true, + faultOnNilArg1: true, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 8}, // R4 ++ {0, 524288}, // R20 + {1, 262144}, // R19 + {2, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, +- clobbers: 262152, // R4 R19 ++ clobbers: 786432, // R19 R20 + }, + }, + { +-- +2.38.1 + diff --git a/0043-cmd-compile-internal-runtime-use-NOOP-for-hardware-N.patch b/0043-cmd-compile-internal-runtime-use-NOOP-for-hardware-N.patch new file mode 100644 index 0000000000000000000000000000000000000000..d2f59cc1b85bf3f0ab8bed61e001342244dda0de --- /dev/null +++ b/0043-cmd-compile-internal-runtime-use-NOOP-for-hardware-N.patch @@ -0,0 +1,61 @@ +From b125d0fc1d38e775218666f88cb4a8b2b9cf3b1e Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sat, 11 Mar 2023 22:26:39 +0800 +Subject: [PATCH 43/62] cmd/compile/internal, runtime: use NOOP for hardware + NOPs on loong64 + +The canonical LoongArch NOP instruction form is "andi r0, r0, 0", as +described in the LoongArch Reference Manual Volume 1, Section 2.2.1.10. +We currently use NOR instead, which may or may not change anything (e.g. +performance on less capable micro-architectures) but is deviation from +upstream standards nevertheless. Fix them to use the explicit hardware +NOP which happens to be supported as `NOOP`. + +Change-Id: I0a799a1da959e9c3b582feb88202df2bab0ab23a +Reviewed-on: https://go-review.googlesource.com/c/go/+/475615 +Reviewed-by: abner chenc +TryBot-Result: Gopher Robot +Run-TryBot: Wayne Zuo +Reviewed-by: Ian Lance Taylor +Reviewed-by: Cherry Mui +--- + src/cmd/compile/internal/loong64/ggen.go | 6 +----- + src/runtime/asm_loong64.s | 4 ++-- + 2 files changed, 3 insertions(+), 7 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ggen.go b/src/cmd/compile/internal/loong64/ggen.go +index 2f94aad912..27d318a8bb 100644 +--- a/src/cmd/compile/internal/loong64/ggen.go ++++ b/src/cmd/compile/internal/loong64/ggen.go +@@ -55,10 +55,6 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog + } + + func ginsnop(pp *objw.Progs) *obj.Prog { +- p := pp.Prog(loong64.ANOR) +- p.From.Type = obj.TYPE_REG +- p.From.Reg = loong64.REG_R0 +- p.To.Type = obj.TYPE_REG +- p.To.Reg = loong64.REG_R0 ++ p := pp.Prog(loong64.ANOOP) + return p + } +diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s +index ba06203fa1..d82c688925 100644 +--- a/src/runtime/asm_loong64.s ++++ b/src/runtime/asm_loong64.s +@@ -613,10 +613,10 @@ TEXT _cgo_topofstack(SB),NOSPLIT,$16 + // The top-most function running on a goroutine + // returns to goexit+PCQuantum. + TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 +- NOR R0, R0 // NOP ++ NOOP + JAL runtime·goexit1(SB) // does not return + // traceback from goexit1 must hit code range of goexit +- NOR R0, R0 // NOP ++ NOOP + + TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 + ADDV $-16, R3 +-- +2.38.1 + diff --git a/0044-cmd-link-internal-loong64-use-BREAK-0-as-the-code-pa.patch b/0044-cmd-link-internal-loong64-use-BREAK-0-as-the-code-pa.patch new file mode 100644 index 0000000000000000000000000000000000000000..0e59b1ce60f611424daa893dc3d51298bcc0606a --- /dev/null +++ b/0044-cmd-link-internal-loong64-use-BREAK-0-as-the-code-pa.patch @@ -0,0 +1,41 @@ +From 9a232dba55a847ef5c19667e083af9f19a160e7c Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sat, 11 Mar 2023 22:38:01 +0800 +Subject: [PATCH 44/62] cmd/link/internal/loong64: use BREAK 0 as the code pad + sequence + +As the comment on CodePad goes, we "might want to pad with a trap +instruction to catch wayward programs". The current behavior of +zero-padding is equivalent to padding with an instruction of 0x00000000, +which is invalid according to the LoongArch manuals nevertheless, but +rumor has it that some early and/or engineering samples of Loongson +3A5000 recognized it (maybe behaving like NOP). It is better to avoid +undocumented behavior and ensure execution flow would not overflow the +pads. + +Change-Id: I531b1eabeb355e9ad4a2d5340e61f2fe71349297 +Reviewed-on: https://go-review.googlesource.com/c/go/+/475616 +Reviewed-by: abner chenc +Reviewed-by: Ian Lance Taylor +Run-TryBot: Cherry Mui +TryBot-Result: Gopher Robot +Reviewed-by: Cherry Mui +--- + src/cmd/link/internal/loong64/obj.go | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/cmd/link/internal/loong64/obj.go b/src/cmd/link/internal/loong64/obj.go +index 0a5bb0ac6d..4865c695b9 100644 +--- a/src/cmd/link/internal/loong64/obj.go ++++ b/src/cmd/link/internal/loong64/obj.go +@@ -19,6 +19,7 @@ func Init() (*sys.Arch, ld.Arch) { + Minalign: minAlign, + Dwarfregsp: dwarfRegSP, + Dwarfreglr: dwarfRegLR, ++ CodePad: []byte{0x00, 0x00, 0x2a, 0x00}, // BREAK 0 + Adddynrel: adddynrel, + Archinit: archinit, + Archreloc: archreloc, +-- +2.38.1 + diff --git a/0045-cmd-asm-runtime-remove-the-RSB-register-from-loong64.patch b/0045-cmd-asm-runtime-remove-the-RSB-register-from-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c96f25505c9f1d4ac1f8c47ef244d55187ae4fc --- /dev/null +++ b/0045-cmd-asm-runtime-remove-the-RSB-register-from-loong64.patch @@ -0,0 +1,121 @@ +From 9e07f2b06c05648aa551ea1967dac9ddaf9fd1db Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Mon, 13 Mar 2023 16:20:50 +0800 +Subject: [PATCH 45/62] cmd/asm, runtime: remove the RSB register from loong64 + +It was carryover from the mips64 port (where it represented the platform +GP register) but LoongArch platform ABI doesn't have the GP concept. + +Change-Id: Iea326ae13676e95b040b52aaadc08d311b507bd3 +--- + src/cmd/asm/internal/arch/arch.go | 1 - + src/cmd/asm/internal/asm/operand_test.go | 4 ++-- + src/runtime/mkpreempt.go | 8 +++----- + src/runtime/preempt_loong64.s | 4 ++-- + src/runtime/signal_loong64.go | 4 +--- + 5 files changed, 8 insertions(+), 13 deletions(-) + +diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go +index 740711c40c..11bb7af899 100644 +--- a/src/cmd/asm/internal/arch/arch.go ++++ b/src/cmd/asm/internal/arch/arch.go +@@ -536,7 +536,6 @@ func archLoong64(linkArch *obj.LinkArch) *Arch { + // Avoid unintentionally clobbering g using R22. + delete(register, "R22") + register["g"] = loong64.REG_R22 +- register["RSB"] = loong64.REG_R31 + registerPrefix := map[string]bool{ + "F": true, + "FCSR": true, +diff --git a/src/cmd/asm/internal/asm/operand_test.go b/src/cmd/asm/internal/asm/operand_test.go +index 29371d6199..c7e251f50f 100644 +--- a/src/cmd/asm/internal/asm/operand_test.go ++++ b/src/cmd/asm/internal/asm/operand_test.go +@@ -915,8 +915,9 @@ var loong64OperandTests = []operandTest{ + {"R27", "R27"}, + {"R28", "R28"}, + {"R29", "R29"}, +- {"R30", "R30"}, + {"R3", "R3"}, ++ {"R30", "R30"}, ++ {"R31", "R31"}, + {"R4", "R4"}, + {"R5", "R5"}, + {"R6", "R6"}, +@@ -925,7 +926,6 @@ var loong64OperandTests = []operandTest{ + {"R9", "R9"}, + {"a(FP)", "a(FP)"}, + {"g", "g"}, +- {"RSB", "R31"}, + {"ret+8(FP)", "ret+8(FP)"}, + {"runtime·abort(SB)", "runtime.abort(SB)"}, + {"·AddUint32(SB)", "\"\".AddUint32(SB)"}, +diff --git a/src/runtime/mkpreempt.go b/src/runtime/mkpreempt.go +index 70eca7c7e2..043fb3e4ef 100644 +--- a/src/runtime/mkpreempt.go ++++ b/src/runtime/mkpreempt.go +@@ -462,20 +462,18 @@ func genLoong64() { + movf := "MOVD" + add := "ADDV" + sub := "SUBV" +- r31 := "RSB" + regsize := 8 + + // Add integer registers r4-r21 r23-r29 r31 + // R0 (zero), R30 (REGTMP), R2 (tp), R3 (SP), R22 (g), R1 (LR) are special, + var l = layout{sp: "R3", stack: regsize} // add slot to save PC of interrupted instruction (in LR) +- for i := 4; i <= 29; i++ { +- if i == 22 { +- continue // R3 is REGSP R22 is g ++ for i := 4; i <= 31; i++ { ++ if i == 22 || i == 30 { ++ continue + } + reg := fmt.Sprintf("R%d", i) + l.add(mov, reg, regsize) + } +- l.add(mov, r31, regsize) + + // Add floating point registers F0-F31. + for i := 0; i <= 31; i++ { +diff --git a/src/runtime/preempt_loong64.s b/src/runtime/preempt_loong64.s +index 999e72c470..bb9c948365 100644 +--- a/src/runtime/preempt_loong64.s ++++ b/src/runtime/preempt_loong64.s +@@ -31,7 +31,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 + MOVV R27, 184(R3) + MOVV R28, 192(R3) + MOVV R29, 200(R3) +- MOVV RSB, 208(R3) ++ MOVV R31, 208(R3) + MOVD F0, 216(R3) + MOVD F1, 224(R3) + MOVD F2, 232(R3) +@@ -101,7 +101,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0 + MOVD 232(R3), F2 + MOVD 224(R3), F1 + MOVD 216(R3), F0 +- MOVV 208(R3), RSB ++ MOVV 208(R3), R31 + MOVV 200(R3), R29 + MOVV 192(R3), R28 + MOVV 184(R3), R27 +diff --git a/src/runtime/signal_loong64.go b/src/runtime/signal_loong64.go +index 26717a6e59..ac842c0c94 100644 +--- a/src/runtime/signal_loong64.go ++++ b/src/runtime/signal_loong64.go +@@ -77,10 +77,8 @@ func (c *sigctxt) preparePanic(sig uint32, gp *g) { + } + + // In case we are panicking from external C code +- sigpanicPC := uint64(abi.FuncPCABIInternal(sigpanic)) +- c.set_r31(sigpanicPC >> 32 << 32) // RSB register + c.set_r22(uint64(uintptr(unsafe.Pointer(gp)))) +- c.set_pc(sigpanicPC) ++ c.set_pc(uint64(abi.FuncPCABIInternal(sigpanic))) + } + + func (c *sigctxt) pushCall(targetPC, resumePC uintptr) { +-- +2.38.1 + diff --git a/0046-cmd-internal-obj-loong64-realize-all-unconditional-j.patch b/0046-cmd-internal-obj-loong64-realize-all-unconditional-j.patch new file mode 100644 index 0000000000000000000000000000000000000000..b1c9b9d53d929cd3078294c73e0d98e9479e3eca --- /dev/null +++ b/0046-cmd-internal-obj-loong64-realize-all-unconditional-j.patch @@ -0,0 +1,169 @@ +From 3c6467474e6a4fd8596af462f38b6631eb511baf Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 21 Mar 2023 18:23:44 +0800 +Subject: [PATCH 46/62] cmd/internal/obj/loong64: realize all unconditional + jumps with B/BL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current practice of using the "PC-relative" `BEQ ZERO, ZERO` for +short jumps is inherited from the MIPS port, where the pre-R6 long +jumps are PC-regional instead of PC-relative. This quirk is not +present in LoongArch from the very beginning so there is no reason to +keep the behavior any more. + +While at it, simplify the code to not place anything in the jump offset +field if a relocation is to take place. (It may be relic of a previous +REL-era treatment where the addend is to be stored in the instruction +word, but again, loong64 is exclusively RELA from day 1 so no point in +doing so either.) + +Benchmark shows very slight improvement on a 3A5000 box, indicating the +LA464 micro-architecture presumably *not* seeing the always-true BEQs as +equivalent to B: + +goos: linux +goarch: loong64 +pkg: test/bench/go1 + │ 2ef70d9d0f │ this CL │ + │ sec/op │ sec/op vs base │ +BinaryTree17 14.57 ± 4% 14.54 ± 1% ~ (p=0.353 n=10) +Fannkuch11 3.570 ± 0% 3.570 ± 0% ~ (p=0.529 n=10) +FmtFprintfEmpty 92.84n ± 0% 92.84n ± 0% ~ (p=0.970 n=10) +FmtFprintfString 150.0n ± 0% 149.9n ± 0% ~ (p=0.350 n=10) +FmtFprintfInt 153.3n ± 0% 153.3n ± 0% ~ (p=1.000 n=10) ¹ +FmtFprintfIntInt 235.8n ± 0% 235.8n ± 0% ~ (p=0.963 n=10) +FmtFprintfPrefixedInt 318.5n ± 0% 318.5n ± 0% ~ (p=0.474 n=10) +FmtFprintfFloat 410.4n ± 0% 410.4n ± 0% ~ (p=0.628 n=10) +FmtManyArgs 944.9n ± 0% 945.0n ± 0% ~ (p=0.240 n=10) +GobDecode 13.97m ± 12% 12.83m ± 21% ~ (p=0.165 n=10) +GobEncode 17.84m ± 5% 18.60m ± 4% ~ (p=0.123 n=10) +Gzip 421.0m ± 0% 421.0m ± 0% ~ (p=0.579 n=10) +Gunzip 89.80m ± 0% 89.77m ± 0% ~ (p=0.529 n=10) +HTTPClientServer 86.54µ ± 1% 86.25µ ± 0% -0.33% (p=0.003 n=10) +JSONEncode 18.57m ± 0% 18.57m ± 0% ~ (p=0.353 n=10) +JSONDecode 77.48m ± 0% 77.30m ± 0% -0.23% (p=0.035 n=10) +Mandelbrot200 7.217m ± 0% 7.217m ± 0% ~ (p=0.436 n=10) +GoParse 7.599m ± 2% 7.632m ± 1% ~ (p=0.353 n=10) +RegexpMatchEasy0_32 140.1n ± 0% 140.1n ± 0% ~ (p=0.582 n=10) +RegexpMatchEasy0_1K 1.538µ ± 0% 1.538µ ± 0% ~ (p=1.000 n=10) ¹ +RegexpMatchEasy1_32 161.7n ± 0% 161.7n ± 0% ~ (p=1.000 n=10) ¹ +RegexpMatchEasy1_1K 1.632µ ± 0% 1.632µ ± 0% ~ (p=1.000 n=10) ¹ +RegexpMatchMedium_32 1.369µ ± 0% 1.369µ ± 0% ~ (p=1.000 n=10) +RegexpMatchMedium_1K 39.96µ ± 0% 39.96µ ± 0% +0.01% (p=0.010 n=10) +RegexpMatchHard_32 2.099µ ± 0% 2.099µ ± 0% ~ (p=1.000 n=10) ¹ +RegexpMatchHard_1K 62.50µ ± 0% 62.50µ ± 0% ~ (p=0.099 n=10) +Revcomp 1.349 ± 0% 1.347 ± 0% -0.14% (p=0.001 n=10) +Template 118.4m ± 0% 118.0m ± 0% -0.36% (p=0.023 n=10) +TimeParse 407.8n ± 0% 407.9n ± 0% +0.02% (p=0.000 n=10) +TimeFormat 508.0n ± 0% 507.9n ± 0% ~ (p=0.421 n=10) +geomean 103.5µ 103.3µ -0.17% +¹ all samples are equal + + │ 2ef70d9d0f │ this CL │ + │ B/s │ B/s vs base │ +GobDecode 52.67Mi ± 11% 57.04Mi ± 17% ~ (p=0.149 n=10) +GobEncode 41.03Mi ± 4% 39.35Mi ± 4% ~ (p=0.118 n=10) +Gzip 43.95Mi ± 0% 43.95Mi ± 0% ~ (p=0.428 n=10) +Gunzip 206.1Mi ± 0% 206.1Mi ± 0% ~ (p=0.399 n=10) +JSONEncode 99.64Mi ± 0% 99.66Mi ± 0% ~ (p=0.304 n=10) +JSONDecode 23.88Mi ± 0% 23.94Mi ± 0% +0.22% (p=0.030 n=10) +GoParse 7.267Mi ± 2% 7.238Mi ± 1% ~ (p=0.360 n=10) +RegexpMatchEasy0_32 217.8Mi ± 0% 217.8Mi ± 0% -0.00% (p=0.006 n=10) +RegexpMatchEasy0_1K 635.0Mi ± 0% 635.0Mi ± 0% ~ (p=0.194 n=10) +RegexpMatchEasy1_32 188.7Mi ± 0% 188.7Mi ± 0% ~ (p=0.338 n=10) +RegexpMatchEasy1_1K 598.5Mi ± 0% 598.5Mi ± 0% -0.00% (p=0.000 n=10) +RegexpMatchMedium_32 22.30Mi ± 0% 22.30Mi ± 0% ~ (p=0.211 n=10) +RegexpMatchMedium_1K 24.43Mi ± 0% 24.43Mi ± 0% ~ (p=1.000 n=10) +RegexpMatchHard_32 14.54Mi ± 0% 14.54Mi ± 0% ~ (p=0.474 n=10) +RegexpMatchHard_1K 15.62Mi ± 0% 15.62Mi ± 0% ~ (p=1.000 n=10) ¹ +Revcomp 179.7Mi ± 0% 180.0Mi ± 0% +0.14% (p=0.001 n=10) +Template 15.63Mi ± 0% 15.68Mi ± 0% +0.34% (p=0.022 n=10) +geomean 60.29Mi 60.44Mi +0.24% +¹ all samples are equal + +Change-Id: I112dd663c49567386ea75dd4966a9f8127ffb90e +Reviewed-on: https://go-review.googlesource.com/c/go/+/478075 +Run-TryBot: Ian Lance Taylor +Reviewed-by: Ian Lance Taylor +Run-TryBot: Cherry Mui +Auto-Submit: Ian Lance Taylor +Reviewed-by: Cherry Mui +Reviewed-by: Heschi Kreinick +TryBot-Result: Gopher Robot +--- + src/cmd/asm/internal/asm/testdata/loong64.s | 5 +++-- + .../asm/internal/asm/testdata/loong64enc1.s | 6 ++++-- + src/cmd/internal/obj/loong64/asm.go | 20 +++++-------------- + 3 files changed, 12 insertions(+), 19 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64.s b/src/cmd/asm/internal/asm/testdata/loong64.s +index 6c44d2208a..51b195b4b0 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64.s +@@ -6,6 +6,7 @@ + // TODO: cover more instruction + + TEXT foo(SB),DUPOK|NOSPLIT,$0 +- JAL 1(PC) //CALL 1(PC) //00100054 ++ JAL 1(PC) //CALL 1(PC) //00040054 + JAL (R4) //CALL (R4) //8100004c +- JAL foo(SB) //CALL foo(SB) //00140054 ++ // relocation in play so the assembled offset should be 0 ++ JAL foo(SB) //CALL foo(SB) //00000054 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 4f3cb2b2ee..f5a80d5d17 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -13,9 +13,11 @@ lable2: + BFPF 1(PC) // 00040048 + BFPF lable2 // BFPF 4 // 1ffcff4b + +- JMP foo(SB) // 00100050 ++ // relocation in play so the assembled offset should be 0 ++ JMP foo(SB) // 00000050 ++ + JMP (R4) // 8000004c +- JMP 1(PC) // 00040058 ++ JMP 1(PC) // 00040050 + MOVW $65536, R4 // 04020014 + MOVW $4096, R4 // 24000014 + MOVV $65536, R4 // 04020014 +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 17c0539972..a2e25dfa71 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1263,24 +1263,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + + case 11: // jmp lbra + v := int32(0) +- if c.aclass(&p.To) == C_SBRA && p.To.Sym == nil && p.As == AJMP { +- // use PC-relative branch for short branches +- // BEQ R0, R0, sbra +- if p.To.Target() != nil { +- v = int32(p.To.Target().Pc-p.Pc) >> 2 +- } +- if (v<<16)>>16 == v { +- o1 = OP_16IRR(c.opirr(ABEQ), uint32(v), uint32(REGZERO), uint32(REGZERO)) +- break +- } +- } +- if p.To.Target() == nil { +- v = int32(p.Pc) >> 2 +- } else { +- v = int32(p.To.Target().Pc) >> 2 ++ if p.To.Target() != nil { ++ v = int32(p.To.Target().Pc-p.Pc) >> 2 + } + o1 = OP_B_BL(c.opirr(p.As), uint32(v)) + if p.To.Sym == nil { ++ if p.As == AJMP { ++ break ++ } + p.To.Sym = c.cursym.Func().Text.From.Sym + p.To.Offset = p.To.Target().Pc + } +-- +2.38.1 + diff --git a/0047-cmd-internal-obj-loong64-clean-up-code-for-short-con.patch b/0047-cmd-internal-obj-loong64-clean-up-code-for-short-con.patch new file mode 100644 index 0000000000000000000000000000000000000000..fdf888b1d13a6b13b14b7d9a7ce03cc05c77f815 --- /dev/null +++ b/0047-cmd-internal-obj-loong64-clean-up-code-for-short-con.patch @@ -0,0 +1,64 @@ +From 6ad6916ddb2a1e374c66f78464b0368389fe843a Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Wed, 22 Mar 2023 13:28:08 +0800 +Subject: [PATCH 47/62] cmd/internal/obj/loong64: clean up code for short + conditional branches + +Untangle the logic so the preparation of operands and actual assembling +(branch range checking included) are properly separated, making future +changes easier to review and maintain. No functional change intended. + +Change-Id: I1f73282f9d92ff23d84846453d3597ba66d207d1 +--- + src/cmd/internal/obj/loong64/asm.go | 30 ++++++++++++++--------------- + 1 file changed, 15 insertions(+), 15 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index a2e25dfa71..7d40b927f8 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1199,26 +1199,26 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + + case 6: // beq r1,[r2],sbra + v := int32(0) +- vcmp := int32(0) + if p.To.Target() != nil { + v = int32(p.To.Target().Pc-p.Pc) >> 2 + } +- if v < 0 { +- vcmp = -v +- } +- if (p.As == ABFPT || p.As == ABFPF) && ((uint32(vcmp))>>21)&0x7FF != 0 { +- c.ctxt.Diag("21 bit-width, short branch too far\n%v", p) +- } else if p.As != ABFPT && p.As != ABFPF && (v<<16)>>16 != v { +- c.ctxt.Diag("16 bit-width, short branch too far\n%v", p) +- } ++ rd, rj := p.Reg, p.From.Reg + if p.As == ABGTZ || p.As == ABLEZ { +- o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.Reg), uint32(p.From.Reg)) +- } else if p.As == ABFPT || p.As == ABFPF { +- // BCNEZ cj offset21 ,cj = fcc0 +- // BCEQZ cj offset21 ,cj = fcc0 ++ rd, rj = rj, rd ++ } ++ switch p.As { ++ case ABFPT, ABFPF: ++ if (v<<11)>>11 != v { ++ c.ctxt.Diag("21 bit-width, short branch too far\n%v", p) ++ } ++ // FCC0 is the implicit source operand, now that we ++ // don't register-allocate from the FCC bank. + o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0)) +- } else { +- o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.From.Reg), uint32(p.Reg)) ++ default: ++ if (v<<16)>>16 != v { ++ c.ctxt.Diag("16 bit-width, short branch too far\n%v", p) ++ } ++ o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(rj), uint32(rd)) + } + + case 7: // mov r, soreg +-- +2.38.1 + diff --git a/0048-cmd-internal-obj-loong64-assemble-BEQ-BNEs-comparing.patch b/0048-cmd-internal-obj-loong64-assemble-BEQ-BNEs-comparing.patch new file mode 100644 index 0000000000000000000000000000000000000000..b732fc2dab2853c5c20b0b06ca052d2eff8cdd75 --- /dev/null +++ b/0048-cmd-internal-obj-loong64-assemble-BEQ-BNEs-comparing.patch @@ -0,0 +1,115 @@ +From 08108cdba9826583a1b6f7c32d7c378de1d336ad Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Wed, 22 Mar 2023 13:56:38 +0800 +Subject: [PATCH 48/62] cmd/internal/obj/loong64: assemble BEQ/BNEs comparing + with 0 as beqz/bnez + +LoongArch (except for the extremely reduced LA32 Primary subset) has +dedicated beqz/bnez instructions as alternative encodings for beq/bne +with one of the source registers being R0, that allow the offset field +to occupy 5 more bits, giving 21 bits in total (equal to the FP +branches). Make use of them instead of beq/bne if one source operand is +omitted in asm, or if one of the registers being compared is R0. + +Multiple go1 benchmark runs indicate the change is not perf-sensitive. + +Change-Id: If6267623c82092e81d75578091fb4e013658b9f3 +--- + .../asm/internal/asm/testdata/loong64enc1.s | 8 +++- + src/cmd/internal/obj/loong64/asm.go | 39 ++++++++++++++----- + 2 files changed, 36 insertions(+), 11 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index f5a80d5d17..ea6c569f9d 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -116,7 +116,13 @@ lable2: + ROTRV $4, R4 // 84104d00 + SYSCALL // 00002b00 + BEQ R4, R5, 1(PC) // 85040058 +- BEQ R4, 1(PC) // 80040058 ++ BEQ R4, 1(PC) // 80040040 ++ BEQ R4, R0, 1(PC) // 80040040 ++ BEQ R0, R4, 1(PC) // 80040040 ++ BNE R4, R5, 1(PC) // 8504005c ++ BNE R4, 1(PC) // 80040044 ++ BNE R4, R0, 1(PC) // 80040044 ++ BNE R0, R4, 1(PC) // 80040044 + BLTU R4, 1(PC) // 80040068 + MOVW y+8(FP), F4 // 6440002b + MOVF y+8(FP), F4 // 6440002b +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 7d40b927f8..3973674998 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1115,7 +1115,7 @@ func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 { + } + + func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 { +- return op | (i&0xFFFF)<<10 | (r2&0x7)<<5 | ((i >> 16) & 0x1F) ++ return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F) + } + + func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { +@@ -1202,23 +1202,38 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + if p.To.Target() != nil { + v = int32(p.To.Target().Pc-p.Pc) >> 2 + } +- rd, rj := p.Reg, p.From.Reg +- if p.As == ABGTZ || p.As == ABLEZ { ++ as, rd, rj, width := p.As, p.Reg, p.From.Reg, 16 ++ switch as { ++ case ABGTZ, ABLEZ: + rd, rj = rj, rd +- } +- switch p.As { + case ABFPT, ABFPF: ++ width = 21 ++ // FCC0 is the implicit source operand, now that we ++ // don't register-allocate from the FCC bank. ++ rd = REG_FCC0 ++ case ABEQ, ABNE: ++ if rd == 0 || rd == REGZERO || rj == REGZERO { ++ // BEQZ/BNEZ can be encoded with 21-bit offsets. ++ width = 21 ++ as = -as ++ if rj == 0 || rj == REGZERO { ++ rj = rd ++ } ++ } ++ } ++ switch width { ++ case 21: + if (v<<11)>>11 != v { + c.ctxt.Diag("21 bit-width, short branch too far\n%v", p) + } +- // FCC0 is the implicit source operand, now that we +- // don't register-allocate from the FCC bank. +- o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0)) +- default: ++ o1 = OP_16IR_5I(c.opirr(as), uint32(v), uint32(rj)) ++ case 16: + if (v<<16)>>16 != v { + c.ctxt.Diag("16 bit-width, short branch too far\n%v", p) + } +- o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(rj), uint32(rd)) ++ o1 = OP_16IRR(c.opirr(as), uint32(v), uint32(rj), uint32(rd)) ++ default: ++ c.ctxt.Diag("unexpected branch encoding\n%v", p) + } + + case 7: // mov r, soreg +@@ -1972,6 +1987,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { + return 0x1b << 26 + case ABGE, ABGEZ, ABLEZ: + return 0x19 << 26 ++ case -ABEQ: // beqz ++ return 0x10 << 26 ++ case -ABNE: // bnez ++ return 0x11 << 26 + case ABEQ: + return 0x16 << 26 + case ABNE: +-- +2.38.1 + diff --git a/0049-cmd-internal-obj-loong64-remove-Optab.family-and-reo.patch b/0049-cmd-internal-obj-loong64-remove-Optab.family-and-reo.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ffb7149a68a13371a4a2cccd441048561e118ea --- /dev/null +++ b/0049-cmd-internal-obj-loong64-remove-Optab.family-and-reo.patch @@ -0,0 +1,822 @@ +From 8119503dbdd4d444b9c89cd21d3e67bcf9750c80 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Thu, 16 Mar 2023 18:18:04 +0800 +Subject: [PATCH 49/62] cmd/internal/obj/loong64: remove Optab.family and + reorganize operand class fields + +There is currently no support for GOARCH=loong32, so the Optab.family +field is unused so far. Remove it to simplify the optab; the loong +assembler backend would likely already be overhauled into a sufficiently +different shape by the time we start to care for loong32, that the data +we have today would be useless anyway. + +While at it, add a operand class slot for the 3rd source operand +(support for which will arrive in later commits), and rename the other +operand class fields to be self-documenting. The changes are being +merged into this patch for sake of reducing code churn. + +Change-Id: Icf0988e34ff1c0f762c8e0708cfcef2e7954760c +Reviewed-on: https://go-review.googlesource.com/c/go/+/477715 +Reviewed-by: abner chenc +Run-TryBot: Ben Shi +Reviewed-by: Matthew Dempsky +TryBot-Result: Gopher Robot +Reviewed-by: Cherry Mui +Auto-Submit: Wayne Zuo +--- + src/cmd/internal/obj/loong64/asm.go | 709 ++++++++++++++-------------- + 1 file changed, 360 insertions(+), 349 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 3973674998..0bc3f9c75e 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -7,7 +7,6 @@ package loong64 + import ( + "cmd/internal/obj" + "cmd/internal/objabi" +- "cmd/internal/sys" + "fmt" + "log" + "sort" +@@ -32,16 +31,16 @@ const ( + ) + + type Optab struct { +- as obj.As +- a1 uint8 // first source operand +- a2 uint8 // 2nd source operand +- a3 uint8 // first destination operand +- a4 uint8 // 2nd destination operand +- type_ int8 +- size int8 +- param int16 +- family sys.ArchFamily +- flag uint8 ++ as obj.As ++ from1 uint8 ++ reg uint8 ++ from3 uint8 ++ to1 uint8 ++ to2 uint8 ++ type_ int8 ++ size int8 ++ param int16 ++ flag uint8 + } + + const ( +@@ -49,328 +48,329 @@ const ( + ) + + var optab = []Optab{ +- {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, C_NONE, 0, 0, 0, 0, 0}, +- +- {AMOVW, C_REG, C_NONE, C_REG, C_NONE, 1, 4, 0, 0, 0}, +- {AMOVV, C_REG, C_NONE, C_REG, C_NONE, 1, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_REG, C_NONE, 12, 8, 0, 0, NOTUSETMP}, +- {AMOVBU, C_REG, C_NONE, C_REG, C_NONE, 13, 4, 0, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_REG, C_NONE, 14, 8, 0, sys.Loong64, NOTUSETMP}, +- +- {ASUB, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {ASUBV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AADD, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {AADDV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AAND, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {ASUB, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {ASUBV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AADD, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {AADDV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AAND, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {ANEGW, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {ANEGV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AMASKEQZ, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- +- {ASLL, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0, 0}, +- {ASLL, C_REG, C_REG, C_REG, C_NONE, 9, 4, 0, 0, 0}, +- {ASLLV, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, sys.Loong64, 0}, +- {ASLLV, C_REG, C_REG, C_REG, C_NONE, 9, 4, 0, sys.Loong64, 0}, +- {ACLO, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0, 0}, +- +- {AADDF, C_FREG, C_NONE, C_FREG, C_NONE, 32, 4, 0, 0, 0}, +- {AADDF, C_FREG, C_REG, C_FREG, C_NONE, 32, 4, 0, 0, 0}, +- {ACMPEQF, C_FREG, C_REG, C_NONE, C_NONE, 32, 4, 0, 0, 0}, +- {AABSF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, +- {AMOVVF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0, 0}, +- +- {AMOVW, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVWL, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVVL, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, +- {AMOVWL, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0, 0}, +- {AMOVVL, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, +- {AMOVWL, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, +- {AMOVVL, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, +- {ASC, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0, 0}, +- {ASCV, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, +- +- {AMOVW, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVWU, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVB, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVBU, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVWL, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVVL, C_SEXT, C_NONE, C_REG, C_NONE, 8, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, +- {AMOVWU, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVV, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVB, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, +- {AMOVBU, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, +- {AMOVWL, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0, 0}, +- {AMOVVL, C_SAUTO, C_NONE, C_REG, C_NONE, 8, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, +- {AMOVWU, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, +- {AMOVBU, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, +- {AMOVWL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, +- {AMOVVL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, +- {ALL, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0, 0}, +- {ALLV, C_SOREG, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, sys.Loong64, 0}, +- +- {AMOVW, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_LEXT, C_NONE, 35, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, +- {ASC, C_REG, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, +- {AMOVW, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, +- {AMOVB, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, +- {AMOVWU, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, +- {AMOVBU, C_REG, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0, 0}, +- +- {AMOVW, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +- {AMOVWU, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +- {AMOVV, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +- {AMOVB, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +- {AMOVBU, C_LEXT, C_NONE, C_REG, C_NONE, 36, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, +- {AMOVWU, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, sys.Loong64, 0}, +- {AMOVV, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, sys.Loong64, 0}, +- {AMOVB, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, +- {AMOVBU, C_LAUTO, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0, 0}, +- {AMOVW, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, +- {AMOVWU, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, sys.Loong64, 0}, +- {AMOVV, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, sys.Loong64, 0}, +- {AMOVB, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, +- {AMOVBU, C_LOREG, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0, 0}, +- {AMOVW, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, +- {AMOVW, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVWU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVV, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVB, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, +- {AMOVB, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, 0, 0}, +- {AMOVBU, C_ADDR, C_NONE, C_REG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, +- {AMOVWU, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, sys.Loong64, 0}, +- {AMOVB, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, +- {AMOVBU, C_TLS_LE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0, 0}, +- +- {AMOVW, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_SECON, C_NONE, C_REG, C_NONE, 3, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SACON, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0, 0}, +- {AMOVV, C_SACON, C_NONE, C_REG, C_NONE, 3, 4, REGSP, sys.Loong64, 0}, +- {AMOVW, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, 0, NOTUSETMP}, +- {AMOVW, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, sys.Loong64, NOTUSETMP}, +- {AMOVV, C_LECON, C_NONE, C_REG, C_NONE, 52, 8, 0, sys.Loong64, NOTUSETMP}, +- +- {AMOVW, C_LACON, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0, 0}, +- {AMOVV, C_LACON, C_NONE, C_REG, C_NONE, 26, 12, REGSP, sys.Loong64, 0}, +- {AMOVW, C_ADDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0, 0}, +- {AMOVV, C_ADDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, sys.Loong64, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0, 0}, +- {AMOVV, C_ANDCON, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, sys.Loong64, 0}, +- {AMOVW, C_STCON, C_NONE, C_REG, C_NONE, 55, 12, 0, 0, 0}, +- {AMOVV, C_STCON, C_NONE, C_REG, C_NONE, 55, 12, 0, sys.Loong64, 0}, +- +- {AMOVW, C_UCON, C_NONE, C_REG, C_NONE, 24, 4, 0, 0, 0}, +- {AMOVV, C_UCON, C_NONE, C_REG, C_NONE, 24, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_LCON, C_NONE, C_REG, C_NONE, 19, 8, 0, 0, NOTUSETMP}, +- {AMOVV, C_LCON, C_NONE, C_REG, C_NONE, 19, 8, 0, sys.Loong64, NOTUSETMP}, +- {AMOVV, C_DCON, C_NONE, C_REG, C_NONE, 59, 16, 0, sys.Loong64, NOTUSETMP}, +- +- {AMUL, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {AMUL, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, 0, 0}, +- {AMULV, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- {AMULV, C_REG, C_REG, C_REG, C_NONE, 2, 4, 0, sys.Loong64, 0}, +- +- {AADD, C_ADD0CON, C_REG, C_REG, C_NONE, 4, 4, 0, 0, 0}, +- {AADD, C_ADD0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, 0, 0}, +- {AADD, C_ANDCON, C_REG, C_REG, C_NONE, 10, 8, 0, 0, 0}, +- {AADD, C_ANDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, 0, 0}, +- +- {AADDV, C_ADD0CON, C_REG, C_REG, C_NONE, 4, 4, 0, sys.Loong64, 0}, +- {AADDV, C_ADD0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, sys.Loong64, 0}, +- {AADDV, C_ANDCON, C_REG, C_REG, C_NONE, 10, 8, 0, sys.Loong64, 0}, +- {AADDV, C_ANDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, sys.Loong64, 0}, +- +- {AAND, C_AND0CON, C_REG, C_REG, C_NONE, 4, 4, 0, 0, 0}, +- {AAND, C_AND0CON, C_NONE, C_REG, C_NONE, 4, 4, 0, 0, 0}, +- {AAND, C_ADDCON, C_REG, C_REG, C_NONE, 10, 8, 0, 0, 0}, +- {AAND, C_ADDCON, C_NONE, C_REG, C_NONE, 10, 8, 0, 0, 0}, +- +- {AADD, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, 0, 0}, +- {AADD, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, 0, 0}, +- {AADDV, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, sys.Loong64, 0}, +- {AADDV, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, sys.Loong64, 0}, +- {AAND, C_UCON, C_REG, C_REG, C_NONE, 25, 8, 0, 0, 0}, +- {AAND, C_UCON, C_NONE, C_REG, C_NONE, 25, 8, 0, 0, 0}, +- +- {AADD, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, 0, 0}, +- {AADDV, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, sys.Loong64, 0}, +- {AAND, C_LCON, C_NONE, C_REG, C_NONE, 23, 12, 0, 0, 0}, +- {AADD, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, 0, 0}, +- {AADDV, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, sys.Loong64, 0}, +- {AAND, C_LCON, C_REG, C_REG, C_NONE, 23, 12, 0, 0, 0}, +- +- {AADDV, C_DCON, C_NONE, C_REG, C_NONE, 60, 20, 0, sys.Loong64, 0}, +- {AADDV, C_DCON, C_REG, C_REG, C_NONE, 60, 20, 0, sys.Loong64, 0}, +- +- {ASLL, C_SCON, C_REG, C_REG, C_NONE, 16, 4, 0, 0, 0}, +- {ASLL, C_SCON, C_NONE, C_REG, C_NONE, 16, 4, 0, 0, 0}, +- +- {ASLLV, C_SCON, C_REG, C_REG, C_NONE, 16, 4, 0, sys.Loong64, 0}, +- {ASLLV, C_SCON, C_NONE, C_REG, C_NONE, 16, 4, 0, sys.Loong64, 0}, +- +- {ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, +- +- {ABEQ, C_REG, C_REG, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, +- {ABEQ, C_REG, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, +- {ABLEZ, C_REG, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, 0}, +- {ABFPT, C_NONE, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0, NOTUSETMP}, +- +- {AJMP, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // b +- {AJAL, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // bl +- +- {AJMP, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGZERO, 0, 0}, // jirl r0, rj, 0 +- {AJAL, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGLINK, 0, 0}, // jirl r1, rj, 0 +- +- {AMOVW, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, +- {AMOVD, C_SEXT, C_NONE, C_FREG, C_NONE, 27, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, sys.Loong64, 0}, +- {AMOVF, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0, 0}, +- {AMOVD, C_SAUTO, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0, 0}, +- {AMOVW, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0, 0}, +- {AMOVD, C_SOREG, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0, 0}, +- +- {AMOVW, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, +- {AMOVF, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, +- {AMOVD, C_LEXT, C_NONE, C_FREG, C_NONE, 27, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, sys.Loong64, 0}, +- {AMOVF, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0, 0}, +- {AMOVD, C_LAUTO, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0, 0}, +- {AMOVW, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0, 0}, +- {AMOVD, C_LOREG, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0, 0}, +- {AMOVF, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0, 0}, +- {AMOVF, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- {AMOVD, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0, 0}, +- {AMOVD, C_ADDR, C_NONE, C_FREG, C_NONE, 51, 8, 0, sys.Loong64, 0}, +- +- {AMOVW, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_SEXT, C_NONE, 28, 4, 0, sys.Loong64, 0}, +- {AMOVW, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0, 0}, +- +- {AMOVW, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_LEXT, C_NONE, 28, 12, 0, sys.Loong64, 0}, +- {AMOVW, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, sys.Loong64, 0}, +- {AMOVF, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0, 0}, +- {AMOVF, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, +- {AMOVF, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- {AMOVD, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0, 0}, +- {AMOVD, C_FREG, C_NONE, C_ADDR, C_NONE, 50, 8, 0, sys.Loong64, 0}, +- +- {AMOVW, C_REG, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0, 0}, +- {AMOVW, C_FREG, C_NONE, C_REG, C_NONE, 31, 4, 0, 0, 0}, +- {AMOVV, C_REG, C_NONE, C_FREG, C_NONE, 47, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_FREG, C_NONE, C_REG, C_NONE, 48, 4, 0, sys.Loong64, 0}, +- +- {AMOVV, C_FCCREG, C_NONE, C_REG, C_NONE, 63, 4, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_FCCREG, C_NONE, 64, 4, 0, sys.Loong64, 0}, +- +- {AMOVW, C_ADDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_FREG, C_NONE, 34, 8, 0, sys.Loong64, 0}, +- +- {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0, 0}, +- {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0, 0}, +- {AMOVB, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, +- {AMOVW, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, +- {AMOVBU, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, +- {AMOVWU, C_REG, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, sys.Loong64, 0}, +- +- {AMOVB, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, +- {AMOVW, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, +- {AMOVV, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, +- {AMOVBU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, +- {AMOVWU, C_TLS_IE, C_NONE, C_REG, C_NONE, 57, 16, 0, sys.Loong64, 0}, +- +- {AMOVV, C_GOTADDR, C_NONE, C_REG, C_NONE, 65, 8, 0, sys.Loong64, 0}, +- +- {ATEQ, C_SCON, C_REG, C_REG, C_NONE, 15, 8, 0, 0, 0}, +- {ATEQ, C_SCON, C_NONE, C_REG, C_NONE, 15, 8, 0, 0, 0}, +- +- {ABREAK, C_REG, C_NONE, C_SEXT, C_NONE, 7, 4, 0, sys.Loong64, 0}, // really CACHE instruction +- {ABREAK, C_REG, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, sys.Loong64, 0}, +- {ABREAK, C_REG, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, sys.Loong64, 0}, +- {ABREAK, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0}, +- +- {ARDTIMELW, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, +- {ARDTIMEHW, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, +- {ARDTIMED, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0, 0}, +- +- {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}, +- {obj.APCDATA, C_LCON, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0, 0}, +- {obj.APCDATA, C_DCON, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0, 0}, +- {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 +- {obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689 +- {obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, +- {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // same as AJMP +- {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0, 0}, // same as AJMP +- +- {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0}, ++ {obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, C_NONE, 0, 0, 0, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 8, 0, NOTUSETMP}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 13, 4, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 14, 8, 0, NOTUSETMP}, ++ ++ {ASUB, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {ASUBV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AADD, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AADDV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AAND, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {ASUB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {ASUBV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AADD, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AADDV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AAND, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {ANEGW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {ANEGV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AMASKEQZ, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ ++ {ASLL, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, ++ {ASLL, C_REG, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, ++ {ASLLV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, ++ {ASLLV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, ++ {ACLO, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, ++ ++ {AADDF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 32, 4, 0, 0}, ++ {AADDF, C_FREG, C_REG, C_NONE, C_FREG, C_NONE, 32, 4, 0, 0}, ++ {ACMPEQF, C_FREG, C_REG, C_NONE, C_NONE, C_NONE, 32, 4, 0, 0}, ++ {AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0}, ++ {AMOVVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVWL, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVVL, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVWL, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVVL, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVWL, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {AMOVVL, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {ASC, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {ASCV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ ++ {AMOVW, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVWU, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVV, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVB, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVBU, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVWL, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVVL, C_SEXT, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, 0, 0}, ++ {AMOVW, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVWU, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVV, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVB, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVBU, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVWL, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVVL, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0}, ++ {AMOVW, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVWU, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVB, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVBU, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVWL, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {AMOVVL, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {ALL, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ {ALLV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_NONE, C_LEXT, C_NONE, 35, 12, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_LEXT, C_NONE, 35, 12, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_LEXT, C_NONE, 35, 12, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_LEXT, C_NONE, 35, 12, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_LEXT, C_NONE, 35, 12, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {ASC, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, ++ {AMOVB, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, ++ ++ {AMOVW, C_LEXT, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, 0, 0}, ++ {AMOVWU, C_LEXT, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, 0, 0}, ++ {AMOVV, C_LEXT, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, 0, 0}, ++ {AMOVB, C_LEXT, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, 0, 0}, ++ {AMOVBU, C_LEXT, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, 0, 0}, ++ {AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, ++ {AMOVWU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, ++ {AMOVV, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, ++ {AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, ++ {AMOVBU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, ++ {AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0}, ++ {AMOVWU, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0}, ++ {AMOVV, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0}, ++ {AMOVB, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0}, ++ {AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0}, ++ {AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVWU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVV, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVBU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVBU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0}, ++ {AMOVW, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, ++ {AMOVWU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, ++ {AMOVV, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, ++ {AMOVB, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, ++ {AMOVBU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, ++ ++ {AMOVW, C_SECON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, 0, 0}, ++ {AMOVV, C_SECON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, 0, 0}, ++ {AMOVW, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, ++ {AMOVV, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, ++ {AMOVW, C_LECON, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP}, ++ {AMOVW, C_LECON, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP}, ++ {AMOVV, C_LECON, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP}, ++ ++ {AMOVW, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0}, ++ {AMOVV, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0}, ++ {AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ {AMOVV, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ {AMOVW, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ {AMOVV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ {AMOVW, C_STCON, C_NONE, C_NONE, C_REG, C_NONE, 55, 12, 0, 0}, ++ {AMOVV, C_STCON, C_NONE, C_NONE, C_REG, C_NONE, 55, 12, 0, 0}, ++ ++ {AMOVW, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, ++ {AMOVV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, ++ {AMOVW, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, ++ {AMOVV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, ++ {AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP}, ++ ++ {AMUL, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AMUL, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AMULV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ {AMULV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0}, ++ ++ {AADD, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADD, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADD, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AADD, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AADDV, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADDV, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADDV, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AADDV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AAND, C_AND0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AAND, C_AND0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AAND, C_ADDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AAND, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AADD, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADD, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADDV, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADDV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AAND, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AAND, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ ++ {AADD, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADDV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AAND, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADD, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADDV, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AAND, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ ++ {AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, ++ {AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, ++ ++ {ASLL, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLL, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ ++ {ASLLV, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLLV, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ ++ {ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, ++ ++ {ABEQ, C_REG, C_REG, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0}, ++ {ABEQ, C_REG, C_NONE, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0}, ++ {ABLEZ, C_REG, C_NONE, C_NONE, C_SBRA, C_NONE, 6, 4, 0, 0}, ++ {ABFPT, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 6, 4, 0, NOTUSETMP}, ++ ++ {AJMP, C_NONE, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0}, // b ++ {AJAL, C_NONE, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0}, // bl ++ ++ {AJMP, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGZERO, 0}, // jirl r0, rj, 0 ++ {AJAL, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGLINK, 0}, // jirl r1, rj, 0 ++ ++ {AMOVW, C_SEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, 0, 0}, ++ {AMOVF, C_SEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, 0, 0}, ++ {AMOVD, C_SEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, 0, 0}, ++ {AMOVW, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0}, ++ {AMOVF, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0}, ++ {AMOVD, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0}, ++ {AMOVW, C_SOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0}, ++ {AMOVF, C_SOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0}, ++ {AMOVD, C_SOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0}, ++ ++ {AMOVW, C_LEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, 0, 0}, ++ {AMOVF, C_LEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, 0, 0}, ++ {AMOVD, C_LEXT, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, 0, 0}, ++ {AMOVW, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0}, ++ {AMOVF, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0}, ++ {AMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0}, ++ {AMOVW, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0}, ++ {AMOVF, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0}, ++ {AMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0}, ++ {AMOVF, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0}, ++ {AMOVF, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0}, ++ {AMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0}, ++ {AMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0}, ++ ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_SEXT, C_NONE, 28, 4, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_SEXT, C_NONE, 28, 4, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_SEXT, C_NONE, 28, 4, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0}, ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0}, ++ ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_LEXT, C_NONE, 28, 12, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_LEXT, C_NONE, 28, 12, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_LEXT, C_NONE, 28, 12, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0}, ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVF, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ {AMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0}, ++ ++ {AMOVW, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0}, ++ {AMOVW, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 31, 4, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 47, 4, 0, 0}, ++ {AMOVV, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 48, 4, 0, 0}, ++ ++ {AMOVV, C_FCCREG, C_NONE, C_NONE, C_REG, C_NONE, 63, 4, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_FCCREG, C_NONE, 64, 4, 0, 0}, ++ ++ {AMOVW, C_ADDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0}, ++ {AMOVW, C_ANDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0}, ++ ++ {AMOVB, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, ++ {AMOVW, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, ++ {AMOVV, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, ++ {AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, ++ {AMOVWU, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, ++ ++ {AMOVB, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, ++ {AMOVW, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, ++ {AMOVV, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, ++ {AMOVBU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, ++ {AMOVWU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, ++ ++ {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0}, ++ {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0}, ++ ++ {AMOVV, C_GOTADDR, C_NONE, C_NONE, C_REG, C_NONE, 65, 8, 0, 0}, ++ ++ {ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, ++ {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, ++ ++ {ABREAK, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, // really CACHE instruction ++ {ABREAK, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, ++ {ABREAK, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, ++ {ABREAK, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, ++ ++ {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, ++ {ARDTIMEHW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, ++ {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, ++ ++ {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, ++ {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, ++ {obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0}, ++ {obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0}, ++ {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, ++ {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689 ++ {obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689 ++ {obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, ++ {obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, ++ {obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0}, // same as AJMP ++ {obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_LBRA, C_NONE, 11, 4, 0, 0}, // same as AJMP ++ ++ {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0}, + } + + var oprange [ALAST & obj.AMask][]Optab +@@ -733,12 +733,12 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab { + a1-- + + // first destination operand +- a3 := int(p.To.Class) +- if a3 == 0 { +- a3 = c.aclass(&p.To) + 1 +- p.To.Class = int8(a3) ++ a4 := int(p.To.Class) ++ if a4 == 0 { ++ a4 = c.aclass(&p.To) + 1 ++ p.To.Class = int8(a4) + } +- a3-- ++ a4-- + + // 2nd source operand + a2 := C_NONE +@@ -747,26 +747,37 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab { + } + + // 2nd destination operand +- a4 := C_NONE ++ a5 := C_NONE + if p.RegTo2 != 0 { +- a4 = C_REG ++ a5 = C_REG ++ } ++ ++ // 3rd source operand ++ a3 := C_NONE ++ if len(p.RestArgs) > 0 { ++ a3 = int(p.RestArgs[0].Class) ++ if a3 == 0 { ++ a3 = c.aclass(&p.RestArgs[0].Addr) + 1 ++ p.RestArgs[0].Class = int8(a3) ++ } ++ a3-- + } + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] +- c3 := &xcmp[a3] ++ c4 := &xcmp[a4] + for i := range ops { + op := &ops[i] +- if (int(op.a2) == a2) && c1[op.a1] && c3[op.a3] && (int(op.a4) == a4) { ++ if (int(op.reg) == a2) && int(op.from3) == a3 && c1[op.from1] && c4[op.to1] && (int(op.to2) == a5) { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } + } + +- c.ctxt.Diag("illegal combination %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4)) ++ c.ctxt.Diag("illegal combination %v %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), DRconv(a5)) + prasm(p) + // Turn illegal instruction into an UNDEF, avoid crashing in asmout. +- return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0} ++ return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0} + } + + func cmp(a int, b int) bool { +@@ -872,15 +883,15 @@ func (x ocmp) Less(i, j int) bool { + if n != 0 { + return n < 0 + } +- n = int(p1.a1) - int(p2.a1) ++ n = int(p1.from1) - int(p2.from1) + if n != 0 { + return n < 0 + } +- n = int(p1.a2) - int(p2.a2) ++ n = int(p1.reg) - int(p2.reg) + if n != 0 { + return n < 0 + } +- n = int(p1.a3) - int(p2.a3) ++ n = int(p1.to1) - int(p2.to1) + if n != 0 { + return n < 0 + } +@@ -1178,7 +1189,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(o.param) + } + a := add +- if o.a1 == C_ANDCON { ++ if o.from1 == C_ANDCON { + a = AOR + } + +@@ -1472,7 +1483,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + case 34: // mov $con,fr + v := c.regoff(&p.From) + a := AADDU +- if o.a1 == C_ANDCON { ++ if o.from1 == C_ANDCON { + a = AOR + } + o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP)) +-- +2.38.1 + diff --git a/0050-cmd-asm-support-the-PCALIGN-directive-on-loong64.patch b/0050-cmd-asm-support-the-PCALIGN-directive-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..a3373d6461174099da36c9906735421157ce3011 --- /dev/null +++ b/0050-cmd-asm-support-the-PCALIGN-directive-on-loong64.patch @@ -0,0 +1,157 @@ +From 60c85687ed353267fb33684275edc9fd0152f8fe Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 28 Mar 2023 19:30:04 +0800 +Subject: [PATCH 50/62] cmd/asm: support the PCALIGN directive on loong64 + +This could be useful for both asm performance hand-tuning, and future +scenarios where a certain bigger alignment might be required. + +Change-Id: Iad6244669a3d5adea88eceb0dc7be1af4f0d4fc9 +--- + src/cmd/internal/obj/loong64/asm.go | 44 ++++++++++++++++++++++++++--- + src/cmd/link/link_test.go | 25 ++++++++++++---- + 2 files changed, 60 insertions(+), 9 deletions(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 0bc3f9c75e..fa1a877eab 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -359,6 +359,7 @@ var optab = []Optab{ + {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, ++ {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, + {obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0}, + {obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0}, +@@ -373,6 +374,14 @@ var optab = []Optab{ + {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0}, + } + ++// align code to a certain length by padding bytes. ++func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { ++ if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { ++ ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue) ++ } ++ return int(-pc & (alignedValue - 1)) ++} ++ + var oprange [ALAST & obj.AMask][]Optab + + var xcmp [C_NCLASS][C_NCLASS]bool +@@ -404,10 +413,20 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + o = c.oplook(p) + m = int(o.size) + if m == 0 { +- if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { ++ switch p.As { ++ case obj.APCALIGN: ++ alignedValue := p.From.Offset ++ m = pcAlignPadLength(pc, alignedValue, ctxt) ++ // Update the current text symbol alignment value. ++ if int32(alignedValue) > cursym.Func().Align { ++ cursym.Func().Align = int32(alignedValue) ++ } ++ break ++ case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: ++ continue ++ default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } +- continue + } + + pc += int64(m) +@@ -457,10 +476,16 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + + m = int(o.size) + if m == 0 { +- if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA { ++ switch p.As { ++ case obj.APCALIGN: ++ alignedValue := p.From.Offset ++ m = pcAlignPadLength(pc, alignedValue, ctxt) ++ break ++ case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: ++ continue ++ default: + c.ctxt.Diag("zero-width instruction\n%v", p) + } +- continue + } + + pc += int64(m) +@@ -484,6 +509,16 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + if int(o.size) > 4*len(out) { + log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p) + } ++ if p.As == obj.APCALIGN { ++ alignedValue := p.From.Offset ++ v := pcAlignPadLength(p.Pc, alignedValue, c.ctxt) ++ for i = 0; i < int32(v/4); i++ { ++ // emit ANOOP instruction by the padding size ++ c.ctxt.Arch.ByteOrder.PutUint32(bp, c.oprrr(ANOOP)) ++ bp = bp[4:] ++ } ++ continue ++ } + c.asmout(p, o, out[:]) + for i = 0; i < int32(o.size/4); i++ { + c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) +@@ -1083,6 +1118,7 @@ func buildop(ctxt *obj.Link) { + obj.ATEXT, + obj.AUNDEF, + obj.AFUNCDATA, ++ obj.APCALIGN, + obj.APCDATA, + obj.ADUFFZERO, + obj.ADUFFCOPY: +diff --git a/src/cmd/link/link_test.go b/src/cmd/link/link_test.go +index a770c91936..1e02a3dfa3 100644 +--- a/src/cmd/link/link_test.go ++++ b/src/cmd/link/link_test.go +@@ -570,7 +570,8 @@ func main() { + } + ` + +-const testFuncAlignAsmSrc = ` ++var testFuncAlignAsmSources = map[string]string{ ++ "arm64": ` + #include "textflag.h" + + TEXT ·alignPc(SB),NOSPLIT, $0-0 +@@ -581,13 +582,27 @@ TEXT ·alignPc(SB),NOSPLIT, $0-0 + + GLOBL ·alignPcFnAddr(SB),RODATA,$8 + DATA ·alignPcFnAddr(SB)/8,$·alignPc(SB) +-` ++`, ++ "loong64": ` ++#include "textflag.h" ++ ++TEXT ·alignPc(SB),NOSPLIT, $0-0 ++ MOVV $2, R4 ++ PCALIGN $512 ++ MOVV $3, R5 ++ RET ++ ++GLOBL ·alignPcFnAddr(SB),RODATA,$8 ++DATA ·alignPcFnAddr(SB)/8,$·alignPc(SB) ++`, ++} + + // TestFuncAlign verifies that the address of a function can be aligned +-// with a specific value on arm64. ++// with a specific value on arm64 and loong64. + func TestFuncAlign(t *testing.T) { +- if runtime.GOARCH != "arm64" || runtime.GOOS != "linux" { +- t.Skip("skipping on non-linux/arm64 platform") ++ testFuncAlignAsmSrc := testFuncAlignAsmSources[runtime.GOARCH] ++ if len(testFuncAlignAsmSrc) == 0 || runtime.GOOS != "linux" { ++ t.Skip("skipping on non-linux/{arm64,loong64} platform") + } + testenv.MustHaveGoBuild(t) + +-- +2.38.1 + diff --git a/0051-internal-bytealg-runtime-align-some-loong64-asm-loop.patch b/0051-internal-bytealg-runtime-align-some-loong64-asm-loop.patch new file mode 100644 index 0000000000000000000000000000000000000000..1ddffed06d01edd4a3b9c9b675b149ea6c940d80 --- /dev/null +++ b/0051-internal-bytealg-runtime-align-some-loong64-asm-loop.patch @@ -0,0 +1,175 @@ +From d25610dc2fdae0eb9af71d47a50685b7ecbc971f Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 28 Mar 2023 19:58:17 +0800 +Subject: [PATCH 51/62] internal/bytealg, runtime: align some loong64 asm loops + to 16-byte boundaries +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The LA464 micro-architecture is very sensitive to alignment of loops, +so the final performance of linked binaries can vary wildly due to +uncontrolled alignment of certain performance-critical loops. Now that +PCALIGN is available on loong64, let's make use of it and manually align +some assembly loops. The functions are identified based on perf records +of some easily regressed go1 benchmark cases (e.g. FmtFprintfPrefixedInt, +RegexpMatchEasy0_1K and Revcomp are particularly sensitive; even those +optimizations purely reducing dynamic instruction counts can regress +those cases by 6~12%, making the numbers almost useless). + +Benchmark results on Loongson 3A5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: test/bench/go1 + │ CL 416154 │ this CL │ + │ sec/op │ sec/op vs base │ +BinaryTree17 14.10 ± 1% 14.10 ± 1% ~ (p=1.000 n=10) +Fannkuch11 3.672 ± 0% 3.579 ± 0% -2.53% (p=0.000 n=10) +FmtFprintfEmpty 94.72n ± 0% 94.73n ± 0% +0.01% (p=0.000 n=10) +FmtFprintfString 149.9n ± 0% 151.9n ± 0% +1.33% (p=0.000 n=10) +FmtFprintfInt 154.1n ± 0% 158.3n ± 0% +2.73% (p=0.000 n=10) +FmtFprintfIntInt 236.2n ± 0% 241.4n ± 0% +2.20% (p=0.000 n=10) +FmtFprintfPrefixedInt 314.2n ± 0% 320.2n ± 0% +1.91% (p=0.000 n=10) +FmtFprintfFloat 405.0n ± 0% 414.3n ± 0% +2.30% (p=0.000 n=10) +FmtManyArgs 933.6n ± 0% 949.9n ± 0% +1.75% (p=0.000 n=10) +GobDecode 15.51m ± 1% 15.24m ± 0% -1.77% (p=0.000 n=10) +GobEncode 18.42m ± 4% 18.10m ± 2% ~ (p=0.631 n=10) +Gzip 423.6m ± 0% 429.9m ± 0% +1.49% (p=0.000 n=10) +Gunzip 88.75m ± 0% 88.31m ± 0% -0.50% (p=0.000 n=10) +HTTPClientServer 85.44µ ± 0% 85.71µ ± 0% +0.31% (p=0.035 n=10) +JSONEncode 18.65m ± 0% 19.74m ± 0% +5.81% (p=0.000 n=10) +JSONDecode 77.75m ± 0% 78.60m ± 1% +1.09% (p=0.000 n=10) +Mandelbrot200 7.214m ± 0% 7.208m ± 0% ~ (p=0.481 n=10) +GoParse 7.616m ± 2% 7.616m ± 1% ~ (p=0.739 n=10) +RegexpMatchEasy0_32 142.9n ± 0% 133.0n ± 0% -6.93% (p=0.000 n=10) +RegexpMatchEasy0_1K 1.535µ ± 0% 1.362µ ± 0% -11.27% (p=0.000 n=10) +RegexpMatchEasy1_32 161.8n ± 0% 161.8n ± 0% ~ (p=0.628 n=10) +RegexpMatchEasy1_1K 1.635µ ± 0% 1.497µ ± 0% -8.41% (p=0.000 n=10) +RegexpMatchMedium_32 1.429µ ± 0% 1.420µ ± 0% -0.63% (p=0.000 n=10) +RegexpMatchMedium_1K 41.86µ ± 0% 42.25µ ± 0% +0.93% (p=0.000 n=10) +RegexpMatchHard_32 2.144µ ± 0% 2.108µ ± 0% -1.68% (p=0.000 n=10) +RegexpMatchHard_1K 63.83µ ± 0% 62.65µ ± 0% -1.86% (p=0.000 n=10) +Revcomp 1.337 ± 0% 1.192 ± 0% -10.89% (p=0.000 n=10) +Template 116.4m ± 1% 115.6m ± 2% ~ (p=0.579 n=10) +TimeParse 421.4n ± 2% 418.1n ± 1% -0.78% (p=0.001 n=10) +TimeFormat 515.1n ± 0% 517.9n ± 0% +0.54% (p=0.001 n=10) +geomean 104.5µ 103.5µ -0.99% + + │ CL 416154 │ this CL │ + │ B/s │ B/s vs base │ +GobDecode 47.19Mi ± 1% 48.04Mi ± 0% +1.80% (p=0.000 n=10) +GobEncode 39.73Mi ± 4% 40.44Mi ± 2% ~ (p=0.631 n=10) +Gzip 43.68Mi ± 0% 43.04Mi ± 0% -1.47% (p=0.000 n=10) +Gunzip 208.5Mi ± 0% 209.6Mi ± 0% +0.50% (p=0.000 n=10) +JSONEncode 99.21Mi ± 0% 93.76Mi ± 0% -5.49% (p=0.000 n=10) +JSONDecode 23.80Mi ± 0% 23.55Mi ± 1% -1.08% (p=0.000 n=10) +GoParse 7.253Mi ± 2% 7.253Mi ± 1% ~ (p=0.810 n=10) +RegexpMatchEasy0_32 213.6Mi ± 0% 229.4Mi ± 0% +7.41% (p=0.000 n=10) +RegexpMatchEasy0_1K 636.3Mi ± 0% 717.3Mi ± 0% +12.73% (p=0.000 n=10) +RegexpMatchEasy1_32 188.6Mi ± 0% 188.6Mi ± 0% ~ (p=0.810 n=10) +RegexpMatchEasy1_1K 597.4Mi ± 0% 652.2Mi ± 0% +9.17% (p=0.000 n=10) +RegexpMatchMedium_32 21.35Mi ± 0% 21.49Mi ± 0% +0.63% (p=0.000 n=10) +RegexpMatchMedium_1K 23.33Mi ± 0% 23.11Mi ± 0% -0.94% (p=0.000 n=10) +RegexpMatchHard_32 14.24Mi ± 0% 14.48Mi ± 0% +1.67% (p=0.000 n=10) +RegexpMatchHard_1K 15.30Mi ± 0% 15.59Mi ± 0% +1.93% (p=0.000 n=10) +Revcomp 181.3Mi ± 0% 203.4Mi ± 0% +12.21% (p=0.000 n=10) +Template 15.89Mi ± 1% 16.00Mi ± 2% ~ (p=0.542 n=10) +geomean 59.33Mi 60.72Mi +2.33% + +Change-Id: I9ac28d936e03d21c46bb19fa100018f61ace6b42 +Reviewed-on: https://go-review.googlesource.com/c/go/+/479816 +TryBot-Result: Gopher Robot +Reviewed-by: Ian Lance Taylor +Auto-Submit: Ian Lance Taylor +Run-TryBot: WANG Xuerui +Reviewed-by: Keith Randall +Run-TryBot: Ian Lance Taylor +Reviewed-by: Keith Randall +--- + src/internal/bytealg/compare_loong64.s | 1 + + src/internal/bytealg/equal_loong64.s | 1 + + src/internal/bytealg/indexbyte_loong64.s | 2 ++ + src/runtime/memclr_loong64.s | 1 + + src/runtime/memmove_loong64.s | 2 ++ + 5 files changed, 7 insertions(+) + +diff --git a/src/internal/bytealg/compare_loong64.s b/src/internal/bytealg/compare_loong64.s +index 54c2daba69..c89c5a9256 100644 +--- a/src/internal/bytealg/compare_loong64.s ++++ b/src/internal/bytealg/compare_loong64.s +@@ -48,6 +48,7 @@ entry: + AND $7, R15 + BNE R0, R15, byte_loop + ++ PCALIGN $16 + chunk16_loop: + BEQ R0, R14, byte_loop + MOVV (R6), R8 +diff --git a/src/internal/bytealg/equal_loong64.s b/src/internal/bytealg/equal_loong64.s +index dcdde89b25..ba2a5578c3 100644 +--- a/src/internal/bytealg/equal_loong64.s ++++ b/src/internal/bytealg/equal_loong64.s +@@ -14,6 +14,7 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 + BEQ R4, R5, eq + MOVV size+16(FP), R6 + ADDV R4, R6, R7 ++ PCALIGN $16 + loop: + BNE R4, R7, test + MOVV $1, R4 +diff --git a/src/internal/bytealg/indexbyte_loong64.s b/src/internal/bytealg/indexbyte_loong64.s +index baa9c86be2..604970549f 100644 +--- a/src/internal/bytealg/indexbyte_loong64.s ++++ b/src/internal/bytealg/indexbyte_loong64.s +@@ -13,6 +13,7 @@ TEXT ·IndexByte(SB),NOSPLIT,$0-40 + ADDV R4, R5 // end + ADDV $-1, R4 + ++ PCALIGN $16 + loop: + ADDV $1, R4 + BEQ R4, R5, notfound +@@ -36,6 +37,7 @@ TEXT ·IndexByteString(SB),NOSPLIT,$0-32 + ADDV R4, R5 // end + ADDV $-1, R4 + ++ PCALIGN $16 + loop: + ADDV $1, R4 + BEQ R4, R5, notfound +diff --git a/src/runtime/memclr_loong64.s b/src/runtime/memclr_loong64.s +index e4f20587b7..7bb6f3dfc9 100644 +--- a/src/runtime/memclr_loong64.s ++++ b/src/runtime/memclr_loong64.s +@@ -26,6 +26,7 @@ words: + // do 8 bytes at a time if there is room + ADDV $-7, R4, R7 + ++ PCALIGN $16 + SGTU R7, R6, R8 + BEQ R8, out + MOVV R0, (R6) +diff --git a/src/runtime/memmove_loong64.s b/src/runtime/memmove_loong64.s +index b7b9c56627..0f139bcc13 100644 +--- a/src/runtime/memmove_loong64.s ++++ b/src/runtime/memmove_loong64.s +@@ -42,6 +42,7 @@ words: + // do 8 bytes at a time if there is room + ADDV $-7, R9, R6 // R6 is end pointer-7 + ++ PCALIGN $16 + SGTU R6, R4, R8 + BEQ R8, out + MOVV (R5), R7 +@@ -86,6 +87,7 @@ words1: + // do 8 bytes at a time if there is room + ADDV $7, R4, R6 // R6 is start pointer+7 + ++ PCALIGN $16 + SGTU R9, R6, R8 + BEQ R8, out1 + ADDV $-8, R5 +-- +2.38.1 + diff --git a/0052-cmd-link-bump-loong64-function-alignment-to-16-bytes.patch b/0052-cmd-link-bump-loong64-function-alignment-to-16-bytes.patch new file mode 100644 index 0000000000000000000000000000000000000000..19d90f4ed3e664a5e212ff7f47d7384818dbabbb --- /dev/null +++ b/0052-cmd-link-bump-loong64-function-alignment-to-16-bytes.patch @@ -0,0 +1,48 @@ +From ec655da9ecb0d1f016b1d385a397de28116f07be Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 4 Apr 2023 16:35:06 +0800 +Subject: [PATCH 52/62] cmd/link: bump loong64 function alignment to 16 bytes + +The loong64 PCALIGN directive works with PCs relative to beginning of +functions. So if the function alignment is less than that requested by +PCALIGN, the following code may in fact not be aligned as such, leading +to unexpected performance. + +The current function alignment on loong64 is 8 bytes, which seems to +stem from mips64 or riscv64. In order to make performance more +predictable on loong64, it is raised to 16 bytes to ensure that at +least `PCALIGN $16` works. + +As alignment of loops written in Go is yet to be tackled, and the +codegen is not otherwise touched, benchmark numbers for this change are +not going to be meaningful, and not included. + +Change-Id: I2120ef3746ce067e274920c82091810073bfa3be +Reviewed-on: https://go-review.googlesource.com/c/go/+/481936 +Auto-Submit: Ian Lance Taylor +TryBot-Result: Gopher Robot +Reviewed-by: Ian Lance Taylor +Run-TryBot: Ian Lance Taylor +Run-TryBot: WANG Xuerui +Reviewed-by: Keith Randall +Reviewed-by: Keith Randall +--- + src/cmd/link/internal/loong64/l.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/cmd/link/internal/loong64/l.go b/src/cmd/link/internal/loong64/l.go +index e97a8686bf..a6309f1a3a 100644 +--- a/src/cmd/link/internal/loong64/l.go ++++ b/src/cmd/link/internal/loong64/l.go +@@ -7,7 +7,7 @@ package loong64 + const ( + maxAlign = 32 // max data alignment + minAlign = 1 // min data alignment +- funcAlign = 8 ++ funcAlign = 16 + ) + + /* Used by ../../internal/ld/dwarf.go */ +-- +2.38.1 + diff --git a/0053-cmd-compile-optimize-multiplication-on-loong64.patch b/0053-cmd-compile-optimize-multiplication-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..00843e896dc4d3657b8b8fcc50f2a4d839d1eb49 --- /dev/null +++ b/0053-cmd-compile-optimize-multiplication-on-loong64.patch @@ -0,0 +1,761 @@ +From c7370fa1f2fa5440c706872923cff4f68408193f Mon Sep 17 00:00:00 2001 +From: Wayne Zuo +Date: Thu, 2 Mar 2023 13:33:21 +0800 +Subject: [PATCH 53/62] cmd/compile: optimize multiplication on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Previously, multiplication on loong64 architecture was performed using +MULV and MULHVU instructions to calculate the low 64-bit and high +64-bit of a multiplication respectively. However, in most cases, only +the low 64-bits are needed. This commit enalbes only computating the low +64-bit result with the MULV instruction. + +Reduce the binary size slightly. + +file before after Δ % +addr2line 2833777 2833849 +72 +0.003% +asm 5267499 5266963 -536 -0.010% +buildid 2579706 2579402 -304 -0.012% +cgo 4798260 4797444 -816 -0.017% +compile 25247419 25175030 -72389 -0.287% +cover 4973091 4972027 -1064 -0.021% +dist 3631013 3565653 -65360 -1.800% +doc 4076036 4074004 -2032 -0.050% +fix 3496378 3496066 -312 -0.009% +link 6984102 6983214 -888 -0.013% +nm 2743820 2743516 -304 -0.011% +objdump 4277171 4277035 -136 -0.003% +pack 2379248 2378872 -376 -0.016% +pprof 14419090 14419874 +784 +0.005% +test2json 2684386 2684018 -368 -0.014% +trace 13640018 13631034 -8984 -0.066% +vet 7748918 7752630 +3712 +0.048% +go 15643850 15638098 -5752 -0.037% +total 127423782 127268729 -155053 -0.122% + +Change-Id: Ifce4a9a3ed1d03c170681e39cb6f3541db9882dc +Reviewed-on: https://go-review.googlesource.com/c/go/+/472775 +TryBot-Result: Gopher Robot +Reviewed-by: Dmitri Shuralyov +Run-TryBot: Wayne Zuo +Reviewed-by: David Chase +--- + src/cmd/compile/internal/loong64/ssa.go | 29 +- + .../compile/internal/ssa/_gen/LOONG64.rules | 27 +- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 9 +- + src/cmd/compile/internal/ssa/opGen.go | 44 +- + .../compile/internal/ssa/rewriteLOONG64.go | 392 +++++++----------- + 5 files changed, 192 insertions(+), 309 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 75eb732df8..1c84dccb11 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -162,7 +162,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpLOONG64MULF, + ssa.OpLOONG64MULD, + ssa.OpLOONG64DIVF, +- ssa.OpLOONG64DIVD: ++ ssa.OpLOONG64DIVD, ++ ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() +@@ -196,32 +197,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() +- case ssa.OpLOONG64MULV: +- p := s.Prog(loong64.AMULV) +- p.From.Type = obj.TYPE_REG +- p.From.Reg = v.Args[1].Reg() +- p.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = v.Reg1() +- p1 := s.Prog(loong64.AMULHV) +- p1.From.Type = obj.TYPE_REG +- p1.From.Reg = v.Args[1].Reg() +- p1.Reg = v.Args[0].Reg() +- p1.To.Type = obj.TYPE_REG +- p1.To.Reg = v.Reg0() +- case ssa.OpLOONG64MULVU: +- p := s.Prog(loong64.AMULV) +- p.From.Type = obj.TYPE_REG +- p.From.Reg = v.Args[1].Reg() +- p.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = v.Reg1() +- p1 := s.Prog(loong64.AMULHVU) +- p1.From.Type = obj.TYPE_REG +- p1.From.Reg = v.Args[1].Reg() +- p1.Reg = v.Args[0].Reg() +- p1.To.Type = obj.TYPE_REG +- p1.To.Reg = v.Reg0() + case ssa.OpLOONG64DIVV: + p := s.Prog(loong64.ADIVV) + p.From.Type = obj.TYPE_REG +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index f8c07f3024..8c81b7a3f6 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -8,16 +8,17 @@ + (Sub(Ptr|64|32|16|8) ...) => (SUBV ...) + (Sub(32|64)F ...) => (SUB(F|D) ...) + +-(Mul(64|32|16|8) x y) => (Select1 (MULVU x y)) ++(Mul(64|32|16|8) ...) => (MULV ...) + (Mul(32|64)F ...) => (MUL(F|D) ...) +-(Mul64uhilo ...) => (MULVU ...) +-(Select0 (Mul64uover x y)) => (Select1 (MULVU x y)) +-(Select1 (Mul64uover x y)) => (SGTU (Select0 (MULVU x y)) (MOVVconst [0])) ++(Select0 (Mul64uhilo x y)) => (MULHVU x y) ++(Select1 (Mul64uhilo x y)) => (MULV x y) ++(Select0 (Mul64uover x y)) => (MULV x y) ++(Select1 (Mul64uover x y)) => (SGTU (MULHVU x y) (MOVVconst [0])) + +-(Hmul64 x y) => (Select0 (MULV x y)) +-(Hmul64u x y) => (Select0 (MULVU x y)) +-(Hmul32 x y) => (SRAVconst (Select1 (MULV (SignExt32to64 x) (SignExt32to64 y))) [32]) +-(Hmul32u x y) => (SRLVconst (Select1 (MULVU (ZeroExt32to64 x) (ZeroExt32to64 y))) [32]) ++(Hmul64 ...) => (MULHV ...) ++(Hmul64u ...) => (MULHVU ...) ++(Hmul32 x y) => (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) ++(Hmul32u x y) => (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) + + (Div64 x y) => (Select1 (DIVV x y)) + (Div64u x y) => (Select1 (DIVVU x y)) +@@ -571,10 +572,10 @@ + (SGTU (MOVVconst [c]) x) && is32Bit(c) => (SGTUconst [c] x) + + // mul by constant +-(Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x) +-(Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0]) +-(Select1 (MULVU x (MOVVconst [1]))) => x +-(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x) ++(MULV x (MOVVconst [-1])) => (NEGV x) ++(MULV _ (MOVVconst [0])) => (MOVVconst [0]) ++(MULV x (MOVVconst [1])) => x ++(MULV x (MOVVconst [c])) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x) + + // div by constant + (Select1 (DIVVU x (MOVVconst [1]))) => x +@@ -612,7 +613,7 @@ + (SLLVconst [c] (MOVVconst [d])) => (MOVVconst [d< (MOVVconst [int64(uint64(d)>>uint64(c))]) + (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) +-(Select1 (MULVU (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [c*d]) ++(MULV (MOVVconst [c]) (MOVVconst [d])) => (MOVVconst [c*d]) + (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c/d]) + (Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [int64(uint64(c)/uint64(d))]) + (Select0 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c%d]) // mod +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index aca1bd7358..e40354e526 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -163,10 +163,11 @@ func init() { + {name: "SUBV", argLength: 2, reg: gp21, asm: "SUBVU"}, // arg0 - arg1 + {name: "SUBVconst", argLength: 1, reg: gp11, asm: "SUBVU", aux: "Int64"}, // arg0 - auxInt + +- {name: "MULV", argLength: 2, reg: gp22, resultNotInArgs: true, commutative: true, typ: "(Int64,Int64)"}, // arg0 * arg1, signed +- {name: "MULVU", argLength: 2, reg: gp22, resultNotInArgs: true, commutative: true, typ: "(UInt64,UInt64)"}, // arg0 * arg1, unsigned +- {name: "DIVV", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(Int64,Int64)"}, // arg0 / arg1, signed +- {name: "DIVVU", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(UInt64,UInt64)"}, // arg0 / arg1, unsigned ++ {name: "MULV", argLength: 2, reg: gp21, asm: "MULV", commutative: true, typ: "Int64"}, // arg0 * arg1 ++ {name: "MULHV", argLength: 2, reg: gp21, asm: "MULHV", commutative: true, typ: "Int64"}, // (arg0 * arg1) >> 64, signed ++ {name: "MULHVU", argLength: 2, reg: gp21, asm: "MULHVU", commutative: true, typ: "UInt64"}, // (arg0 * arg1) >> 64, unsigned ++ {name: "DIVV", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(Int64,Int64)"}, // arg0 / arg1, signed ++ {name: "DIVVU", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(UInt64,UInt64)"}, // arg0 / arg1, unsigned + + {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 + {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 57cd6d6931..33b05a09bc 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -1716,7 +1716,8 @@ const ( + OpLOONG64SUBV + OpLOONG64SUBVconst + OpLOONG64MULV +- OpLOONG64MULVU ++ OpLOONG64MULHV ++ OpLOONG64MULHVU + OpLOONG64DIVV + OpLOONG64DIVVU + OpLOONG64ADDF +@@ -22913,34 +22914,47 @@ var opcodeTable = [...]opInfo{ + }, + }, + { +- name: "MULV", +- argLen: 2, +- commutative: true, +- resultNotInArgs: true, ++ name: "MULV", ++ argLen: 2, ++ commutative: true, ++ asm: loong64.AMULV, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { +- name: "MULVU", +- argLen: 2, +- commutative: true, +- resultNotInArgs: true, ++ name: "MULHV", ++ argLen: 2, ++ commutative: true, ++ asm: loong64.AMULHV, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, ++ { ++ name: "MULHVU", ++ argLen: 2, ++ commutative: true, ++ asm: loong64.AMULHVU, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index 1a2593ef55..49122c5feb 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -202,9 +202,11 @@ func rewriteValueLOONG64(v *Value) bool { + case OpHmul32u: + return rewriteValueLOONG64_OpHmul32u(v) + case OpHmul64: +- return rewriteValueLOONG64_OpHmul64(v) ++ v.Op = OpLOONG64MULHV ++ return true + case OpHmul64u: +- return rewriteValueLOONG64_OpHmul64u(v) ++ v.Op = OpLOONG64MULHVU ++ return true + case OpInterCall: + v.Op = OpLOONG64CALLinter + return true +@@ -286,6 +288,8 @@ func rewriteValueLOONG64(v *Value) bool { + return rewriteValueLOONG64_OpLOONG64MOVWstore(v) + case OpLOONG64MOVWstorezero: + return rewriteValueLOONG64_OpLOONG64MOVWstorezero(v) ++ case OpLOONG64MULV: ++ return rewriteValueLOONG64_OpLOONG64MULV(v) + case OpLOONG64NEGV: + return rewriteValueLOONG64_OpLOONG64NEGV(v) + case OpLOONG64NOR: +@@ -423,22 +427,23 @@ func rewriteValueLOONG64(v *Value) bool { + case OpMove: + return rewriteValueLOONG64_OpMove(v) + case OpMul16: +- return rewriteValueLOONG64_OpMul16(v) ++ v.Op = OpLOONG64MULV ++ return true + case OpMul32: +- return rewriteValueLOONG64_OpMul32(v) ++ v.Op = OpLOONG64MULV ++ return true + case OpMul32F: + v.Op = OpLOONG64MULF + return true + case OpMul64: +- return rewriteValueLOONG64_OpMul64(v) ++ v.Op = OpLOONG64MULV ++ return true + case OpMul64F: + v.Op = OpLOONG64MULD + return true +- case OpMul64uhilo: +- v.Op = OpLOONG64MULVU +- return true + case OpMul8: +- return rewriteValueLOONG64_OpMul8(v) ++ v.Op = OpLOONG64MULV ++ return true + case OpNeg16: + v.Op = OpLOONG64NEGV + return true +@@ -1229,20 +1234,18 @@ func rewriteValueLOONG64_OpHmul32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul32 x y) +- // result: (SRAVconst (Select1 (MULV (SignExt32to64 x) (SignExt32to64 y))) [32]) ++ // result: (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) + for { + x := v_0 + y := v_1 + v.reset(OpLOONG64SRAVconst) + v.AuxInt = int64ToAuxInt(32) +- v0 := b.NewValue0(v.Pos, OpSelect1, typ.Int64) +- v1 := b.NewValue0(v.Pos, OpLOONG64MULV, types.NewTuple(typ.Int64, typ.Int64)) ++ v0 := b.NewValue0(v.Pos, OpLOONG64MULV, typ.Int64) ++ v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) ++ v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v2.AddArg(x) +- v3 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v3.AddArg(y) +- v1.AddArg2(v2, v3) +- v0.AddArg(v1) ++ v2.AddArg(y) ++ v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +@@ -1253,54 +1256,18 @@ func rewriteValueLOONG64_OpHmul32u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul32u x y) +- // result: (SRLVconst (Select1 (MULVU (ZeroExt32to64 x) (ZeroExt32to64 y))) [32]) ++ // result: (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) + for { + x := v_0 + y := v_1 + v.reset(OpLOONG64SRLVconst) + v.AuxInt = int64ToAuxInt(32) +- v0 := b.NewValue0(v.Pos, OpSelect1, typ.UInt64) +- v1 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v0 := b.NewValue0(v.Pos, OpLOONG64MULV, typ.Int64) ++ v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v1.AddArg(x) + v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v2.AddArg(x) +- v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v3.AddArg(y) +- v1.AddArg2(v2, v3) +- v0.AddArg(v1) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpHmul64(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Hmul64 x y) +- // result: (Select0 (MULV x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULV, types.NewTuple(typ.Int64, typ.Int64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpHmul64u(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Hmul64u x y) +- // result: (Select0 (MULVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) ++ v2.AddArg(y) ++ v0.AddArg2(v1, v2) + v.AddArg(v0) + return true + } +@@ -3279,6 +3246,89 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (MULV x (MOVVconst [-1])) ++ // result: (NEGV x) ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != -1 { ++ continue ++ } ++ v.reset(OpLOONG64NEGV) ++ v.AddArg(x) ++ return true ++ } ++ break ++ } ++ // match: (MULV _ (MOVVconst [0])) ++ // result: (MOVVconst [0]) ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != 0 { ++ continue ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } ++ break ++ } ++ // match: (MULV x (MOVVconst [1])) ++ // result: x ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != 1 { ++ continue ++ } ++ v.copyOf(x) ++ return true ++ } ++ break ++ } ++ // match: (MULV x (MOVVconst [c])) ++ // cond: isPowerOfTwo64(c) ++ // result: (SLLVconst [log64(c)] x) ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ continue ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(isPowerOfTwo64(c)) { ++ continue ++ } ++ v.reset(OpLOONG64SLLVconst) ++ v.AuxInt = int64ToAuxInt(log64(c)) ++ v.AddArg(x) ++ return true ++ } ++ break ++ } ++ // match: (MULV (MOVVconst [c]) (MOVVconst [d])) ++ // result: (MOVVconst [c*d]) ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ if v_0.Op != OpLOONG64MOVVconst { ++ continue ++ } ++ c := auxIntToInt64(v_0.AuxInt) ++ if v_1.Op != OpLOONG64MOVVconst { ++ continue ++ } ++ d := auxIntToInt64(v_1.AuxInt) ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(c * d) ++ return true ++ } ++ break ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64NEGV(v *Value) bool { + v_0 := v.Args[0] + // match: (NEGV (MOVVconst [c])) +@@ -5671,74 +5721,6 @@ func rewriteValueLOONG64_OpMove(v *Value) bool { + } + return false + } +-func rewriteValueLOONG64_OpMul16(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Mul16 x y) +- // result: (Select1 (MULVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpMul32(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Mul32 x y) +- // result: (Select1 (MULVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpMul64(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Mul64 x y) +- // result: (Select1 (MULVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpMul8(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Mul8 x y) +- // result: (Select1 (MULVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} + func rewriteValueLOONG64_OpNeq16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -6925,20 +6907,28 @@ func rewriteValueLOONG64_OpRsh8x8(v *Value) bool { + func rewriteValueLOONG64_OpSelect0(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +- typ := &b.Func.Config.Types ++ // match: (Select0 (Mul64uhilo x y)) ++ // result: (MULHVU x y) ++ for { ++ if v_0.Op != OpMul64uhilo { ++ break ++ } ++ y := v_0.Args[1] ++ x := v_0.Args[0] ++ v.reset(OpLOONG64MULHVU) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Select0 (Mul64uover x y)) +- // result: (Select1 (MULVU x y)) ++ // result: (MULV x y) + for { + if v_0.Op != OpMul64uover { + break + } + y := v_0.Args[1] + x := v_0.Args[0] +- v.reset(OpSelect1) +- v.Type = typ.UInt64 +- v0 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) ++ v.reset(OpLOONG64MULV) ++ v.AddArg2(x, y) + return true + } + // match: (Select0 (Add64carry x y c)) +@@ -7066,8 +7056,20 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Select1 (Mul64uhilo x y)) ++ // result: (MULV x y) ++ for { ++ if v_0.Op != OpMul64uhilo { ++ break ++ } ++ y := v_0.Args[1] ++ x := v_0.Args[0] ++ v.reset(OpLOONG64MULV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Select1 (Mul64uover x y)) +- // result: (SGTU (Select0 (MULVU x y)) (MOVVconst [0])) ++ // result: (SGTU (MULHVU x y) (MOVVconst [0])) + for { + if v_0.Op != OpMul64uover { + break +@@ -7076,13 +7078,11 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { + x := v_0.Args[0] + v.reset(OpLOONG64SGTU) + v.Type = typ.Bool +- v0 := b.NewValue0(v.Pos, OpSelect0, typ.UInt64) +- v1 := b.NewValue0(v.Pos, OpLOONG64MULVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v1.AddArg2(x, y) +- v0.AddArg(v1) +- v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v2.AuxInt = int64ToAuxInt(0) +- v.AddArg2(v0, v2) ++ v0 := b.NewValue0(v.Pos, OpLOONG64MULHVU, typ.UInt64) ++ v0.AddArg2(x, y) ++ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(0) ++ v.AddArg2(v0, v1) + return true + } + // match: (Select1 (Add64carry x y c)) +@@ -7129,90 +7129,6 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { + v.AddArg2(v0, v2) + return true + } +- // match: (Select1 (MULVU x (MOVVconst [-1]))) +- // result: (NEGV x) +- for { +- if v_0.Op != OpLOONG64MULVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { +- x := v_0_0 +- if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != -1 { +- continue +- } +- v.reset(OpLOONG64NEGV) +- v.AddArg(x) +- return true +- } +- break +- } +- // match: (Select1 (MULVU _ (MOVVconst [0]))) +- // result: (MOVVconst [0]) +- for { +- if v_0.Op != OpLOONG64MULVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { +- if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 0 { +- continue +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(0) +- return true +- } +- break +- } +- // match: (Select1 (MULVU x (MOVVconst [1]))) +- // result: x +- for { +- if v_0.Op != OpLOONG64MULVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { +- x := v_0_0 +- if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 1 { +- continue +- } +- v.copyOf(x) +- return true +- } +- break +- } +- // match: (Select1 (MULVU x (MOVVconst [c]))) +- // cond: isPowerOfTwo64(c) +- // result: (SLLVconst [log64(c)] x) +- for { +- if v_0.Op != OpLOONG64MULVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { +- x := v_0_0 +- if v_0_1.Op != OpLOONG64MOVVconst { +- continue +- } +- c := auxIntToInt64(v_0_1.AuxInt) +- if !(isPowerOfTwo64(c)) { +- continue +- } +- v.reset(OpLOONG64SLLVconst) +- v.AuxInt = int64ToAuxInt(log64(c)) +- v.AddArg(x) +- return true +- } +- break +- } + // match: (Select1 (DIVVU x (MOVVconst [1]))) + // result: x + for { +@@ -7250,30 +7166,6 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { + v.AddArg(x) + return true + } +- // match: (Select1 (MULVU (MOVVconst [c]) (MOVVconst [d]))) +- // result: (MOVVconst [c*d]) +- for { +- if v_0.Op != OpLOONG64MULVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { +- if v_0_0.Op != OpLOONG64MOVVconst { +- continue +- } +- c := auxIntToInt64(v_0_0.AuxInt) +- if v_0_1.Op != OpLOONG64MOVVconst { +- continue +- } +- d := auxIntToInt64(v_0_1.AuxInt) +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(c * d) +- return true +- } +- break +- } + // match: (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d]))) + // cond: d != 0 + // result: (MOVVconst [c/d]) +-- +2.38.1 + diff --git a/0054-cmd-compile-split-DIVV-DIVVU-op-on-loong64.patch b/0054-cmd-compile-split-DIVV-DIVVU-op-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..b31b3e9cf0bdd1d9be95f3bdc4db5120cea408cc --- /dev/null +++ b/0054-cmd-compile-split-DIVV-DIVVU-op-on-loong64.patch @@ -0,0 +1,1038 @@ +From a4ae935430449401f42427818790e9d2e2398518 Mon Sep 17 00:00:00 2001 +From: Wayne Zuo +Date: Thu, 2 Mar 2023 15:55:03 +0800 +Subject: [PATCH 54/62] cmd/compile: split DIVV/DIVVU op on loong64 + +Previously, we need calculate both quotient and remainder together. +However, in most cases, only one result is needed. By separating these +instructions, we can save one instruction in most cases. + +Change-Id: I0a2d4167cda68ab606783ba1aa2720ede19d6b53 +Reviewed-on: https://go-review.googlesource.com/c/go/+/475315 +Reviewed-by: Than McIntosh +Run-TryBot: Wayne Zuo +Reviewed-by: abner chenc +TryBot-Result: Gopher Robot +Reviewed-by: David Chase +--- + src/cmd/compile/internal/loong64/ssa.go | 29 +- + .../compile/internal/ssa/_gen/LOONG64.rules | 48 +- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 8 +- + src/cmd/compile/internal/ssa/opGen.go | 52 +- + .../compile/internal/ssa/rewriteLOONG64.go | 560 ++++++++---------- + 5 files changed, 310 insertions(+), 387 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 1c84dccb11..18abe06966 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -163,7 +163,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpLOONG64MULD, + ssa.OpLOONG64DIVF, + ssa.OpLOONG64DIVD, +- ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU: ++ ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ++ ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() +@@ -197,32 +198,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() +- case ssa.OpLOONG64DIVV: +- p := s.Prog(loong64.ADIVV) +- p.From.Type = obj.TYPE_REG +- p.From.Reg = v.Args[1].Reg() +- p.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = v.Reg1() +- p1 := s.Prog(loong64.AREMV) +- p1.From.Type = obj.TYPE_REG +- p1.From.Reg = v.Args[1].Reg() +- p1.Reg = v.Args[0].Reg() +- p1.To.Type = obj.TYPE_REG +- p1.To.Reg = v.Reg0() +- case ssa.OpLOONG64DIVVU: +- p := s.Prog(loong64.ADIVVU) +- p.From.Type = obj.TYPE_REG +- p.From.Reg = v.Args[1].Reg() +- p.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = v.Reg1() +- p1 := s.Prog(loong64.AREMVU) +- p1.From.Type = obj.TYPE_REG +- p1.From.Reg = v.Args[1].Reg() +- p1.Reg = v.Args[0].Reg() +- p1.To.Type = obj.TYPE_REG +- p1.To.Reg = v.Reg0() + case ssa.OpLOONG64MOVVconst: + r := v.Reg() + p := s.Prog(v.Op.Asm()) +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index 8c81b7a3f6..dc1a76dc93 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -20,24 +20,24 @@ + (Hmul32 x y) => (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) + (Hmul32u x y) => (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) + +-(Div64 x y) => (Select1 (DIVV x y)) +-(Div64u x y) => (Select1 (DIVVU x y)) +-(Div32 x y) => (Select1 (DIVV (SignExt32to64 x) (SignExt32to64 y))) +-(Div32u x y) => (Select1 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) +-(Div16 x y) => (Select1 (DIVV (SignExt16to64 x) (SignExt16to64 y))) +-(Div16u x y) => (Select1 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) +-(Div8 x y) => (Select1 (DIVV (SignExt8to64 x) (SignExt8to64 y))) +-(Div8u x y) => (Select1 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) ++(Div64 x y) => (DIVV x y) ++(Div64u ...) => (DIVVU ...) ++(Div32 x y) => (DIVV (SignExt32to64 x) (SignExt32to64 y)) ++(Div32u x y) => (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)) ++(Div16 x y) => (DIVV (SignExt16to64 x) (SignExt16to64 y)) ++(Div16u x y) => (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)) ++(Div8 x y) => (DIVV (SignExt8to64 x) (SignExt8to64 y)) ++(Div8u x y) => (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)) + (Div(32|64)F ...) => (DIV(F|D) ...) + +-(Mod64 x y) => (Select0 (DIVV x y)) +-(Mod64u x y) => (Select0 (DIVVU x y)) +-(Mod32 x y) => (Select0 (DIVV (SignExt32to64 x) (SignExt32to64 y))) +-(Mod32u x y) => (Select0 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) +-(Mod16 x y) => (Select0 (DIVV (SignExt16to64 x) (SignExt16to64 y))) +-(Mod16u x y) => (Select0 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) +-(Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) +-(Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) ++(Mod64 x y) => (REMV x y) ++(Mod64u ...) => (REMVU ...) ++(Mod32 x y) => (REMV (SignExt32to64 x) (SignExt32to64 y)) ++(Mod32u x y) => (REMVU (ZeroExt32to64 x) (ZeroExt32to64 y)) ++(Mod16 x y) => (REMV (SignExt16to64 x) (SignExt16to64 y)) ++(Mod16u x y) => (REMVU (ZeroExt16to64 x) (ZeroExt16to64 y)) ++(Mod8 x y) => (REMV (SignExt8to64 x) (SignExt8to64 y)) ++(Mod8u x y) => (REMVU (ZeroExt8to64 x) (ZeroExt8to64 y)) + + (Select0 (Add64carry x y c)) => (ADDV (ADDV x y) c) + (Select1 (Add64carry x y c)) => +@@ -578,10 +578,10 @@ + (MULV x (MOVVconst [c])) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x) + + // div by constant +-(Select1 (DIVVU x (MOVVconst [1]))) => x +-(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SRLVconst [log64(c)] x) +-(Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0]) // mod +-(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod ++(DIVVU x (MOVVconst [1])) => x ++(DIVVU x (MOVVconst [c])) && isPowerOfTwo64(c) => (SRLVconst [log64(c)] x) ++(REMVU _ (MOVVconst [1])) => (MOVVconst [0]) // mod ++(REMVU x (MOVVconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod + + // generic simplifications + (ADDV x (NEGV y)) => (SUBV x y) +@@ -614,10 +614,10 @@ + (SRLVconst [c] (MOVVconst [d])) => (MOVVconst [int64(uint64(d)>>uint64(c))]) + (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) + (MULV (MOVVconst [c]) (MOVVconst [d])) => (MOVVconst [c*d]) +-(Select1 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c/d]) +-(Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [int64(uint64(c)/uint64(d))]) +-(Select0 (DIVV (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [c%d]) // mod +-(Select0 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) && d != 0 => (MOVVconst [int64(uint64(c)%uint64(d))]) // mod ++(DIVV (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [c/d]) ++(DIVVU (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [int64(uint64(c)/uint64(d))]) ++(REMV (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [c%d]) // mod ++(REMVU (MOVVconst [c]) (MOVVconst [d])) && d != 0 => (MOVVconst [int64(uint64(c)%uint64(d))]) // mod + (ANDconst [c] (MOVVconst [d])) => (MOVVconst [c&d]) + (ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x) + (ORconst [c] (MOVVconst [d])) => (MOVVconst [c|d]) +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index e40354e526..f2b9ac9ccd 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -124,7 +124,6 @@ func init() { + // Common individual register masks + var ( + gp = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31") // R1 is LR, R2 is thread pointer, R3 is stack pointer, R21-unused, R22 is g, R30 is REGTMP +- gps = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31") | buildReg("g") + gpg = gp | buildReg("g") + gpsp = gp | buildReg("SP") + gpspg = gpg | buildReg("SP") +@@ -142,7 +141,6 @@ func init() { + gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} + gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} + gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} +- gp22 = regInfo{inputs: []regMask{gps, gps}, outputs: []regMask{gp, gp}} + gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} + gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} + gpstore0 = regInfo{inputs: []regMask{gpspsbg}} +@@ -166,8 +164,10 @@ func init() { + {name: "MULV", argLength: 2, reg: gp21, asm: "MULV", commutative: true, typ: "Int64"}, // arg0 * arg1 + {name: "MULHV", argLength: 2, reg: gp21, asm: "MULHV", commutative: true, typ: "Int64"}, // (arg0 * arg1) >> 64, signed + {name: "MULHVU", argLength: 2, reg: gp21, asm: "MULHVU", commutative: true, typ: "UInt64"}, // (arg0 * arg1) >> 64, unsigned +- {name: "DIVV", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(Int64,Int64)"}, // arg0 / arg1, signed +- {name: "DIVVU", argLength: 2, reg: gp22, resultNotInArgs: true, typ: "(UInt64,UInt64)"}, // arg0 / arg1, unsigned ++ {name: "DIVV", argLength: 2, reg: gp21, asm: "DIVV", typ: "Int64"}, // arg0 / arg1, signed ++ {name: "DIVVU", argLength: 2, reg: gp21, asm: "DIVVU", typ: "UInt64"}, // arg0 / arg1, unsigned ++ {name: "REMV", argLength: 2, reg: gp21, asm: "REMV", typ: "Int64"}, // arg0 / arg1, signed ++ {name: "REMVU", argLength: 2, reg: gp21, asm: "REMVU", typ: "UInt64"}, // arg0 / arg1, unsigned + + {name: "ADDF", argLength: 2, reg: fp21, asm: "ADDF", commutative: true}, // arg0 + arg1 + {name: "ADDD", argLength: 2, reg: fp21, asm: "ADDD", commutative: true}, // arg0 + arg1 +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 33b05a09bc..5a001573ea 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -1720,6 +1720,8 @@ const ( + OpLOONG64MULHVU + OpLOONG64DIVV + OpLOONG64DIVVU ++ OpLOONG64REMV ++ OpLOONG64REMVU + OpLOONG64ADDF + OpLOONG64ADDD + OpLOONG64SUBF +@@ -22959,32 +22961,58 @@ var opcodeTable = [...]opInfo{ + }, + }, + { +- name: "DIVV", +- argLen: 2, +- resultNotInArgs: true, ++ name: "DIVV", ++ argLen: 2, ++ asm: loong64.ADIVV, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { +- name: "DIVVU", +- argLen: 2, +- resultNotInArgs: true, ++ name: "DIVVU", ++ argLen: 2, ++ asm: loong64.ADIVVU, + reg: regInfo{ + inputs: []inputInfo{ +- {0, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1072496632}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, ++ { ++ name: "REMV", ++ argLen: 2, ++ asm: loong64.AREMV, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, ++ { ++ name: "REMVU", ++ argLen: 2, ++ asm: loong64.AREMVU, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 +- {1, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index 49122c5feb..08e0d6f6c2 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -167,7 +167,8 @@ func rewriteValueLOONG64(v *Value) bool { + v.Op = OpLOONG64DIVD + return true + case OpDiv64u: +- return rewriteValueLOONG64_OpDiv64u(v) ++ v.Op = OpLOONG64DIVVU ++ return true + case OpDiv8: + return rewriteValueLOONG64_OpDiv8(v) + case OpDiv8u: +@@ -224,6 +225,10 @@ func rewriteValueLOONG64(v *Value) bool { + return rewriteValueLOONG64_OpLOONG64AND(v) + case OpLOONG64ANDconst: + return rewriteValueLOONG64_OpLOONG64ANDconst(v) ++ case OpLOONG64DIVV: ++ return rewriteValueLOONG64_OpLOONG64DIVV(v) ++ case OpLOONG64DIVVU: ++ return rewriteValueLOONG64_OpLOONG64DIVVU(v) + case OpLOONG64LoweredAtomicAdd32: + return rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd32(v) + case OpLOONG64LoweredAtomicAdd64: +@@ -300,6 +305,10 @@ func rewriteValueLOONG64(v *Value) bool { + return rewriteValueLOONG64_OpLOONG64OR(v) + case OpLOONG64ORconst: + return rewriteValueLOONG64_OpLOONG64ORconst(v) ++ case OpLOONG64REMV: ++ return rewriteValueLOONG64_OpLOONG64REMV(v) ++ case OpLOONG64REMVU: ++ return rewriteValueLOONG64_OpLOONG64REMVU(v) + case OpLOONG64ROTR: + return rewriteValueLOONG64_OpLOONG64ROTR(v) + case OpLOONG64ROTRV: +@@ -419,7 +428,8 @@ func rewriteValueLOONG64(v *Value) bool { + case OpMod64: + return rewriteValueLOONG64_OpMod64(v) + case OpMod64u: +- return rewriteValueLOONG64_OpMod64u(v) ++ v.Op = OpLOONG64REMVU ++ return true + case OpMod8: + return rewriteValueLOONG64_OpMod8(v) + case OpMod8u: +@@ -916,18 +926,16 @@ func rewriteValueLOONG64_OpDiv16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div16 x y) +- // result: (Select1 (DIVV (SignExt16to64 x) (SignExt16to64 y))) ++ // result: (DIVV (SignExt16to64 x) (SignExt16to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64DIVV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -937,18 +945,16 @@ func rewriteValueLOONG64_OpDiv16u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div16u x y) +- // result: (Select1 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) ++ // result: (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64DIVVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -958,18 +964,16 @@ func rewriteValueLOONG64_OpDiv32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div32 x y) +- // result: (Select1 (DIVV (SignExt32to64 x) (SignExt32to64 y))) ++ // result: (DIVV (SignExt32to64 x) (SignExt32to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64DIVV) ++ v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -979,52 +983,29 @@ func rewriteValueLOONG64_OpDiv32u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div32u x y) +- // result: (Select1 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) ++ // result: (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64DIVVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } + func rewriteValueLOONG64_OpDiv64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types + // match: (Div64 x y) +- // result: (Select1 (DIVV x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpDiv64u(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Div64u x y) +- // result: (Select1 (DIVVU x y)) ++ // result: (DIVV x y) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) ++ v.reset(OpLOONG64DIVV) ++ v.AddArg2(x, y) + return true + } + } +@@ -1034,18 +1015,16 @@ func rewriteValueLOONG64_OpDiv8(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div8 x y) +- // result: (Select1 (DIVV (SignExt8to64 x) (SignExt8to64 y))) ++ // result: (DIVV (SignExt8to64 x) (SignExt8to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64DIVV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -1055,18 +1034,16 @@ func rewriteValueLOONG64_OpDiv8u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Div8u x y) +- // result: (Select1 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) ++ // result: (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect1) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64DIVVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -1526,6 +1503,81 @@ func rewriteValueLOONG64_OpLOONG64ANDconst(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64DIVV(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (DIVV (MOVVconst [c]) (MOVVconst [d])) ++ // cond: d != 0 ++ // result: (MOVVconst [c/d]) ++ for { ++ if v_0.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_0.AuxInt) ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ d := auxIntToInt64(v_1.AuxInt) ++ if !(d != 0) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(c / d) ++ return true ++ } ++ return false ++} ++func rewriteValueLOONG64_OpLOONG64DIVVU(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (DIVVU x (MOVVconst [1])) ++ // result: x ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != 1 { ++ break ++ } ++ v.copyOf(x) ++ return true ++ } ++ // match: (DIVVU x (MOVVconst [c])) ++ // cond: isPowerOfTwo64(c) ++ // result: (SRLVconst [log64(c)] x) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(isPowerOfTwo64(c)) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(log64(c)) ++ v.AddArg(x) ++ return true ++ } ++ // match: (DIVVU (MOVVconst [c]) (MOVVconst [d])) ++ // cond: d != 0 ++ // result: (MOVVconst [int64(uint64(c)/uint64(d))]) ++ for { ++ if v_0.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_0.AuxInt) ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ d := auxIntToInt64(v_1.AuxInt) ++ if !(d != 0) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(int64(uint64(c) / uint64(d))) ++ return true ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64LoweredAtomicAdd32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] +@@ -3474,6 +3526,81 @@ func rewriteValueLOONG64_OpLOONG64ORconst(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64REMV(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (REMV (MOVVconst [c]) (MOVVconst [d])) ++ // cond: d != 0 ++ // result: (MOVVconst [c%d]) ++ for { ++ if v_0.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_0.AuxInt) ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ d := auxIntToInt64(v_1.AuxInt) ++ if !(d != 0) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(c % d) ++ return true ++ } ++ return false ++} ++func rewriteValueLOONG64_OpLOONG64REMVU(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (REMVU _ (MOVVconst [1])) ++ // result: (MOVVconst [0]) ++ for { ++ if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != 1 { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } ++ // match: (REMVU x (MOVVconst [c])) ++ // cond: isPowerOfTwo64(c) ++ // result: (ANDconst [c-1] x) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(isPowerOfTwo64(c)) { ++ break ++ } ++ v.reset(OpLOONG64ANDconst) ++ v.AuxInt = int64ToAuxInt(c - 1) ++ v.AddArg(x) ++ return true ++ } ++ // match: (REMVU (MOVVconst [c]) (MOVVconst [d])) ++ // cond: d != 0 ++ // result: (MOVVconst [int64(uint64(c)%uint64(d))]) ++ for { ++ if v_0.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_0.AuxInt) ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ d := auxIntToInt64(v_1.AuxInt) ++ if !(d != 0) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(int64(uint64(c) % uint64(d))) ++ return true ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64ROTR(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -5143,18 +5270,16 @@ func rewriteValueLOONG64_OpMod16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod16 x y) +- // result: (Select0 (DIVV (SignExt16to64 x) (SignExt16to64 y))) ++ // result: (REMV (SignExt16to64 x) (SignExt16to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64REMV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -5164,18 +5289,16 @@ func rewriteValueLOONG64_OpMod16u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod16u x y) +- // result: (Select0 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y))) ++ // result: (REMVU (ZeroExt16to64 x) (ZeroExt16to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64REMVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -5185,18 +5308,16 @@ func rewriteValueLOONG64_OpMod32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod32 x y) +- // result: (Select0 (DIVV (SignExt32to64 x) (SignExt32to64 y))) ++ // result: (REMV (SignExt32to64 x) (SignExt32to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64REMV) ++ v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -5206,52 +5327,29 @@ func rewriteValueLOONG64_OpMod32u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod32u x y) +- // result: (Select0 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y))) ++ // result: (REMVU (ZeroExt32to64 x) (ZeroExt32to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64REMVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } + func rewriteValueLOONG64_OpMod64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types + // match: (Mod64 x y) +- // result: (Select0 (DIVV x y)) ++ // result: (REMV x y) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) +- return true +- } +-} +-func rewriteValueLOONG64_OpMod64u(v *Value) bool { +- v_1 := v.Args[1] +- v_0 := v.Args[0] +- b := v.Block +- typ := &b.Func.Config.Types +- // match: (Mod64u x y) +- // result: (Select0 (DIVVU x y)) +- for { +- x := v_0 +- y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) +- v0.AddArg2(x, y) +- v.AddArg(v0) ++ v.reset(OpLOONG64REMV) ++ v.AddArg2(x, y) + return true + } + } +@@ -5261,18 +5359,16 @@ func rewriteValueLOONG64_OpMod8(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod8 x y) +- // result: (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) ++ // result: (REMV (SignExt8to64 x) (SignExt8to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVV, types.NewTuple(typ.Int64, typ.Int64)) ++ v.reset(OpLOONG64REMV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -5282,18 +5378,16 @@ func rewriteValueLOONG64_OpMod8u(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Mod8u x y) +- // result: (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) ++ // result: (REMVU (ZeroExt8to64 x) (ZeroExt8to64 y)) + for { + x := v_0 + y := v_1 +- v.reset(OpSelect0) +- v0 := b.NewValue0(v.Pos, OpLOONG64DIVVU, types.NewTuple(typ.UInt64, typ.UInt64)) ++ v.reset(OpLOONG64REMVU) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v.AddArg(v0) ++ v1.AddArg(y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -6963,93 +7057,6 @@ func rewriteValueLOONG64_OpSelect0(v *Value) bool { + v.AddArg2(v0, c) + return true + } +- // match: (Select0 (DIVVU _ (MOVVconst [1]))) +- // result: (MOVVconst [0]) +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 1 { +- break +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(0) +- return true +- } +- // match: (Select0 (DIVVU x (MOVVconst [c]))) +- // cond: isPowerOfTwo64(c) +- // result: (ANDconst [c-1] x) +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- x := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_1.AuxInt) +- if !(isPowerOfTwo64(c)) { +- break +- } +- v.reset(OpLOONG64ANDconst) +- v.AuxInt = int64ToAuxInt(c - 1) +- v.AddArg(x) +- return true +- } +- // match: (Select0 (DIVV (MOVVconst [c]) (MOVVconst [d]))) +- // cond: d != 0 +- // result: (MOVVconst [c%d]) +- for { +- if v_0.Op != OpLOONG64DIVV { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- if v_0_0.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_0.AuxInt) +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- d := auxIntToInt64(v_0_1.AuxInt) +- if !(d != 0) { +- break +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(c % d) +- return true +- } +- // match: (Select0 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) +- // cond: d != 0 +- // result: (MOVVconst [int64(uint64(c)%uint64(d))]) +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- if v_0_0.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_0.AuxInt) +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- d := auxIntToInt64(v_0_1.AuxInt) +- if !(d != 0) { +- break +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(int64(uint64(c) % uint64(d))) +- return true +- } + return false + } + func rewriteValueLOONG64_OpSelect1(v *Value) bool { +@@ -7129,93 +7136,6 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { + v.AddArg2(v0, v2) + return true + } +- // match: (Select1 (DIVVU x (MOVVconst [1]))) +- // result: x +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- x := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 1 { +- break +- } +- v.copyOf(x) +- return true +- } +- // match: (Select1 (DIVVU x (MOVVconst [c]))) +- // cond: isPowerOfTwo64(c) +- // result: (SRLVconst [log64(c)] x) +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- x := v_0.Args[0] +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_1.AuxInt) +- if !(isPowerOfTwo64(c)) { +- break +- } +- v.reset(OpLOONG64SRLVconst) +- v.AuxInt = int64ToAuxInt(log64(c)) +- v.AddArg(x) +- return true +- } +- // match: (Select1 (DIVV (MOVVconst [c]) (MOVVconst [d]))) +- // cond: d != 0 +- // result: (MOVVconst [c/d]) +- for { +- if v_0.Op != OpLOONG64DIVV { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- if v_0_0.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_0.AuxInt) +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- d := auxIntToInt64(v_0_1.AuxInt) +- if !(d != 0) { +- break +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(c / d) +- return true +- } +- // match: (Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) +- // cond: d != 0 +- // result: (MOVVconst [int64(uint64(c)/uint64(d))]) +- for { +- if v_0.Op != OpLOONG64DIVVU { +- break +- } +- _ = v_0.Args[1] +- v_0_0 := v_0.Args[0] +- if v_0_0.Op != OpLOONG64MOVVconst { +- break +- } +- c := auxIntToInt64(v_0_0.AuxInt) +- v_0_1 := v_0.Args[1] +- if v_0_1.Op != OpLOONG64MOVVconst { +- break +- } +- d := auxIntToInt64(v_0_1.AuxInt) +- if !(d != 0) { +- break +- } +- v.reset(OpLOONG64MOVVconst) +- v.AuxInt = int64ToAuxInt(int64(uint64(c) / uint64(d))) +- return true +- } + return false + } + func rewriteValueLOONG64_OpSlicemask(v *Value) bool { +-- +2.38.1 + diff --git a/0055-cmd-internal-obj-loong64-auto-align-loop-heads-to-16.patch b/0055-cmd-internal-obj-loong64-auto-align-loop-heads-to-16.patch new file mode 100644 index 0000000000000000000000000000000000000000..45efd4a1532b36aad90ab27784f144e2f289e46b --- /dev/null +++ b/0055-cmd-internal-obj-loong64-auto-align-loop-heads-to-16.patch @@ -0,0 +1,175 @@ +From 6b65697069b76b77604d810d0c486d169c043603 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Tue, 28 Mar 2023 21:10:16 +0800 +Subject: [PATCH 55/62] cmd/internal/obj/loong64: auto-align loop heads to + 16-byte boundaries +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +CL 479816 took care of loops in hand-written assembly, but did not +account for those written in Go, that may become performance-sensitive +as well. + +In this patch, all loop heads are automatically identified and aligned +to 16-byte boundaries, by inserting a synthetic `PCALIGN $16` before +them. "Loop heads" are defined as targets of backward branches. + +Because LoongArch instructions are all 32 bits long, at most 3 NOOPs +can be inserted for each target Prog. This may sound excessive, but +benchmark results indicate the current approach is overall profitable +anyway. + +Benchmark results on Loongson 3A5000 (LA464): + +goos: linux +goarch: loong64 +pkg: test/bench/go1 + │ CL 479816 │ this CL │ + │ sec/op │ sec/op vs base │ +BinaryTree17 14.10 ± 1% 14.06 ± 1% ~ (p=0.280 n=10) +Fannkuch11 3.579 ± 0% 3.419 ± 0% -4.45% (p=0.000 n=10) +FmtFprintfEmpty 94.73n ± 0% 94.44n ± 0% -0.31% (p=0.000 n=10) +FmtFprintfString 151.9n ± 0% 149.1n ± 0% -1.84% (p=0.000 n=10) +FmtFprintfInt 158.3n ± 0% 155.2n ± 0% -1.96% (p=0.000 n=10) +FmtFprintfIntInt 241.4n ± 0% 235.4n ± 0% -2.49% (p=0.000 n=10) +FmtFprintfPrefixedInt 320.2n ± 0% 314.7n ± 0% -1.73% (p=0.000 n=10) +FmtFprintfFloat 414.3n ± 0% 398.7n ± 0% -3.77% (p=0.000 n=10) +FmtManyArgs 949.9n ± 0% 929.8n ± 0% -2.12% (p=0.000 n=10) +GobDecode 15.24m ± 0% 15.30m ± 0% +0.38% (p=0.035 n=10) +GobEncode 18.10m ± 2% 17.59m ± 1% -2.81% (p=0.002 n=10) +Gzip 429.9m ± 0% 421.5m ± 0% -1.97% (p=0.000 n=10) +Gunzip 88.31m ± 0% 87.39m ± 0% -1.04% (p=0.000 n=10) +HTTPClientServer 85.71µ ± 0% 87.24µ ± 0% +1.79% (p=0.000 n=10) +JSONEncode 19.74m ± 0% 18.55m ± 0% -6.00% (p=0.000 n=10) +JSONDecode 78.60m ± 1% 77.93m ± 0% -0.84% (p=0.000 n=10) +Mandelbrot200 7.208m ± 0% 7.217m ± 0% ~ (p=0.481 n=10) +GoParse 7.616m ± 1% 7.630m ± 2% ~ (p=0.796 n=10) +RegexpMatchEasy0_32 133.0n ± 0% 134.1n ± 0% +0.83% (p=0.000 n=10) +RegexpMatchEasy0_1K 1.362µ ± 0% 1.364µ ± 0% +0.15% (p=0.000 n=10) +RegexpMatchEasy1_32 161.8n ± 0% 163.7n ± 0% +1.17% (p=0.000 n=10) +RegexpMatchEasy1_1K 1.497µ ± 0% 1.497µ ± 0% ~ (p=1.000 n=10) +RegexpMatchMedium_32 1.420µ ± 0% 1.446µ ± 0% +1.83% (p=0.000 n=10) +RegexpMatchMedium_1K 42.25µ ± 0% 42.53µ ± 0% +0.65% (p=0.000 n=10) +RegexpMatchHard_32 2.108µ ± 0% 2.116µ ± 0% +0.38% (p=0.000 n=10) +RegexpMatchHard_1K 62.65µ ± 0% 63.23µ ± 0% +0.93% (p=0.000 n=10) +Revcomp 1.192 ± 0% 1.198 ± 0% +0.55% (p=0.000 n=10) +Template 115.6m ± 2% 116.9m ± 1% ~ (p=0.075 n=10) +TimeParse 418.1n ± 1% 414.7n ± 0% -0.81% (p=0.000 n=10) +TimeFormat 517.9n ± 0% 513.7n ± 0% -0.81% (p=0.000 n=10) +geomean 103.5µ 102.6µ -0.79% + + │ CL 479816 │ this CL │ + │ B/s │ B/s vs base │ +GobDecode 48.04Mi ± 0% 47.86Mi ± 0% -0.38% (p=0.035 n=10) +GobEncode 40.44Mi ± 2% 41.61Mi ± 1% +2.89% (p=0.001 n=10) +Gzip 43.04Mi ± 0% 43.91Mi ± 0% +2.02% (p=0.000 n=10) +Gunzip 209.6Mi ± 0% 211.8Mi ± 0% +1.05% (p=0.000 n=10) +JSONEncode 93.76Mi ± 0% 99.75Mi ± 0% +6.39% (p=0.000 n=10) +JSONDecode 23.55Mi ± 1% 23.75Mi ± 0% +0.85% (p=0.000 n=10) +GoParse 7.253Mi ± 1% 7.238Mi ± 2% ~ (p=0.698 n=10) +RegexpMatchEasy0_32 229.4Mi ± 0% 227.6Mi ± 0% -0.82% (p=0.000 n=10) +RegexpMatchEasy0_1K 717.3Mi ± 0% 716.2Mi ± 0% -0.15% (p=0.000 n=10) +RegexpMatchEasy1_32 188.6Mi ± 0% 186.4Mi ± 0% -1.13% (p=0.000 n=10) +RegexpMatchEasy1_1K 652.2Mi ± 0% 652.3Mi ± 0% +0.01% (p=0.005 n=10) +RegexpMatchMedium_32 21.49Mi ± 0% 21.11Mi ± 0% -1.73% (p=0.000 n=10) +RegexpMatchMedium_1K 23.11Mi ± 0% 22.96Mi ± 0% -0.62% (p=0.000 n=10) +RegexpMatchHard_32 14.48Mi ± 0% 14.42Mi ± 0% -0.40% (p=0.000 n=10) +RegexpMatchHard_1K 15.59Mi ± 0% 15.44Mi ± 0% -0.98% (p=0.000 n=10) +Revcomp 203.4Mi ± 0% 202.3Mi ± 0% -0.55% (p=0.000 n=10) +Template 16.00Mi ± 2% 15.83Mi ± 1% ~ (p=0.078 n=10) +geomean 60.72Mi 60.89Mi +0.29% + +The slight regression on the Regexp cases is likely because the previous +numbers are just coincidental: indeed, large regressions or improvements +(of roughly ±10%) happen with definitely irrelevant changes. This CL +should (hopefully) bring such random performance fluctuations down a +bit. + +Change-Id: I8bdda6e65336da00d4ad79650937b3eeb9db0e7c +--- + src/cmd/internal/obj/loong64/asm.go | 37 ++++++++++++++++++++++++++++- + 1 file changed, 36 insertions(+), 1 deletion(-) + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index fa1a877eab..c785adf8af 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -28,6 +28,7 @@ type ctxt0 struct { + + const ( + FuncAlign = 4 ++ loopAlign = 16 + ) + + type Optab struct { +@@ -45,6 +46,10 @@ type Optab struct { + + const ( + NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP ++ ++ // branchLoopHead marks loop entry. ++ // Used to insert padding for misaligned loops. ++ branchLoopHead + ) + + var optab = []Optab{ +@@ -434,6 +439,14 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + + c.cursym.Size = pc + ++ // mark loop entry instructions for padding ++ // loop entrances are defined as targets of backward branches ++ for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { ++ if q := p.To.Target(); q != nil && q.Pc < p.Pc { ++ q.Mark |= branchLoopHead ++ } ++ } ++ + /* + * if any procedure is large enough to + * generate a large SBRA branch, then +@@ -444,10 +457,17 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + + var otxt int64 + var q *obj.Prog ++ iters := 0 + for bflag != 0 { ++ iters++ ++ if iters > 200 { ++ ctxt.Diag("layout pass doesn't converge") ++ break ++ } + bflag = 0 + pc = 0 +- for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { ++ prev := c.cursym.Func().Text ++ for p = prev.Link; p != nil; prev, p = p, p.Link { + p.Pc = pc + o = c.oplook(p) + +@@ -474,6 +494,21 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + } + } + ++ // loop heads that need padding ++ // prepend a PCALIGN $16 to such progs ++ if p.Mark&branchLoopHead != 0 && pc&(loopAlign-1) != 0 { ++ q = c.newprog() ++ prev.Link = q ++ q.Link = p ++ q.As = obj.APCALIGN ++ q.From.Type = obj.TYPE_CONST ++ q.From.Offset = loopAlign ++ // don't associate the synthesized PCALIGN ++ // with the original source position ++ // q.Pos = p.Pos ++ bflag = 1 ++ } ++ + m = int(o.size) + if m == 0 { + switch p.As { +-- +2.38.1 + diff --git a/0056-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch b/0056-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch new file mode 100644 index 0000000000000000000000000000000000000000..d02f3dca9bc3640a471de77bfe395c0446a09804 --- /dev/null +++ b/0056-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch @@ -0,0 +1,320 @@ +From 964026d0bfad09ee9d54290e3140aae97d8821dc Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Sat, 1 Apr 2023 03:43:20 +0800 +Subject: [PATCH 56/62] cmd/internal/obj/loong64: add atomic memory access + instructions support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The AM* atomic access instruction performs a sequence of “read-modify-write” +operations on a memory cell atomically. Specifically, it retrieves the old +value at the specified address in memory and writes it to the general register +rd, performs some simple operations on the old value in memory and the value +in the general register rk, and then write the result of the operation back +to the memory address pointed to by general register rj. + +Go asm syntax: + AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB]{W/V} RK, (RJ), RD + AM{MAX/MIN}[DB]{WU/VU} RK, (RJ), RD + +Equivalent platform assembler syntax: + am{swap/add/and/or/xor/max/min}[_db].{w/d} rd, rk, rj + am{max/min}[_db].{wu/du} rd, rk, rj + +Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + +Change-Id: I99ea4553ae731675180d63691c19ef334e7e7817 +--- + src/cmd/asm/internal/arch/loong64.go | 4 ++ + src/cmd/asm/internal/asm/asm.go | 14 +++- + .../asm/internal/asm/testdata/loong64enc1.s | 38 +++++++++++ + src/cmd/internal/obj/loong64/a.out.go | 38 +++++++++++ + src/cmd/internal/obj/loong64/anames.go | 36 ++++++++++ + src/cmd/internal/obj/loong64/asm.go | 67 +++++++++++++++++++ + 6 files changed, 194 insertions(+), 3 deletions(-) + +diff --git a/src/cmd/asm/internal/arch/loong64.go b/src/cmd/asm/internal/arch/loong64.go +index 2958ee1a86..bf34a94f07 100644 +--- a/src/cmd/asm/internal/arch/loong64.go ++++ b/src/cmd/asm/internal/arch/loong64.go +@@ -55,6 +55,10 @@ func IsLoong64RDTIME(op obj.As) bool { + return false + } + ++func IsLoong64AMO(op obj.As) bool { ++ return loong64.IsAtomicInst(op) ++} ++ + func loong64RegisterNumber(name string, n int16) (int16, bool) { + switch name { + case "F": +diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go +index 4d0eeacc74..ee988734f0 100644 +--- a/src/cmd/asm/internal/asm/asm.go ++++ b/src/cmd/asm/internal/asm/asm.go +@@ -664,9 +664,17 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { + prog.Reg = p.getRegister(prog, op, &a[1]) + prog.To = a[2] + case sys.Loong64: +- prog.From = a[0] +- prog.Reg = p.getRegister(prog, op, &a[1]) +- prog.To = a[2] ++ switch { ++ // Loong64 atomic instructions with one input and two outputs. ++ case arch.IsLoong64AMO(op): ++ prog.From = a[0] ++ prog.To = a[1] ++ prog.RegTo2 = a[2].Reg ++ default: ++ prog.From = a[0] ++ prog.Reg = p.getRegister(prog, op, &a[1]) ++ prog.To = a[2] ++ } + case sys.ARM: + // Special cases. + if arch.IsARMSTREX(op) { +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index ea6c569f9d..288408b010 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -233,3 +233,41 @@ lable2: + + MOVV FCC0, R4 // 04dc1401 + MOVV R4, FCC0 // 80d81401 ++ ++ // Loong64 atomic memory access instructions ++ AMSWAPW R14, (R13), R12 // ac396038 ++ AMSWAPV R14, (R13), R12 // acb96038 ++ AMADDW R14, (R13), R12 // ac396138 ++ AMADDV R14, (R13), R12 // acb96138 ++ AMANDW R14, (R13), R12 // ac396238 ++ AMANDV R14, (R13), R12 // acb96238 ++ AMORW R14, (R13), R12 // ac396338 ++ AMORV R14, (R13), R12 // acb96338 ++ AMXORW R14, (R13), R12 // ac396438 ++ AMXORV R14, (R13), R12 // acb96438 ++ AMMAXW R14, (R13), R12 // ac396538 ++ AMMAXV R14, (R13), R12 // acb96538 ++ AMMINW R14, (R13), R12 // ac396638 ++ AMMINV R14, (R13), R12 // acb96638 ++ AMMAXWU R14, (R13), R12 // ac396738 ++ AMMAXVU R14, (R13), R12 // acb96738 ++ AMMINWU R14, (R13), R12 // ac396838 ++ AMMINVU R14, (R13), R12 // acb96838 ++ AMSWAPDBW R14, (R13), R12 // ac396938 ++ AMSWAPDBV R14, (R13), R12 // acb96938 ++ AMADDDBW R14, (R13), R12 // ac396a38 ++ AMADDDBV R14, (R13), R12 // acb96a38 ++ AMANDDBW R14, (R13), R12 // ac396b38 ++ AMANDDBV R14, (R13), R12 // acb96b38 ++ AMORDBW R14, (R13), R12 // ac396c38 ++ AMORDBV R14, (R13), R12 // acb96c38 ++ AMXORDBW R14, (R13), R12 // ac396d38 ++ AMXORDBV R14, (R13), R12 // acb96d38 ++ AMMAXDBW R14, (R13), R12 // ac396e38 ++ AMMAXDBV R14, (R13), R12 // acb96e38 ++ AMMINDBW R14, (R13), R12 // ac396f38 ++ AMMINDBV R14, (R13), R12 // acb96f38 ++ AMMAXDBWU R14, (R13), R12 // ac397038 ++ AMMAXDBVU R14, (R13), R12 // acb97038 ++ AMMINDBWU R14, (R13), R12 // ac397138 ++ AMMINDBVU R14, (R13), R12 // acb97138 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 9527e99b56..3ed15fc7e7 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -394,6 +394,44 @@ const ( + AMOVVF + AMOVVD + ++ // 2.2.7. Atomic Memory Access Instructions ++ AAMSWAPW ++ AAMSWAPV ++ AAMADDW ++ AAMADDV ++ AAMANDW ++ AAMANDV ++ AAMORW ++ AAMORV ++ AAMXORW ++ AAMXORV ++ AAMMAXW ++ AAMMAXV ++ AAMMINW ++ AAMMINV ++ AAMMAXWU ++ AAMMAXVU ++ AAMMINWU ++ AAMMINVU ++ AAMSWAPDBW ++ AAMSWAPDBV ++ AAMADDDBW ++ AAMADDDBV ++ AAMANDDBW ++ AAMANDDBV ++ AAMORDBW ++ AAMORDBV ++ AAMXORDBW ++ AAMXORDBV ++ AAMMAXDBW ++ AAMMAXDBV ++ AAMMINDBW ++ AAMMINDBV ++ AAMMAXDBWU ++ AAMMAXDBVU ++ AAMMINDBWU ++ AAMMINDBVU ++ + // 2.2.10. Other Miscellaneous Instructions + ARDTIMELW + ARDTIMEHW +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index f61756e7a8..d48ff8a281 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -131,6 +131,42 @@ var Anames = []string{ + "MOVDV", + "MOVVF", + "MOVVD", ++ "AMSWAPW", ++ "AMSWAPV", ++ "AMADDW", ++ "AMADDV", ++ "AMANDW", ++ "AMANDV", ++ "AMORW", ++ "AMORV", ++ "AMXORW", ++ "AMXORV", ++ "AMMAXW", ++ "AMMAXV", ++ "AMMINW", ++ "AMMINV", ++ "AMMAXWU", ++ "AMMAXVU", ++ "AMMINWU", ++ "AMMINVU", ++ "AMSWAPDBW", ++ "AMSWAPDBV", ++ "AMADDDBW", ++ "AMADDDBV", ++ "AMANDDBW", ++ "AMANDDBV", ++ "AMORDBW", ++ "AMORDBV", ++ "AMXORDBW", ++ "AMXORDBV", ++ "AMMAXDBW", ++ "AMMAXDBV", ++ "AMMINDBW", ++ "AMMINDBV", ++ "AMMAXDBWU", ++ "AMMAXDBVU", ++ "AMMINDBWU", ++ "AMMINDBVU", + "RDTIMELW", + "RDTIMEHW", + "RDTIMED", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index c785adf8af..2b505ad749 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -363,6 +363,8 @@ var optab = []Optab{ + {ARDTIMEHW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + ++ {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0}, ++ + {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, + {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, + {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, +@@ -379,6 +381,51 @@ var optab = []Optab{ + {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0}, + } + ++var atomicInst = map[obj.As]uint32{ ++ AAMSWAPW: 0x070C0 << 15, // amswap.w ++ AAMSWAPV: 0x070C1 << 15, // amswap.d ++ AAMADDW: 0x070C2 << 15, // amadd.w ++ AAMADDV: 0x070C3 << 15, // amadd.d ++ AAMANDW: 0x070C4 << 15, // amand.w ++ AAMANDV: 0x070C5 << 15, // amand.d ++ AAMORW: 0x070C6 << 15, // amor.w ++ AAMORV: 0x070C7 << 15, // amor.d ++ AAMXORW: 0x070C8 << 15, // amxor.w ++ AAMXORV: 0x070C9 << 15, // amxor.d ++ AAMMAXW: 0x070CA << 15, // ammax.w ++ AAMMAXV: 0x070CB << 15, // ammax.d ++ AAMMINW: 0x070CC << 15, // ammin.w ++ AAMMINV: 0x070CD << 15, // ammin.d ++ AAMMAXWU: 0x070CE << 15, // ammax.wu ++ AAMMAXVU: 0x070CF << 15, // ammax.du ++ AAMMINWU: 0x070D0 << 15, // ammin.wu ++ AAMMINVU: 0x070D1 << 15, // ammin.du ++ AAMSWAPDBW: 0x070D2 << 15, // amswap_db.w ++ AAMSWAPDBV: 0x070D3 << 15, // amswap_db.d ++ AAMADDDBW: 0x070D4 << 15, // amadd_db.w ++ AAMADDDBV: 0x070D5 << 15, // amadd_db.d ++ AAMANDDBW: 0x070D6 << 15, // amand_db.w ++ AAMANDDBV: 0x070D7 << 15, // amand_db.d ++ AAMORDBW: 0x070D8 << 15, // amor_db.w ++ AAMORDBV: 0x070D9 << 15, // amor_db.d ++ AAMXORDBW: 0x070DA << 15, // amxor_db.w ++ AAMXORDBV: 0x070DB << 15, // amxor_db.d ++ AAMMAXDBW: 0x070DC << 15, // ammax_db.w ++ AAMMAXDBV: 0x070DD << 15, // ammax_db.d ++ AAMMINDBW: 0x070DE << 15, // ammin_db.w ++ AAMMINDBV: 0x070DF << 15, // ammin_db.d ++ AAMMAXDBWU: 0x070E0 << 15, // ammax_db.wu ++ AAMMAXDBVU: 0x070E1 << 15, // ammax_db.du ++ AAMMINDBWU: 0x070E2 << 15, // ammin_db.wu ++ AAMMINDBVU: 0x070E3 << 15, // ammin_db.du ++} ++ ++func IsAtomicInst(as obj.As) bool { ++ _, ok := atomicInst[as] ++ ++ return ok ++} ++ + // align code to a certain length by padding bytes. + func pcAlignPadLength(pc int64, alignedValue int64, ctxt *obj.Link) int { + if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) { +@@ -1167,6 +1214,14 @@ func buildop(ctxt *obj.Link) { + + case AMASKEQZ: + opset(AMASKNEZ, r0) ++ ++ case AAMSWAPW: ++ for i := range atomicInst { ++ if i == AAMSWAPW { ++ continue ++ } ++ opset(i, r0) ++ } + } + } + } +@@ -1790,6 +1845,18 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + rel2.Sym = p.From.Sym + rel2.Type = objabi.R_LOONG64_GOT_LO + rel2.Add = 0x0 ++ ++ case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To ++ rk := p.From.Reg ++ rj := p.To.Reg ++ rd := p.RegTo2 ++ ++ // See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html ++ // for the register usage constraints. ++ if rd == rj || rd == rk { ++ c.ctxt.Diag("illegal register combination: %v\n", p) ++ } ++ o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd)) + } + + out[0] = o1 +-- +2.38.1 + diff --git a/0057-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch b/0057-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..85dec4e1c0e608d4d3a95f3058559466e8f021cb --- /dev/null +++ b/0057-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch @@ -0,0 +1,164 @@ +From 73d1e0ee8acb2a3d4eb8030fa62473a8df7c43e8 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Sat, 1 Apr 2023 08:49:58 +0800 +Subject: [PATCH 57/62] cmd/compiler,runtime/internal/atomic: optimize xchg and + xchg64 on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use Loong64's atomic operation instruction AMSWAPx to implement Xchg and Xchg64 + +goos: linux +goarch: loong64 +pkg: runtime/internal/atomic + │ bench.old │ bench.new │ + │ sec/op │ sec/op vs base │ +Xchg 30.06n ± 0% 13.66n ± 0% -54.56% (p=0.000 n=20) +Xchg-2 37.43n ± 6% 23.55n ± 1% -37.06% (p=0.000 n=20) +Xchg-4 37.16n ± 5% 33.25n ± 2% -10.55% (p=0.000 n=20) +Xchg-8 37.81n ± 3% 32.12n ± 1% -15.04% (p=0.000 n=20) +Xchg-16 37.55n ± 0% 33.70n ± 0% -10.25% (p=0.000 n=20) +Xchg64 30.05n ± 0% 14.13n ± 0% -52.96% (p=0.000 n=20) +Xchg64-2 37.42n ± 1% 21.80n ± 0% -41.74% (p=0.000 n=20) +Xchg64-4 38.17n ± 6% 31.95n ± 1% -16.30% (p=0.000 n=20) +Xchg64-8 37.44n ± 1% 32.12n ± 2% -14.18% (p=0.000 n=20) +Xchg64-16 37.56n ± 0% 33.65n ± 0% -10.41% (p=0.000 n=20) +geomean 36.54n 25.65n -28.61% + +Updates #59120. + +Change-Id: Ied74fc20338b63799c6d6eeb122c31b42cff0f7e +--- + src/cmd/compile/internal/loong64/ssa.go | 38 ++++--------------- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 6 --- + src/runtime/internal/atomic/atomic_loong64.s | 30 +++++++-------- + 3 files changed, 22 insertions(+), 52 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 18abe06966..26534ccf40 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -494,40 +494,18 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.To.Reg = v.Args[0].Reg() + s.Prog(loong64.ADBAR) + case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64: +- // DBAR +- // MOVV Rarg1, Rtmp +- // LL (Rarg0), Rout +- // SC Rtmp, (Rarg0) +- // BEQ Rtmp, -3(PC) +- // DBAR +- ll := loong64.ALLV +- sc := loong64.ASCV ++ // AMSWAPx Rarg1, (Rarg0), Rout ++ amswapx := loong64.AAMSWAPV + if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 { +- ll = loong64.ALL +- sc = loong64.ASC ++ amswapx = loong64.AAMSWAPW + } +- s.Prog(loong64.ADBAR) +- p := s.Prog(loong64.AMOVV) ++ p := s.Prog(amswapx) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[1].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = loong64.REGTMP +- p1 := s.Prog(ll) +- p1.From.Type = obj.TYPE_MEM +- p1.From.Reg = v.Args[0].Reg() +- p1.To.Type = obj.TYPE_REG +- p1.To.Reg = v.Reg0() +- p2 := s.Prog(sc) +- p2.From.Type = obj.TYPE_REG +- p2.From.Reg = loong64.REGTMP +- p2.To.Type = obj.TYPE_MEM +- p2.To.Reg = v.Args[0].Reg() +- p3 := s.Prog(loong64.ABEQ) +- p3.From.Type = obj.TYPE_REG +- p3.From.Reg = loong64.REGTMP +- p3.To.Type = obj.TYPE_BRANCH +- p3.To.SetTarget(p) +- s.Prog(loong64.ADBAR) ++ p.To.Type = obj.TYPE_MEM ++ p.To.Reg = v.Args[0].Reg() ++ p.RegTo2 = v.Reg0() ++ + case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: + // DBAR + // LL (Rarg0), Rout +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index f2b9ac9ccd..350e624adc 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -377,12 +377,6 @@ func init() { + + // atomic exchange. + // store arg1 to arg0. arg2=mem. returns . +- // DBAR +- // LL (Rarg0), Rout +- // MOVV Rarg1, Rtmp +- // SC Rtmp, (Rarg0) +- // BEQ Rtmp, -3(PC) +- // DBAR + {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + +diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s +index 80ff980739..e8ffdee70e 100644 +--- a/src/runtime/internal/atomic/atomic_loong64.s ++++ b/src/runtime/internal/atomic/atomic_loong64.s +@@ -121,35 +121,33 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24 + DBAR + RET + ++// func Xchg(ptr *uint32, new uint32) uint32 + TEXT ·Xchg(SB), NOSPLIT, $0-20 + MOVV ptr+0(FP), R4 + MOVW new+8(FP), R5 +- +- DBAR +- MOVV R5, R6 +- LL (R4), R7 +- SC R6, (R4) +- BEQ R6, -3(PC) +- MOVW R7, ret+16(FP) +- DBAR ++ AMSWAPW R5, (R4), R6 ++ MOVW R6, ret+16(FP) + RET + ++// func Xchg64(ptr *uint64, new uint64) uint64 + TEXT ·Xchg64(SB), NOSPLIT, $0-24 + MOVV ptr+0(FP), R4 + MOVV new+8(FP), R5 +- +- DBAR +- MOVV R5, R6 +- LLV (R4), R7 +- SCV R6, (R4) +- BEQ R6, -3(PC) +- MOVV R7, ret+16(FP) +- DBAR ++ AMSWAPV R5, (R4), R6 ++ MOVV R6, ret+16(FP) + RET + + TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 + JMP ·Xchg64(SB) + ++// func Xchgint32(ptr *int32, new int32) int32 ++TEXT ·Xchgint32(SB), NOSPLIT, $0-20 ++ JMP ·Xchg(SB) ++ ++// func Xchgint64(ptr *int64, new int64) int64 ++TEXT ·Xchgint64(SB), NOSPLIT, $0-24 ++ JMP ·Xchg64(SB) ++ + TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 + JMP ·Store64(SB) + +-- +2.38.1 + diff --git a/0058-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch b/0058-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..ea75e610b2e6ebf640a4e5a71c74d6e0ca080db4 --- /dev/null +++ b/0058-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch @@ -0,0 +1,182 @@ +From 6e2b9551333ec2dfd66c294f09c9aedbaea23be2 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Mon, 3 Apr 2023 12:11:46 +0800 +Subject: [PATCH 58/62] cmd/compiler,runtime/internal/atomic: optimize xadd and + xadd64 on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use Loong64's atomic operation instruction AMADDx to implement Xadd and Xadd64 + +goos: linux +goarch: loong64 +pkg: runtime/internal/atomic + │ bench.old │ bench.new │ + │ sec/op │ sec/op vs base │ +Xadd 27.24n ± 0% 27.23n ± 0% -0.04% (p=0.000 n=35) +Xadd-2 31.90n ± 0% 31.91n ± 0% ~ (p=0.765 n=35) +Xadd-4 31.90n ± 0% 31.90n ± 0% ~ (p=0.636 n=35) +Xadd-8 32.61n ± 3% 32.50n ± 4% ~ (p=0.883 n=35) +Xadd-16 32.36n ± 1% 32.33n ± 1% ~ (p=0.266 n=35) +Xadd64 27.24n ± 0% 27.23n ± 0% -0.04% (p=0.000 n=35) +Xadd64-2 31.92n ± 0% 31.92n ± 0% ~ (p=0.617 n=35) +Xadd64-4 31.90n ± 0% 31.90n ± 0% 0.00% (p=0.011 n=35) +Xadd64-8 32.95n ± 1% 32.89n ± 3% ~ (p=1.000 n=35) +Xadd64-16 32.16n ± 1% 32.31n ± 1% ~ (p=0.057 n=35) +geomean 31.15n 31.14n -0.02% + +Change-Id: I982539c2aa04680e9dd11b099ba8d5f215bf9b32 +--- + src/cmd/compile/internal/loong64/ssa.go | 51 ++++++------------- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 7 --- + src/runtime/internal/atomic/atomic_loong64.s | 29 +++++------ + 3 files changed, 27 insertions(+), 60 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 26534ccf40..b49a140e03 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -507,48 +507,27 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.RegTo2 = v.Reg0() + + case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: +- // DBAR +- // LL (Rarg0), Rout +- // ADDV Rarg1, Rout, Rtmp +- // SC Rtmp, (Rarg0) +- // BEQ Rtmp, -3(PC) +- // DBAR +- // ADDV Rarg1, Rout +- ll := loong64.ALLV +- sc := loong64.ASCV ++ // AMADDx Rarg1, (Rarg0), Rout ++ // ADDxU Rarg1, Rout, Rout ++ amaddx := loong64.AAMADDV ++ addx := loong64.AADDVU + if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 { +- ll = loong64.ALL +- sc = loong64.ASC ++ amaddx = loong64.AAMADDW ++ addx = loong64.AADDU + } +- s.Prog(loong64.ADBAR) +- p := s.Prog(ll) +- p.From.Type = obj.TYPE_MEM +- p.From.Reg = v.Args[0].Reg() +- p.To.Type = obj.TYPE_REG +- p.To.Reg = v.Reg0() +- p1 := s.Prog(loong64.AADDVU) ++ p := s.Prog(amaddx) ++ p.From.Type = obj.TYPE_REG ++ p.From.Reg = v.Args[1].Reg() ++ p.To.Type = obj.TYPE_MEM ++ p.To.Reg = v.Args[0].Reg() ++ p.RegTo2 = v.Reg0() ++ p1 := s.Prog(addx) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = v.Args[1].Reg() + p1.Reg = v.Reg0() + p1.To.Type = obj.TYPE_REG +- p1.To.Reg = loong64.REGTMP +- p2 := s.Prog(sc) +- p2.From.Type = obj.TYPE_REG +- p2.From.Reg = loong64.REGTMP +- p2.To.Type = obj.TYPE_MEM +- p2.To.Reg = v.Args[0].Reg() +- p3 := s.Prog(loong64.ABEQ) +- p3.From.Type = obj.TYPE_REG +- p3.From.Reg = loong64.REGTMP +- p3.To.Type = obj.TYPE_BRANCH +- p3.To.SetTarget(p) +- s.Prog(loong64.ADBAR) +- p4 := s.Prog(loong64.AADDVU) +- p4.From.Type = obj.TYPE_REG +- p4.From.Reg = v.Args[1].Reg() +- p4.Reg = v.Reg0() +- p4.To.Type = obj.TYPE_REG +- p4.To.Reg = v.Reg0() ++ p1.To.Reg = v.Reg0() ++ + case ssa.OpLOONG64LoweredAtomicAddconst32, ssa.OpLOONG64LoweredAtomicAddconst64: + // DBAR + // LL (Rarg0), Rout +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index 350e624adc..effd02b9e7 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -382,13 +382,6 @@ func init() { + + // atomic add. + // *arg0 += arg1. arg2=mem. returns . +- // DBAR +- // LL (Rarg0), Rout +- // ADDV Rarg1, Rout, Rtmp +- // SC Rtmp, (Rarg0) +- // BEQ Rtmp, -3(PC) +- // DBAR +- // ADDV Rarg1, Rout + {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // *arg0 += auxint. arg1=mem. returns . auxint is 32-bit. +diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s +index e8ffdee70e..c36e549d33 100644 +--- a/src/runtime/internal/atomic/atomic_loong64.s ++++ b/src/runtime/internal/atomic/atomic_loong64.s +@@ -78,6 +78,9 @@ TEXT ·Xadduintptr(SB), NOSPLIT, $0-24 + TEXT ·Loadint64(SB), NOSPLIT, $0-16 + JMP ·Load64(SB) + ++TEXT ·Xaddint32(SB),NOSPLIT,$0-20 ++ JMP ·Xadd(SB) ++ + TEXT ·Xaddint64(SB), NOSPLIT, $0-24 + JMP ·Xadd64(SB) + +@@ -91,34 +94,26 @@ TEXT ·Xaddint64(SB), NOSPLIT, $0-24 + TEXT ·Casp1(SB), NOSPLIT, $0-25 + JMP runtime∕internal∕atomic·Cas64(SB) + +-// uint32 xadd(uint32 volatile *ptr, int32 delta) + // Atomically: + // *val += delta; + // return *val; ++// ++//func Xadd(ptr *uint32, delta int32) uint32 + TEXT ·Xadd(SB), NOSPLIT, $0-20 + MOVV ptr+0(FP), R4 + MOVW delta+8(FP), R5 +- DBAR +- LL (R4), R6 +- ADDU R6, R5, R7 +- MOVV R7, R6 +- SC R7, (R4) +- BEQ R7, -4(PC) +- MOVW R6, ret+16(FP) +- DBAR ++ AMADDW R5, (R4), R6 ++ ADDU R6, R5, R4 ++ MOVW R4, ret+16(FP) + RET + ++// func Xadd64(ptr *uint64, delta int64) uint64 + TEXT ·Xadd64(SB), NOSPLIT, $0-24 + MOVV ptr+0(FP), R4 + MOVV delta+8(FP), R5 +- DBAR +- LLV (R4), R6 +- ADDVU R6, R5, R7 +- MOVV R7, R6 +- SCV R7, (R4) +- BEQ R7, -4(PC) +- MOVV R6, ret+16(FP) +- DBAR ++ AMADDV R5, (R4), R6 ++ ADDVU R6, R5, R4 ++ MOVV R4, ret+16(FP) + RET + + // func Xchg(ptr *uint32, new uint32) uint32 +-- +2.38.1 + diff --git a/0059-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch b/0059-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5840fdd0fc0d17c638f80fa76aa8f66d2b14620 --- /dev/null +++ b/0059-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch @@ -0,0 +1,399 @@ +From 4acb0344ea4db6528c59b61a69f96f270a5d2b20 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Wed, 5 Apr 2023 22:15:46 +0800 +Subject: [PATCH 59/62] cmd/compiler,runtime/internal/atomic: optimize + And{32,8} and Or{32,8} on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use Loong64's atomic operation instruction AMANDW to implement And{32,8}, +AMORW to implement Or{32,8}, and intrinsify them. + +goos: linux +goarch: loong64 +pkg: runtime/internal/atomic + │ bench.old │ bench.new │ + │ sec/op │ sec/op vs base │ +And8 36.25n ± 0% 17.14n ± 0% -52.72% (p=0.000 n=25) +And8-2 36.19n ± 0% 17.16n ± 0% -52.58% (p=0.000 n=25) +And8-4 36.12n ± 0% 17.15n ± 0% -52.52% (p=0.000 n=25) +And8-8 36.15n ± 0% 17.15n ± 0% -52.56% (p=0.000 n=25) +And8-16 36.15n ± 0% 17.15n ± 0% -52.56% (p=0.000 n=25) +And 35.84n ± 0% 16.15n ± 0% -54.94% (p=0.000 n=25) +And-2 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) +And-4 35.78n ± 0% 16.18n ± 0% -54.78% (p=0.000 n=25) +And-8 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) +And-16 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) +And8Parallel 35.43n ± 0% 17.31n ± 0% -51.14% (p=0.000 n=25) +And8Parallel-2 42.19n ± 3% 26.85n ± 2% -36.36% (p=0.000 n=25) +And8Parallel-4 46.59n ± 2% 33.31n ± 1% -28.50% (p=0.000 n=25) +And8Parallel-8 45.36n ± 1% 32.64n ± 2% -28.04% (p=0.000 n=25) +And8Parallel-16 45.24n ± 0% 33.63n ± 0% -25.66% (p=0.000 n=25) +AndParallel 34.20n ± 0% 16.39n ± 0% -52.08% (p=0.000 n=25) +AndParallel-2 42.30n ± 3% 24.54n ± 4% -41.99% (p=0.000 n=25) +AndParallel-4 44.58n ± 2% 33.64n ± 7% -24.54% (p=0.000 n=25) +AndParallel-8 45.27n ± 1% 34.14n ± 1% -24.59% (p=0.000 n=25) +AndParallel-16 44.77n ± 1% 33.68n ± 0% -24.77% (p=0.000 n=25) +Or8 35.84n ± 0% 16.67n ± 0% -53.49% (p=0.000 n=25) +Or8-2 35.78n ± 0% 16.67n ± 0% -53.41% (p=0.000 n=25) +Or8-4 35.82n ± 0% 16.69n ± 0% -53.41% (p=0.000 n=25) +Or8-8 35.84n ± 0% 16.68n ± 0% -53.46% (p=0.000 n=25) +Or8-16 35.76n ± 0% 16.68n ± 0% -53.36% (p=0.000 n=25) +Or 35.71n ± 0% 16.17n ± 0% -54.72% (p=0.000 n=25) +Or-2 35.78n ± 0% 16.19n ± 0% -54.75% (p=0.000 n=25) +Or-4 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) +Or-8 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) +Or-16 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) +Or8Parallel 34.56n ± 0% 16.89n ± 0% -51.13% (p=0.000 n=25) +Or8Parallel-2 43.00n ± 0% 33.59n ± 19% -21.88% (p=0.000 n=25) +Or8Parallel-4 45.62n ± 0% 30.48n ± 0% -33.19% (p=0.000 n=25) +Or8Parallel-8 44.58n ± 1% 33.16n ± 2% -25.62% (p=0.000 n=25) +Or8Parallel-16 44.80n ± 0% 33.67n ± 0% -24.84% (p=0.000 n=25) +OrParallel 34.16n ± 0% 16.39n ± 0% -52.02% (p=0.000 n=25) +OrParallel-2 42.44n ± 0% 25.44n ± 1% -40.06% (p=0.000 n=25) +OrParallel-4 45.91n ± 2% 32.47n ± 4% -29.27% (p=0.000 n=25) +OrParallel-8 45.37n ± 2% 33.08n ± 2% -27.09% (p=0.000 n=25) +OrParallel-16 44.60n ± 1% 33.67n ± 0% -24.51% (p=0.000 n=25) +geomean 38.98n 21.45n -44.96% + +Updates #59120. + +Change-Id: If008ff6a08b51905076f8ddb6e92f8e214d3f7b3 +--- + src/cmd/compile/internal/loong64/ssa.go | 8 ++ + .../compile/internal/ssa/_gen/LOONG64.rules | 19 +++++ + .../compile/internal/ssa/_gen/LOONG64Ops.go | 5 ++ + src/cmd/compile/internal/ssa/opGen.go | 34 ++++++++ + .../compile/internal/ssa/rewriteLOONG64.go | 81 +++++++++++++++++++ + src/cmd/compile/internal/ssagen/ssa.go | 8 +- + src/runtime/internal/atomic/atomic_loong64.s | 30 +------ + 7 files changed, 155 insertions(+), 30 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index b49a140e03..b992024bca 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -506,6 +506,14 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + p.To.Reg = v.Args[0].Reg() + p.RegTo2 = v.Reg0() + ++ case ssa.OpLOONG64LoweredAtomicAnd32, ssa.OpLOONG64LoweredAtomicOr32: ++ p := s.Prog(v.Op.Asm()) ++ p.From.Type = obj.TYPE_REG ++ p.From.Reg = v.Args[1].Reg() ++ p.To.Type = obj.TYPE_MEM ++ p.To.Reg = v.Args[0].Reg() ++ p.RegTo2 = loong64.REGZERO ++ + case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: + // AMADDx Rarg1, (Rarg0), Rout + // ADDxU Rarg1, Rout, Rout +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index dc1a76dc93..b4bce73637 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -404,6 +404,25 @@ + (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) + (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) + ++// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3, ^((uint8(val) ^ 0xff) << ((ptr & 3) * 8))) ++(AtomicAnd8 ptr val mem) => ++ (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) ++ (OR (SLLV (ZeroExt8to32 val) ++ (SLLVconst [3] ++ (ANDconst [3] ptr))) ++ (NORconst [0] (SLLV ++ (MOVVconst [0xff]) (SLLVconst [3] ++ (ANDconst [3] ptr))))) mem) ++(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) ++ ++// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3, uint32(val) << ((ptr & 3) * 8)) ++(AtomicOr8 ptr val mem) => ++ (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) ++ (SLLV (ZeroExt8to32 val) ++ (SLLVconst [3] ++ (ANDconst [3] ptr))) mem) ++(AtomicOr32 ...) => (LoweredAtomicOr32 ...) ++ + // checks + (NilCheck ...) => (LoweredNilCheck ...) + (IsNonNil ptr) => (SGTU ptr (MOVVconst [0])) +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index effd02b9e7..8043ad2aa7 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -380,6 +380,11 @@ func init() { + {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + ++ // Atomic 32 bit AND/OR. ++ // *arg0 &= (|=) arg1. arg2=mem. returns nil. ++ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, asm: "AMANDW", faultOnNilArg0: true, hasSideEffects: true}, ++ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, asm: "AMORW", faultOnNilArg0: true, hasSideEffects: true}, ++ + // atomic add. + // *arg0 += arg1. arg2=mem. returns . + {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 5a001573ea..6ba5fe891a 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -1824,6 +1824,8 @@ const ( + OpLOONG64LoweredAtomicStorezero64 + OpLOONG64LoweredAtomicExchange32 + OpLOONG64LoweredAtomicExchange64 ++ OpLOONG64LoweredAtomicAnd32 ++ OpLOONG64LoweredAtomicOr32 + OpLOONG64LoweredAtomicAdd32 + OpLOONG64LoweredAtomicAdd64 + OpLOONG64LoweredAtomicAddconst32 +@@ -24401,6 +24403,38 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "LoweredAtomicAnd32", ++ argLen: 3, ++ faultOnNilArg0: true, ++ hasSideEffects: true, ++ asm: loong64.AAMANDW, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB ++ }, ++ outputs: []outputInfo{ ++ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, ++ { ++ name: "LoweredAtomicOr32", ++ argLen: 3, ++ faultOnNilArg0: true, ++ hasSideEffects: true, ++ asm: loong64.AAMORW, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB ++ }, ++ outputs: []outputInfo{ ++ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "LoweredAtomicAdd32", + argLen: 3, +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index 08e0d6f6c2..5c1d30b0bb 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -51,6 +51,11 @@ func rewriteValueLOONG64(v *Value) bool { + case OpAtomicAdd64: + v.Op = OpLOONG64LoweredAtomicAdd64 + return true ++ case OpAtomicAnd32: ++ v.Op = OpLOONG64LoweredAtomicAnd32 ++ return true ++ case OpAtomicAnd8: ++ return rewriteValueLOONG64_OpAtomicAnd8(v) + case OpAtomicCompareAndSwap32: + return rewriteValueLOONG64_OpAtomicCompareAndSwap32(v) + case OpAtomicCompareAndSwap64: +@@ -74,6 +79,11 @@ func rewriteValueLOONG64(v *Value) bool { + case OpAtomicLoadPtr: + v.Op = OpLOONG64LoweredAtomicLoad64 + return true ++ case OpAtomicOr32: ++ v.Op = OpLOONG64LoweredAtomicOr32 ++ return true ++ case OpAtomicOr8: ++ return rewriteValueLOONG64_OpAtomicOr8(v) + case OpAtomicStore32: + v.Op = OpLOONG64LoweredAtomicStore32 + return true +@@ -719,6 +729,46 @@ func rewriteValueLOONG64_OpAddr(v *Value) bool { + return true + } + } ++func rewriteValueLOONG64_OpAtomicAnd8(v *Value) bool { ++ v_2 := v.Args[2] ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (AtomicAnd8 ptr val mem) ++ // result: (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) (OR (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) (NORconst [0] (SLLV (MOVVconst [0xff]) (SLLVconst [3] (ANDconst [3] ptr))))) mem) ++ for { ++ ptr := v_0 ++ val := v_1 ++ mem := v_2 ++ v.reset(OpLOONG64LoweredAtomicAnd32) ++ v0 := b.NewValue0(v.Pos, OpLOONG64AND, typ.Uintptr) ++ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(^3) ++ v0.AddArg2(v1, ptr) ++ v2 := b.NewValue0(v.Pos, OpLOONG64OR, typ.UInt64) ++ v3 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt32) ++ v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) ++ v4.AddArg(val) ++ v5 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v5.AuxInt = int64ToAuxInt(3) ++ v6 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.UInt64) ++ v6.AuxInt = int64ToAuxInt(3) ++ v6.AddArg(ptr) ++ v5.AddArg(v6) ++ v3.AddArg2(v4, v5) ++ v7 := b.NewValue0(v.Pos, OpLOONG64NORconst, typ.UInt64) ++ v7.AuxInt = int64ToAuxInt(0) ++ v8 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt64) ++ v9 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v9.AuxInt = int64ToAuxInt(0xff) ++ v8.AddArg2(v9, v5) ++ v7.AddArg(v8) ++ v2.AddArg2(v3, v7) ++ v.AddArg3(v0, v2, mem) ++ return true ++ } ++} + func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] +@@ -740,6 +790,37 @@ func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool { + return true + } + } ++func rewriteValueLOONG64_OpAtomicOr8(v *Value) bool { ++ v_2 := v.Args[2] ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (AtomicOr8 ptr val mem) ++ // result: (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) mem) ++ for { ++ ptr := v_0 ++ val := v_1 ++ mem := v_2 ++ v.reset(OpLOONG64LoweredAtomicOr32) ++ v0 := b.NewValue0(v.Pos, OpLOONG64AND, typ.Uintptr) ++ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(^3) ++ v0.AddArg2(v1, ptr) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt32) ++ v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) ++ v3.AddArg(val) ++ v4 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v4.AuxInt = int64ToAuxInt(3) ++ v5 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.UInt64) ++ v5.AuxInt = int64ToAuxInt(3) ++ v5.AddArg(ptr) ++ v4.AddArg(v5) ++ v2.AddArg2(v3, v4) ++ v.AddArg3(v0, v2, mem) ++ return true ++ } ++} + func rewriteValueLOONG64_OpAvg64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go +index 526332294c..6bdb4a9ea5 100644 +--- a/src/cmd/compile/internal/ssagen/ssa.go ++++ b/src/cmd/compile/internal/ssagen/ssa.go +@@ -4238,25 +4238,25 @@ func InitTables() { + s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, +- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) ++ sys.AMD64, sys.Loong64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + addF("runtime/internal/atomic", "And", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, +- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) ++ sys.AMD64, sys.Loong64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + addF("runtime/internal/atomic", "Or8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, +- sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) ++ sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + addF("runtime/internal/atomic", "Or", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) + return nil + }, +- sys.AMD64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) ++ sys.AMD64, sys.Loong64, sys.MIPS, sys.PPC64, sys.RISCV64, sys.S390X) + + atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { + s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem()) +diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s +index c36e549d33..1d4d4b4b37 100644 +--- a/src/runtime/internal/atomic/atomic_loong64.s ++++ b/src/runtime/internal/atomic/atomic_loong64.s +@@ -191,13 +191,7 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 + SLLV $3, R7 + // Shift val for aligned ptr. R5 = val << R4 + SLLV R7, R5 +- +- DBAR +- LL (R6), R7 +- OR R5, R7 +- SC R7, (R6) +- BEQ R7, -4(PC) +- DBAR ++ AMORW R5, (R6), R0 + RET + + // void And8(byte volatile*, byte); +@@ -216,37 +210,21 @@ TEXT ·And8(SB), NOSPLIT, $0-9 + SLLV R7, R8 + NOR R0, R8 + OR R8, R5 +- +- DBAR +- LL (R6), R7 +- AND R5, R7 +- SC R7, (R6) +- BEQ R7, -4(PC) +- DBAR ++ AMANDW R5, (R6), R0 + RET + + // func Or(addr *uint32, v uint32) + TEXT ·Or(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R4 + MOVW val+8(FP), R5 +- DBAR +- LL (R4), R6 +- OR R5, R6 +- SC R6, (R4) +- BEQ R6, -4(PC) +- DBAR ++ AMORW R5, (R4), R0 + RET + + // func And(addr *uint32, v uint32) + TEXT ·And(SB), NOSPLIT, $0-12 + MOVV ptr+0(FP), R4 + MOVW val+8(FP), R5 +- DBAR +- LL (R4), R6 +- AND R5, R6 +- SC R6, (R4) +- BEQ R6, -4(PC) +- DBAR ++ AMANDW R5, (R4), R0 + RET + + // uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr) +-- +2.38.1 + diff --git a/0060-Revert-cmd-link-workaround-linkshared-test-errors-on.patch b/0060-Revert-cmd-link-workaround-linkshared-test-errors-on.patch new file mode 100644 index 0000000000000000000000000000000000000000..5913c2986fc19180c0a37ca52581078b79ec2b37 --- /dev/null +++ b/0060-Revert-cmd-link-workaround-linkshared-test-errors-on.patch @@ -0,0 +1,36 @@ +From e69ffe14ac9a460ca960ef608f4002f438397756 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Thu, 11 May 2023 12:26:21 +0800 +Subject: [PATCH 60/62] Revert "cmd/link: workaround linkshared test errors on + loong64." + +This reverts commit 2f13efe6c594446137ecec51688d029fa2df438b. + +binutils has fixed the bug: + https://sourceware.org/pipermail/binutils/2023-May/127394.html + +Change-Id: I94ffb9ee3fa7c1e0c66280185e32b83977830f5d +--- + src/cmd/link/internal/ld/lib.go | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go +index 54672c0686..03b9f11608 100644 +--- a/src/cmd/link/internal/ld/lib.go ++++ b/src/cmd/link/internal/ld/lib.go +@@ -1790,12 +1790,6 @@ func (ctxt *Link) hostlink() { + argv = append(argv, peimporteddlls()...) + } + +- if ctxt.Arch.Family == sys.Loong64 { +- if (ctxt.BuildMode != BuildModeShared) && ctxt.linkShared { +- argv = append(argv, "-pie") +- } +- } +- + if ctxt.Debugvlog != 0 { + ctxt.Logf("host link:") + for _, v := range argv { +-- +2.38.1 + diff --git a/0061-syscall-implement-Ptrace-Set-Get-Regs-using-PTRACE_-.patch b/0061-syscall-implement-Ptrace-Set-Get-Regs-using-PTRACE_-.patch new file mode 100644 index 0000000000000000000000000000000000000000..fff86314e833923d45d1dbb1b280d3940bd436d0 --- /dev/null +++ b/0061-syscall-implement-Ptrace-Set-Get-Regs-using-PTRACE_-.patch @@ -0,0 +1,91 @@ +From 84e0a4152e35fd2eaf818593fcffdf344f19644a Mon Sep 17 00:00:00 2001 +From: chenguoqi +Date: Fri, 9 Jun 2023 09:54:28 +0800 +Subject: [PATCH 61/62] syscall: implement Ptrace{Set,Get}Regs using + PTRACE_{GET,SET}REGSET on all linux platforms + +In the ptrace system call, most of the newer architectures (e.g. arm64,riscv64,loong64) +do not provide support for the command PTRACE_{GET, SET}REGS. + +The Linux kernel 2.6.33-rc7[1] introduces support for the command PTRACE_{GET,SET}REGSET, +which exports different types of register sets depending on the NT_* types, completely +overriding the functionality provided by PTRACE_{GET,SET}REGS. + +[1] https://lore.kernel.org/all/20100211195614.886724710@sbs-t61.sc.intel.com/ + +Fixes #60679. + +Change-Id: I8c2671d64a7ecd654834740f4f1e1e50c00edcae + +Signed-off-by: chenguoqi +Change-Id: I3839003de99d8ea2e346e17184c309e17a0685ac +(cherry picked from commit cadb91e0ca34c9b77ee0c9e1a6594131bd636c77) +--- + .../vendor/golang.org/x/sys/unix/syscall_linux.go | 11 +++++++++-- + src/syscall/syscall_linux.go | 14 ++++++++++++-- + 2 files changed, 21 insertions(+), 4 deletions(-) + +diff --git a/src/cmd/vendor/golang.org/x/sys/unix/syscall_linux.go b/src/cmd/vendor/golang.org/x/sys/unix/syscall_linux.go +index c5a98440ec..d20ea9de72 100644 +--- a/src/cmd/vendor/golang.org/x/sys/unix/syscall_linux.go ++++ b/src/cmd/vendor/golang.org/x/sys/unix/syscall_linux.go +@@ -12,6 +12,7 @@ + package unix + + import ( ++ "debug/elf" + "encoding/binary" + "strconv" + "syscall" +@@ -1696,11 +1697,17 @@ func PtracePokeUser(pid int, addr uintptr, data []byte) (count int, err error) { + } + + func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) { +- return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout))) ++ var iov Iovec ++ iov.Base = (*byte)(unsafe.Pointer(regsout)) ++ iov.SetLen(int(unsafe.Sizeof(*regsout))) ++ return ptrace(PTRACE_GETREGSET, pid, uintptr(elf.NT_PRSTATUS), uintptr(unsafe.Pointer(&iov))) + } + + func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) { +- return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs))) ++ var iov Iovec ++ iov.Base = (*byte)(unsafe.Pointer(regs)) ++ iov.SetLen(int(unsafe.Sizeof(*regs))) ++ return ptrace(PTRACE_SETREGSET, pid, uintptr(elf.NT_PRSTATUS), uintptr(unsafe.Pointer(&iov))) + } + + func PtraceSetOptions(pid int, options int) (err error) { +diff --git a/src/syscall/syscall_linux.go b/src/syscall/syscall_linux.go +index d1c981a9b3..1c3ca39dd4 100644 +--- a/src/syscall/syscall_linux.go ++++ b/src/syscall/syscall_linux.go +@@ -936,12 +936,22 @@ func PtracePokeData(pid int, addr uintptr, data []byte) (count int, err error) { + return ptracePoke(PTRACE_POKEDATA, PTRACE_PEEKDATA, pid, addr, data) + } + ++const ( ++ _NT_PRSTATUS = 1 ++) ++ + func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) { +- return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout))) ++ var iov Iovec ++ iov.Base = (*byte)(unsafe.Pointer(regsout)) ++ iov.SetLen(int(unsafe.Sizeof(*regsout))) ++ return ptrace(PTRACE_GETREGSET, pid, uintptr(_NT_PRSTATUS), uintptr(unsafe.Pointer(&iov))) + } + + func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) { +- return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs))) ++ var iov Iovec ++ iov.Base = (*byte)(unsafe.Pointer(regs)) ++ iov.SetLen(int(unsafe.Sizeof(*regs))) ++ return ptrace(PTRACE_SETREGSET, pid, uintptr(_NT_PRSTATUS), uintptr(unsafe.Pointer(&iov))) + } + + func PtraceSetOptions(pid int, options int) (err error) { +-- +2.38.1 + diff --git a/0062-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch b/0062-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e99a2acb1fac6c253db187609022d07b93a0496 --- /dev/null +++ b/0062-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch @@ -0,0 +1,177 @@ +From 55c430fa956299a1056b79a0fe7da893f05a6364 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Mon, 8 May 2023 06:20:21 +0800 +Subject: [PATCH 62/62] cmd/internal/obj/loong64: remove the invalid plan9 + format of the BREAK instruction + +In the three formats corresponding to case 7 of the function asmout, BREAK actually +corresponds to the cacop instruction of Loong64, refer to the loong64 instruction +manual volume 1 [1], the cacop instruction is a privileged instruction used to +maintain the cache, and the user mode does not have permission to execute. + +Referring to the loong64 instruction manual volume 1 [1], the format of SYSCALL, +BREAK, DBAR and NOOP instructions is similar and can be grouped into one category. + +[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + +Change-Id: I0b8998270102d1557fc2b2410cf8c0b078bd0c2e +--- + .../asm/internal/asm/testdata/loong64enc1.s | 2 - + src/cmd/internal/obj/loong64/asm.go | 51 ++++++++++--------- + 2 files changed, 27 insertions(+), 26 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 288408b010..701515cf4c 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -141,8 +141,6 @@ lable2: + MOVV R4, F5 // 85a81401 + MOVV F4, R5 // 85b81401 + WORD $74565 // 45230100 +- BREAK R4, result+16(FP) // 64600006 +- BREAK R4, 1(R5) // a4040006 + BREAK // 00002a00 + UNDEF // 00002a00 + +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 2b505ad749..c2efe0eef1 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -354,11 +354,6 @@ var optab = []Optab{ + {ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, + {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, + +- {ABREAK, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, // really CACHE instruction +- {ABREAK, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, +- {ABREAK, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, +- {ABREAK, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, +- + {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + {ARDTIMEHW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, +@@ -596,7 +591,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + v := pcAlignPadLength(p.Pc, alignedValue, c.ctxt) + for i = 0; i < int32(v/4); i++ { + // emit ANOOP instruction by the padding size +- c.ctxt.Arch.ByteOrder.PutUint32(bp, c.oprrr(ANOOP)) ++ c.ctxt.Arch.ByteOrder.PutUint32(bp, c.opi(ANOOP)) + bp = bp[4:] + } + continue +@@ -1160,6 +1155,7 @@ func buildop(ctxt *obj.Link) { + case ASYSCALL: + opset(ADBAR, r0) + opset(ANOOP, r0) ++ opset(ABREAK, r0) + + case ACMPEQF: + opset(ACMPGTF, r0) +@@ -1181,7 +1177,6 @@ func buildop(ctxt *obj.Link) { + AMOVD, + AMOVF, + AMOVV, +- ABREAK, + ARFE, + AJAL, + AJMP, +@@ -1267,6 +1262,10 @@ func OP_IR(op uint32, i uint32, r2 uint32) uint32 { + return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5 + } + ++func OP_I(op uint32, i uint32) uint32 { ++ return op | (i&0x7FFF)<<0 ++} ++ + // Encoding for the 'b' or 'bl' instruction. + func OP_B_BL(op uint32, i uint32) uint32 { + return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF) +@@ -1332,7 +1331,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 5: // syscall +- o1 = c.oprrr(p.As) ++ o1 = c.opi(p.As) + + case 6: // beq r1,[r2],sbra + v := int32(0) +@@ -1476,7 +1475,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + } else { // ATNE + o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg)) + } +- o2 = c.oprrr(ABREAK) | (uint32(v) & 0x7FFF) ++ o2 = OP_I(c.opi(ABREAK), uint32(v)) + + case 16: // sll $c,[r1],r2 + v := c.regoff(&p.From) +@@ -1647,7 +1646,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + + case 49: // undef +- o1 = c.oprrr(ABREAK) ++ o1 = c.opi(ABREAK) + + // relocation operations + case 50: // mov r,addr ==> pcalau12i + sw +@@ -1965,10 +1964,6 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + case AJAL: + return (0x13 << 26) | 1 // jirl r1, rj, 0 + +- case ABREAK: +- return 0x54 << 15 +- case ASYSCALL: +- return 0x56 << 15 + case ADIVF: + return 0x20d << 15 + case ADIVD: +@@ -2042,12 +2037,6 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0x4511 << 10 + case ASQRTD: + return 0x4512 << 10 +- +- case ADBAR: +- return 0x70e4 << 15 +- case ANOOP: +- // andi r0, r0, 0 +- return 0x03400000 + } + + if a < 0 { +@@ -2076,6 +2065,24 @@ func (c *ctxt0) oprr(a obj.As) uint32 { + return 0 + } + ++func (c *ctxt0) opi(a obj.As) uint32 { ++ switch a { ++ case ASYSCALL: ++ return 0x56 << 15 ++ case ABREAK: ++ return 0x54 << 15 ++ case ADBAR: ++ return 0x70e4 << 15 ++ case ANOOP: ++ // andi r0, r0, 0 ++ return 0x03400000 ++ } ++ ++ c.ctxt.Diag("bad ic opcode %v", a) ++ ++ return 0 ++} ++ + func (c *ctxt0) opir(a obj.As) uint32 { + switch a { + case ALU12IW: +@@ -2172,10 +2179,6 @@ func (c *ctxt0) opirr(a obj.As) uint32 { + return 0x0be << 22 + case AMOVVR: + return 0x0bf << 22 +- +- case ABREAK: +- return 0x018 << 22 +- + case -AMOVWL: + return 0x0b8 << 22 + case -AMOVWR: +-- +2.38.1 + diff --git a/golang.spec b/golang.spec index 5f56f8c01b7d7e478abab3fa5419e3d23998e44d..0a0a77e165e712820a260b3718d00163e2f5a110 100644 --- a/golang.spec +++ b/golang.spec @@ -1,4 +1,4 @@ -%define anolis_release 1 +%define anolis_release 2 # Disable debuginfo packages %global debug_package %{nil} @@ -107,6 +107,64 @@ Requires: %{name}-src = %{version}-%{release} Patch3: 0001-cmd-go-use-aliyun-proxy-and-local-sumdb.patch Patch4: 0004-cmd-link-use-gold-on-ARM-ARM64-only-if-gold-is-avail.patch +Patch5: 0005-cmd-asm-add-RDTIME-L-H-.W-RDTIME.D-support-for-loong.patch +Patch6: 0006-runtime-implement-cputicks-with-the-stable-counter-o.patch +Patch7: 0007-runtime-remove-the-fake-mstart-caller-in-systemstack.patch +Patch8: 0008-cmd-internal-obj-loong64-save-LR-after-decrementing-.patch +Patch9: 0009-runtime-refactor-the-linux-loong64-entrypoint.patch +Patch10: 0010-cmd-internal-obj-loong64-remove-invalid-branch-delay.patch +Patch11: 0011-runtime-calculate-nanoseconds-in-usleep-on-linux-loo.patch +Patch12: 0012-cmd-internal-obj-remove-redundant-cnames-on-loong64.patch +Patch13: 0013-runtime-save-fetch-g-register-during-VDSO-on-loong64.patch +Patch14: 0014-runtime-save-restore-callee-saved-registers-in-loong.patch +Patch15: 0015-runtime-add-comment-for-sys_linux_loong64.patch +Patch16: 0016-runtime-mark-morestack_noctxt-SPWRITE-for-linux-loon.patch +Patch17: 0017-cmd-internal-obj-loong64-add-the-PCALAU12I-instructi.patch +Patch18: 0018-cmd-internal-obj-loong64-cmd-link-internal-loong64-s.patch +Patch19: 0019-runtime-add-support-for-buildmode-c-shared-on-loong6.patch +Patch20: 0020-cmd-compile-add-support-for-buildmode-c-shared-on-lo.patch +Patch21: 0021-cmd-internal-obj-loong64-cmd-internal-objabi-add-c-s.patch +Patch22: 0022-cmd-link-add-support-for-buildmode-c-shared-on-loong.patch +Patch23: 0023-cmd-internal-sys-enable-c-shared-feature-on-loong64.patch +Patch24: 0024-cmd-dist-misc-cgo-testcshared-enable-c-shared-test-o.patch +Patch25: 0025-cmd-link-cmd-internal-in-shared-mode-change-to-use-I.patch +Patch26: 0026-cmd-compile-cmd-dist-cmd-go-internal-enable-buildmod.patch +Patch27: 0027-net-disable-TestLookupDotsWithRemoteSource-and-TestL.patch +Patch28: 0028-enable-c-archive-test-on-linux-loong64.patch +Patch29: 0029-cmd-internal-cmd-link-remove-invalid-GOT-relative-re.patch +Patch30: 0030-runtime-no-need-to-save-these-registers-in-load_g-sa.patch +Patch31: 0031-cmd-internal-obj-loong64-add-support-for-movgr2cf-an.patch +Patch32: 0032-runtime-save-and-restore-fcc-registers-in-async-pree.patch +Patch33: 0033-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch +Patch34: 0034-cmd-internal-objfile-add-loong64-disassembler-suppor.patch +Patch35: 0035-cmd-compile-link-internal-runtime-support-buildmode-.patch +Patch36: 0036-cmd-link-internal-support-buildmode-plugin-for-loong.patch +Patch37: 0037-cmd-dist-test-enable-buildmode-shared-plugin-test-fo.patch +Patch38: 0038-runtime-enable-memory-sanitizer-on-loong64.patch +Patch39: 0039-runtime-enable-address-sanitizer-on-loong64.patch +Patch40: 0040-cmd-link-workaround-linkshared-test-errors-on-loong6.patch +Patch41: 0041-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch +Patch42: 0042-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch +Patch43: 0043-cmd-compile-internal-runtime-use-NOOP-for-hardware-N.patch +Patch44: 0044-cmd-link-internal-loong64-use-BREAK-0-as-the-code-pa.patch +Patch45: 0045-cmd-asm-runtime-remove-the-RSB-register-from-loong64.patch +Patch46: 0046-cmd-internal-obj-loong64-realize-all-unconditional-j.patch +Patch47: 0047-cmd-internal-obj-loong64-clean-up-code-for-short-con.patch +Patch48: 0048-cmd-internal-obj-loong64-assemble-BEQ-BNEs-comparing.patch +Patch49: 0049-cmd-internal-obj-loong64-remove-Optab.family-and-reo.patch +Patch50: 0050-cmd-asm-support-the-PCALIGN-directive-on-loong64.patch +Patch51: 0051-internal-bytealg-runtime-align-some-loong64-asm-loop.patch +Patch52: 0052-cmd-link-bump-loong64-function-alignment-to-16-bytes.patch +Patch53: 0053-cmd-compile-optimize-multiplication-on-loong64.patch +Patch54: 0054-cmd-compile-split-DIVV-DIVVU-op-on-loong64.patch +Patch55: 0055-cmd-internal-obj-loong64-auto-align-loop-heads-to-16.patch +Patch56: 0056-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch +Patch57: 0057-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch +Patch58: 0058-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch +Patch59: 0059-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch +Patch60: 0060-Revert-cmd-link-workaround-linkshared-test-errors-on.patch +Patch61: 0061-syscall-implement-Ptrace-Set-Get-Regs-using-PTRACE_-.patch +Patch62: 0062-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch Source100: golang-gdbinit @@ -539,6 +597,66 @@ fi %files docs -f go-docs.list %changelog +* Wed Oct 18 2023 Meidan Li - 1.20.9-2 +- cmd/asm: add RDTIME{L,H}.W, RDTIME.D support for loong64 +- runtime: implement cputicks with the stable counter on loong64 +- runtime: remove the fake mstart caller in systemstack on linux/loong64 +- cmd/internal/obj/loong64: save LR after decrementing SP +- runtime: refactor the linux/loong64 entrypoint +- cmd/internal/obj/loong64: remove invalid branch delay slots +- runtime: calculate nanoseconds in usleep on linux/loong64 +- cmd/internal/obj: remove redundant cnames on loong64 +- runtime: save/fetch g register during VDSO on loong64 +- runtime: save/restore callee-saved registers in loong64's sigtramp +- runtime: add comment for sys_linux_loong64 +- runtime: mark morestack_noctxt SPWRITE for linux/loong64 +- cmd/internal/obj/loong64: add the PCALAU12I instruction for reloc use +- cmd/internal/obj/loong64, cmd/link/internal/loong64: switch to LoongArch ELF psABI v2 relocs +- runtime: add support for --buildmode=c-shared on loong64 +- cmd/compile: add support for --buildmode=c-shared on loong64 +- cmd/internal/obj/loong64, cmd/internal/objabi: add c-shared relocations on loong64 +- cmd/link: add support for --buildmode=c-shared on loong64 +- cmd/internal/sys: enable c-shared feature on loong64 +- cmd/dist, misc/cgo/testcshared: enable c-shared test on loong64 +- cmd/link, cmd/internal: in shared mode, change to use IE mode to access TLS variables +- cmd/compile, cmd/dist, cmd/go, internal: enable buildmode=pie for linux/loong64 +- net: disable TestLookupDotsWithRemoteSource and TestLookupGoogleSRV +- enable c-archive test on linux/loong64 +- cmd/internal, cmd/link: remove invalid GOT relative relocations +- runtime: no need to save these registers in load_g&save_g +- cmd/internal/obj/loong64: add support for movgr2cf and movcf2gr instructions +- runtime: save and restore fcc registers in async preempt on loong64 +- cmd,cmd/vendor: pick up updates for golang.org/x/arch/loong64 +- cmd/internal/objfile: add loong64 disassembler support +- cmd/{compile,link,internal},runtime: support -buildmode=shared for loong64 +- cmd/{link,internal}: support -buildmode=plugin for loong64 +- cmd/dist/test: enable buildmode={shared,plugin} test for loong64 +- runtime: enable memory sanitizer on loong64 +- runtime: enable address sanitizer on loong64 +- cmd/link: workaround linkshared test errors on loong64. +- runtime: remove the meaningless offset of 8 for duffzero on loong64 +- cmd/compiler: remove the meaningless offset of 8 for Lowered{Zero,Move} on loong64 +- cmd/compile/internal, runtime: use NOOP for hardware NOPs on loong64 +- cmd/link/internal/loong64: use BREAK 0 as the code pad sequence +- cmd/asm, runtime: remove the RSB register from loong64 +- cmd/internal/obj/loong64: realize all unconditional jumps with B/BL +- cmd/internal/obj/loong64: clean up code for short conditional branches +- cmd/internal/obj/loong64: assemble BEQ/BNEs comparing with 0 as beqz/bnez +- cmd/internal/obj/loong64: remove Optab.family and reorganize operand class fields +- cmd/asm: support the PCALIGN directive on loong64 +- internal/bytealg, runtime: align some loong64 asm loops to 16-byte boundaries +- cmd/link: bump loong64 function alignment to 16 bytes +- cmd/compile: optimize multiplication on loong64 +- cmd/compile: split DIVV/DIVVU op on loong64 +- cmd/internal/obj/loong64: auto-align loop heads to 16-byte boundaries +- cmd/internal/obj/loong64: add atomic memory access instructions support +- cmd/compiler,runtime/internal/atomic: optimize xchg and xchg64 on loong64 +- cmd/compiler,runtime/internal/atomic: optimize xadd and xadd64 on loong64 +- cmd/compiler,runtime/internal/atomic: optimize And{32,8} and Or{32,8} on loong64 +- Revert "cmd/link: workaround linkshared test errors on loong64." +- syscall: implement Ptrace{Set,Get}Regs using PTRACE_{GET,SET}REGSET on all linux platforms +- cmd/internal/obj/loong64: remove the invalid plan9 format of the BREAK instruction + * Mon Oct 9 2023 Funda Wang - 1.20.9-1 - New version 1.20.9 - Use local sumdb rather than disable it