diff --git a/0001-cmd-dist-cmd-link-internal-runtime-add-buildmode-plu.patch b/0001-cmd-dist-cmd-link-internal-runtime-add-buildmode-plu.patch deleted file mode 100644 index 9931dd7b7568db314b07d6a285757d8118ab8f50..0000000000000000000000000000000000000000 --- a/0001-cmd-dist-cmd-link-internal-runtime-add-buildmode-plu.patch +++ /dev/null @@ -1,1318 +0,0 @@ -From 01cba1b97a827118d3e3e7171e7457d4e036f78c Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Fri, 7 Jul 2023 07:18:59 +0800 -Subject: [PATCH 01/51] cmd/dist, cmd/link, internal, runtime: add - buildmode={plugin,shared} support for linux/loong64. - -Signed-off-by: Guoqi Chen -Change-Id: I0e7843c7d61420af1c59778ad1e0ecc8dc3bca57 ---- - src/cmd/compile/internal/liveness/plive.go | 7 +- - .../compile/internal/ssa/_gen/LOONG64.rules | 85 +++----- - src/cmd/compile/internal/ssa/regalloc.go | 2 + - .../compile/internal/ssa/rewriteLOONG64.go | 190 +++++++++++------- - src/cmd/dist/test.go | 4 +- - src/cmd/internal/obj/loong64/a.out.go | 1 + - src/cmd/internal/obj/loong64/asm.go | 21 ++ - src/cmd/internal/obj/loong64/cnames.go | 1 + - src/cmd/internal/obj/loong64/obj.go | 123 ++++++++++++ - src/cmd/internal/objabi/reloctype.go | 5 + - src/cmd/internal/objabi/reloctype_string.go | 22 +- - src/cmd/link/internal/loong64/asm.go | 59 +++++- - src/internal/platform/supported.go | 4 +- - src/runtime/asm_loong64.s | 11 + - 14 files changed, 382 insertions(+), 153 deletions(-) - -diff --git a/src/cmd/compile/internal/liveness/plive.go b/src/cmd/compile/internal/liveness/plive.go -index 169467e6f5..38273db07d 100644 ---- a/src/cmd/compile/internal/liveness/plive.go -+++ b/src/cmd/compile/internal/liveness/plive.go -@@ -546,7 +546,12 @@ func (lv *liveness) markUnsafePoints() { - v = v.Args[0] - continue - } -- case ssa.Op386MOVLload, ssa.OpARM64MOVWUload, ssa.OpMIPS64MOVWUload, ssa.OpPPC64MOVWZload, ssa.OpWasmI64Load32U: -+ case ssa.Op386MOVLload, -+ ssa.OpARM64MOVWUload, -+ ssa.OpLOONG64MOVWUload, -+ ssa.OpMIPS64MOVWUload, -+ ssa.OpPPC64MOVWZload, -+ ssa.OpWasmI64Load32U: - // Args[0] is the address of the write - // barrier control. Ignore Args[1], - // which is the mem operand. -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -index 4a47c4cd47..b9aaa3ff7f 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -@@ -455,66 +455,31 @@ - (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVVaddr [int32(off1)+int32(off2)] {sym} ptr) - - // fold address into load/store --(MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBload [off1+int32(off2)] {sym} ptr mem) --(MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBUload [off1+int32(off2)] {sym} ptr mem) --(MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHload [off1+int32(off2)] {sym} ptr mem) --(MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHUload [off1+int32(off2)] {sym} ptr mem) --(MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWload [off1+int32(off2)] {sym} ptr mem) --(MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWUload [off1+int32(off2)] {sym} ptr mem) --(MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVVload [off1+int32(off2)] {sym} ptr mem) --(MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVFload [off1+int32(off2)] {sym} ptr mem) --(MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVDload [off1+int32(off2)] {sym} ptr mem) -- --(MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) --(MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVHstore [off1+int32(off2)] {sym} ptr val mem) --(MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVWstore [off1+int32(off2)] {sym} ptr val mem) --(MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVVstore [off1+int32(off2)] {sym} ptr val mem) --(MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVFstore [off1+int32(off2)] {sym} ptr val mem) --(MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) => (MOVDstore [off1+int32(off2)] {sym} ptr val mem) --(MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) --(MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) --(MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) --(MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) -- --(MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) -- --(MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) --(MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) --(MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) => -- (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) -+// Do not fold global variable access in -dynlink mode, where it will be rewritten -+// to use the GOT via REGTMP, which currently cannot handle large offset. -+(MOV(B|BU|H|HU|W|WU|V|F|D)load [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) -+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1+int32(off2)] {sym} ptr mem) -+ -+(MOV(B|H|W|V|F|D)store [off1] {sym} (ADDVconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) -+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|H|W|V|F|D)store [off1+int32(off2)] {sym} ptr val mem) -+ -+(MOV(B|H|W|V)storezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) -+ && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|H|W|V)storezero [off1+int32(off2)] {sym} ptr mem) -+ -+(MOV(B|BU|H|HU|W|WU|V|F|D)load [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) -+ && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|BU|H|HU|W|WU|V|F|D)load [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) -+ -+(MOV(B|H|W|V|F|D)store [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) -+ && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|H|W|V|F|D)store [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) -+ -+(MOV(B|H|W|V)storezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) -+ && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => -+ (MOV(B|H|W|V)storezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - - (LoweredAtomicStore(32|64) ptr (MOVVconst [0]) mem) => (LoweredAtomicStorezero(32|64) ptr mem) - (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) => (LoweredAtomicAddconst32 [int32(c)] ptr mem) -diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go -index c4d6e48cad..71fedd52cf 100644 ---- a/src/cmd/compile/internal/ssa/regalloc.go -+++ b/src/cmd/compile/internal/ssa/regalloc.go -@@ -672,6 +672,8 @@ func (s *regAllocState) init(f *Func) { - s.allocatable &^= 1 << 9 // R9 - case "arm64": - // nothing to do -+ case "loong64": // R2 (aka TP) already reserved. -+ // nothing to do - case "ppc64le": // R2 already reserved. - // nothing to do - case "riscv64": // X3 (aka GP) and X4 (aka TP) already reserved. -diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -index e88b74cb22..757524bdbb 100644 ---- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go -+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -@@ -1724,8 +1724,10 @@ func rewriteValueLOONG64_OpLOONG64MASKNEZ(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBUload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1736,7 +1738,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBUload) -@@ -1746,7 +1748,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { - return true - } - // match: (MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1758,7 +1760,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBUload) -@@ -1809,8 +1811,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBUreg(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1821,7 +1825,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBload) -@@ -1831,7 +1835,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { - return true - } - // match: (MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1843,7 +1847,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBload) -@@ -1895,8 +1899,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVBstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1908,7 +1914,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBstore) -@@ -1918,7 +1924,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { - return true - } - // match: (MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -1931,7 +1937,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBstore) -@@ -2047,8 +2053,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVBstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBstorezero [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2059,7 +2067,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBstorezero) -@@ -2069,7 +2077,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { - return true - } - // match: (MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2081,7 +2089,7 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVBstorezero) -@@ -2095,8 +2103,10 @@ func rewriteValueLOONG64_OpLOONG64MOVBstorezero(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVDload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVDload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2107,7 +2117,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVDload) -@@ -2117,7 +2127,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { - return true - } - // match: (MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2129,7 +2139,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVDload) -@@ -2144,8 +2154,10 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVDstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVDstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2157,7 +2169,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVDstore) -@@ -2167,7 +2179,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { - return true - } - // match: (MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2180,7 +2192,7 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVDstore) -@@ -2194,8 +2206,10 @@ func rewriteValueLOONG64_OpLOONG64MOVDstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVFload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVFload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2206,7 +2220,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVFload) -@@ -2216,7 +2230,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { - return true - } - // match: (MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2228,7 +2242,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVFload) -@@ -2243,8 +2257,10 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVFstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVFstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2256,7 +2272,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVFstore) -@@ -2266,7 +2282,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { - return true - } - // match: (MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2279,7 +2295,7 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVFstore) -@@ -2293,8 +2309,10 @@ func rewriteValueLOONG64_OpLOONG64MOVFstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHUload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2305,7 +2323,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHUload) -@@ -2315,7 +2333,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { - return true - } - // match: (MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2327,7 +2345,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHUload) -@@ -2400,8 +2418,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHUreg(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2412,7 +2432,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHload) -@@ -2422,7 +2442,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { - return true - } - // match: (MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2434,7 +2454,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHload) -@@ -2530,8 +2550,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVHstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2543,7 +2565,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHstore) -@@ -2553,7 +2575,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { - return true - } - // match: (MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2566,7 +2588,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHstore) -@@ -2648,8 +2670,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVHstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHstorezero [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2660,7 +2684,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHstorezero) -@@ -2670,7 +2694,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { - return true - } - // match: (MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2682,7 +2706,7 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVHstorezero) -@@ -2696,8 +2720,10 @@ func rewriteValueLOONG64_OpLOONG64MOVHstorezero(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2708,7 +2734,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVload) -@@ -2718,7 +2744,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { - return true - } - // match: (MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2730,7 +2756,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVload) -@@ -2772,8 +2798,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVVstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2785,7 +2813,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVstore) -@@ -2795,7 +2823,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { - return true - } - // match: (MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2808,7 +2836,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVstore) -@@ -2822,8 +2850,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVstorezero [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2834,7 +2864,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVstorezero) -@@ -2844,7 +2874,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { - return true - } - // match: (MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2856,7 +2886,7 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVVstorezero) -@@ -2870,8 +2900,10 @@ func rewriteValueLOONG64_OpLOONG64MOVVstorezero(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2882,7 +2914,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWUload) -@@ -2892,7 +2924,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { - return true - } - // match: (MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -2904,7 +2936,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWUload) -@@ -2999,8 +3031,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWUreg(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWload [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3011,7 +3045,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWload) -@@ -3021,7 +3055,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { - return true - } - // match: (MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3033,7 +3067,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWload) -@@ -3162,8 +3196,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVWstore [off1] {sym} (ADDVconst [off2] ptr) val mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWstore [off1+int32(off2)] {sym} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3175,7 +3211,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWstore) -@@ -3185,7 +3221,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { - return true - } - // match: (MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3198,7 +3234,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { - ptr := v_0.Args[0] - val := v_1 - mem := v_2 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWstore) -@@ -3246,8 +3282,10 @@ func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool { - func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -+ b := v.Block -+ config := b.Func.Config - // match: (MOVWstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) -- // cond: is32Bit(int64(off1)+off2) -+ // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWstorezero [off1+int32(off2)] {sym} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3258,7 +3296,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { - off2 := auxIntToInt64(v_0.AuxInt) - ptr := v_0.Args[0] - mem := v_1 -- if !(is32Bit(int64(off1) + off2)) { -+ if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWstorezero) -@@ -3268,7 +3306,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { - return true - } - // match: (MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) -- // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) -+ // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) - // result: (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem) - for { - off1 := auxIntToInt32(v.AuxInt) -@@ -3280,7 +3318,7 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezero(v *Value) bool { - sym2 := auxToSym(v_0.Aux) - ptr := v_0.Args[0] - mem := v_1 -- if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2))) { -+ if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { - break - } - v.reset(OpLOONG64MOVWstorezero) -diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go -index 36a20e8b2a..864060cbb2 100644 ---- a/src/cmd/dist/test.go -+++ b/src/cmd/dist/test.go -@@ -1613,14 +1613,14 @@ func buildModeSupported(compiler, buildmode, goos, goarch string) bool { - - case "shared": - switch platform { -- case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/ppc64le", "linux/s390x": -+ case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/ppc64le", "linux/s390x": - return true - } - return false - - case "plugin": - switch platform { -- case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/s390x", "linux/ppc64le", -+ case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/loong64", "linux/s390x", "linux/ppc64le", - "android/amd64", "android/386", - "darwin/amd64", "darwin/arm64", - "freebsd/amd64": -diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go -index 99a7da388f..9527e99b56 100644 ---- a/src/cmd/internal/obj/loong64/a.out.go -+++ b/src/cmd/internal/obj/loong64/a.out.go -@@ -227,6 +227,7 @@ const ( - C_ADDR - C_TLS_LE - C_TLS_IE -+ C_GOTADDR - C_TEXTSIZE - - C_NCLASS // must be the last -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index 0ab0caafae..c8d00413a0 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -349,6 +349,8 @@ var optab = []Optab{ - {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 40, 4, 0, 0}, - {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0}, - -+ {AMOVV, C_GOTADDR, C_NONE, C_NONE, C_REG, C_NONE, 65, 8, 0, 0}, -+ - {ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, - {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, - -@@ -676,6 +678,9 @@ func (c *ctxt0) aclass(a *obj.Addr) int { - return C_SOREG - } - return C_LOREG -+ -+ case obj.NAME_GOTREF: -+ return C_GOTADDR - } - - return C_GOK -@@ -1776,6 +1781,22 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - case 64: // movv c_reg, c_fcc0 ==> movgr2cf cd, rj - a := OP_TEN(8, 1334) - o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) -+ -+ case 65: // mov sym@GOT, r ==> pcalau12i + ld.d -+ o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg)) -+ rel := obj.Addrel(c.cursym) -+ rel.Off = int32(c.pc) -+ rel.Siz = 4 -+ rel.Sym = p.From.Sym -+ rel.Type = objabi.R_LOONG64_GOTPCREL_HI -+ rel.Add = 0x0 -+ o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) -+ rel2 := obj.Addrel(c.cursym) -+ rel2.Off = int32(c.pc + 4) -+ rel2.Siz = 4 -+ rel2.Sym = p.From.Sym -+ rel2.Type = objabi.R_LOONG64_GOT_LO -+ rel2.Add = 0x0 - } - - out[0] = o1 -diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go -index 8b8af6ba31..94b1b54c93 100644 ---- a/src/cmd/internal/obj/loong64/cnames.go -+++ b/src/cmd/internal/obj/loong64/cnames.go -@@ -39,6 +39,7 @@ var cnames0 = []string{ - "ADDR", - "TLS_LE", - "TLS_IE", -+ "GOTADDR", - "TEXTSIZE", - "NCLASS", - } -diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go -index 1eedd46c69..38ab66b819 100644 ---- a/src/cmd/internal/obj/loong64/obj.go -+++ b/src/cmd/internal/obj/loong64/obj.go -@@ -6,6 +6,7 @@ package loong64 - - import ( - "cmd/internal/obj" -+ "cmd/internal/objabi" - "cmd/internal/sys" - "internal/abi" - "log" -@@ -84,6 +85,128 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { - p.As = AADDVU - } - } -+ -+ if ctxt.Flag_dynlink { -+ rewriteToUseGot(ctxt, p, newprog) -+ } -+} -+ -+func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { -+ // ADUFFxxx $offset -+ // becomes -+ // MOVV runtime.duffxxx@GOT, REGTMP -+ // ADD $offset, REGTMP -+ // JAL REGTMP -+ if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { -+ var sym *obj.LSym -+ if p.As == obj.ADUFFZERO { -+ sym = ctxt.Lookup("runtime.duffzero") -+ } else { -+ sym = ctxt.Lookup("runtime.duffcopy") -+ } -+ offset := p.To.Offset -+ p.As = AMOVV -+ p.From.Type = obj.TYPE_MEM -+ p.From.Sym = sym -+ p.From.Name = obj.NAME_GOTREF -+ p.To.Type = obj.TYPE_REG -+ p.To.Reg = REGTMP -+ p.To.Name = obj.NAME_NONE -+ p.To.Offset = 0 -+ p.To.Sym = nil -+ p1 := obj.Appendp(p, newprog) -+ p1.As = AADDV -+ p1.From.Type = obj.TYPE_CONST -+ p1.From.Offset = offset -+ p1.To.Type = obj.TYPE_REG -+ p1.To.Reg = REGTMP -+ p2 := obj.Appendp(p1, newprog) -+ p2.As = obj.ACALL -+ p2.To.Type = obj.TYPE_MEM -+ p2.To.Reg = REGTMP -+ return -+ } -+ -+ // We only care about global data: NAME_EXTERN means a global symbol in the -+ // Go sense, and p.Sym.Local is true for a few internally defined symbols. -+ if p.From.Type == obj.TYPE_ADDR && p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { -+ // MOVV $sym, Rx becomes MOVV sym@GOT, Rx -+ // MOVV $sym+, Rx becomes MOVV sym@GOT, Rx; ADD , Rx -+ if p.As != AMOVV { -+ ctxt.Diag("do not know how to handle TYPE_ADDR in %v with -shared", p) -+ } -+ if p.To.Type != obj.TYPE_REG { -+ ctxt.Diag("do not know how to handle LEAQ-type insn to non-register in %v with -shared", p) -+ } -+ p.From.Type = obj.TYPE_MEM -+ p.From.Name = obj.NAME_GOTREF -+ if p.From.Offset != 0 { -+ q := obj.Appendp(p, newprog) -+ q.As = AADDV -+ q.From.Type = obj.TYPE_CONST -+ q.From.Offset = p.From.Offset -+ q.To = p.To -+ p.From.Offset = 0 -+ } -+ return -+ } -+ -+ if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN { -+ ctxt.Diag("don't know how to handle %v with -shared", p) -+ } -+ -+ // MOVx sym, Ry becomes MOVV sym@GOT, REGTMP; MOVx (REGTMP), Ry -+ // MOVx Ry, sym becomes MOVV sym@GOT, REGTMP; MOVx Ry, (REGTMP) -+ // An addition may be inserted between the two MOVs if there is an offset. -+ var source *obj.Addr -+ if p.From.Name == obj.NAME_EXTERN && !p.From.Sym.Local() { -+ if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { -+ ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -shared", p) -+ } -+ source = &p.From -+ } else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() { -+ source = &p.To -+ } else { -+ return -+ } -+ -+ if p.As == obj.ATEXT || p.As == obj.AFUNCDATA || p.As == obj.ACALL || p.As == obj.ARET || p.As == obj.AJMP { -+ return -+ } -+ -+ if source.Sym.Type == objabi.STLSBSS { -+ return -+ } -+ -+ if source.Type != obj.TYPE_MEM { -+ ctxt.Diag("don't know how to handle %v with -shared", p) -+ } -+ -+ p1 := obj.Appendp(p, newprog) -+ p1.As = AMOVV -+ p1.From.Type = obj.TYPE_MEM -+ p1.From.Sym = source.Sym -+ p1.From.Name = obj.NAME_GOTREF -+ p1.To.Type = obj.TYPE_REG -+ p1.To.Reg = REGTMP -+ -+ p2 := obj.Appendp(p1, newprog) -+ p2.As = p.As -+ p2.From = p.From -+ p2.To = p.To -+ if p.From.Name == obj.NAME_EXTERN { -+ p2.From.Reg = REGTMP -+ p2.From.Name = obj.NAME_NONE -+ p2.From.Sym = nil -+ } else if p.To.Name == obj.NAME_EXTERN { -+ p2.To.Reg = REGTMP -+ p2.To.Name = obj.NAME_NONE -+ p2.To.Sym = nil -+ } else { -+ return -+ } -+ -+ obj.Nopout(p) - } - - func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { -diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go -index 996c300d95..241a79817c 100644 ---- a/src/cmd/internal/objabi/reloctype.go -+++ b/src/cmd/internal/objabi/reloctype.go -@@ -316,6 +316,11 @@ const ( - R_LOONG64_TLS_IE_PCREL_HI - R_LOONG64_TLS_IE_LO - -+ // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute -+ // the address of the GOT slot of the referenced symbol. -+ R_LOONG64_GOTPCREL_HI -+ R_LOONG64_GOT_LO -+ - // R_JMPLOONG64 resolves to non-PC-relative target address of a JMP instruction, - // by encoding the address into the instruction. - R_JMPLOONG64 -diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go -index c7441efa28..e0649a5b0a 100644 ---- a/src/cmd/internal/objabi/reloctype_string.go -+++ b/src/cmd/internal/objabi/reloctype_string.go -@@ -81,19 +81,21 @@ func _() { - _ = x[R_CALLLOONG64-71] - _ = x[R_LOONG64_TLS_IE_PCREL_HI-72] - _ = x[R_LOONG64_TLS_IE_LO-73] -- _ = x[R_JMPLOONG64-74] -- _ = x[R_ADDRMIPSU-75] -- _ = x[R_ADDRMIPSTLS-76] -- _ = x[R_ADDRCUOFF-77] -- _ = x[R_WASMIMPORT-78] -- _ = x[R_XCOFFREF-79] -- _ = x[R_PEIMAGEOFF-80] -- _ = x[R_INITORDER-81] -+ _ = x[R_LOONG64_GOTPCREL_HI-74] -+ _ = x[R_LOONG64_GOT_LO-75] -+ _ = x[R_JMPLOONG64-76] -+ _ = x[R_ADDRMIPSU-77] -+ _ = x[R_ADDRMIPSTLS-78] -+ _ = x[R_ADDRCUOFF-79] -+ _ = x[R_WASMIMPORT-80] -+ _ = x[R_XCOFFREF-81] -+ _ = x[R_PEIMAGEOFF-82] -+ _ = x[R_INITORDER-83] - } - --const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER" -+const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER" - --var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 996, 1007, 1020, 1031, 1043, 1053, 1065, 1076} -+var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090, 1102, 1113} - - func (i RelocType) String() string { - i -= 1 -diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go -index 8f06068d78..d1296c3309 100644 ---- a/src/cmd/link/internal/loong64/asm.go -+++ b/src/cmd/link/internal/loong64/asm.go -@@ -14,7 +14,47 @@ import ( - "log" - ) - --func gentext(ctxt *ld.Link, ldr *loader.Loader) {} -+func gentext(ctxt *ld.Link, ldr *loader.Loader) { -+ initfunc, addmoduledata := ld.PrepareAddmoduledata(ctxt) -+ if initfunc == nil { -+ return -+ } -+ -+ o := func(op uint32) { -+ initfunc.AddUint32(ctxt.Arch, op) -+ } -+ -+ // Emit the following function: -+ // -+ // local.dso_init: -+ // la.pcrel $a0, local.moduledata -+ // b runtime.addmoduledata -+ -+ // 0000000000000000 : -+ // 0: 1a000004 pcalau12i $a0, 0 -+ // 0: R_LARCH_PCALA_HI20 local.moduledata -+ o(0x1a000004) -+ rel, _ := initfunc.AddRel(objabi.R_ADDRLOONG64U) -+ rel.SetOff(0) -+ rel.SetSiz(4) -+ rel.SetSym(ctxt.Moduledata) -+ -+ // 4: 02c00084 addi.d $a0, $a0, 0 -+ // 4: R_LARCH_PCALA_LO12 local.moduledata -+ o(0x02c00084) -+ rel2, _ := initfunc.AddRel(objabi.R_ADDRLOONG64) -+ rel2.SetOff(4) -+ rel2.SetSiz(4) -+ rel2.SetSym(ctxt.Moduledata) -+ -+ // 8: 50000000 b 0 -+ // 8: R_LARCH_B26 runtime.addmoduledata -+ o(0x50000000) -+ rel3, _ := initfunc.AddRel(objabi.R_CALLLOONG64) -+ rel3.SetOff(8) -+ rel3.SetSiz(4) -+ rel3.SetSym(addmoduledata) -+} - - func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym, r loader.Reloc, rIdx int) bool { - log.Fatalf("adddynrel not implemented") -@@ -78,6 +118,16 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_PCALA_HI20) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) -+ -+ case objabi.R_LOONG64_GOTPCREL_HI: -+ out.Write64(uint64(sectoff)) -+ out.Write64(uint64(elf.R_LARCH_GOT_PC_HI20) | uint64(elfsym)<<32) -+ out.Write64(uint64(0x0)) -+ -+ case objabi.R_LOONG64_GOT_LO: -+ out.Write64(uint64(sectoff)) -+ out.Write64(uint64(elf.R_LARCH_GOT_PC_LO12) | uint64(elfsym)<<32) -+ out.Write64(uint64(0x0)) - } - - return true -@@ -98,6 +148,8 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade - default: - return val, 0, false - case objabi.R_ADDRLOONG64, -+ objabi.R_LOONG64_GOTPCREL_HI, -+ objabi.R_LOONG64_GOT_LO, - objabi.R_ADDRLOONG64U: - // set up addend for eventual relocation via outer symbol. - rs, _ := ld.FoldSubSymbolOffset(ldr, rs) -@@ -156,7 +208,10 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant - func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { - switch r.Type() { - case objabi.R_ADDRLOONG64, -- objabi.R_ADDRLOONG64U: -+ objabi.R_ADDRLOONG64U, -+ objabi.R_LOONG64_GOTPCREL_HI, -+ objabi.R_LOONG64_GOT_LO: -+ - return ld.ExtrelocViaOuterSym(ldr, r, s), true - - case objabi.R_ADDRLOONG64TLS, -diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go -index 230a952d2d..abab4b0541 100644 ---- a/src/internal/platform/supported.go -+++ b/src/internal/platform/supported.go -@@ -199,14 +199,14 @@ func BuildModeSupported(compiler, buildmode, goos, goarch string) bool { - - case "shared": - switch platform { -- case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/ppc64le", "linux/s390x": -+ case "linux/386", "linux/amd64", "linux/arm", "linux/arm64", "linux/loong64", "linux/ppc64le", "linux/s390x": - return true - } - return false - - case "plugin": - switch platform { -- case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/s390x", "linux/ppc64le", -+ case "linux/amd64", "linux/arm", "linux/arm64", "linux/386", "linux/loong64", "linux/s390x", "linux/ppc64le", - "android/amd64", "android/386", - "darwin/amd64", "darwin/arm64", - "freebsd/amd64": -diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index 6ffa1392c4..78a1a4d358 100644 ---- a/src/runtime/asm_loong64.s -+++ b/src/runtime/asm_loong64.s -@@ -642,6 +642,17 @@ TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0 - // traceback from goexit1 must hit code range of goexit - NOOP - -+// This is called from .init_array and follows the platform, not Go, ABI. -+TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0 -+ ADDV $-0x10, R3 -+ MOVV R30, 8(R3) // The access to global variables below implicitly uses R30, which is callee-save -+ MOVV runtime·lastmoduledatap(SB), R12 -+ MOVV R4, moduledata_next(R12) -+ MOVV R4, runtime·lastmoduledatap(SB) -+ MOVV 8(R3), R30 -+ ADDV $0x10, R3 -+ RET -+ - TEXT ·checkASM(SB),NOSPLIT,$0-1 - MOVW $1, R19 - MOVB R19, ret+0(FP) --- -2.38.1 - diff --git a/0001-cmd-go-use-aliyun-proxy-and-local-sumdb.patch b/0001-cmd-go-use-aliyun-proxy-and-local-sumdb.patch deleted file mode 100644 index fd474c77bba2a9781577866697035df4f72b41e3..0000000000000000000000000000000000000000 --- a/0001-cmd-go-use-aliyun-proxy-and-local-sumdb.patch +++ /dev/null @@ -1,48 +0,0 @@ -From fa250374b727439159bc9f203b854bb5df00186f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Jakub=20=C4=8Cajka?= -Date: Mon, 27 May 2019 15:12:53 +0200 -Subject: [PATCH 3/3] cmd/go: disable Google's proxy and sumdb - ---- - src/cmd/go/internal/cfg/cfg.go | 4 ++-- - src/cmd/go/testdata/script/mod_sumdb_golang.txt | 6 +++--- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/src/cmd/go/internal/cfg/cfg.go b/src/cmd/go/internal/cfg/cfg.go -index 57a3c1ff6f..e56c60e591 100644 ---- a/src/cmd/go/internal/cfg/cfg.go -+++ b/src/cmd/go/internal/cfg/cfg.go -@@ -417,8 +417,8 @@ var ( - GOPPC64 = envOr("GOPPC64", fmt.Sprintf("%s%d", "power", buildcfg.GOPPC64)) - GOWASM = envOr("GOWASM", fmt.Sprint(buildcfg.GOWASM)) - -- GOPROXY = envOr("GOPROXY", "") -- GOSUMDB = envOr("GOSUMDB", "") -+ GOPROXY = envOr("GOPROXY", "https://mirrors.aliyun.com/goproxy/,direct") -+ GOSUMDB = envOr("GOSUMDB", "sum.golang.google.cn") - GOPRIVATE = Getenv("GOPRIVATE") - GONOPROXY = envOr("GONOPROXY", GOPRIVATE) - GONOSUMDB = envOr("GONOSUMDB", GOPRIVATE) -diff --git a/src/cmd/go/testdata/script/mod_sumdb_golang.txt b/src/cmd/go/testdata/script/mod_sumdb_golang.txt -index becd88b52e..b2a1250372 100644 ---- a/src/cmd/go/testdata/script/mod_sumdb_golang.txt -+++ b/src/cmd/go/testdata/script/mod_sumdb_golang.txt -@@ -2,12 +2,12 @@ - [go-builder] env GOPROXY= - [go-builder] env GOSUMDB= - [go-builder] go env GOPROXY --[go-builder] stdout '^https://proxy.golang.org,direct$' -+[go-builder] stdout '^https://mirrors.aliyun.com/goproxy/,direct$' - [go-builder] go env GOSUMDB --[go-builder] stdout '^sum.golang.org$' -+[go-builder] stdout '^sum.golang.google.cn$' - [go-builder] env GOPROXY=https://proxy.golang.org - [go-builder] go env GOSUMDB --[go-builder] stdout '^sum.golang.org$' -+[go-builder] stdout '^sum.golang.google.cn$' - - # Download direct from github. - --- -2.31.1 - diff --git a/0001-cmd-link-internal-add-support-for-internal-linking-o.patch b/0001-cmd-link-internal-add-support-for-internal-linking-o.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f49bc5378a4e86ad82b6008e3e5688252365a39 --- /dev/null +++ b/0001-cmd-link-internal-add-support-for-internal-linking-o.patch @@ -0,0 +1,457 @@ +From 2730907e506ac1fdcc25fbb263df89a03c12b309 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Mon, 9 Oct 2023 17:31:14 +0800 +Subject: [PATCH 01/44] cmd/link/internal: add support for internal linking on + loong64 + +Change-Id: Ic0d36f27481ac707d04aaf7001f26061e510dd8f +--- + src/cmd/link/internal/loadelf/ldelf.go | 24 ++ + src/cmd/link/internal/loong64/asm.go | 356 ++++++++++++++++++++++++- + 2 files changed, 375 insertions(+), 5 deletions(-) + +diff --git a/src/cmd/link/internal/loadelf/ldelf.go b/src/cmd/link/internal/loadelf/ldelf.go +index e0363b5535..be14cc3bb2 100644 +--- a/src/cmd/link/internal/loadelf/ldelf.go ++++ b/src/cmd/link/internal/loadelf/ldelf.go +@@ -602,6 +602,11 @@ func Load(l *loader.Loader, arch *sys.Arch, localSymVersion int, f *bio.Reader, + // See https://sourceware.org/bugzilla/show_bug.cgi?id=21809 + continue + } ++ ++ if arch.Family == sys.Loong64 && (strings.HasPrefix(elfsym.name, ".L") || elfsym.name == "L0\001") { ++ // Symbols generated by the relax feature of gcc and binutils on loong64. ++ continue ++ } + } + + if strings.HasPrefix(elfsym.name, ".Linfo_string") { +@@ -682,6 +687,12 @@ func Load(l *loader.Loader, arch *sys.Arch, localSymVersion int, f *bio.Reader, + l.SetAttrOnList(s, true) + textp = append(textp, s) + for ss := l.SubSym(s); ss != 0; ss = l.SubSym(ss) { ++ if arch.Family == sys.Loong64 && (strings.HasPrefix(l.SymName(ss), ".L") || l.SymName(ss) == "L0\001") { ++ // Symbols generated by the relax feature of gcc and binutils on loong64. ++ // We ignore them here because there are too many symbols of this type, ++ // resulting in insufficient space in findfunctable. ++ continue ++ } + if l.AttrOnList(ss) { + return errorf("symbol %s listed multiple times", + l.SymName(ss)) +@@ -1018,7 +1029,14 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { + MIPS64 | uint32(elf.R_MIPS_PC32)<<16: + return 4, 4, nil + ++ // These are informational annotations to assist linker optimizations. ++ case LOONG64 | uint32(elf.R_LARCH_ALIGN)<<16, ++ LOONG64 | uint32(elf.R_LARCH_RELAX)<<16: ++ return 0, 0, nil ++ + case LOONG64 | uint32(elf.R_LARCH_ADD8)<<16, ++ LOONG64 | uint32(elf.R_LARCH_ADD6)<<16, ++ LOONG64 | uint32(elf.R_LARCH_SUB6)<<16, + LOONG64 | uint32(elf.R_LARCH_SUB8)<<16: + return 1, 1, nil + +@@ -1032,7 +1050,13 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { + LOONG64 | uint32(elf.R_LARCH_ADD32)<<16, + LOONG64 | uint32(elf.R_LARCH_SUB24)<<16, + LOONG64 | uint32(elf.R_LARCH_SUB32)<<16, ++ LOONG64 | uint32(elf.R_LARCH_B16)<<16, ++ LOONG64 | uint32(elf.R_LARCH_B21)<<16, + LOONG64 | uint32(elf.R_LARCH_B26)<<16, ++ LOONG64 | uint32(elf.R_LARCH_PCALA_HI20)<<16, ++ LOONG64 | uint32(elf.R_LARCH_PCALA_LO12)<<16, ++ LOONG64 | uint32(elf.R_LARCH_GOT_PC_HI20)<<16, ++ LOONG64 | uint32(elf.R_LARCH_GOT_PC_LO12)<<16, + LOONG64 | uint32(elf.R_LARCH_32_PCREL)<<16: + return 4, 4, nil + +diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go +index 2e69594f92..3a83f1a5ad 100644 +--- a/src/cmd/link/internal/loong64/asm.go ++++ b/src/cmd/link/internal/loong64/asm.go +@@ -58,10 +58,328 @@ func gentext(ctxt *ld.Link, ldr *loader.Loader) { + } + + func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym, r loader.Reloc, rIdx int) bool { +- log.Fatalf("adddynrel not implemented") ++ targ := r.Sym() ++ var targType sym.SymKind ++ if targ != 0 { ++ targType = ldr.SymType(targ) ++ } ++ ++ switch r.Type() { ++ default: ++ if r.Type() >= objabi.ElfRelocOffset { ++ ldr.Errorf(s, "adddynrel: unexpected reloction type %d (%s)", r.Type(), sym.RelocName(target.Arch, r.Type())) ++ return false ++ } ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_64): ++ if targType == sym.SDYNIMPORT { ++ ldr.Errorf(s, "unexpected R_LARCH_64 relocation for dynamic symbol %s", ldr.SymName(targ)) ++ } ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocType(rIdx, objabi.R_ADDR) ++ if target.IsPIE() && target.IsInternal() { ++ // For internal linking PIE, this R_ADDR relocation cannot ++ // be resolved statically. We need to generate a dynamic ++ // relocation. Let the code below handle it. ++ break ++ } ++ return true ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_B26): ++ if targType == sym.SDYNIMPORT { ++ addpltsym(target, ldr, syms, targ) ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocSym(rIdx, syms.PLT) ++ su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymPlt(targ))) ++ } ++ if targType == 0 || targType == sym.SXREF { ++ ldr.Errorf(s, "unknown symbol %s in callloong64", ldr.SymName(targ)) ++ } ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocType(rIdx, objabi.R_CALLLOONG64) ++ return true ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_GOT_PC_HI20), ++ objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_GOT_PC_LO12): ++ if targType != sym.SDYNIMPORT { ++ // TODO: turn LDR of GOT entry into ADR of symbol itself ++ } ++ ++ ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_LARCH_64)) ++ su := ldr.MakeSymbolUpdater(s) ++ if r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_LARCH_GOT_PC_HI20) { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_HI) ++ } else { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_LO) ++ } ++ su.SetRelocSym(rIdx, syms.GOT) ++ su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymGot(targ))) ++ return true ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_PCALA_HI20), ++ objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_PCALA_LO12): ++ if targType == sym.SDYNIMPORT { ++ ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ)) ++ } ++ if targType == 0 || targType == sym.SXREF { ++ ldr.Errorf(s, "unknown symbol %s", ldr.SymName(targ)) ++ } ++ ++ su := ldr.MakeSymbolUpdater(s) ++ if r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_LARCH_PCALA_HI20) { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_HI) ++ } else { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_LO) ++ } ++ return true ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_ADD64), ++ objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_SUB64): ++ su := ldr.MakeSymbolUpdater(s) ++ if r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_LARCH_ADD64) { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADD64) ++ } else { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_SUB64) ++ } ++ return true ++ ++ case objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_B16), ++ objabi.ElfRelocOffset + objabi.RelocType(elf.R_LARCH_B21): ++ if targType == sym.SDYNIMPORT { ++ addpltsym(target, ldr, syms, targ) ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocSym(rIdx, syms.PLT) ++ su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymPlt(targ))) ++ } ++ if targType == 0 || targType == sym.SXREF { ++ ldr.Errorf(s, "unknown symbol %s in R_JMPxxLOONG64", ldr.SymName(targ)) ++ } ++ su := ldr.MakeSymbolUpdater(s) ++ if r.Type() == objabi.ElfRelocOffset+objabi.RelocType(elf.R_LARCH_B16) { ++ su.SetRelocType(rIdx, objabi.R_JMP16LOONG64) ++ } else { ++ su.SetRelocType(rIdx, objabi.R_JMP21LOONG64) ++ } ++ return true ++ } ++ ++ relocs := ldr.Relocs(s) ++ r = relocs.At(rIdx) ++ ++ switch r.Type() { ++ case objabi.R_CALLLOONG64: ++ if targType != sym.SDYNIMPORT { ++ return true ++ } ++ if target.IsExternal() { ++ return true ++ } ++ ++ // Internal linking. ++ if r.Add() != 0 { ++ ldr.Errorf(s, "PLT call with no-zero addend (%v)", r.Add()) ++ } ++ ++ // Build a PLT entry and change the relocation target to that entry. ++ addpltsym(target, ldr, syms, targ) ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocSym(rIdx, syms.PLT) ++ su.SetRelocAdd(rIdx, int64(ldr.SymPlt(targ))) ++ return true ++ ++ case objabi.R_ADDR: ++ if ldr.SymType(s) == sym.STEXT && target.IsElf() { ++ // The code is asking for the address of an external ++ // function. We provide it with the address of the ++ // correspondent GOT symbol. ++ ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_LARCH_64)) ++ su := ldr.MakeSymbolUpdater(s) ++ su.SetRelocSym(rIdx, syms.GOT) ++ su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymGot(targ))) ++ return true ++ } ++ ++ // Process dynamic relocations for the data sections. ++ if target.IsPIE() && target.IsInternal() { ++ // When internally linking, generate dynamic relocations ++ // for all typical R_ADDR relocations. The exception ++ // are those R_ADDR that are created as part of generating ++ // the dynamic relocations and must be resolved statically. ++ // ++ // There are three phases relevant to understanding this: ++ // ++ // dodata() // we are here ++ // address() // symbol address assignment ++ // reloc() // resolution of static R_ADDR relocs ++ // ++ // At this point symbol addresses have not been ++ // assigned yet (as the final size of the .rela section ++ // will affect the addresses), and so we cannot write ++ // the Elf64_Rela.r_offset now. Instead we delay it ++ // until after the 'address' phase of the linker is ++ // complete. We do this via Addaddrplus, which creates ++ // a new R_ADDR relocation which will be resolved in ++ // the 'reloc' phase. ++ // ++ // These synthetic static R_ADDR relocs must be skipped ++ // now, or else we will be caught in an infinite loop ++ // of generating synthetic relocs for our synthetic ++ // relocs. ++ // ++ // Furthermore, the rela sections contain dynamic ++ // relocations with R_ADDR relocations on ++ // Elf64_Rela.r_offset. This field should contain the ++ // symbol offset as determined by reloc(), not the ++ // final dynamically linked address as a dynamic ++ // relocation would provide. ++ switch ldr.SymName(s) { ++ case ".dynsym", ".rela", ".rela.plt", ".got.plt", ".dynamic": ++ return false ++ } ++ } else { ++ // Either internally linking a static executable, ++ // in which case we can resolve these relocations ++ // statically in the 'reloc' phase, or externally ++ // linking, in which case the relocation will be ++ // prepared in the 'reloc' phase and passed to the ++ // external linker in the 'asmb' phase. ++ if ldr.SymType(s) != sym.SDATA && ldr.SymType(s) != sym.SRODATA { ++ break ++ } ++ } ++ ++ if target.IsElf() { ++ // Generate R_LARCH_RELATIVE relocations for best ++ // efficiency in the dynamic linker. ++ // ++ // As noted above, symbol addresses have not been ++ // assigned yet, so we can't generate the final reloc ++ // entry yet. We ultimately want: ++ // ++ // r_offset = s + r.Off ++ // r_info = R_LARCH_RELATIVE ++ // r_addend = targ + r.Add ++ // ++ // The dynamic linker will set *offset = base address + ++ // addend. ++ // ++ // AddAddrPlus is used for r_offset and r_addend to ++ // generate new R_ADDR relocations that will update ++ // these fields in the 'reloc' phase. ++ rela := ldr.MakeSymbolUpdater(syms.Rela) ++ rela.AddAddrPlus(target.Arch, s, int64(r.Off())) ++ if r.Siz() == 8 { ++ rela.AddUint64(target.Arch, elf.R_INFO(0, uint32(elf.R_LARCH_RELATIVE))) ++ } else { ++ ldr.Errorf(s, "unexpected relocation for dynamic symbol %s", ldr.SymName(targ)) ++ } ++ rela.AddAddrPlus(target.Arch, targ, int64(r.Add())) ++ return true ++ } ++ ++ case objabi.R_LOONG64_GOT_HI, ++ objabi.R_LOONG64_GOT_LO: ++ ld.AddGotSym(target, ldr, syms, targ, uint32(elf.R_LARCH_64)) ++ su := ldr.MakeSymbolUpdater(s) ++ if r.Type() == objabi.R_LOONG64_GOT_HI { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_HI) ++ } else { ++ su.SetRelocType(rIdx, objabi.R_LOONG64_ADDR_LO) ++ } ++ su.SetRelocSym(rIdx, syms.GOT) ++ su.SetRelocAdd(rIdx, r.Add()+int64(ldr.SymGot(targ))) ++ return true ++ } + return false + } + ++func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) { ++ if plt.Size() == 0 { ++ // pcalau12i $r14, imm ++ plt.AddSymRef(ctxt.Arch, gotplt.Sym(), 0, objabi.R_LOONG64_ADDR_HI, 4) ++ plt.SetUint32(ctxt.Arch, plt.Size()-4, 0x1a00000e) ++ ++ // sub.d $r13, $r13, $r15 ++ plt.AddUint32(ctxt.Arch, 0x0011bdad) ++ ++ // ld.d $r15, $r14, imm ++ plt.AddSymRef(ctxt.Arch, gotplt.Sym(), 0, objabi.R_LOONG64_ADDR_LO, 4) ++ plt.SetUint32(ctxt.Arch, plt.Size()-4, 0x28c001cf) ++ ++ // addi.d $r13, $r13, -40 ++ plt.AddUint32(ctxt.Arch, 0x02ff61ad) ++ ++ // addi.d $r12, $r14, imm ++ plt.AddSymRef(ctxt.Arch, gotplt.Sym(), 0, objabi.R_LOONG64_ADDR_LO, 4) ++ plt.SetUint32(ctxt.Arch, plt.Size()-4, 0x2c001cc) ++ ++ // srli.d $r13, $r13, 1 ++ plt.AddUint32(ctxt.Arch, 0x004505ad) ++ ++ // ld.d $r12, $r12, 8 ++ plt.AddUint32(ctxt.Arch, 0x28c0218c) ++ ++ // jirl $r0, $r15, 0 ++ plt.AddUint32(ctxt.Arch, 0x4c0001e0) ++ ++ // check gotplt.size == 0 ++ if gotplt.Size() != 0 { ++ ctxt.Errorf(gotplt.Sym(), "got.plt is not empty at the very beginning") ++ } ++ ++ gotplt.AddUint64(ctxt.Arch, 0) ++ gotplt.AddUint64(ctxt.Arch, 0) ++ } ++} ++ ++func addpltsym(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loader.Sym) { ++ if ldr.SymPlt(s) >= 0 { ++ return ++ } ++ ++ ld.Adddynsym(ldr, target, syms, s) ++ ++ if target.IsElf() { ++ plt := ldr.MakeSymbolUpdater(syms.PLT) ++ gotplt := ldr.MakeSymbolUpdater(syms.GOTPLT) ++ rela := ldr.MakeSymbolUpdater(syms.RelaPLT) ++ if plt.Size() == 0 { ++ panic("plt is not set up") ++ } ++ ++ // pcalau12i $r15, imm ++ plt.AddAddrPlus4(target.Arch, gotplt.Sym(), gotplt.Size()) ++ plt.SetUint32(target.Arch, plt.Size()-4, 0x1a00000f) ++ relocs := plt.Relocs() ++ plt.SetRelocType(relocs.Count()-1, objabi.R_LOONG64_ADDR_HI) ++ ++ // ld.d $r15, $r15, imm ++ plt.AddAddrPlus4(target.Arch, gotplt.Sym(), gotplt.Size()) ++ plt.SetUint32(target.Arch, plt.Size()-4, 0x28c001ef) ++ relocs = plt.Relocs() ++ plt.SetRelocType(relocs.Count()-1, objabi.R_LOONG64_ADDR_LO) ++ ++ // pcaddu12i $r13, 0 ++ plt.AddUint32(target.Arch, 0x1c00000d) ++ ++ // jirl r0, r15, 0 ++ plt.AddUint32(target.Arch, 0x4c0001e0) ++ ++ // add to got.plt: pointer to plt[0] ++ gotplt.AddAddrPlus(target.Arch, plt.Sym(), 0) ++ ++ // rela ++ rela.AddAddrPlus(target.Arch, gotplt.Sym(), gotplt.Size()-8) ++ sDynid := ldr.SymDynid(s) ++ rela.AddUint64(target.Arch, elf.R_INFO(uint32(sDynid), uint32(elf.R_LARCH_JUMP_SLOT))) ++ rela.AddUint64(target.Arch, 0) ++ ++ ldr.SetPlt(s, int32(plt.Size()-16)) ++ } else { ++ ldr.Errorf(s, "addpltsym: unsupport binary format") ++ } ++} ++ + func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, r loader.ExtReloc, ri int, sectoff int64) bool { + // loong64 ELF relocation (endian neutral) + // offset uint64 +@@ -134,10 +452,6 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, + return true + } + +-func elfsetupplt(ctxt *ld.Link, ldr *loader.Loader, plt, gotplt *loader.SymbolBuilder, dynamic loader.Sym) { +- return +-} +- + func machoreloc1(*sys.Arch, *ld.OutBuf, *loader.Loader, loader.Sym, loader.ExtReloc, int64) bool { + return false + } +@@ -197,6 +511,38 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade + pc := ldr.SymValue(s) + int64(r.Off()) + t := ldr.SymAddr(rs) + r.Add() - pc + return int64(val&0xfc000000 | (((t >> 2) & 0xffff) << 10) | (((t >> 2) & 0x3ff0000) >> 16)), noExtReloc, isOk ++ ++ case objabi.R_JMP16LOONG64, ++ objabi.R_JMP21LOONG64: ++ pc := ldr.SymValue(s) + int64(r.Off()) ++ t := ldr.SymAddr(rs) + r.Add() - pc ++ if r.Type() == objabi.R_JMP16LOONG64 { ++ return int64(val&0xfc0003ff | (((t >> 2) & 0xffff) << 10)), noExtReloc, isOk ++ } ++ return int64(val&0xfc0003e0 | (((t >> 2) & 0xffff) << 10) | (((t >> 2) & 0x1f0000) >> 16)), noExtReloc, isOk ++ ++ case objabi.R_LOONG64_TLS_IE_HI, ++ objabi.R_LOONG64_TLS_IE_LO: ++ if target.IsPIE() && target.IsElf() { ++ if !target.IsLinux() { ++ ldr.Errorf(s, "TLS reloc on unsupported OS %v", target.HeadType) ++ } ++ t := ldr.SymAddr(rs) + r.Add() ++ if r.Type() == objabi.R_LOONG64_TLS_IE_HI { ++ // pcalau12i -> lu12i.w ++ return (0x14000000 | (val & 0x1f) | ((t >> 12) << 5)), noExtReloc, isOk ++ } ++ // ld.d -> ori ++ return (0x03800000 | (val & 0x3ff) | ((t & 0xfff) << 10)), noExtReloc, isOk ++ } else { ++ log.Fatalf("cannot handle R_LOONG64_TLS_IE_x (sym %s) when linking internally", ldr.SymName(rs)) ++ } ++ ++ case objabi.R_LOONG64_ADD64, objabi.R_LOONG64_SUB64: ++ if r.Type() == objabi.R_LOONG64_ADD64 { ++ return int64(val + ldr.SymAddr(rs) + r.Add()), noExtReloc, isOk ++ } ++ return int64(val - (ldr.SymAddr(rs) + r.Add())), noExtReloc, isOk + } + + return val, 0, false +-- +2.38.1 + diff --git a/0002-cmd-dist-internal-platform-enable-internal-linking-f.patch b/0002-cmd-dist-internal-platform-enable-internal-linking-f.patch new file mode 100644 index 0000000000000000000000000000000000000000..8f2e4c171a8fe6328bc2ecfa7c3888b65e22339b --- /dev/null +++ b/0002-cmd-dist-internal-platform-enable-internal-linking-f.patch @@ -0,0 +1,83 @@ +From d404dccc7f089ddbd81b95c3d97f19acc6cb0329 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Mon, 9 Oct 2023 17:32:03 +0800 +Subject: [PATCH 02/44] cmd/dist, internal/platform: enable internal linking + feature and test on loong64 + +Change-Id: Ifea676e9eb44281465832fc4050f6286e50f4543 +--- + src/cmd/dist/build.go | 4 +++- + src/cmd/dist/test.go | 4 ++-- + src/internal/platform/supported.go | 6 ++++-- + 3 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/src/cmd/dist/build.go b/src/cmd/dist/build.go +index 1f467647f5..b71d6c393e 100644 +--- a/src/cmd/dist/build.go ++++ b/src/cmd/dist/build.go +@@ -624,10 +624,12 @@ func setup() { + func mustLinkExternal(goos, goarch string, cgoEnabled bool) bool { + if cgoEnabled { + switch goarch { +- case "loong64", "mips", "mipsle", "mips64", "mips64le": ++ case "mips", "mipsle", "mips64", "mips64le": + // Internally linking cgo is incomplete on some architectures. + // https://golang.org/issue/14449 + return true ++ case "loong64": ++ return false + case "arm64": + if goos == "windows" { + // windows/arm64 internal linking is not implemented. +diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go +index 0c992118f4..9728ef29cb 100644 +--- a/src/cmd/dist/test.go ++++ b/src/cmd/dist/test.go +@@ -1164,7 +1164,7 @@ func (t *tester) internalLink() bool { + // Internally linking cgo is incomplete on some architectures. + // https://golang.org/issue/10373 + // https://golang.org/issue/14449 +- if goarch == "loong64" || goarch == "mips64" || goarch == "mips64le" || goarch == "mips" || goarch == "mipsle" || goarch == "riscv64" { ++ if goarch == "mips64" || goarch == "mips64le" || goarch == "mips" || goarch == "mipsle" || goarch == "riscv64" { + return false + } + if goos == "aix" { +@@ -1185,7 +1185,7 @@ func (t *tester) internalLinkPIE() bool { + } + switch goos + "-" + goarch { + case "darwin-amd64", "darwin-arm64", +- "linux-amd64", "linux-arm64", "linux-ppc64le", ++ "linux-amd64", "linux-arm64", "linux-loong64", "linux-ppc64le", + "android-arm64", + "windows-amd64", "windows-386", "windows-arm": + return true +diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go +index e864c37d68..79ed6d4b1c 100644 +--- a/src/internal/platform/supported.go ++++ b/src/internal/platform/supported.go +@@ -85,10 +85,12 @@ func FuzzInstrumented(goos, goarch string) bool { + func MustLinkExternal(goos, goarch string, withCgo bool) bool { + if withCgo { + switch goarch { +- case "loong64", "mips", "mipsle", "mips64", "mips64le": ++ case "mips", "mipsle", "mips64", "mips64le": + // Internally linking cgo is incomplete on some architectures. + // https://go.dev/issue/14449 + return true ++ case "loong64": ++ return false + case "arm64": + if goos == "windows" { + // windows/arm64 internal linking is not implemented. +@@ -225,7 +227,7 @@ func InternalLinkPIESupported(goos, goarch string) bool { + switch goos + "/" + goarch { + case "android/arm64", + "darwin/amd64", "darwin/arm64", +- "linux/amd64", "linux/arm64", "linux/ppc64le", ++ "linux/amd64", "linux/arm64", "linux/loong64", "linux/ppc64le", + "windows/386", "windows/amd64", "windows/arm", "windows/arm64": + return true + } +-- +2.38.1 + diff --git a/0002-runtime-cmd-go-enable-memory-sanitizer-on-linux-loon.patch b/0002-runtime-cmd-go-enable-memory-sanitizer-on-linux-loon.patch deleted file mode 100644 index 1dd74447e04d911936448b966fb271c43e206fb6..0000000000000000000000000000000000000000 --- a/0002-runtime-cmd-go-enable-memory-sanitizer-on-linux-loon.patch +++ /dev/null @@ -1,268 +0,0 @@ -From 0ee3e3ece0f02f18211190d31a75118d236635e4 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zhao -Date: Fri, 31 Mar 2023 15:36:59 +0800 -Subject: [PATCH 02/51] runtime, cmd/go: enable memory sanitizer on - linux/loong64 - -Change-Id: If537c5ffb1c9d4b3316b9b3794d411953bc5764b ---- - src/cmd/go/alldocs.go | 2 +- - src/cmd/go/internal/work/build.go | 2 +- - src/internal/platform/supported.go | 2 +- - src/runtime/cgo/gcc_mmap.c | 2 +- - src/runtime/cgo/mmap.go | 2 +- - src/runtime/cgo_mmap.go | 2 +- - src/runtime/mmap.go | 2 +- - src/runtime/msan/msan.go | 2 +- - src/runtime/msan_loong64.s | 72 ++++++++++++++++++++++++++++++ - src/runtime/sys_linux_loong64.s | 37 +++++++++++++-- - 10 files changed, 113 insertions(+), 12 deletions(-) - create mode 100644 src/runtime/msan_loong64.s - -diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go -index bb28756133..d8377d1fd6 100644 ---- a/src/cmd/go/alldocs.go -+++ b/src/cmd/go/alldocs.go -@@ -119,7 +119,7 @@ - // linux/ppc64le and linux/arm64 (only for 48-bit VMA). - // -msan - // enable interoperation with memory sanitizer. --// Supported only on linux/amd64, linux/arm64, freebsd/amd64 -+// Supported only on linux/amd64, linux/arm64, linux/loong64, freebsd/amd64 - // and only with Clang/LLVM as the host C compiler. - // PIE build mode will be used on all platforms except linux/amd64. - // -asan -diff --git a/src/cmd/go/internal/work/build.go b/src/cmd/go/internal/work/build.go -index e2e0e07299..05e300581c 100644 ---- a/src/cmd/go/internal/work/build.go -+++ b/src/cmd/go/internal/work/build.go -@@ -76,7 +76,7 @@ and test commands: - linux/ppc64le and linux/arm64 (only for 48-bit VMA). - -msan - enable interoperation with memory sanitizer. -- Supported only on linux/amd64, linux/arm64, freebsd/amd64 -+ Supported only on linux/amd64, linux/arm64, linux/loong64, freebsd/amd64 - and only with Clang/LLVM as the host C compiler. - PIE build mode will be used on all platforms except linux/amd64. - -asan -diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go -index abab4b0541..b4f8bb5496 100644 ---- a/src/internal/platform/supported.go -+++ b/src/internal/platform/supported.go -@@ -38,7 +38,7 @@ func RaceDetectorSupported(goos, goarch string) bool { - func MSanSupported(goos, goarch string) bool { - switch goos { - case "linux": -- return goarch == "amd64" || goarch == "arm64" -+ return goarch == "amd64" || goarch == "arm64" || goarch == "loong64" - case "freebsd": - return goarch == "amd64" - default: -diff --git a/src/runtime/cgo/gcc_mmap.c b/src/runtime/cgo/gcc_mmap.c -index 1fbd5e82a4..eb710a039d 100644 ---- a/src/runtime/cgo/gcc_mmap.c -+++ b/src/runtime/cgo/gcc_mmap.c -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build (linux && (amd64 || arm64 || ppc64le)) || (freebsd && amd64) -+//go:build (linux && (amd64 || arm64 || loong64 || ppc64le)) || (freebsd && amd64) - - #include - #include -diff --git a/src/runtime/cgo/mmap.go b/src/runtime/cgo/mmap.go -index 2f7e83bcb7..144af2b2ca 100644 ---- a/src/runtime/cgo/mmap.go -+++ b/src/runtime/cgo/mmap.go -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64) -+//go:build (linux && (amd64 || arm64 || loong64)) || (freebsd && amd64) - - package cgo - -diff --git a/src/runtime/cgo_mmap.go b/src/runtime/cgo_mmap.go -index 30660f7784..36d776e628 100644 ---- a/src/runtime/cgo_mmap.go -+++ b/src/runtime/cgo_mmap.go -@@ -4,7 +4,7 @@ - - // Support for memory sanitizer. See runtime/cgo/mmap.go. - --//go:build (linux && amd64) || (linux && arm64) || (freebsd && amd64) -+//go:build (linux && (amd64 || arm64 || loong64)) || (freebsd && amd64) - - package runtime - -diff --git a/src/runtime/mmap.go b/src/runtime/mmap.go -index f0183f61cf..9a7b298562 100644 ---- a/src/runtime/mmap.go -+++ b/src/runtime/mmap.go -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build !aix && !darwin && !js && (!linux || !amd64) && (!linux || !arm64) && (!freebsd || !amd64) && !openbsd && !plan9 && !solaris && !windows -+//go:build !aix && !darwin && !js && !((linux && (amd64 || arm64 || loong64)) || (freebsd && amd64)) && !openbsd && !plan9 && !solaris && !windows - - package runtime - -diff --git a/src/runtime/msan/msan.go b/src/runtime/msan/msan.go -index 4e41f8528d..7b3e8e608d 100644 ---- a/src/runtime/msan/msan.go -+++ b/src/runtime/msan/msan.go -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build msan && ((linux && (amd64 || arm64)) || (freebsd && amd64)) -+//go:build msan && ((linux && (amd64 || arm64 || loong64)) || (freebsd && amd64)) - - package msan - -diff --git a/src/runtime/msan_loong64.s b/src/runtime/msan_loong64.s -new file mode 100644 -index 0000000000..dcd7940ed8 ---- /dev/null -+++ b/src/runtime/msan_loong64.s -@@ -0,0 +1,72 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+//go:build msan -+ -+#include "go_asm.h" -+#include "textflag.h" -+ -+#define RARG0 R4 -+#define RARG1 R5 -+#define RARG2 R6 -+#define FARG R7 -+ -+// func runtime·domsanread(addr unsafe.Pointer, sz uintptr) -+// Called from msanread. -+TEXT runtime·domsanread(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __msan_read_go(void *addr, uintptr_t sz); -+ MOVV $__msan_read_go(SB), FARG -+ JMP msancall<>(SB) -+ -+// func runtime·msanwrite(addr unsafe.Pointer, sz uintptr) -+// Called from instrumented code. -+TEXT runtime·msanwrite(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __msan_write_go(void *addr, uintptr_t sz); -+ MOVV $__msan_write_go(SB), FARG -+ JMP msancall<>(SB) -+ -+// func runtime·msanmalloc(addr unsafe.Pointer, sz uintptr) -+TEXT runtime·msanmalloc(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __msan_malloc_go(void *addr, uintptr_t sz); -+ MOVV $__msan_malloc_go(SB), FARG -+ JMP msancall<>(SB) -+ -+// func runtime·msanfree(addr unsafe.Pointer, sz uintptr) -+TEXT runtime·msanfree(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __msan_free_go(void *addr, uintptr_t sz); -+ MOVV $__msan_free_go(SB), FARG -+ JMP msancall<>(SB) -+ -+// func runtime·msanmove(dst, src unsafe.Pointer, sz uintptr) -+TEXT runtime·msanmove(SB), NOSPLIT, $0-24 -+ MOVV dst+0(FP), RARG0 -+ MOVV src+8(FP), RARG1 -+ MOVV size+16(FP), RARG2 -+ // void __msan_memmove(void *dst, void *src, uintptr_t sz); -+ MOVV $__msan_memmove(SB), FARG -+ JMP msancall<>(SB) -+ -+// Switches SP to g0 stack and calls (FARG). Arguments already set. -+TEXT msancall<>(SB), NOSPLIT, $0-0 -+ MOVV R3, R23 // callee-saved -+ BEQ g, g0stack // no g, still on a system stack -+ MOVV g_m(g), R14 -+ MOVV m_g0(R14), R15 -+ BEQ R15, g, g0stack -+ -+ MOVV (g_sched+gobuf_sp)(R15), R9 -+ MOVV R9, R3 -+ -+g0stack: -+ JAL (FARG) -+ MOVV R23, R3 -+ RET -diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s -index 12e5455345..eba8e1f24c 100644 ---- a/src/runtime/sys_linux_loong64.s -+++ b/src/runtime/sys_linux_loong64.s -@@ -461,8 +461,8 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME,$168 - TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 - JMP runtime·sigtramp(SB) - --// func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) --TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0 -+// func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) -+TEXT runtime·sysMmap(SB),NOSPLIT|NOFRAME,$0 - MOVV addr+0(FP), R4 - MOVV n+8(FP), R5 - MOVW prot+16(FP), R6 -@@ -483,8 +483,25 @@ ok: - MOVV $0, err+40(FP) - RET - --// func munmap(addr unsafe.Pointer, n uintptr) --TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 -+// Call the function stored in _cgo_mmap using the GCC calling convention. -+// This must be called on the system stack. -+// func callCgoMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) uintptr -+TEXT runtime·callCgoMmap(SB),NOSPLIT,$0 -+ MOVV addr+0(FP), R4 -+ MOVV n+8(FP), R5 -+ MOVW prot+16(FP), R6 -+ MOVW flags+20(FP), R7 -+ MOVW fd+24(FP), R8 -+ MOVW off+28(FP), R9 -+ MOVV _cgo_mmap(SB), R13 -+ SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. -+ JAL (R13) -+ ADDV $16, R3 -+ MOVV R4, ret+32(FP) -+ RET -+ -+// func sysMunmap(addr unsafe.Pointer, n uintptr) -+TEXT runtime·sysMunmap(SB),NOSPLIT|NOFRAME,$0 - MOVV addr+0(FP), R4 - MOVV n+8(FP), R5 - MOVV $SYS_munmap, R11 -@@ -494,6 +511,18 @@ TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0 - MOVV R0, 0xf3(R0) // crash - RET - -+// Call the function stored in _cgo_munmap using the GCC calling convention. -+// This must be called on the system stack. -+// func callCgoMunmap(addr unsafe.Pointer, n uintptr) -+TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 -+ MOVV addr+0(FP), R4 -+ MOVV n+8(FP), R5 -+ MOVV _cgo_munmap(SB), R13 -+ SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. -+ JAL (R13) -+ ADDV $16, R3 -+ RET -+ - // func madvise(addr unsafe.Pointer, n uintptr, flags int32) - TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 - MOVV addr+0(FP), R4 --- -2.38.1 - diff --git a/0047-cmd-runtime-enable-race-detector-on-loong64.patch b/0003-cmd-runtime-enable-race-detector-on-loong64.patch similarity index 86% rename from 0047-cmd-runtime-enable-race-detector-on-loong64.patch rename to 0003-cmd-runtime-enable-race-detector-on-loong64.patch index fe80f1f910ad836e5866d2ca6cd7bd358467d379..0d61dcc5fcb6091e4e881d6ebe0a579b2ba30792 100644 --- a/0047-cmd-runtime-enable-race-detector-on-loong64.patch +++ b/0003-cmd-runtime-enable-race-detector-on-loong64.patch @@ -1,25 +1,32 @@ -From ee2afe75e0c3197818eb861dd8db85eeeef31de7 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zhao +From f84142ce620b086cc90f728861a76e5066c22ed9 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen Date: Sat, 19 Aug 2023 09:22:34 +0800 -Subject: [PATCH 47/51] cmd,runtime: enable race detector on loong64 +Subject: [PATCH 03/44] cmd,runtime: enable race detector on loong64 +The race feature depends on llvm. And support for building the tsan library on +linux/loong64 has been added in this patch [1], which has been merged into the +branch main and will be supported in the upcoming llvm18. + +[1]: https://github.com/llvm/llvm-project/pull/72819 + +Co-authored-by: Xiaolin Zhao Change-Id: If389318215476890295ed771297c6c088cfc84b3 --- src/cmd/dist/test.go | 2 +- src/internal/platform/supported.go | 2 +- src/race.bash | 3 +- src/runtime/asm_loong64.s | 1 + - src/runtime/race/README | 1 + + src/runtime/race/README | 3 +- src/runtime/race/race.go | 2 +- - src/runtime/race_loong64.s | 480 +++++++++++++++++++++++ - 8 files changed, 487 insertions(+), 4 deletions(-) + src/runtime/race_loong64.s | 509 +++++++++++++++++++++++ + 8 files changed, 517 insertions(+), 5 deletions(-) create mode 100644 src/runtime/race_loong64.s diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go -index 864060cbb2..8a1568c068 100644 +index 9728ef29cb..044268ada0 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go -@@ -1533,7 +1533,7 @@ func (t *tester) makeGOROOTUnwritable() (undo func()) { +@@ -1674,7 +1674,7 @@ func (t *tester) makeGOROOTUnwritable() (undo func()) { func raceDetectorSupported(goos, goarch string) bool { switch goos { case "linux": @@ -27,9 +34,9 @@ index 864060cbb2..8a1568c068 100644 + return goarch == "amd64" || goarch == "ppc64le" || goarch == "arm64" || goarch == "s390x" || goarch == "loong64" case "darwin": return goarch == "amd64" || goarch == "arm64" - case "freebsd", "netbsd", "openbsd", "windows": + case "freebsd", "netbsd", "windows": diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go -index 715bfb5e48..1589a1ebd5 100644 +index 79ed6d4b1c..52cad096cb 100644 --- a/src/internal/platform/supported.go +++ b/src/internal/platform/supported.go @@ -23,7 +23,7 @@ func (p OSArch) String() string { @@ -40,7 +47,7 @@ index 715bfb5e48..1589a1ebd5 100644 + return goarch == "amd64" || goarch == "ppc64le" || goarch == "arm64" || goarch == "s390x" || goarch == "loong64" case "darwin": return goarch == "amd64" || goarch == "arm64" - case "freebsd", "netbsd", "openbsd", "windows": + case "freebsd", "netbsd", "windows": diff --git a/src/race.bash b/src/race.bash index f1a168bfbb..ae9f57ffd7 100755 --- a/src/race.bash @@ -63,29 +70,29 @@ index f1a168bfbb..ae9f57ffd7 100755 "FreeBSD amd64") ;; "NetBSD amd64") ;; diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index af41d57553..e36bc10a2c 100644 +index 1c5ced4512..1bd8276835 100644 --- a/src/runtime/asm_loong64.s +++ b/src/runtime/asm_loong64.s @@ -37,6 +37,7 @@ TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0 JAL (R25) nocgo: -+ //JAL runtime·save_g(SB) ++ JAL runtime·save_g(SB) // update stackguard after _cgo_init MOVV (g_stack+stack_lo)(g), R19 ADDV $const_stackGuard, R19 diff --git a/src/runtime/race/README b/src/runtime/race/README -index acd8b84838..fc0b56ec69 100644 +index 47c51ca9c1..06865d2b34 100644 --- a/src/runtime/race/README +++ b/src/runtime/race/README -@@ -12,6 +12,7 @@ race_windows_amd64.syso built with LLVM b6374437af39af66896da74a1dc1b8a0ece26bee - race_linux_arm64.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8. - race_darwin_arm64.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8. - race_openbsd_amd64.syso built with LLVM fcf6ae2f070eba73074b6ec8d8281e54d29dbeeb and Go 8f2db14cd35bbd674cb2988a508306de6655e425. -+//TODO race_linux_loong64.syso built with ... - race_linux_s390x.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8. - internal/amd64v3/race_linux.syso built with LLVM 74c2d4f6024c8f160871a2baa928d0b42415f183 and Go c0f27eb3d580c8b9efd73802678eba4c6c9461be. - internal/amd64v1/race_linux.syso built with LLVM 74c2d4f6024c8f160871a2baa928d0b42415f183 and Go c0f27eb3d580c8b9efd73802678eba4c6c9461be. +@@ -13,5 +13,6 @@ internal/amd64v1/race_windows.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d + internal/amd64v3/race_linux.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. + race_darwin_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. + race_linux_arm64.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. +-race_linux_ppc64le.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. ++race_linux_loong64.syso built with LLVM 9d3fbf97bef3f19da4e0a047f017b8142f59b3fd and Go 988b718f4130ab5b3ce5a5774e1a58e83c92a163. ++race_linux_ppc64le.syso built with LLVM 41cb504b7c4b18ac15830107431a0c1eec73a6b2 and Go 851ecea4cc99ab276109493477b2c7e30c253ea8. + race_linux_s390x.syso built with LLVM 51bfeff0e4b0757ff773da6882f4d538996c9b04 and Go e7d582b55dda36e76ce4d0ce770139ca0915b7c5. diff --git a/src/runtime/race/race.go b/src/runtime/race/race.go index 9c508ebc2b..9fd75424ca 100644 --- a/src/runtime/race/race.go @@ -101,10 +108,10 @@ index 9c508ebc2b..9fd75424ca 100644 diff --git a/src/runtime/race_loong64.s b/src/runtime/race_loong64.s new file mode 100644 -index 0000000000..5e7bd6d716 +index 0000000000..0512efc045 --- /dev/null +++ b/src/runtime/race_loong64.s -@@ -0,0 +1,480 @@ +@@ -0,0 +1,509 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. @@ -146,8 +153,12 @@ index 0000000000..5e7bd6d716 +// Called from instrumented code. +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would make caller's PC ineffective. -+TEXT runtime·raceread(SB), NOSPLIT, $0-8 ++TEXT runtime·raceread(SB), NOSPLIT, $0-8 ++#ifdef GOEXPERIMENT_regabiargs ++ MOVV R4, RARG1 ++#else + MOVV addr+0(FP), RARG1 ++#endif + MOVV R1, RARG2 + // void __tsan_read(ThreadState *thr, void *addr, void *pc); + MOVV $__tsan_read(SB), RCALL @@ -171,8 +182,12 @@ index 0000000000..5e7bd6d716 +// Called from instrumented code. +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would make caller's PC ineffective. -+TEXT runtime·racewrite(SB), NOSPLIT, $0-8 ++TEXT runtime·racewrite(SB), NOSPLIT, $0-8 ++#ifdef GOEXPERIMENT_regabiargs ++ MOVV R4, RARG1 ++#else + MOVV addr+0(FP), RARG1 ++#endif + MOVV R1, RARG2 + // void __tsan_write(ThreadState *thr, void *addr, void *pc); + MOVV $__tsan_write(SB), RCALL @@ -196,9 +211,14 @@ index 0000000000..5e7bd6d716 +// Called from instrumented code. +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would make caller's PC ineffective. -+TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 ++TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 ++#ifdef GOEXPERIMENT_regabiargs ++ MOVV R5, RARG2 ++ MOVV R4, RARG1 ++#else + MOVV addr+0(FP), RARG1 + MOVV size+8(FP), RARG2 ++#endif + MOVV R1, RARG3 + // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOVV $__tsan_read_range(SB), RCALL @@ -223,9 +243,14 @@ index 0000000000..5e7bd6d716 +// Called from instrumented code. +// Defined as ABIInternal so as to avoid introducing a wrapper, +// which would make caller's PC ineffective. -+TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 ++TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 ++#ifdef GOEXPERIMENT_regabiargs ++ MOVV R5, RARG2 ++ MOVV R4, RARG1 ++#else + MOVV addr+0(FP), RARG1 + MOVV size+8(FP), RARG2 ++#endif + MOVV R1, RARG3 + // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); + MOVV $__tsan_write_range(SB), RCALL @@ -274,8 +299,12 @@ index 0000000000..5e7bd6d716 + +// func runtime·racefuncenter(pc uintptr) +// Called from instrumented code. -+TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 ++TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 ++#ifdef GOEXPERIMENT_regabiargs ++ MOVV R4, RCALL ++#else + MOVV callpc+0(FP), RCALL ++#endif + JMP racefuncenter<>(SB) + +// Common code for racefuncenter @@ -291,7 +320,7 @@ index 0000000000..5e7bd6d716 + +// func runtime·racefuncexit() +// Called from instrumented code. -+TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 ++TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 + load_g + MOVV g_racectx(g), RARG0 // race context + // void __tsan_func_exit(ThreadState *thr); @@ -561,10 +590,13 @@ index 0000000000..5e7bd6d716 + BEQ R14, g, noswitch // branch if already on g0 + MOVV R14, g + ++#ifdef GOEXPERIMENT_regabiargs ++ JAL runtime·racecallback(SB) ++#else + MOVV RARG0, 8(R3) // func arg + MOVV RARG1, 16(R3) // func arg + JAL runtime·racecallback(SB) -+ ++#endif + // All registers are smashed after Go code, reload. + MOVV g_m(g), R15 + MOVV m_curg(R15), g // g = m->curg @@ -578,9 +610,13 @@ index 0000000000..5e7bd6d716 + +noswitch: + // already on g0 ++#ifdef GOEXPERIMENT_regabiargs ++ JAL runtime·racecallback(SB) ++#else + MOVV RARG0, 8(R3) // func arg + MOVV RARG1, 16(R3) // func arg + JAL runtime·racecallback(SB) ++#endif + JMP ret + +// tls_g, g value for each thread in TLS diff --git a/0003-runtime-cmd-go-enable-address-sanitizer-on-linux-loo.patch b/0003-runtime-cmd-go-enable-address-sanitizer-on-linux-loo.patch deleted file mode 100644 index 9b08485e90084e583d89bbe87801aeb9bd70c905..0000000000000000000000000000000000000000 --- a/0003-runtime-cmd-go-enable-address-sanitizer-on-linux-loo.patch +++ /dev/null @@ -1,160 +0,0 @@ -From bab2e6e8ee28692e81f9397b4c6fc92df6e3ae43 Mon Sep 17 00:00:00 2001 -From: Xiaolin Zhao -Date: Fri, 31 Mar 2023 17:08:44 +0800 -Subject: [PATCH 03/51] runtime, cmd/go: enable address sanitizer on - linux/loong64 - -Change-Id: I48a65f2f10e3dc488acd9c02ea1a1f37de192be0 ---- - src/cmd/go/alldocs.go | 5 +- - src/cmd/go/internal/work/build.go | 5 +- - src/internal/platform/supported.go | 2 +- - src/runtime/asan/asan.go | 2 +- - src/runtime/asan_loong64.s | 75 ++++++++++++++++++++++++++++++ - 5 files changed, 83 insertions(+), 6 deletions(-) - create mode 100644 src/runtime/asan_loong64.s - -diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go -index d8377d1fd6..7de5a066dd 100644 ---- a/src/cmd/go/alldocs.go -+++ b/src/cmd/go/alldocs.go -@@ -124,9 +124,10 @@ - // PIE build mode will be used on all platforms except linux/amd64. - // -asan - // enable interoperation with address sanitizer. --// Supported only on linux/arm64, linux/amd64. --// Supported only on linux/amd64 or linux/arm64 and only with GCC 7 and higher -+// Supported only on linux/arm64, linux/amd64, linux/loong64. -+// Supported on linux/amd64 or linux/arm64 and only with GCC 7 and higher - // or Clang/LLVM 9 and higher. -+// And supported on linux/loong64 only with Clang/LLVM 16 and higher. - // -cover - // enable code coverage instrumentation. - // -covermode set,count,atomic -diff --git a/src/cmd/go/internal/work/build.go b/src/cmd/go/internal/work/build.go -index 05e300581c..8cb53b95d9 100644 ---- a/src/cmd/go/internal/work/build.go -+++ b/src/cmd/go/internal/work/build.go -@@ -81,9 +81,10 @@ and test commands: - PIE build mode will be used on all platforms except linux/amd64. - -asan - enable interoperation with address sanitizer. -- Supported only on linux/arm64, linux/amd64. -- Supported only on linux/amd64 or linux/arm64 and only with GCC 7 and higher -+ Supported only on linux/arm64, linux/amd64, linux/loong64. -+ Supported on linux/amd64 or linux/arm64 and only with GCC 7 and higher - or Clang/LLVM 9 and higher. -+ And supported on linux/loong64 only with Clang/LLVM 16 and higher. - -cover - enable code coverage instrumentation. - -covermode set,count,atomic -diff --git a/src/internal/platform/supported.go b/src/internal/platform/supported.go -index b4f8bb5496..715bfb5e48 100644 ---- a/src/internal/platform/supported.go -+++ b/src/internal/platform/supported.go -@@ -51,7 +51,7 @@ func MSanSupported(goos, goarch string) bool { - func ASanSupported(goos, goarch string) bool { - switch goos { - case "linux": -- return goarch == "arm64" || goarch == "amd64" || goarch == "riscv64" || goarch == "ppc64le" -+ return goarch == "arm64" || goarch == "amd64" || goarch == "loong64" || goarch == "riscv64" || goarch == "ppc64le" - default: - return false - } -diff --git a/src/runtime/asan/asan.go b/src/runtime/asan/asan.go -index 25f15ae45b..ef70b0145b 100644 ---- a/src/runtime/asan/asan.go -+++ b/src/runtime/asan/asan.go -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build asan && linux && (arm64 || amd64 || riscv64 || ppc64le) -+//go:build asan && linux && (arm64 || amd64 || loong64 || riscv64 || ppc64le) - - package asan - -diff --git a/src/runtime/asan_loong64.s b/src/runtime/asan_loong64.s -new file mode 100644 -index 0000000000..8cef686217 ---- /dev/null -+++ b/src/runtime/asan_loong64.s -@@ -0,0 +1,75 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+//go:build asan -+ -+#include "go_asm.h" -+#include "textflag.h" -+ -+#define RARG0 R4 -+#define RARG1 R5 -+#define RARG2 R6 -+#define RARG3 R7 -+#define FARG R8 -+ -+// Called from instrumented code. -+// func runtime·doasanread(addr unsafe.Pointer, sz, sp, pc uintptr) -+TEXT runtime·doasanread(SB), NOSPLIT, $0-32 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ MOVV sp+16(FP), RARG2 -+ MOVV pc+24(FP), RARG3 -+ // void __asan_read_go(void *addr, uintptr_t sz, void *sp, void *pc); -+ MOVV $__asan_read_go(SB), FARG -+ JMP asancall<>(SB) -+ -+// func runtime·doasanwrite(addr unsafe.Pointer, sz, sp, pc uintptr) -+TEXT runtime·doasanwrite(SB), NOSPLIT, $0-32 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ MOVV sp+16(FP), RARG2 -+ MOVV pc+24(FP), RARG3 -+ // void __asan_write_go(void *addr, uintptr_t sz, void *sp, void *pc); -+ MOVV $__asan_write_go(SB), FARG -+ JMP asancall<>(SB) -+ -+// func runtime·asanunpoison(addr unsafe.Pointer, sz uintptr) -+TEXT runtime·asanunpoison(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __asan_unpoison_go(void *addr, uintptr_t sz); -+ MOVV $__asan_unpoison_go(SB), FARG -+ JMP asancall<>(SB) -+ -+// func runtime·asanpoison(addr unsafe.Pointer, sz uintptr) -+TEXT runtime·asanpoison(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __asan_poison_go(void *addr, uintptr_t sz); -+ MOVV $__asan_poison_go(SB), FARG -+ JMP asancall<>(SB) -+ -+// func runtime·asanregisterglobals(addr unsafe.Pointer, n uintptr) -+TEXT runtime·asanregisterglobals(SB), NOSPLIT, $0-16 -+ MOVV addr+0(FP), RARG0 -+ MOVV size+8(FP), RARG1 -+ // void __asan_register_globals_go(void *addr, uintptr_t n); -+ MOVV $__asan_register_globals_go(SB), FARG -+ JMP asancall<>(SB) -+ -+// Switches SP to g0 stack and calls (FARG). Arguments already set. -+TEXT asancall<>(SB), NOSPLIT, $0-0 -+ MOVV R3, R23 // callee-saved -+ BEQ g, g0stack // no g, still on a system stack -+ MOVV g_m(g), R14 -+ MOVV m_g0(R14), R15 -+ BEQ R15, g, g0stack -+ -+ MOVV (g_sched+gobuf_sp)(R15), R9 -+ MOVV R9, R3 -+ -+g0stack: -+ JAL (FARG) -+ MOVV R23, R3 -+ RET --- -2.38.1 - diff --git a/0004-cmd-link-use-gold-on-ARM-ARM64-only-if-gold-is-avail.patch b/0004-cmd-link-use-gold-on-ARM-ARM64-only-if-gold-is-avail.patch deleted file mode 100644 index 5341d6d6a87bfcee82bb3bf80fc0512bbfdb3577..0000000000000000000000000000000000000000 --- a/0004-cmd-link-use-gold-on-ARM-ARM64-only-if-gold-is-avail.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 7506da0af38aa307f45664f0c787b5767cc7a87f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Alejandro=20S=C3=A1ez?= -Date: Thu, 22 Jun 2023 17:19:00 +0200 -Subject: [PATCH] Force gold in aarch64 until binutils 2.41 is on Fedora - ---- - src/cmd/link/internal/ld/lib.go | 8 +++----- - 1 file changed, 3 insertions(+), 5 deletions(-) - -diff --git a/src/cmd/link/internal/ld/lib.go b/src/cmd/link/internal/ld/lib.go -index 91e2d5149c..99c305530b 100644 ---- a/src/cmd/link/internal/ld/lib.go -+++ b/src/cmd/link/internal/ld/lib.go -@@ -1605,15 +1605,13 @@ func (ctxt *Link) hostlink() { - // https://go.dev/issue/22040 - altLinker = "gold" - -- // If gold is not installed, gcc will silently switch -- // back to ld.bfd. So we parse the version information -- // and provide a useful error if gold is missing. -+ // In both cases, switch to gold if gold is available. - name, args := flagExtld[0], flagExtld[1:] - args = append(args, "-fuse-ld=gold", "-Wl,--version") - cmd := exec.Command(name, args...) - if out, err := cmd.CombinedOutput(); err == nil { -- if !bytes.Contains(out, []byte("GNU gold")) { -- log.Fatalf("ARM64 external linker must be gold (issue #15696, 22040), but is not: %s", out) -+ if bytes.Contains(out, []byte("GNU gold")) { -+ altLinker = "gold" - } - } - } --- -2.40.1 - diff --git a/0004-internal-sysinfo-print-cpu-type-from-cpuinfo-when-in.patch b/0004-internal-sysinfo-print-cpu-type-from-cpuinfo-when-in.patch deleted file mode 100644 index ed506bd76e2e616d6a31254dc6e54aaade147af7..0000000000000000000000000000000000000000 --- a/0004-internal-sysinfo-print-cpu-type-from-cpuinfo-when-in.patch +++ /dev/null @@ -1,194 +0,0 @@ -From 51a11cb75d60f68f4e8edc7a244c10feaf26b33a Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 11 Jul 2023 05:11:26 +0800 -Subject: [PATCH 04/51] internal/sysinfo: print cpu type from cpuinfo when - internal cpu name is empty on Linux - -Supports 386,amd64 and loong64 architectures on linux operating systems. - -Example output: -$ go test -bench=.* -goos: linux -goarch: loong64 -pkg: runtime -cpu: Loongson-3A5000-HV @ 2500.00MHz -BenchmarkSemTable/OneAddrCollision/n=1000 19261 62302 ns/op - ... - -Change-Id: I02db12d70c11327e4625bb6e59f30dfaf37c2db0 ---- - src/go/build/deps_test.go | 2 +- - src/internal/sysinfo/generic_os_cpuinfo.go | 11 +++++ - src/internal/sysinfo/proc_cpuinfo_linux.go | 41 +++++++++++++++++++ - .../sysinfo/proc_cpuinfo_linux_loong64.go | 33 +++++++++++++++ - .../sysinfo/proc_cpuinfo_linux_x84.go | 22 ++++++++++ - src/internal/sysinfo/sysinfo.go | 5 +++ - 6 files changed, 113 insertions(+), 1 deletion(-) - create mode 100644 src/internal/sysinfo/generic_os_cpuinfo.go - create mode 100644 src/internal/sysinfo/proc_cpuinfo_linux.go - create mode 100644 src/internal/sysinfo/proc_cpuinfo_linux_loong64.go - create mode 100644 src/internal/sysinfo/proc_cpuinfo_linux_x84.go - -diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go -index 592f2fd72a..cd99bd2a63 100644 ---- a/src/go/build/deps_test.go -+++ b/src/go/build/deps_test.go -@@ -562,7 +562,7 @@ var depsRules = ` - < net/rpc/jsonrpc; - - # System Information -- internal/cpu, sync -+ internal/cpu, io, os, strings, sync - < internal/sysinfo; - - # Test-only -diff --git a/src/internal/sysinfo/generic_os_cpuinfo.go b/src/internal/sysinfo/generic_os_cpuinfo.go -new file mode 100644 -index 0000000000..ec3047bd3d ---- /dev/null -+++ b/src/internal/sysinfo/generic_os_cpuinfo.go -@@ -0,0 +1,11 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+//go:build !(linux && (386 || amd64 || loong64)) -+ -+package sysinfo -+ -+func osCpuInfoName() string { -+ return "" -+} -diff --git a/src/internal/sysinfo/proc_cpuinfo_linux.go b/src/internal/sysinfo/proc_cpuinfo_linux.go -new file mode 100644 -index 0000000000..cbe56fdae0 ---- /dev/null -+++ b/src/internal/sysinfo/proc_cpuinfo_linux.go -@@ -0,0 +1,41 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package sysinfo -+ -+import ( -+ "io" -+ "os" -+ "strings" -+) -+ -+func readLinuxProcCPUInfo(buf []byte) error { -+ f, err := os.Open("/proc/cpuinfo") -+ if err != nil { -+ return err -+ } -+ defer f.Close() -+ -+ _, err = io.ReadFull(f, buf) -+ if err != nil && err != io.ErrUnexpectedEOF { -+ return err -+ } -+ -+ return nil -+} -+ -+func findCPUInfoField(buf []byte, fieldName string) string { -+ filedValue := string(buf[:len(buf)]) -+ n := strings.Index(filedValue, fieldName) -+ if n == -1 { -+ return "" -+ } -+ -+ filedValue = filedValue[n+len(fieldName):] -+ if n := strings.Index(filedValue, "\n"); n != -1 { -+ filedValue = filedValue[:n] -+ } -+ -+ return filedValue -+} -diff --git a/src/internal/sysinfo/proc_cpuinfo_linux_loong64.go b/src/internal/sysinfo/proc_cpuinfo_linux_loong64.go -new file mode 100644 -index 0000000000..3592998de6 ---- /dev/null -+++ b/src/internal/sysinfo/proc_cpuinfo_linux_loong64.go -@@ -0,0 +1,33 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package sysinfo -+ -+const ( -+ // cpuinfo filed name -+ ModelName = "\nModel Name\t\t: " -+ CPUMHz = "\nCPU MHz\t\t\t: " -+) -+ -+func osCpuInfoName() string { -+ // The 512-byte buffer is enough to hold the contents of CPU0 -+ buf := make([]byte, 512) -+ err := readLinuxProcCPUInfo(buf) -+ if err != nil { -+ return "" -+ } -+ -+ modelName := findCPUInfoField(buf, ModelName) -+ cpuMHz := findCPUInfoField(buf, CPUMHz) -+ -+ if modelName == "" { -+ return "" -+ } -+ -+ if cpuMHz == "" { -+ return modelName -+ } -+ -+ return modelName + " @ " + cpuMHz + "MHz" -+} -diff --git a/src/internal/sysinfo/proc_cpuinfo_linux_x84.go b/src/internal/sysinfo/proc_cpuinfo_linux_x84.go -new file mode 100644 -index 0000000000..7b5d5b88f2 ---- /dev/null -+++ b/src/internal/sysinfo/proc_cpuinfo_linux_x84.go -@@ -0,0 +1,22 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+//go:build linux && (386 || amd64) -+ -+package sysinfo -+ -+const ( -+ // cpuinfo filed name -+ ModelName = "model name\t: " -+) -+ -+func osCpuInfoName() string { -+ buf := make([]byte, 512) -+ err := readLinuxProcCPUInfo(buf) -+ if err != nil { -+ return "" -+ } -+ -+ return findCPUInfoField(buf, ModelName) -+} -diff --git a/src/internal/sysinfo/sysinfo.go b/src/internal/sysinfo/sysinfo.go -index 961be7abae..19e841fb16 100644 ---- a/src/internal/sysinfo/sysinfo.go -+++ b/src/internal/sysinfo/sysinfo.go -@@ -26,6 +26,11 @@ func (cpu *cpuInfo) Name() string { - return - } - // TODO(martisch): use /proc/cpuinfo and /sys/devices/system/cpu/ on Linux as fallback. -+ if name := osCpuInfoName(); name != "" { -+ cpu.name = name -+ return -+ } - }) -+ - return cpu.name - } --- -2.38.1 - diff --git a/0049-runtime-delete-on-register-ABI-fallback-path-for-rac.patch b/0004-runtime-delete-on-register-ABI-fallback-path-for-rac.patch similarity index 93% rename from 0049-runtime-delete-on-register-ABI-fallback-path-for-rac.patch rename to 0004-runtime-delete-on-register-ABI-fallback-path-for-rac.patch index e03716fef55a32d013d149ff8d8de3992ab90877..54922d9071f7d3b9635639e6d87b4e67ab1d5546 100644 --- a/0049-runtime-delete-on-register-ABI-fallback-path-for-rac.patch +++ b/0004-runtime-delete-on-register-ABI-fallback-path-for-rac.patch @@ -1,10 +1,11 @@ -From 0ec91508644b2900fc51dd9f9ac7f122e1bd467e Mon Sep 17 00:00:00 2001 +From 5623cd585fd5891d1f6d6d93256e4252b95b9dae Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Mon, 6 Nov 2023 17:13:43 +0800 -Subject: [PATCH 49/51] runtime: delete on-register ABI fallback path for race +Subject: [PATCH 04/44] runtime: delete on-register ABI fallback path for race of loong64 -Change-Id: I0769bdd12c8c458870a4bc6bbf731de4c3bbd997 +Co-authored-by: Xiaolin Zhao +Change-Id: Ie8c4a137205e29dd7dc63825f502b1f6b2f1c205 --- src/runtime/race_loong64.s | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/0005-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch b/0005-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch deleted file mode 100644 index 5870609b2a7ed6678c489cec39b72d622b5fe2c7..0000000000000000000000000000000000000000 --- a/0005-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch +++ /dev/null @@ -1,2250 +0,0 @@ -From fe1fa12ff48cf3abd0fe78812b8df4bb86860a41 Mon Sep 17 00:00:00 2001 -From: chenguoqi -Date: Fri, 10 Feb 2023 15:00:12 +0800 -Subject: [PATCH 05/51] cmd,cmd/vendor: pick up updates for - golang.org/x/arch/loong64 - -Bring in updates to golang.org/x/arch/ to support loong64 disassembler -from CL 358854. - - Used the directions found in README.vendor: - - cd $GOROOT/src/cmd - go get -d golang.org/x/arch@latest - go mod tidy - go mod vendor - -Change-Id: Ida0721ebd11caf4f116bb54a277606eaa38dc8ba ---- - .../x/arch/loong64/loong64asm/arg.go | 93 + - .../x/arch/loong64/loong64asm/decode.go | 269 +++ - .../x/arch/loong64/loong64asm/gnu.go | 16 + - .../x/arch/loong64/loong64asm/inst.go | 296 ++++ - .../x/arch/loong64/loong64asm/tables.go | 1513 +++++++++++++++++ - 5 files changed, 2187 insertions(+) - create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go - create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go - create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go - create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go - create mode 100644 src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go - -diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go -new file mode 100644 -index 0000000000..9496e8c34d ---- /dev/null -+++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/arg.go -@@ -0,0 +1,93 @@ -+// Copyright 2022 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package loong64asm -+ -+// Naming for Go decoder arguments: -+// -+// - arg_fd: a Floating Point operand register fd encoded in the fd[4:0] field -+// -+// - arg_fj: a Floating Point operand register fj encoded in the fj[9:5] field -+// -+// - arg_fk: a Floating Point operand register fk encoded in the fk[14:10] field -+// -+// - arg_fa: a Floating Point operand register fa encoded in the fa[19:15] field -+// -+// - arg_rd: a general-purpose register rd encoded in the rd[4:0] field -+// -+// - arg_rj: a general-purpose register rj encoded in the rj[9:5] field -+// -+// - arg_rk: a general-purpose register rk encoded in the rk[14:10] field -+// -+// - arg_fcsr_4_0: float control status register encoded in [4:0] field -+// -+// - arg_cd_2_0: condition flag register encoded in [2:0] field -+// -+// - arg_sa2_16_15: shift bits constant encoded in [16:15] field -+// -+// - arg_code_14_0: arg for exception process routine encoded in [14:0] field -+// -+// - arg_ui5_14_10: 5bits unsigned immediate -+// -+// - arg_lsbw: For details, please refer to chapter 2.2.3.8 of instruction manual -+// -+// - arg_msbw: For details, please refer to chapter 2.2.3.9 of instruction manual -+// -+// - arg_hint_4_0: hint field implied the prefetch type and the data should fetch to cache's level -+// 0: load to data cache level 1 -+// 8: store to data cache level 1 -+// other: no define -+// -+// - arg_si12_21_10: 12bits signed immediate -+ -+type instArg uint16 -+ -+const ( -+ _ instArg = iota -+ //1-5 -+ arg_fd -+ arg_fj -+ arg_fk -+ arg_fa -+ arg_rd -+ //6-10 -+ arg_rj -+ arg_rk -+ arg_op_4_0 -+ arg_fcsr_4_0 -+ arg_fcsr_9_5 -+ //11-15 -+ arg_csr_23_10 -+ arg_cd -+ arg_cj -+ arg_ca -+ arg_sa2_16_15 -+ //16-20 -+ arg_sa3_17_15 -+ arg_code_4_0 -+ arg_code_14_0 -+ arg_ui5_14_10 -+ arg_ui6_15_10 -+ //21-25 -+ arg_ui12_21_10 -+ arg_lsbw -+ arg_msbw -+ arg_lsbd -+ arg_msbd -+ //26-30 -+ arg_hint_4_0 -+ arg_hint_14_0 -+ arg_level_14_0 -+ arg_level_17_10 -+ arg_seq_17_10 -+ //31-35 -+ arg_si12_21_10 -+ arg_si14_23_10 -+ arg_si16_25_10 -+ arg_si20_24_5 -+ arg_offset_20_0 -+ //36~ -+ arg_offset_25_0 -+ arg_offset_15_0 -+) -diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go -new file mode 100644 -index 0000000000..ac3448f170 ---- /dev/null -+++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/decode.go -@@ -0,0 +1,269 @@ -+// Copyright 2022 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package loong64asm -+ -+import ( -+ "encoding/binary" -+ "fmt" -+) -+ -+type instArgs [5]instArg -+ -+// An instFormat describes the format of an instruction encoding. -+type instFormat struct { -+ mask uint32 -+ value uint32 -+ op Op -+ // args describe how to decode the instruction arguments. -+ // args is stored as a fixed-size array. -+ // if there are fewer than len(args) arguments, args[i] == 0 marks -+ // the end of the argument list. -+ args instArgs -+} -+ -+var ( -+ errShort = fmt.Errorf("truncated instruction") -+ errUnknown = fmt.Errorf("unknown instruction") -+) -+ -+var decoderCover []bool -+ -+func init() { -+ decoderCover = make([]bool, len(instFormats)) -+} -+ -+// Decode decodes the 4 bytes in src as a single instruction. -+func Decode(src []byte) (inst Inst, err error) { -+ if len(src) < 4 { -+ return Inst{}, errShort -+ } -+ -+ x := binary.LittleEndian.Uint32(src) -+ -+Search: -+ for i := range instFormats { -+ f := &instFormats[i] -+ -+ if (x & f.mask) != f.value { -+ continue -+ } -+ -+ // Decode args. -+ var args Args -+ for j, aop := range f.args { -+ if aop == 0 { -+ break -+ } -+ -+ arg := decodeArg(aop, x, i) -+ if arg == nil { -+ // Cannot decode argument -+ continue Search -+ } -+ -+ args[j] = arg -+ } -+ -+ decoderCover[i] = true -+ inst = Inst{ -+ Op: f.op, -+ Args: args, -+ Enc: x, -+ } -+ return inst, nil -+ } -+ -+ return Inst{}, errUnknown -+} -+ -+// decodeArg decodes the arg described by aop from the instruction bits x. -+// It returns nil if x cannot be decoded according to aop. -+func decodeArg(aop instArg, x uint32, index int) Arg { -+ switch aop { -+ case arg_fd: -+ return F0 + Reg(x&((1<<5)-1)) -+ -+ case arg_fj: -+ return F0 + Reg((x>>5)&((1<<5)-1)) -+ -+ case arg_fk: -+ return F0 + Reg((x>>10)&((1<<5)-1)) -+ -+ case arg_fa: -+ return F0 + Reg((x>>15)&((1<<5)-1)) -+ -+ case arg_rd: -+ return R0 + Reg(x&((1<<5)-1)) -+ -+ case arg_rj: -+ return R0 + Reg((x>>5)&((1<<5)-1)) -+ -+ case arg_rk: -+ return R0 + Reg((x>>10)&((1<<5)-1)) -+ -+ case arg_fcsr_4_0: -+ return FCSR0 + Fcsr(x&((1<<5)-1)) -+ -+ case arg_fcsr_9_5: -+ return FCSR0 + Fcsr((x>>5)&((1<<5)-1)) -+ -+ case arg_cd: -+ return FCC0 + Fcc(x&((1<<3)-1)) -+ -+ case arg_cj: -+ return FCC0 + Fcc((x>>5)&((1<<3)-1)) -+ -+ case arg_ca: -+ return FCC0 + Fcc((x>>15)&((1<<3)-1)) -+ -+ case arg_op_4_0: -+ tmp := x & ((1 << 5) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_csr_23_10: -+ tmp := (x >> 10) & ((1 << 14) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_sa2_16_15: -+ f := &instFormats[index] -+ tmp := SaSimm((x >> 15) & ((1 << 2) - 1)) -+ if (f.op == ALSL_D) || (f.op == ALSL_W) || (f.op == ALSL_WU) { -+ return tmp + 1 -+ } else { -+ return tmp + 0 -+ } -+ -+ case arg_sa3_17_15: -+ return SaSimm((x >> 15) & ((1 << 3) - 1)) -+ -+ case arg_code_4_0: -+ return CodeSimm(x & ((1 << 5) - 1)) -+ -+ case arg_code_14_0: -+ return CodeSimm(x & ((1 << 15) - 1)) -+ -+ case arg_ui5_14_10: -+ tmp := (x >> 10) & ((1 << 5) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_ui6_15_10: -+ tmp := (x >> 10) & ((1 << 6) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_ui12_21_10: -+ tmp := ((x >> 10) & ((1 << 12) - 1) & 0xfff) -+ return Uimm{tmp, false} -+ -+ case arg_lsbw: -+ tmp := (x >> 10) & ((1 << 5) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_msbw: -+ tmp := (x >> 16) & ((1 << 5) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_lsbd: -+ tmp := (x >> 10) & ((1 << 6) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_msbd: -+ tmp := (x >> 16) & ((1 << 6) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_hint_4_0: -+ tmp := int16(x & ((1 << 5) - 1)) -+ return Simm16{tmp, 4} -+ -+ case arg_hint_14_0: -+ tmp := int16(x & ((1 << 15) - 1)) -+ return Simm16{tmp, 15} -+ -+ case arg_level_14_0: -+ tmp := x & ((1 << 15) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_level_17_10: -+ tmp := (x >> 10) & ((1 << 8) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_seq_17_10: -+ tmp := (x >> 10) & ((1 << 8) - 1) -+ return Uimm{tmp, false} -+ -+ case arg_si12_21_10: -+ var tmp int16 -+ -+ //no int12, so sign-extend a 12-bit signed to 16-bit signed -+ if (x & 0x200000) == 0x200000 { -+ tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0xf000) -+ } else { -+ tmp = int16(((x >> 10) & ((1 << 12) - 1)) | 0x0000) -+ } -+ return Simm16{tmp, 12} -+ -+ case arg_si14_23_10: -+ var tmp int32 -+ if (x & 0x800000) == 0x800000 { -+ tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0xffff0000) -+ } else { -+ tmp = int32((((x >> 10) & ((1 << 14) - 1)) << 2) | 0x00000000) -+ } -+ return Simm32{tmp, 16} -+ -+ case arg_si16_25_10: -+ var tmp int32 -+ -+ if (x & 0x2000000) == 0x2000000 { -+ tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0xffff0000) -+ } else { -+ tmp = int32(((x >> 10) & ((1 << 16) - 1)) | 0x00000000) -+ } -+ -+ return Simm32{tmp, 16} -+ -+ case arg_si20_24_5: -+ var tmp int32 -+ if (x & 0x1000000) == 0x1000000 { -+ tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0xfff00000) -+ } else { -+ tmp = int32(((x >> 5) & ((1 << 20) - 1)) | 0x00000000) -+ } -+ return Simm32{tmp, 20} -+ -+ case arg_offset_20_0: -+ var tmp int32 -+ -+ if (x & 0x1000000) == 0x1000000 { -+ tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) -+ } else { -+ tmp = int32((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 21) - 1)) << 2) -+ } -+ -+ return OffsetSimm{tmp, 28} -+ -+ case arg_offset_15_0: -+ var tmp int32 -+ if (x & 0x2000000) == 0x2000000 { -+ tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0xfffc0000) -+ } else { -+ tmp = int32((((x >> 10) & ((1 << 16) - 1)) << 2) | 0x00000000) -+ } -+ -+ return OffsetSimm{tmp, 18} -+ -+ case arg_offset_25_0: -+ var tmp int32 -+ -+ if (x & 0x200) == 0x200 { -+ tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0xf0000000) -+ } else { -+ tmp = int32(((((x << 16) | ((x >> 10) & ((1 << 16) - 1))) & ((1 << 26) - 1)) << 2) | 0x00000000) -+ } -+ -+ return OffsetSimm{tmp, 28} -+ default: -+ return nil -+ } -+} -diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go -new file mode 100644 -index 0000000000..fd6bcffde1 ---- /dev/null -+++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/gnu.go -@@ -0,0 +1,16 @@ -+// Copyright 2022 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package loong64asm -+ -+import ( -+ "strings" -+) -+ -+// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils. -+// This form typically matches the syntax defined in the Loong64 Reference Manual. See -+// https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html -+func GNUSyntax(inst Inst) string { -+ return strings.ToLower(inst.String()) -+} -diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go -new file mode 100644 -index 0000000000..187a46fc01 ---- /dev/null -+++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/inst.go -@@ -0,0 +1,296 @@ -+// Copyright 2022 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package loong64asm -+ -+import ( -+ "fmt" -+ "strconv" -+ "strings" -+ "unsafe" -+) -+ -+// An Inst is a single instruction. -+type Inst struct { -+ Op Op // Opcode mnemonic -+ Enc uint32 // Raw encoding bits. -+ Args Args // Instruction arguments, in Loong64 manual order. -+} -+ -+func (i Inst) String() string { -+ var args []string -+ -+ for _, arg := range i.Args { -+ if arg == nil { -+ break -+ } -+ -+ args = append(args, arg.String()) -+ } -+ -+ str2 := strings.Join(args, ", ") -+ if str2 == "" { -+ str := i.Op.String() -+ return strings.Replace(str, ", (", "(", -1) -+ } else { -+ str := i.Op.String() + " " + strings.Join(args, ", ") -+ return strings.Replace(str, ", (", "(", -1) -+ } -+} -+ -+// An Op is an Loong64 opcode. -+type Op uint16 -+ -+// NOTE: The actual Op values are defined in tables.go. -+// They are chosen to simplify instruction decoding and -+// are not a dense packing from 0 to N, although the -+// density is high, probably at least 90%. -+func (op Op) String() string { -+ if (op >= Op(len(opstr))) || (opstr[op] == "") { -+ return fmt.Sprintf("Op(%d)", int(op)) -+ } -+ -+ return opstr[op] -+} -+ -+// An Args holds the instruction arguments. -+// If an instruction has fewer than 5 arguments, -+// the final elements in the array are nil. -+type Args [5]Arg -+ -+// An Arg is a single instruction argument -+type Arg interface { -+ String() string -+} -+ -+// A Reg is a single register. -+// The zero value denotes R0, not the absence of a register. -+type Reg uint16 -+ -+const ( -+ //_ Reg = iota -+ -+ // General-purpose register -+ R0 Reg = iota -+ R1 -+ R2 -+ R3 -+ R4 -+ R5 -+ R6 -+ R7 -+ R8 -+ R9 -+ R10 -+ R11 -+ R12 -+ R13 -+ R14 -+ R15 -+ R16 -+ R17 -+ R18 -+ R19 -+ R20 -+ R21 -+ R22 -+ R23 -+ R24 -+ R25 -+ R26 -+ R27 -+ R28 -+ R29 -+ R30 -+ R31 -+ -+ // Float point register -+ F0 -+ F1 -+ F2 -+ F3 -+ F4 -+ F5 -+ F6 -+ F7 -+ F8 -+ F9 -+ F10 -+ F11 -+ F12 -+ F13 -+ F14 -+ F15 -+ F16 -+ F17 -+ F18 -+ F19 -+ F20 -+ F21 -+ F22 -+ F23 -+ F24 -+ F25 -+ F26 -+ F27 -+ F28 -+ F29 -+ F30 -+ F31 -+) -+ -+func (r Reg) String() string { -+ switch { -+ case r == R0: -+ return "$zero" -+ -+ case r == R1: -+ return "$ra" -+ -+ case r == R2: -+ return "$tp" -+ -+ case r == R3: -+ return "$sp" -+ -+ case (r >= R4) && (r <= R11): -+ return fmt.Sprintf("$a%d", int(r-R4)) -+ -+ case (r >= R12) && (r <= R20): -+ return fmt.Sprintf("$t%d", int(r-R12)) -+ -+ case r == R21: -+ return "$r21" -+ -+ case r == R22: -+ return "$fp" -+ -+ case (r >= R23) && (r <= R31): -+ return fmt.Sprintf("$s%d", int(r-R23)) -+ -+ case (r >= F0) && (r <= F7): -+ return fmt.Sprintf("$fa%d", int(r-F0)) -+ -+ case (r >= F8) && (r <= F23): -+ return fmt.Sprintf("$ft%d", int(r-F8)) -+ -+ case (r >= F24) && (r <= F31): -+ return fmt.Sprintf("$fs%d", int(r-F24)) -+ -+ default: -+ return fmt.Sprintf("Unknown(%d)", int(r)) -+ } -+} -+ -+// float control status register -+type Fcsr uint8 -+ -+const ( -+ //_ Fcsr = iota -+ FCSR0 Fcsr = iota -+ FCSR1 -+ FCSR2 -+ FCSR3 -+) -+ -+func (f Fcsr) String() string { -+ switch f { -+ case FCSR0: -+ return fmt.Sprintf("$zero") -+ case FCSR1, FCSR2, FCSR3: -+ return fmt.Sprintf("$r%d", uint8(f)) -+ } -+ -+ return fmt.Sprintf("$unknow%d", uint8(f)) -+} -+ -+// float condition flags register -+type Fcc uint8 -+ -+const ( -+ //_ Fcc = iota -+ FCC0 Fcc = iota -+ FCC1 -+ FCC2 -+ FCC3 -+ FCC4 -+ FCC5 -+ FCC6 -+ FCC7 -+) -+ -+func (f Fcc) String() string { -+ return fmt.Sprintf("$fcc%d", uint8(f)) -+} -+ -+// An Imm is an integer constant. -+type Uimm struct { -+ Imm uint32 -+ Decimal bool -+} -+ -+func (i Uimm) String() string { -+ if i.Decimal == true { -+ return fmt.Sprintf("%d", i.Imm) -+ } else { -+ return fmt.Sprintf("%#x", i.Imm) -+ } -+} -+ -+type Simm16 struct { -+ Imm int16 -+ Width uint8 -+} -+ -+func (si Simm16) String() string { -+ if si.Imm == 0 { -+ return fmt.Sprintf("%#x", int(si.Imm)) -+ } else { -+ hex := int16(si.Imm & ((1 << si.Width) - 1)) -+ str := strconv.FormatUint(uint64(*(*int16)(unsafe.Pointer(&hex))), 16) -+ return fmt.Sprintf("%d(0x%s)", int16(si.Imm), str) -+ } -+} -+ -+type Simm32 struct { -+ Imm int32 -+ Width uint8 -+} -+ -+func (si Simm32) String() string { -+ if si.Imm == 0 { -+ return fmt.Sprintf("%#x", int(si.Imm)) -+ } else { -+ hex := int32(si.Imm & ((1 << si.Width) - 1)) -+ str := strconv.FormatUint(uint64(*(*int32)(unsafe.Pointer(&hex))), 16) -+ return fmt.Sprintf("%d(0x%s)", int32(si.Imm), str) -+ } -+} -+ -+type OffsetSimm struct { -+ Imm int32 -+ Width uint8 -+} -+ -+func (o OffsetSimm) String() string { -+ if o.Imm == 0 { -+ return fmt.Sprintf("%#x", int(o.Imm)) -+ } else { -+ hex := int32(o.Imm & ((1 << o.Width) - 1)) -+ str := strconv.FormatUint(uint64(*(*int32)(unsafe.Pointer(&hex))), 16) -+ return fmt.Sprintf("%d(0x%s)", int32(o.Imm), str) -+ } -+} -+ -+type SaSimm int16 -+ -+func (s SaSimm) String() string { -+ return fmt.Sprintf("%#x", int(s)) -+} -+ -+type CodeSimm int16 -+ -+func (c CodeSimm) String() string { -+ return fmt.Sprintf("%#x", int(c)) -+} -diff --git a/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go -new file mode 100644 -index 0000000000..f90e9295f1 ---- /dev/null -+++ b/src/cmd/vendor/golang.org/x/arch/loong64/loong64asm/tables.go -@@ -0,0 +1,1513 @@ -+// Generated by Loong64 internal tool -+// DO NOT EDIT -+// Copyright 2022 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+package loong64asm -+ -+const ( -+ _ Op = iota -+ ADDI_D -+ ADDI_W -+ ADDU16I_D -+ ADD_D -+ ADD_W -+ ALSL_D -+ ALSL_W -+ ALSL_WU -+ AMADD_D -+ AMADD_DB_D -+ AMADD_DB_W -+ AMADD_W -+ AMAND_D -+ AMAND_DB_D -+ AMAND_DB_W -+ AMAND_W -+ AMMAX_D -+ AMMAX_DB_D -+ AMMAX_DB_DU -+ AMMAX_DB_W -+ AMMAX_DB_WU -+ AMMAX_DU -+ AMMAX_W -+ AMMAX_WU -+ AMMIN_D -+ AMMIN_DB_D -+ AMMIN_DB_DU -+ AMMIN_DB_W -+ AMMIN_DB_WU -+ AMMIN_DU -+ AMMIN_W -+ AMMIN_WU -+ AMOR_D -+ AMOR_DB_D -+ AMOR_DB_W -+ AMOR_W -+ AMSWAP_D -+ AMSWAP_DB_D -+ AMSWAP_DB_W -+ AMSWAP_W -+ AMXOR_D -+ AMXOR_DB_D -+ AMXOR_DB_W -+ AMXOR_W -+ AND -+ ANDI -+ ANDN -+ ASRTGT_D -+ ASRTLE_D -+ B -+ BCEQZ -+ BCNEZ -+ BEQ -+ BEQZ -+ BGE -+ BGEU -+ BITREV_4B -+ BITREV_8B -+ BITREV_D -+ BITREV_W -+ BL -+ BLT -+ BLTU -+ BNE -+ BNEZ -+ BREAK -+ BSTRINS_D -+ BSTRINS_W -+ BSTRPICK_D -+ BSTRPICK_W -+ BYTEPICK_D -+ BYTEPICK_W -+ CACOP -+ CLO_D -+ CLO_W -+ CLZ_D -+ CLZ_W -+ CPUCFG -+ CRCC_W_B_W -+ CRCC_W_D_W -+ CRCC_W_H_W -+ CRCC_W_W_W -+ CRC_W_B_W -+ CRC_W_D_W -+ CRC_W_H_W -+ CRC_W_W_W -+ CSRRD -+ CSRWR -+ CSRXCHG -+ CTO_D -+ CTO_W -+ CTZ_D -+ CTZ_W -+ DBAR -+ DBCL -+ DIV_D -+ DIV_DU -+ DIV_W -+ DIV_WU -+ ERTN -+ EXT_W_B -+ EXT_W_H -+ FABS_D -+ FABS_S -+ FADD_D -+ FADD_S -+ FCLASS_D -+ FCLASS_S -+ FCMP_CAF_D -+ FCMP_CAF_S -+ FCMP_CEQ_D -+ FCMP_CEQ_S -+ FCMP_CLE_D -+ FCMP_CLE_S -+ FCMP_CLT_D -+ FCMP_CLT_S -+ FCMP_CNE_D -+ FCMP_CNE_S -+ FCMP_COR_D -+ FCMP_COR_S -+ FCMP_CUEQ_D -+ FCMP_CUEQ_S -+ FCMP_CULE_D -+ FCMP_CULE_S -+ FCMP_CULT_D -+ FCMP_CULT_S -+ FCMP_CUNE_D -+ FCMP_CUNE_S -+ FCMP_CUN_D -+ FCMP_CUN_S -+ FCMP_SAF_D -+ FCMP_SAF_S -+ FCMP_SEQ_D -+ FCMP_SEQ_S -+ FCMP_SLE_D -+ FCMP_SLE_S -+ FCMP_SLT_D -+ FCMP_SLT_S -+ FCMP_SNE_D -+ FCMP_SNE_S -+ FCMP_SOR_D -+ FCMP_SOR_S -+ FCMP_SUEQ_D -+ FCMP_SUEQ_S -+ FCMP_SULE_D -+ FCMP_SULE_S -+ FCMP_SULT_D -+ FCMP_SULT_S -+ FCMP_SUNE_D -+ FCMP_SUNE_S -+ FCMP_SUN_D -+ FCMP_SUN_S -+ FCOPYSIGN_D -+ FCOPYSIGN_S -+ FCVT_D_S -+ FCVT_S_D -+ FDIV_D -+ FDIV_S -+ FFINT_D_L -+ FFINT_D_W -+ FFINT_S_L -+ FFINT_S_W -+ FLDGT_D -+ FLDGT_S -+ FLDLE_D -+ FLDLE_S -+ FLDX_D -+ FLDX_S -+ FLD_D -+ FLD_S -+ FLOGB_D -+ FLOGB_S -+ FMADD_D -+ FMADD_S -+ FMAXA_D -+ FMAXA_S -+ FMAX_D -+ FMAX_S -+ FMINA_D -+ FMINA_S -+ FMIN_D -+ FMIN_S -+ FMOV_D -+ FMOV_S -+ FMSUB_D -+ FMSUB_S -+ FMUL_D -+ FMUL_S -+ FNEG_D -+ FNEG_S -+ FNMADD_D -+ FNMADD_S -+ FNMSUB_D -+ FNMSUB_S -+ FRECIP_D -+ FRECIP_S -+ FRINT_D -+ FRINT_S -+ FRSQRT_D -+ FRSQRT_S -+ FSCALEB_D -+ FSCALEB_S -+ FSEL -+ FSQRT_D -+ FSQRT_S -+ FSTGT_D -+ FSTGT_S -+ FSTLE_D -+ FSTLE_S -+ FSTX_D -+ FSTX_S -+ FST_D -+ FST_S -+ FSUB_D -+ FSUB_S -+ FTINTRM_L_D -+ FTINTRM_L_S -+ FTINTRM_W_D -+ FTINTRM_W_S -+ FTINTRNE_L_D -+ FTINTRNE_L_S -+ FTINTRNE_W_D -+ FTINTRNE_W_S -+ FTINTRP_L_D -+ FTINTRP_L_S -+ FTINTRP_W_D -+ FTINTRP_W_S -+ FTINTRZ_L_D -+ FTINTRZ_L_S -+ FTINTRZ_W_D -+ FTINTRZ_W_S -+ FTINT_L_D -+ FTINT_L_S -+ FTINT_W_D -+ FTINT_W_S -+ IBAR -+ IDLE -+ INVTLB -+ IOCSRRD_B -+ IOCSRRD_D -+ IOCSRRD_H -+ IOCSRRD_W -+ IOCSRWR_B -+ IOCSRWR_D -+ IOCSRWR_H -+ IOCSRWR_W -+ JIRL -+ LDDIR -+ LDGT_B -+ LDGT_D -+ LDGT_H -+ LDGT_W -+ LDLE_B -+ LDLE_D -+ LDLE_H -+ LDLE_W -+ LDPTE -+ LDPTR_D -+ LDPTR_W -+ LDX_B -+ LDX_BU -+ LDX_D -+ LDX_H -+ LDX_HU -+ LDX_W -+ LDX_WU -+ LD_B -+ LD_BU -+ LD_D -+ LD_H -+ LD_HU -+ LD_W -+ LD_WU -+ LL_D -+ LL_W -+ LU12I_W -+ LU32I_D -+ LU52I_D -+ MASKEQZ -+ MASKNEZ -+ MOD_D -+ MOD_DU -+ MOD_W -+ MOD_WU -+ MOVCF2FR -+ MOVCF2GR -+ MOVFCSR2GR -+ MOVFR2CF -+ MOVFR2GR_D -+ MOVFR2GR_S -+ MOVFRH2GR_S -+ MOVGR2CF -+ MOVGR2FCSR -+ MOVGR2FRH_W -+ MOVGR2FR_D -+ MOVGR2FR_W -+ MULH_D -+ MULH_DU -+ MULH_W -+ MULH_WU -+ MULW_D_W -+ MULW_D_WU -+ MUL_D -+ MUL_W -+ NOR -+ OR -+ ORI -+ ORN -+ PCADDI -+ PCADDU12I -+ PCADDU18I -+ PCALAU12I -+ PRELD -+ PRELDX -+ RDTIMEH_W -+ RDTIMEL_W -+ RDTIME_D -+ REVB_2H -+ REVB_2W -+ REVB_4H -+ REVB_D -+ REVH_2W -+ REVH_D -+ ROTRI_D -+ ROTRI_W -+ ROTR_D -+ ROTR_W -+ SC_D -+ SC_W -+ SLLI_D -+ SLLI_W -+ SLL_D -+ SLL_W -+ SLT -+ SLTI -+ SLTU -+ SLTUI -+ SRAI_D -+ SRAI_W -+ SRA_D -+ SRA_W -+ SRLI_D -+ SRLI_W -+ SRL_D -+ SRL_W -+ STGT_B -+ STGT_D -+ STGT_H -+ STGT_W -+ STLE_B -+ STLE_D -+ STLE_H -+ STLE_W -+ STPTR_D -+ STPTR_W -+ STX_B -+ STX_D -+ STX_H -+ STX_W -+ ST_B -+ ST_D -+ ST_H -+ ST_W -+ SUB_D -+ SUB_W -+ SYSCALL -+ TLBCLR -+ TLBFILL -+ TLBFLUSH -+ TLBRD -+ TLBSRCH -+ TLBWR -+ XOR -+ XORI -+) -+ -+var opstr = [...]string{ -+ ADDI_D: "ADDI.D", -+ ADDI_W: "ADDI.W", -+ ADDU16I_D: "ADDU16I.D", -+ ADD_D: "ADD.D", -+ ADD_W: "ADD.W", -+ ALSL_D: "ALSL.D", -+ ALSL_W: "ALSL.W", -+ ALSL_WU: "ALSL.WU", -+ AMADD_D: "AMADD.D", -+ AMADD_DB_D: "AMADD_DB.D", -+ AMADD_DB_W: "AMADD_DB.W", -+ AMADD_W: "AMADD.W", -+ AMAND_D: "AMAND.D", -+ AMAND_DB_D: "AMAND_DB.D", -+ AMAND_DB_W: "AMAND_DB.W", -+ AMAND_W: "AMAND.W", -+ AMMAX_D: "AMMAX.D", -+ AMMAX_DB_D: "AMMAX_DB.D", -+ AMMAX_DB_DU: "AMMAX_DB.DU", -+ AMMAX_DB_W: "AMMAX_DB.W", -+ AMMAX_DB_WU: "AMMAX_DB.WU", -+ AMMAX_DU: "AMMAX.DU", -+ AMMAX_W: "AMMAX.W", -+ AMMAX_WU: "AMMAX.WU", -+ AMMIN_D: "AMMIN.D", -+ AMMIN_DB_D: "AMMIN_DB.D", -+ AMMIN_DB_DU: "AMMIN_DB.DU", -+ AMMIN_DB_W: "AMMIN_DB.W", -+ AMMIN_DB_WU: "AMMIN_DB.WU", -+ AMMIN_DU: "AMMIN.DU", -+ AMMIN_W: "AMMIN.W", -+ AMMIN_WU: "AMMIN.WU", -+ AMOR_D: "AMOR.D", -+ AMOR_DB_D: "AMOR_DB.D", -+ AMOR_DB_W: "AMOR_DB.W", -+ AMOR_W: "AMOR.W", -+ AMSWAP_D: "AMSWAP.D", -+ AMSWAP_DB_D: "AMSWAP_DB.D", -+ AMSWAP_DB_W: "AMSWAP_DB.W", -+ AMSWAP_W: "AMSWAP.W", -+ AMXOR_D: "AMXOR.D", -+ AMXOR_DB_D: "AMXOR_DB.D", -+ AMXOR_DB_W: "AMXOR_DB.W", -+ AMXOR_W: "AMXOR.W", -+ AND: "AND", -+ ANDI: "ANDI", -+ ANDN: "ANDN", -+ ASRTGT_D: "ASRTGT.D", -+ ASRTLE_D: "ASRTLE.D", -+ B: "B", -+ BCEQZ: "BCEQZ", -+ BCNEZ: "BCNEZ", -+ BEQ: "BEQ", -+ BEQZ: "BEQZ", -+ BGE: "BGE", -+ BGEU: "BGEU", -+ BITREV_4B: "BITREV.4B", -+ BITREV_8B: "BITREV.8B", -+ BITREV_D: "BITREV.D", -+ BITREV_W: "BITREV.W", -+ BL: "BL", -+ BLT: "BLT", -+ BLTU: "BLTU", -+ BNE: "BNE", -+ BNEZ: "BNEZ", -+ BREAK: "BREAK", -+ BSTRINS_D: "BSTRINS.D", -+ BSTRINS_W: "BSTRINS.W", -+ BSTRPICK_D: "BSTRPICK.D", -+ BSTRPICK_W: "BSTRPICK.W", -+ BYTEPICK_D: "BYTEPICK.D", -+ BYTEPICK_W: "BYTEPICK.W", -+ CACOP: "CACOP", -+ CLO_D: "CLO.D", -+ CLO_W: "CLO.W", -+ CLZ_D: "CLZ.D", -+ CLZ_W: "CLZ.W", -+ CPUCFG: "CPUCFG", -+ CRCC_W_B_W: "CRCC.W.B.W", -+ CRCC_W_D_W: "CRCC.W.D.W", -+ CRCC_W_H_W: "CRCC.W.H.W", -+ CRCC_W_W_W: "CRCC.W.W.W", -+ CRC_W_B_W: "CRC.W.B.W", -+ CRC_W_D_W: "CRC.W.D.W", -+ CRC_W_H_W: "CRC.W.H.W", -+ CRC_W_W_W: "CRC.W.W.W", -+ CSRRD: "CSRRD", -+ CSRWR: "CSRWR", -+ CSRXCHG: "CSRXCHG", -+ CTO_D: "CTO.D", -+ CTO_W: "CTO.W", -+ CTZ_D: "CTZ.D", -+ CTZ_W: "CTZ.W", -+ DBAR: "DBAR", -+ DBCL: "DBCL", -+ DIV_D: "DIV.D", -+ DIV_DU: "DIV.DU", -+ DIV_W: "DIV.W", -+ DIV_WU: "DIV.WU", -+ ERTN: "ERTN", -+ EXT_W_B: "EXT.W.B", -+ EXT_W_H: "EXT.W.H", -+ FABS_D: "FABS.D", -+ FABS_S: "FABS.S", -+ FADD_D: "FADD.D", -+ FADD_S: "FADD.S", -+ FCLASS_D: "FCLASS.D", -+ FCLASS_S: "FCLASS.S", -+ FCMP_CAF_D: "FCMP.CAF.D", -+ FCMP_CAF_S: "FCMP.CAF.S", -+ FCMP_CEQ_D: "FCMP.CEQ.D", -+ FCMP_CEQ_S: "FCMP.CEQ.S", -+ FCMP_CLE_D: "FCMP.CLE.D", -+ FCMP_CLE_S: "FCMP.CLE.S", -+ FCMP_CLT_D: "FCMP.CLT.D", -+ FCMP_CLT_S: "FCMP.CLT.S", -+ FCMP_CNE_D: "FCMP.CNE.D", -+ FCMP_CNE_S: "FCMP.CNE.S", -+ FCMP_COR_D: "FCMP.COR.D", -+ FCMP_COR_S: "FCMP.COR.S", -+ FCMP_CUEQ_D: "FCMP.CUEQ.D", -+ FCMP_CUEQ_S: "FCMP.CUEQ.S", -+ FCMP_CULE_D: "FCMP.CULE.D", -+ FCMP_CULE_S: "FCMP.CULE.S", -+ FCMP_CULT_D: "FCMP.CULT.D", -+ FCMP_CULT_S: "FCMP.CULT.S", -+ FCMP_CUNE_D: "FCMP.CUNE.D", -+ FCMP_CUNE_S: "FCMP.CUNE.S", -+ FCMP_CUN_D: "FCMP.CUN.D", -+ FCMP_CUN_S: "FCMP.CUN.S", -+ FCMP_SAF_D: "FCMP.SAF.D", -+ FCMP_SAF_S: "FCMP.SAF.S", -+ FCMP_SEQ_D: "FCMP.SEQ.D", -+ FCMP_SEQ_S: "FCMP.SEQ.S", -+ FCMP_SLE_D: "FCMP.SLE.D", -+ FCMP_SLE_S: "FCMP.SLE.S", -+ FCMP_SLT_D: "FCMP.SLT.D", -+ FCMP_SLT_S: "FCMP.SLT.S", -+ FCMP_SNE_D: "FCMP.SNE.D", -+ FCMP_SNE_S: "FCMP.SNE.S", -+ FCMP_SOR_D: "FCMP.SOR.D", -+ FCMP_SOR_S: "FCMP.SOR.S", -+ FCMP_SUEQ_D: "FCMP.SUEQ.D", -+ FCMP_SUEQ_S: "FCMP.SUEQ.S", -+ FCMP_SULE_D: "FCMP.SULE.D", -+ FCMP_SULE_S: "FCMP.SULE.S", -+ FCMP_SULT_D: "FCMP.SULT.D", -+ FCMP_SULT_S: "FCMP.SULT.S", -+ FCMP_SUNE_D: "FCMP.SUNE.D", -+ FCMP_SUNE_S: "FCMP.SUNE.S", -+ FCMP_SUN_D: "FCMP.SUN.D", -+ FCMP_SUN_S: "FCMP.SUN.S", -+ FCOPYSIGN_D: "FCOPYSIGN.D", -+ FCOPYSIGN_S: "FCOPYSIGN.S", -+ FCVT_D_S: "FCVT.D.S", -+ FCVT_S_D: "FCVT.S.D", -+ FDIV_D: "FDIV.D", -+ FDIV_S: "FDIV.S", -+ FFINT_D_L: "FFINT.D.L", -+ FFINT_D_W: "FFINT.D.W", -+ FFINT_S_L: "FFINT.S.L", -+ FFINT_S_W: "FFINT.S.W", -+ FLDGT_D: "FLDGT.D", -+ FLDGT_S: "FLDGT.S", -+ FLDLE_D: "FLDLE.D", -+ FLDLE_S: "FLDLE.S", -+ FLDX_D: "FLDX.D", -+ FLDX_S: "FLDX.S", -+ FLD_D: "FLD.D", -+ FLD_S: "FLD.S", -+ FLOGB_D: "FLOGB.D", -+ FLOGB_S: "FLOGB.S", -+ FMADD_D: "FMADD.D", -+ FMADD_S: "FMADD.S", -+ FMAXA_D: "FMAXA.D", -+ FMAXA_S: "FMAXA.S", -+ FMAX_D: "FMAX.D", -+ FMAX_S: "FMAX.S", -+ FMINA_D: "FMINA.D", -+ FMINA_S: "FMINA.S", -+ FMIN_D: "FMIN.D", -+ FMIN_S: "FMIN.S", -+ FMOV_D: "FMOV.D", -+ FMOV_S: "FMOV.S", -+ FMSUB_D: "FMSUB.D", -+ FMSUB_S: "FMSUB.S", -+ FMUL_D: "FMUL.D", -+ FMUL_S: "FMUL.S", -+ FNEG_D: "FNEG.D", -+ FNEG_S: "FNEG.S", -+ FNMADD_D: "FNMADD.D", -+ FNMADD_S: "FNMADD.S", -+ FNMSUB_D: "FNMSUB.D", -+ FNMSUB_S: "FNMSUB.S", -+ FRECIP_D: "FRECIP.D", -+ FRECIP_S: "FRECIP.S", -+ FRINT_D: "FRINT.D", -+ FRINT_S: "FRINT.S", -+ FRSQRT_D: "FRSQRT.D", -+ FRSQRT_S: "FRSQRT.S", -+ FSCALEB_D: "FSCALEB.D", -+ FSCALEB_S: "FSCALEB.S", -+ FSEL: "FSEL", -+ FSQRT_D: "FSQRT.D", -+ FSQRT_S: "FSQRT.S", -+ FSTGT_D: "FSTGT.D", -+ FSTGT_S: "FSTGT.S", -+ FSTLE_D: "FSTLE.D", -+ FSTLE_S: "FSTLE.S", -+ FSTX_D: "FSTX.D", -+ FSTX_S: "FSTX.S", -+ FST_D: "FST.D", -+ FST_S: "FST.S", -+ FSUB_D: "FSUB.D", -+ FSUB_S: "FSUB.S", -+ FTINTRM_L_D: "FTINTRM.L.D", -+ FTINTRM_L_S: "FTINTRM.L.S", -+ FTINTRM_W_D: "FTINTRM.W.D", -+ FTINTRM_W_S: "FTINTRM.W.S", -+ FTINTRNE_L_D: "FTINTRNE.L.D", -+ FTINTRNE_L_S: "FTINTRNE.L.S", -+ FTINTRNE_W_D: "FTINTRNE.W.D", -+ FTINTRNE_W_S: "FTINTRNE.W.S", -+ FTINTRP_L_D: "FTINTRP.L.D", -+ FTINTRP_L_S: "FTINTRP.L.S", -+ FTINTRP_W_D: "FTINTRP.W.D", -+ FTINTRP_W_S: "FTINTRP.W.S", -+ FTINTRZ_L_D: "FTINTRZ.L.D", -+ FTINTRZ_L_S: "FTINTRZ.L.S", -+ FTINTRZ_W_D: "FTINTRZ.W.D", -+ FTINTRZ_W_S: "FTINTRZ.W.S", -+ FTINT_L_D: "FTINT.L.D", -+ FTINT_L_S: "FTINT.L.S", -+ FTINT_W_D: "FTINT.W.D", -+ FTINT_W_S: "FTINT.W.S", -+ IBAR: "IBAR", -+ IDLE: "IDLE", -+ INVTLB: "INVTLB", -+ IOCSRRD_B: "IOCSRRD.B", -+ IOCSRRD_D: "IOCSRRD.D", -+ IOCSRRD_H: "IOCSRRD.H", -+ IOCSRRD_W: "IOCSRRD.W", -+ IOCSRWR_B: "IOCSRWR.B", -+ IOCSRWR_D: "IOCSRWR.D", -+ IOCSRWR_H: "IOCSRWR.H", -+ IOCSRWR_W: "IOCSRWR.W", -+ JIRL: "JIRL", -+ LDDIR: "LDDIR", -+ LDGT_B: "LDGT.B", -+ LDGT_D: "LDGT.D", -+ LDGT_H: "LDGT.H", -+ LDGT_W: "LDGT.W", -+ LDLE_B: "LDLE.B", -+ LDLE_D: "LDLE.D", -+ LDLE_H: "LDLE.H", -+ LDLE_W: "LDLE.W", -+ LDPTE: "LDPTE", -+ LDPTR_D: "LDPTR.D", -+ LDPTR_W: "LDPTR.W", -+ LDX_B: "LDX.B", -+ LDX_BU: "LDX.BU", -+ LDX_D: "LDX.D", -+ LDX_H: "LDX.H", -+ LDX_HU: "LDX.HU", -+ LDX_W: "LDX.W", -+ LDX_WU: "LDX.WU", -+ LD_B: "LD.B", -+ LD_BU: "LD.BU", -+ LD_D: "LD.D", -+ LD_H: "LD.H", -+ LD_HU: "LD.HU", -+ LD_W: "LD.W", -+ LD_WU: "LD.WU", -+ LL_D: "LL.D", -+ LL_W: "LL.W", -+ LU12I_W: "LU12I.W", -+ LU32I_D: "LU32I.D", -+ LU52I_D: "LU52I.D", -+ MASKEQZ: "MASKEQZ", -+ MASKNEZ: "MASKNEZ", -+ MOD_D: "MOD.D", -+ MOD_DU: "MOD.DU", -+ MOD_W: "MOD.W", -+ MOD_WU: "MOD.WU", -+ MOVCF2FR: "MOVCF2FR", -+ MOVCF2GR: "MOVCF2GR", -+ MOVFCSR2GR: "MOVFCSR2GR", -+ MOVFR2CF: "MOVFR2CF", -+ MOVFR2GR_D: "MOVFR2GR.D", -+ MOVFR2GR_S: "MOVFR2GR.S", -+ MOVFRH2GR_S: "MOVFRH2GR.S", -+ MOVGR2CF: "MOVGR2CF", -+ MOVGR2FCSR: "MOVGR2FCSR", -+ MOVGR2FRH_W: "MOVGR2FRH.W", -+ MOVGR2FR_D: "MOVGR2FR.D", -+ MOVGR2FR_W: "MOVGR2FR.W", -+ MULH_D: "MULH.D", -+ MULH_DU: "MULH.DU", -+ MULH_W: "MULH.W", -+ MULH_WU: "MULH.WU", -+ MULW_D_W: "MULW.D.W", -+ MULW_D_WU: "MULW.D.WU", -+ MUL_D: "MUL.D", -+ MUL_W: "MUL.W", -+ NOR: "NOR", -+ OR: "OR", -+ ORI: "ORI", -+ ORN: "ORN", -+ PCADDI: "PCADDI", -+ PCADDU12I: "PCADDU12I", -+ PCADDU18I: "PCADDU18I", -+ PCALAU12I: "PCALAU12I", -+ PRELD: "PRELD", -+ PRELDX: "PRELDX", -+ RDTIMEH_W: "RDTIMEH.W", -+ RDTIMEL_W: "RDTIMEL.W", -+ RDTIME_D: "RDTIME.D", -+ REVB_2H: "REVB.2H", -+ REVB_2W: "REVB.2W", -+ REVB_4H: "REVB.4H", -+ REVB_D: "REVB.D", -+ REVH_2W: "REVH.2W", -+ REVH_D: "REVH.D", -+ ROTRI_D: "ROTRI.D", -+ ROTRI_W: "ROTRI.W", -+ ROTR_D: "ROTR.D", -+ ROTR_W: "ROTR.W", -+ SC_D: "SC.D", -+ SC_W: "SC.W", -+ SLLI_D: "SLLI.D", -+ SLLI_W: "SLLI.W", -+ SLL_D: "SLL.D", -+ SLL_W: "SLL.W", -+ SLT: "SLT", -+ SLTI: "SLTI", -+ SLTU: "SLTU", -+ SLTUI: "SLTUI", -+ SRAI_D: "SRAI.D", -+ SRAI_W: "SRAI.W", -+ SRA_D: "SRA.D", -+ SRA_W: "SRA.W", -+ SRLI_D: "SRLI.D", -+ SRLI_W: "SRLI.W", -+ SRL_D: "SRL.D", -+ SRL_W: "SRL.W", -+ STGT_B: "STGT.B", -+ STGT_D: "STGT.D", -+ STGT_H: "STGT.H", -+ STGT_W: "STGT.W", -+ STLE_B: "STLE.B", -+ STLE_D: "STLE.D", -+ STLE_H: "STLE.H", -+ STLE_W: "STLE.W", -+ STPTR_D: "STPTR.D", -+ STPTR_W: "STPTR.W", -+ STX_B: "STX.B", -+ STX_D: "STX.D", -+ STX_H: "STX.H", -+ STX_W: "STX.W", -+ ST_B: "ST.B", -+ ST_D: "ST.D", -+ ST_H: "ST.H", -+ ST_W: "ST.W", -+ SUB_D: "SUB.D", -+ SUB_W: "SUB.W", -+ SYSCALL: "SYSCALL", -+ TLBCLR: "TLBCLR", -+ TLBFILL: "TLBFILL", -+ TLBFLUSH: "TLBFLUSH", -+ TLBRD: "TLBRD", -+ TLBSRCH: "TLBSRCH", -+ TLBWR: "TLBWR", -+ XOR: "XOR", -+ XORI: "XORI", -+} -+ -+var instFormats = [...]instFormat{ -+ // ADDI.D rd, rj, si12 -+ {mask: 0xffc00000, value: 0x02c00000, op: ADDI_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // ADDI.W rd, rj, si12 -+ {mask: 0xffc00000, value: 0x02800000, op: ADDI_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // ADDU16I.D rd, rj, si16 -+ {mask: 0xfc000000, value: 0x10000000, op: ADDU16I_D, args: instArgs{arg_rd, arg_rj, arg_si16_25_10}}, -+ // ADD.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00108000, op: ADD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ADD.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00100000, op: ADD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ALSL.D rd, rj, rk, sa2 -+ {mask: 0xfffe0000, value: 0x002c0000, op: ALSL_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, -+ // ALSL.W rd, rj, rk, sa2 -+ {mask: 0xfffe0000, value: 0x00040000, op: ALSL_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, -+ // ALSL.WU rd, rj, rk, sa2 -+ {mask: 0xfffe0000, value: 0x00060000, op: ALSL_WU, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, -+ // AMADD.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38618000, op: AMADD_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMADD_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386a8000, op: AMADD_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMADD_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386a0000, op: AMADD_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMADD.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38610000, op: AMADD_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMAND.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38628000, op: AMAND_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMAND_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386b8000, op: AMAND_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMAND_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386b0000, op: AMAND_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMAND.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38620000, op: AMAND_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38658000, op: AMMAX_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386e8000, op: AMMAX_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX_DB.DU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38708000, op: AMMAX_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386e0000, op: AMMAX_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX_DB.WU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38700000, op: AMMAX_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX.DU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38678000, op: AMMAX_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38650000, op: AMMAX_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMAX.WU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38670000, op: AMMAX_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38668000, op: AMMIN_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386f8000, op: AMMIN_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN_DB.DU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38718000, op: AMMIN_DB_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386f0000, op: AMMIN_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN_DB.WU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38710000, op: AMMIN_DB_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN.DU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38688000, op: AMMIN_DU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38660000, op: AMMIN_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMMIN.WU rd, rk, rj -+ {mask: 0xffff8000, value: 0x38680000, op: AMMIN_WU, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMOR.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38638000, op: AMOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMOR_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386c8000, op: AMOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMOR_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386c0000, op: AMOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMOR.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38630000, op: AMOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMSWAP.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38608000, op: AMSWAP_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMSWAP_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38698000, op: AMSWAP_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMSWAP_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38690000, op: AMSWAP_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMSWAP.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38600000, op: AMSWAP_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMXOR.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x38648000, op: AMXOR_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMXOR_DB.D rd, rk, rj -+ {mask: 0xffff8000, value: 0x386d8000, op: AMXOR_DB_D, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMXOR_DB.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x386d0000, op: AMXOR_DB_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AMXOR.W rd, rk, rj -+ {mask: 0xffff8000, value: 0x38640000, op: AMXOR_W, args: instArgs{arg_rd, arg_rk, arg_rj}}, -+ // AND rd, rj, rk -+ {mask: 0xffff8000, value: 0x00148000, op: AND, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ANDI rd, rj, ui12 -+ {mask: 0xffc00000, value: 0x03400000, op: ANDI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, -+ // ANDN rd, rj, rk -+ {mask: 0xffff8000, value: 0x00168000, op: ANDN, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ASRTGT.D rj, rk -+ {mask: 0xffff801f, value: 0x00018000, op: ASRTGT_D, args: instArgs{arg_rj, arg_rk}}, -+ // ASRTLE.D rj, rk -+ {mask: 0xffff801f, value: 0x00010000, op: ASRTLE_D, args: instArgs{arg_rj, arg_rk}}, -+ // B offs -+ {mask: 0xfc000000, value: 0x50000000, op: B, args: instArgs{arg_offset_25_0}}, -+ // BCEQZ cj, offs -+ {mask: 0xfc000300, value: 0x48000000, op: BCEQZ, args: instArgs{arg_cj, arg_offset_20_0}}, -+ // BCNEZ cj, offs -+ {mask: 0xfc000300, value: 0x48000100, op: BCNEZ, args: instArgs{arg_cj, arg_offset_20_0}}, -+ // BEQ rj, rd, offs -+ {mask: 0xfc000000, value: 0x58000000, op: BEQ, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BEQZ rj, offs -+ {mask: 0xfc000000, value: 0x40000000, op: BEQZ, args: instArgs{arg_rj, arg_offset_20_0}}, -+ // BGE rj, rd, offs -+ {mask: 0xfc000000, value: 0x64000000, op: BGE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BGEU rj, rd, offs -+ {mask: 0xfc000000, value: 0x6c000000, op: BGEU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BITREV.4B rd, rj -+ {mask: 0xfffffc00, value: 0x00004800, op: BITREV_4B, args: instArgs{arg_rd, arg_rj}}, -+ // BITREV.8B rd, rj -+ {mask: 0xfffffc00, value: 0x00004c00, op: BITREV_8B, args: instArgs{arg_rd, arg_rj}}, -+ // BITREV.D rd, rj -+ {mask: 0xfffffc00, value: 0x00005400, op: BITREV_D, args: instArgs{arg_rd, arg_rj}}, -+ // BITREV.W rd, rj -+ {mask: 0xfffffc00, value: 0x00005000, op: BITREV_W, args: instArgs{arg_rd, arg_rj}}, -+ // BL offs -+ {mask: 0xfc000000, value: 0x54000000, op: BL, args: instArgs{arg_offset_25_0}}, -+ // BLT rj, rd, offs -+ {mask: 0xfc000000, value: 0x60000000, op: BLT, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BLTU rj, rd, offs -+ {mask: 0xfc000000, value: 0x68000000, op: BLTU, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BNE rj, rd, offs -+ {mask: 0xfc000000, value: 0x5c000000, op: BNE, args: instArgs{arg_rj, arg_rd, arg_offset_15_0}}, -+ // BNEZ rj, offs -+ {mask: 0xfc000000, value: 0x44000000, op: BNEZ, args: instArgs{arg_rj, arg_offset_20_0}}, -+ // BREAK code -+ {mask: 0xffff8000, value: 0x002a0000, op: BREAK, args: instArgs{arg_code_14_0}}, -+ // BSTRINS.D rd, rj, msbd, lsbd -+ {mask: 0xffc00000, value: 0x00800000, op: BSTRINS_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, -+ // BSTRINS.W rd, rj, msbw, lsbw -+ {mask: 0xffe08000, value: 0x00600000, op: BSTRINS_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, -+ // BSTRPICK.D rd, rj, msbd, lsbd -+ {mask: 0xffc00000, value: 0x00c00000, op: BSTRPICK_D, args: instArgs{arg_rd, arg_rj, arg_msbd, arg_lsbd}}, -+ // BSTRPICK.W rd, rj, msbw, lsbw -+ {mask: 0xffe08000, value: 0x00608000, op: BSTRPICK_W, args: instArgs{arg_rd, arg_rj, arg_msbw, arg_lsbw}}, -+ // BYTEPICK.D rd, rj, rk, sa3 -+ {mask: 0xfffc0000, value: 0x000c0000, op: BYTEPICK_D, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa3_17_15}}, -+ // BYTEPICK.W rd, rj, rk, sa2 -+ {mask: 0xfffe0000, value: 0x00080000, op: BYTEPICK_W, args: instArgs{arg_rd, arg_rj, arg_rk, arg_sa2_16_15}}, -+ // CACOP code, rj, si12 -+ {mask: 0xffc00000, value: 0x06000000, op: CACOP, args: instArgs{arg_code_4_0, arg_rj, arg_si12_21_10}}, -+ // CLO.D rd, rj -+ {mask: 0xfffffc00, value: 0x00002000, op: CLO_D, args: instArgs{arg_rd, arg_rj}}, -+ // CLO.W rd, rj -+ {mask: 0xfffffc00, value: 0x00001000, op: CLO_W, args: instArgs{arg_rd, arg_rj}}, -+ // CLZ.D rd, rj -+ {mask: 0xfffffc00, value: 0x00002400, op: CLZ_D, args: instArgs{arg_rd, arg_rj}}, -+ // CLZ.W rd, rj -+ {mask: 0xfffffc00, value: 0x00001400, op: CLZ_W, args: instArgs{arg_rd, arg_rj}}, -+ // CPUCFG rd, rj -+ {mask: 0xfffffc00, value: 0x00006c00, op: CPUCFG, args: instArgs{arg_rd, arg_rj}}, -+ // CRCC.W.B.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00260000, op: CRCC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRCC.W.D.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00278000, op: CRCC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRCC.W.H.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00268000, op: CRCC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRCC.W.W.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00270000, op: CRCC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRC.W.B.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00240000, op: CRC_W_B_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRC.W.D.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00258000, op: CRC_W_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRC.W.H.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00248000, op: CRC_W_H_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CRC.W.W.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00250000, op: CRC_W_W_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // CSRRD rd, csr -+ {mask: 0xff0003e0, value: 0x04000000, op: CSRRD, args: instArgs{arg_rd, arg_csr_23_10}}, -+ // CSRWR rd, csr -+ {mask: 0xff0003e0, value: 0x04000020, op: CSRWR, args: instArgs{arg_rd, arg_csr_23_10}}, -+ // CSRXCHG rd, rj, csr -+ {mask: 0xff000000, value: 0x04000000, op: CSRXCHG, args: instArgs{arg_rd, arg_rj, arg_csr_23_10}}, -+ // CTO.D rd, rj -+ {mask: 0xfffffc00, value: 0x00002800, op: CTO_D, args: instArgs{arg_rd, arg_rj}}, -+ // CTO.W rd, rj -+ {mask: 0xfffffc00, value: 0x00001800, op: CTO_W, args: instArgs{arg_rd, arg_rj}}, -+ // CTZ.D rd, rj -+ {mask: 0xfffffc00, value: 0x00002c00, op: CTZ_D, args: instArgs{arg_rd, arg_rj}}, -+ // CTZ.W rd, rj -+ {mask: 0xfffffc00, value: 0x00001c00, op: CTZ_W, args: instArgs{arg_rd, arg_rj}}, -+ // DBAR hint -+ {mask: 0xffff8000, value: 0x38720000, op: DBAR, args: instArgs{arg_hint_14_0}}, -+ // DBCL code -+ {mask: 0xffff8000, value: 0x002a8000, op: DBCL, args: instArgs{arg_code_14_0}}, -+ // DIV.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00220000, op: DIV_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // DIV.DU rd, rj, rk -+ {mask: 0xffff8000, value: 0x00230000, op: DIV_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // DIV.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00200000, op: DIV_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // DIV.WU rd, rj, rk -+ {mask: 0xffff8000, value: 0x00210000, op: DIV_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ERTN -+ {mask: 0xffffffff, value: 0x06483800, op: ERTN, args: instArgs{}}, -+ // EXT.W.B rd, rj -+ {mask: 0xfffffc00, value: 0x00005c00, op: EXT_W_B, args: instArgs{arg_rd, arg_rj}}, -+ // EXT.W.H rd, rj -+ {mask: 0xfffffc00, value: 0x00005800, op: EXT_W_H, args: instArgs{arg_rd, arg_rj}}, -+ // FABS.D fd, fj -+ {mask: 0xfffffc00, value: 0x01140800, op: FABS_D, args: instArgs{arg_fd, arg_fj}}, -+ // FABS.S fd, fj -+ {mask: 0xfffffc00, value: 0x01140400, op: FABS_S, args: instArgs{arg_fd, arg_fj}}, -+ // FADD.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01010000, op: FADD_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FADD.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01008000, op: FADD_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FCLASS.D fd, fj -+ {mask: 0xfffffc00, value: 0x01143800, op: FCLASS_D, args: instArgs{arg_fd, arg_fj}}, -+ // FCLASS.S fd, fj -+ {mask: 0xfffffc00, value: 0x01143400, op: FCLASS_S, args: instArgs{arg_fd, arg_fj}}, -+ // FCMP.CAF.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c200000, op: FCMP_CAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CAF.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c100000, op: FCMP_CAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CEQ.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c220000, op: FCMP_CEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CEQ.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c120000, op: FCMP_CEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CLE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c230000, op: FCMP_CLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CLE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c130000, op: FCMP_CLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CLT.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c210000, op: FCMP_CLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CLT.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c110000, op: FCMP_CLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CNE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c280000, op: FCMP_CNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CNE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c180000, op: FCMP_CNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.COR.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c2a0000, op: FCMP_COR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.COR.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c1a0000, op: FCMP_COR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUEQ.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c260000, op: FCMP_CUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUEQ.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c160000, op: FCMP_CUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CULE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c270000, op: FCMP_CULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CULE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c170000, op: FCMP_CULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CULT.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c250000, op: FCMP_CULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CULT.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c150000, op: FCMP_CULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUNE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c2c0000, op: FCMP_CUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUNE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c1c0000, op: FCMP_CUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUN.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c240000, op: FCMP_CUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.CUN.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c140000, op: FCMP_CUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SAF.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c208000, op: FCMP_SAF_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SAF.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c108000, op: FCMP_SAF_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SEQ.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c228000, op: FCMP_SEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SEQ.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c128000, op: FCMP_SEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SLE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c238000, op: FCMP_SLE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SLE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c138000, op: FCMP_SLE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SLT.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c218000, op: FCMP_SLT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SLT.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c118000, op: FCMP_SLT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SNE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c288000, op: FCMP_SNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SNE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c188000, op: FCMP_SNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SOR.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c2a8000, op: FCMP_SOR_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SOR.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c1a8000, op: FCMP_SOR_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUEQ.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c268000, op: FCMP_SUEQ_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUEQ.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c168000, op: FCMP_SUEQ_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SULE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c278000, op: FCMP_SULE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SULE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c178000, op: FCMP_SULE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SULT.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c258000, op: FCMP_SULT_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SULT.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c158000, op: FCMP_SULT_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUNE.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c2c8000, op: FCMP_SUNE_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUNE.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c1c8000, op: FCMP_SUNE_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUN.D cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c248000, op: FCMP_SUN_D, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCMP.SUN.S cd, fj, fk -+ {mask: 0xffff8018, value: 0x0c148000, op: FCMP_SUN_S, args: instArgs{arg_cd, arg_fj, arg_fk}}, -+ // FCOPYSIGN.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01130000, op: FCOPYSIGN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FCOPYSIGN.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01128000, op: FCOPYSIGN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FCVT.D.S fd, fj -+ {mask: 0xfffffc00, value: 0x01192400, op: FCVT_D_S, args: instArgs{arg_fd, arg_fj}}, -+ // FCVT.S.D fd, fj -+ {mask: 0xfffffc00, value: 0x01191800, op: FCVT_S_D, args: instArgs{arg_fd, arg_fj}}, -+ // FDIV.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01070000, op: FDIV_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FDIV.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01068000, op: FDIV_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FFINT.D.L fd, fj -+ {mask: 0xfffffc00, value: 0x011d2800, op: FFINT_D_L, args: instArgs{arg_fd, arg_fj}}, -+ // FFINT.D.W fd, fj -+ {mask: 0xfffffc00, value: 0x011d2000, op: FFINT_D_W, args: instArgs{arg_fd, arg_fj}}, -+ // FFINT.S.L fd, fj -+ {mask: 0xfffffc00, value: 0x011d1800, op: FFINT_S_L, args: instArgs{arg_fd, arg_fj}}, -+ // FFINT.S.W fd, fj -+ {mask: 0xfffffc00, value: 0x011d1000, op: FFINT_S_W, args: instArgs{arg_fd, arg_fj}}, -+ // FLDGT.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x38748000, op: FLDGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLDGT.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38740000, op: FLDGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLDLE.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x38758000, op: FLDLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLDLE.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38750000, op: FLDLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLDX.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x38340000, op: FLDX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLDX.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38300000, op: FLDX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FLD.D fd, rj, si12 -+ {mask: 0xffc00000, value: 0x2b800000, op: FLD_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, -+ // FLD.S fd, rj, si12 -+ {mask: 0xffc00000, value: 0x2b000000, op: FLD_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, -+ // FLOGB.D fd, fj -+ {mask: 0xfffffc00, value: 0x01142800, op: FLOGB_D, args: instArgs{arg_fd, arg_fj}}, -+ // FLOGB.S fd, fj -+ {mask: 0xfffffc00, value: 0x01142400, op: FLOGB_S, args: instArgs{arg_fd, arg_fj}}, -+ // FMADD.D fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08200000, op: FMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FMADD.S fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08100000, op: FMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FMAXA.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x010d0000, op: FMAXA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMAXA.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x010c8000, op: FMAXA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMAX.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01090000, op: FMAX_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMAX.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01088000, op: FMAX_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMINA.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x010f0000, op: FMINA_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMINA.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x010e8000, op: FMINA_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMIN.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x010b0000, op: FMIN_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMIN.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x010a8000, op: FMIN_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMOV.D fd, fj -+ {mask: 0xfffffc00, value: 0x01149800, op: FMOV_D, args: instArgs{arg_fd, arg_fj}}, -+ // FMOV.S fd, fj -+ {mask: 0xfffffc00, value: 0x01149400, op: FMOV_S, args: instArgs{arg_fd, arg_fj}}, -+ // FMSUB.D fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08600000, op: FMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FMSUB.S fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08500000, op: FMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FMUL.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01050000, op: FMUL_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FMUL.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01048000, op: FMUL_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FNEG.D fd, fj -+ {mask: 0xfffffc00, value: 0x01141800, op: FNEG_D, args: instArgs{arg_fd, arg_fj}}, -+ // FNEG.S fd, fj -+ {mask: 0xfffffc00, value: 0x01141400, op: FNEG_S, args: instArgs{arg_fd, arg_fj}}, -+ // FNMADD.D fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08a00000, op: FNMADD_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FNMADD.S fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08900000, op: FNMADD_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FNMSUB.D fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08e00000, op: FNMSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FNMSUB.S fd, fj, fk, fa -+ {mask: 0xfff00000, value: 0x08d00000, op: FNMSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk, arg_fa}}, -+ // FRECIP.D fd, fj -+ {mask: 0xfffffc00, value: 0x01145800, op: FRECIP_D, args: instArgs{arg_fd, arg_fj}}, -+ // FRECIP.S fd, fj -+ {mask: 0xfffffc00, value: 0x01145400, op: FRECIP_S, args: instArgs{arg_fd, arg_fj}}, -+ // FRINT.D fd, fj -+ {mask: 0xfffffc00, value: 0x011e4800, op: FRINT_D, args: instArgs{arg_fd, arg_fj}}, -+ // FRINT.S fd, fj -+ {mask: 0xfffffc00, value: 0x011e4400, op: FRINT_S, args: instArgs{arg_fd, arg_fj}}, -+ // FRSQRT.D fd, fj -+ {mask: 0xfffffc00, value: 0x01146800, op: FRSQRT_D, args: instArgs{arg_fd, arg_fj}}, -+ // FRSQRT.S fd, fj -+ {mask: 0xfffffc00, value: 0x01146400, op: FRSQRT_S, args: instArgs{arg_fd, arg_fj}}, -+ // FSCALEB.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01110000, op: FSCALEB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FSCALEB.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01108000, op: FSCALEB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FSEL fd, fj, fk, ca -+ {mask: 0xfffc0000, value: 0x0d000000, op: FSEL, args: instArgs{arg_fd, arg_fj, arg_fk, arg_ca}}, -+ // FSQRT.D fd, fj -+ {mask: 0xfffffc00, value: 0x01144800, op: FSQRT_D, args: instArgs{arg_fd, arg_fj}}, -+ // FSQRT.S fd, fj -+ {mask: 0xfffffc00, value: 0x01144400, op: FSQRT_S, args: instArgs{arg_fd, arg_fj}}, -+ // FSTGT.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x38768000, op: FSTGT_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FSTGT.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38760000, op: FSTGT_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FSTLE.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x38778000, op: FSTLE_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FSTLE.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38770000, op: FSTLE_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FSTX.D fd, rj, rk -+ {mask: 0xffff8000, value: 0x383c0000, op: FSTX_D, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FSTX.S fd, rj, rk -+ {mask: 0xffff8000, value: 0x38380000, op: FSTX_S, args: instArgs{arg_fd, arg_rj, arg_rk}}, -+ // FST.D fd, rj, si12 -+ {mask: 0xffc00000, value: 0x2bc00000, op: FST_D, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, -+ // FST.S fd, rj, si12 -+ {mask: 0xffc00000, value: 0x2b400000, op: FST_S, args: instArgs{arg_fd, arg_rj, arg_si12_21_10}}, -+ // FSUB.D fd, fj, fk -+ {mask: 0xffff8000, value: 0x01030000, op: FSUB_D, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FSUB.S fd, fj, fk -+ {mask: 0xffff8000, value: 0x01028000, op: FSUB_S, args: instArgs{arg_fd, arg_fj, arg_fk}}, -+ // FTINTRM.L.D fd, fj -+ {mask: 0xfffffc00, value: 0x011a2800, op: FTINTRM_L_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRM.L.S fd, fj -+ {mask: 0xfffffc00, value: 0x011a2400, op: FTINTRM_L_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRM.W.D fd, fj -+ {mask: 0xfffffc00, value: 0x011a0800, op: FTINTRM_W_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRM.W.S fd, fj -+ {mask: 0xfffffc00, value: 0x011a0400, op: FTINTRM_W_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRNE.L.D fd, fj -+ {mask: 0xfffffc00, value: 0x011ae800, op: FTINTRNE_L_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRNE.L.S fd, fj -+ {mask: 0xfffffc00, value: 0x011ae400, op: FTINTRNE_L_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRNE.W.D fd, fj -+ {mask: 0xfffffc00, value: 0x011ac800, op: FTINTRNE_W_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRNE.W.S fd, fj -+ {mask: 0xfffffc00, value: 0x011ac400, op: FTINTRNE_W_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRP.L.D fd, fj -+ {mask: 0xfffffc00, value: 0x011a6800, op: FTINTRP_L_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRP.L.S fd, fj -+ {mask: 0xfffffc00, value: 0x011a6400, op: FTINTRP_L_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRP.W.D fd, fj -+ {mask: 0xfffffc00, value: 0x011a4800, op: FTINTRP_W_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRP.W.S fd, fj -+ {mask: 0xfffffc00, value: 0x011a4400, op: FTINTRP_W_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRZ.L.D fd, fj -+ {mask: 0xfffffc00, value: 0x011aa800, op: FTINTRZ_L_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRZ.L.S fd, fj -+ {mask: 0xfffffc00, value: 0x011aa400, op: FTINTRZ_L_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRZ.W.D fd, fj -+ {mask: 0xfffffc00, value: 0x011a8800, op: FTINTRZ_W_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINTRZ.W.S fd, fj -+ {mask: 0xfffffc00, value: 0x011a8400, op: FTINTRZ_W_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINT.L.D fd, fj -+ {mask: 0xfffffc00, value: 0x011b2800, op: FTINT_L_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINT.L.S fd, fj -+ {mask: 0xfffffc00, value: 0x011b2400, op: FTINT_L_S, args: instArgs{arg_fd, arg_fj}}, -+ // FTINT.W.D fd, fj -+ {mask: 0xfffffc00, value: 0x011b0800, op: FTINT_W_D, args: instArgs{arg_fd, arg_fj}}, -+ // FTINT.W.S fd, fj -+ {mask: 0xfffffc00, value: 0x011b0400, op: FTINT_W_S, args: instArgs{arg_fd, arg_fj}}, -+ // IBAR hint -+ {mask: 0xffff8000, value: 0x38728000, op: IBAR, args: instArgs{arg_hint_14_0}}, -+ // IDLE level -+ {mask: 0xffff8000, value: 0x06488000, op: IDLE, args: instArgs{arg_level_14_0}}, -+ // INVTLB op, rj, rk -+ {mask: 0xffff8000, value: 0x06498000, op: INVTLB, args: instArgs{arg_op_4_0, arg_rj, arg_rk}}, -+ // IOCSRRD.B rd, rj -+ {mask: 0xfffffc00, value: 0x06480000, op: IOCSRRD_B, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRRD.D rd, rj -+ {mask: 0xfffffc00, value: 0x06480c00, op: IOCSRRD_D, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRRD.H rd, rj -+ {mask: 0xfffffc00, value: 0x06480400, op: IOCSRRD_H, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRRD.W rd, rj -+ {mask: 0xfffffc00, value: 0x06480800, op: IOCSRRD_W, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRWR.B rd, rj -+ {mask: 0xfffffc00, value: 0x06481000, op: IOCSRWR_B, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRWR.D rd, rj -+ {mask: 0xfffffc00, value: 0x06481c00, op: IOCSRWR_D, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRWR.H rd, rj -+ {mask: 0xfffffc00, value: 0x06481400, op: IOCSRWR_H, args: instArgs{arg_rd, arg_rj}}, -+ // IOCSRWR.W rd, rj -+ {mask: 0xfffffc00, value: 0x06481800, op: IOCSRWR_W, args: instArgs{arg_rd, arg_rj}}, -+ // JIRL rd, rj, offs -+ {mask: 0xfc000000, value: 0x4c000000, op: JIRL, args: instArgs{arg_rd, arg_rj, arg_offset_15_0}}, -+ // LDDIR rd, rj, level -+ {mask: 0xfffc0000, value: 0x06400000, op: LDDIR, args: instArgs{arg_rd, arg_rj, arg_level_17_10}}, -+ // LDGT.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x38780000, op: LDGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDGT.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x38798000, op: LDGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDGT.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x38788000, op: LDGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDGT.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x38790000, op: LDGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDLE.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x387a0000, op: LDLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDLE.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x387b8000, op: LDLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDLE.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x387a8000, op: LDLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDLE.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x387b0000, op: LDLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDPTE rj, seq -+ {mask: 0xfffc001f, value: 0x06440000, op: LDPTE, args: instArgs{arg_rj, arg_seq_17_10}}, -+ // LDPTR.D rd, rj, si14 -+ {mask: 0xff000000, value: 0x26000000, op: LDPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // LDPTR.W rd, rj, si14 -+ {mask: 0xff000000, value: 0x24000000, op: LDPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // LDX.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x38000000, op: LDX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.BU rd, rj, rk -+ {mask: 0xffff8000, value: 0x38200000, op: LDX_BU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x380c0000, op: LDX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x38040000, op: LDX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.HU rd, rj, rk -+ {mask: 0xffff8000, value: 0x38240000, op: LDX_HU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x38080000, op: LDX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LDX.WU rd, rj, rk -+ {mask: 0xffff8000, value: 0x38280000, op: LDX_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // LD.B rd, rj, si12 -+ {mask: 0xffc00000, value: 0x28000000, op: LD_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.BU rd, rj, si12 -+ {mask: 0xffc00000, value: 0x2a000000, op: LD_BU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.D rd, rj, si12 -+ {mask: 0xffc00000, value: 0x28c00000, op: LD_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.H rd, rj, si12 -+ {mask: 0xffc00000, value: 0x28400000, op: LD_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.HU rd, rj, si12 -+ {mask: 0xffc00000, value: 0x2a400000, op: LD_HU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.W rd, rj, si12 -+ {mask: 0xffc00000, value: 0x28800000, op: LD_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LD.WU rd, rj, si12 -+ {mask: 0xffc00000, value: 0x2a800000, op: LD_WU, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // LL.D rd, rj, si14 -+ {mask: 0xff000000, value: 0x22000000, op: LL_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // LL.W rd, rj, si14 -+ {mask: 0xff000000, value: 0x20000000, op: LL_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // LU12I.W rd, si20 -+ {mask: 0xfe000000, value: 0x14000000, op: LU12I_W, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // LU32I.D rd, si20 -+ {mask: 0xfe000000, value: 0x16000000, op: LU32I_D, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // LU52I.D rd, rj, si12 -+ {mask: 0xffc00000, value: 0x03000000, op: LU52I_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // MASKEQZ rd, rj, rk -+ {mask: 0xffff8000, value: 0x00130000, op: MASKEQZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MASKNEZ rd, rj, rk -+ {mask: 0xffff8000, value: 0x00138000, op: MASKNEZ, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MOD.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00228000, op: MOD_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MOD.DU rd, rj, rk -+ {mask: 0xffff8000, value: 0x00238000, op: MOD_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MOD.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00208000, op: MOD_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MOD.WU rd, rj, rk -+ {mask: 0xffff8000, value: 0x00218000, op: MOD_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MOVCF2FR fd, cj -+ {mask: 0xffffff00, value: 0x0114d400, op: MOVCF2FR, args: instArgs{arg_fd, arg_cj}}, -+ // MOVCF2GR rd, cj -+ {mask: 0xffffff00, value: 0x0114dc00, op: MOVCF2GR, args: instArgs{arg_rd, arg_cj}}, -+ // MOVFCSR2GR rd, fcsr -+ {mask: 0xfffffc00, value: 0x0114c800, op: MOVFCSR2GR, args: instArgs{arg_rd, arg_fcsr_9_5}}, -+ // MOVFR2CF cd, fj -+ {mask: 0xfffffc18, value: 0x0114d000, op: MOVFR2CF, args: instArgs{arg_cd, arg_fj}}, -+ // MOVFR2GR.D rd, fj -+ {mask: 0xfffffc00, value: 0x0114b800, op: MOVFR2GR_D, args: instArgs{arg_rd, arg_fj}}, -+ // MOVFR2GR.S rd, fj -+ {mask: 0xfffffc00, value: 0x0114b400, op: MOVFR2GR_S, args: instArgs{arg_rd, arg_fj}}, -+ // MOVFRH2GR.S rd, fj -+ {mask: 0xfffffc00, value: 0x0114bc00, op: MOVFRH2GR_S, args: instArgs{arg_rd, arg_fj}}, -+ // MOVGR2CF cd, rj -+ {mask: 0xfffffc18, value: 0x0114d800, op: MOVGR2CF, args: instArgs{arg_cd, arg_rj}}, -+ // MOVGR2FCSR fcsr, rj -+ {mask: 0xfffffc00, value: 0x0114c000, op: MOVGR2FCSR, args: instArgs{arg_fcsr_4_0, arg_rj}}, -+ // MOVGR2FRH.W fd, rj -+ {mask: 0xfffffc00, value: 0x0114ac00, op: MOVGR2FRH_W, args: instArgs{arg_fd, arg_rj}}, -+ // MOVGR2FR.D fd, rj -+ {mask: 0xfffffc00, value: 0x0114a800, op: MOVGR2FR_D, args: instArgs{arg_fd, arg_rj}}, -+ // MOVGR2FR.W fd, rj -+ {mask: 0xfffffc00, value: 0x0114a400, op: MOVGR2FR_W, args: instArgs{arg_fd, arg_rj}}, -+ // MULH.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x001e0000, op: MULH_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MULH.DU rd, rj, rk -+ {mask: 0xffff8000, value: 0x001e8000, op: MULH_DU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MULH.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x001c8000, op: MULH_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MULH.WU rd, rj, rk -+ {mask: 0xffff8000, value: 0x001d0000, op: MULH_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MULW.D.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x001f0000, op: MULW_D_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MULW.D.WU rd, rj, rk -+ {mask: 0xffff8000, value: 0x001f8000, op: MULW_D_WU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MUL.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x001d8000, op: MUL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // MUL.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x001c0000, op: MUL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // NOR rd, rj, rk -+ {mask: 0xffff8000, value: 0x00140000, op: NOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // OR rd, rj, rk -+ {mask: 0xffff8000, value: 0x00150000, op: OR, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ORI rd, rj, ui12 -+ {mask: 0xffc00000, value: 0x03800000, op: ORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, -+ // ORN rd, rj, rk -+ {mask: 0xffff8000, value: 0x00160000, op: ORN, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // PCADDI rd, si20 -+ {mask: 0xfe000000, value: 0x18000000, op: PCADDI, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // PCADDU12I rd, si20 -+ {mask: 0xfe000000, value: 0x1c000000, op: PCADDU12I, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // PCADDU18I rd, si20 -+ {mask: 0xfe000000, value: 0x1e000000, op: PCADDU18I, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // PCALAU12I rd, si20 -+ {mask: 0xfe000000, value: 0x1a000000, op: PCALAU12I, args: instArgs{arg_rd, arg_si20_24_5}}, -+ // PRELD hint, rj, si12 -+ {mask: 0xffc00000, value: 0x2ac00000, op: PRELD, args: instArgs{arg_hint_4_0, arg_rj, arg_si12_21_10}}, -+ // PRELDX hint, rj, rk -+ {mask: 0xffff8000, value: 0x382c0000, op: PRELDX, args: instArgs{arg_hint_4_0, arg_rj, arg_rk}}, -+ // RDTIMEH.W rd, rj -+ {mask: 0xfffffc00, value: 0x00006400, op: RDTIMEH_W, args: instArgs{arg_rd, arg_rj}}, -+ // RDTIMEL.W rd, rj -+ {mask: 0xfffffc00, value: 0x00006000, op: RDTIMEL_W, args: instArgs{arg_rd, arg_rj}}, -+ // RDTIME.D rd, rj -+ {mask: 0xfffffc00, value: 0x00006800, op: RDTIME_D, args: instArgs{arg_rd, arg_rj}}, -+ // REVB.2H rd, rj -+ {mask: 0xfffffc00, value: 0x00003000, op: REVB_2H, args: instArgs{arg_rd, arg_rj}}, -+ // REVB.2W rd, rj -+ {mask: 0xfffffc00, value: 0x00003800, op: REVB_2W, args: instArgs{arg_rd, arg_rj}}, -+ // REVB.4H rd, rj -+ {mask: 0xfffffc00, value: 0x00003400, op: REVB_4H, args: instArgs{arg_rd, arg_rj}}, -+ // REVB.D rd, rj -+ {mask: 0xfffffc00, value: 0x00003c00, op: REVB_D, args: instArgs{arg_rd, arg_rj}}, -+ // REVH.2W rd, rj -+ {mask: 0xfffffc00, value: 0x00004000, op: REVH_2W, args: instArgs{arg_rd, arg_rj}}, -+ // REVH.D rd, rj -+ {mask: 0xfffffc00, value: 0x00004400, op: REVH_D, args: instArgs{arg_rd, arg_rj}}, -+ // ROTRI.D rd, rj, ui6 -+ {mask: 0xffff0000, value: 0x004d0000, op: ROTRI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, -+ // ROTRI.W rd, rj, ui5 -+ {mask: 0xffff8000, value: 0x004c8000, op: ROTRI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, -+ // ROTR.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x001b8000, op: ROTR_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ROTR.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x001b0000, op: ROTR_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SC.D rd, rj, si14 -+ {mask: 0xff000000, value: 0x23000000, op: SC_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // SC.W rd, rj, si14 -+ {mask: 0xff000000, value: 0x21000000, op: SC_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // SLLI.D rd, rj, ui6 -+ {mask: 0xffff0000, value: 0x00410000, op: SLLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, -+ // SLLI.W rd, rj, ui5 -+ {mask: 0xffff8000, value: 0x00408000, op: SLLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, -+ // SLL.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00188000, op: SLL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SLL.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00170000, op: SLL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SLT rd, rj, rk -+ {mask: 0xffff8000, value: 0x00120000, op: SLT, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SLTI rd, rj, si12 -+ {mask: 0xffc00000, value: 0x02000000, op: SLTI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // SLTU rd, rj, rk -+ {mask: 0xffff8000, value: 0x00128000, op: SLTU, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SLTUI rd, rj, si12 -+ {mask: 0xffc00000, value: 0x02400000, op: SLTUI, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // SRAI.D rd, rj, ui6 -+ {mask: 0xffff0000, value: 0x00490000, op: SRAI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, -+ // SRAI.W rd, rj, ui5 -+ {mask: 0xffff8000, value: 0x00488000, op: SRAI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, -+ // SRA.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00198000, op: SRA_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SRA.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00180000, op: SRA_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SRLI.D rd, rj, ui6 -+ {mask: 0xffff0000, value: 0x00450000, op: SRLI_D, args: instArgs{arg_rd, arg_rj, arg_ui6_15_10}}, -+ // SRLI.W rd, rj, ui5 -+ {mask: 0xffff8000, value: 0x00448000, op: SRLI_W, args: instArgs{arg_rd, arg_rj, arg_ui5_14_10}}, -+ // SRL.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00190000, op: SRL_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SRL.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00178000, op: SRL_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STGT.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x387c0000, op: STGT_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STGT.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x387d8000, op: STGT_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STGT.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x387c8000, op: STGT_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STGT.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x387d0000, op: STGT_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STLE.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x387e0000, op: STLE_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STLE.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x387f8000, op: STLE_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STLE.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x387e8000, op: STLE_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STLE.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x387f0000, op: STLE_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STPTR.D rd, rj, si14 -+ {mask: 0xff000000, value: 0x27000000, op: STPTR_D, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // STPTR.W rd, rj, si14 -+ {mask: 0xff000000, value: 0x25000000, op: STPTR_W, args: instArgs{arg_rd, arg_rj, arg_si14_23_10}}, -+ // STX.B rd, rj, rk -+ {mask: 0xffff8000, value: 0x38100000, op: STX_B, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STX.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x381c0000, op: STX_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STX.H rd, rj, rk -+ {mask: 0xffff8000, value: 0x38140000, op: STX_H, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // STX.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x38180000, op: STX_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // ST.B rd, rj, si12 -+ {mask: 0xffc00000, value: 0x29000000, op: ST_B, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // ST.D rd, rj, si12 -+ {mask: 0xffc00000, value: 0x29c00000, op: ST_D, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // ST.H rd, rj, si12 -+ {mask: 0xffc00000, value: 0x29400000, op: ST_H, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // ST.W rd, rj, si12 -+ {mask: 0xffc00000, value: 0x29800000, op: ST_W, args: instArgs{arg_rd, arg_rj, arg_si12_21_10}}, -+ // SUB.D rd, rj, rk -+ {mask: 0xffff8000, value: 0x00118000, op: SUB_D, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SUB.W rd, rj, rk -+ {mask: 0xffff8000, value: 0x00110000, op: SUB_W, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // SYSCALL code -+ {mask: 0xffff8000, value: 0x002b0000, op: SYSCALL, args: instArgs{arg_code_14_0}}, -+ // TLBCLR -+ {mask: 0xffffffff, value: 0x06482000, op: TLBCLR, args: instArgs{}}, -+ // TLBFILL -+ {mask: 0xffffffff, value: 0x06483400, op: TLBFILL, args: instArgs{}}, -+ // TLBFLUSH -+ {mask: 0xffffffff, value: 0x06482400, op: TLBFLUSH, args: instArgs{}}, -+ // TLBRD -+ {mask: 0xffffffff, value: 0x06482c00, op: TLBRD, args: instArgs{}}, -+ // TLBSRCH -+ {mask: 0xffffffff, value: 0x06482800, op: TLBSRCH, args: instArgs{}}, -+ // TLBWR -+ {mask: 0xffffffff, value: 0x06483000, op: TLBWR, args: instArgs{}}, -+ // XOR rd, rj, rk -+ {mask: 0xffff8000, value: 0x00158000, op: XOR, args: instArgs{arg_rd, arg_rj, arg_rk}}, -+ // XORI rd, rj, ui12 -+ {mask: 0xffc00000, value: 0x03c00000, op: XORI, args: instArgs{arg_rd, arg_rj, arg_ui12_21_10}}, -+} --- -2.38.1 - diff --git a/0045-cmd-internal-obj-loong64-remove-unused-register-alia.patch b/0005-cmd-internal-obj-loong64-remove-unused-register-alia.patch similarity index 69% rename from 0045-cmd-internal-obj-loong64-remove-unused-register-alia.patch rename to 0005-cmd-internal-obj-loong64-remove-unused-register-alia.patch index d413f1ffbf5ac68a1cbf77ec11df64faa0f66a29..34a43a06ff29f3746fc70132c426bacdfed426e9 100644 --- a/0045-cmd-internal-obj-loong64-remove-unused-register-alia.patch +++ b/0005-cmd-internal-obj-loong64-remove-unused-register-alia.patch @@ -1,18 +1,19 @@ -From 26000403c06ba6d7d7cd3ad9719b8bde71886dd3 Mon Sep 17 00:00:00 2001 +From 2ecb3ca09093ce12b2e47d97cbff223a950de0bb Mon Sep 17 00:00:00 2001 From: Guoqi Chen -Date: Thu, 16 Nov 2023 19:55:47 +0800 -Subject: [PATCH 45/51] cmd/internal/obj/loong64: remove unused register alias +Date: Thu, 16 Nov 2023 17:28:46 +0800 +Subject: [PATCH 05/44] cmd/internal/obj/loong64: remove unused register alias + definitions -Change-Id: Id6447437ba5492f22417231badac4805fcac4474 +Change-Id: Ie788747372cd47cb3780e75b35750bb08bd166fc --- src/cmd/internal/obj/loong64/a.out.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go -index 8df48a1e01..156dfda8b6 100644 +index e6984dcba7..53b005af4d 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go -@@ -157,8 +157,6 @@ const ( +@@ -225,8 +225,6 @@ const ( REGZERO = REG_R0 // set to zero REGLINK = REG_R1 REGSP = REG_R3 diff --git a/0006-cmd-internal-objfile-add-loong64-disassembler-suppor.patch b/0006-cmd-internal-objfile-add-loong64-disassembler-suppor.patch deleted file mode 100644 index d62d49627f55ceb471c8a4db4d134cbc3dcd3a77..0000000000000000000000000000000000000000 --- a/0006-cmd-internal-objfile-add-loong64-disassembler-suppor.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 7941fee1ffb6c3560ec3507514c254bd98eeda28 Mon Sep 17 00:00:00 2001 -From: chenguoqi -Date: Fri, 10 Feb 2023 15:10:48 +0800 -Subject: [PATCH 06/51] cmd/internal/objfile: add loong64 disassembler support - -Change-Id: Id29c2de9e592a07a9f932a4aa3718c6b25788082 ---- - src/cmd/internal/objfile/disasm.go | 14 ++++++++++++++ - src/cmd/internal/objfile/elf.go | 2 ++ - 2 files changed, 16 insertions(+) - -diff --git a/src/cmd/internal/objfile/disasm.go b/src/cmd/internal/objfile/disasm.go -index c298d7e1a9..129741fe01 100644 ---- a/src/cmd/internal/objfile/disasm.go -+++ b/src/cmd/internal/objfile/disasm.go -@@ -23,6 +23,7 @@ import ( - - "golang.org/x/arch/arm/armasm" - "golang.org/x/arch/arm64/arm64asm" -+ "golang.org/x/arch/loong64/loong64asm" - "golang.org/x/arch/ppc64/ppc64asm" - "golang.org/x/arch/x86/x86asm" - ) -@@ -366,6 +367,17 @@ func disasm_arm64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.By - return text, 4 - } - -+func disasm_loong64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { -+ inst, err := loong64asm.Decode(code) -+ var text string -+ if err != nil || inst.Op == 0 { -+ text = "?" -+ } else if gnuAsm { -+ text = fmt.Sprintf("%s %s", "", loong64asm.GNUSyntax(inst)) -+ } -+ return text, 4 -+} -+ - func disasm_ppc64(code []byte, pc uint64, lookup lookupFunc, byteOrder binary.ByteOrder, gnuAsm bool) (string, int) { - inst, err := ppc64asm.Decode(code, byteOrder) - var text string -@@ -388,6 +400,7 @@ var disasms = map[string]disasmFunc{ - "amd64": disasm_amd64, - "arm": disasm_arm, - "arm64": disasm_arm64, -+ "loong64": disasm_loong64, - "ppc64": disasm_ppc64, - "ppc64le": disasm_ppc64, - } -@@ -397,6 +410,7 @@ var byteOrders = map[string]binary.ByteOrder{ - "amd64": binary.LittleEndian, - "arm": binary.LittleEndian, - "arm64": binary.LittleEndian, -+ "loong64": binary.LittleEndian, - "ppc64": binary.BigEndian, - "ppc64le": binary.LittleEndian, - "s390x": binary.BigEndian, -diff --git a/src/cmd/internal/objfile/elf.go b/src/cmd/internal/objfile/elf.go -index f25e4a65d6..9048be7d73 100644 ---- a/src/cmd/internal/objfile/elf.go -+++ b/src/cmd/internal/objfile/elf.go -@@ -120,6 +120,8 @@ func (f *elfFile) goarch() string { - return "arm" - case elf.EM_AARCH64: - return "arm64" -+ case elf.EM_LOONGARCH: -+ return "loong64" - case elf.EM_PPC64: - if f.elf.ByteOrder == binary.LittleEndian { - return "ppc64le" --- -2.38.1 - diff --git a/0006-internal-bytealg-optimize-IndexByte-and-IndexByteStr.patch b/0006-internal-bytealg-optimize-IndexByte-and-IndexByteStr.patch new file mode 100644 index 0000000000000000000000000000000000000000..b295cb6d9be8b060f1b9ce2adf084cc77b189724 --- /dev/null +++ b/0006-internal-bytealg-optimize-IndexByte-and-IndexByteStr.patch @@ -0,0 +1,160 @@ +From 0b580e45412ffc11f3a1c7ed7165f7a81e51adec Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Fri, 17 May 2024 17:10:59 +0800 +Subject: [PATCH 06/44] internal/bytealg: optimize IndexByte and + IndexByteString function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_indexbyte.log │ test/new_3c5000_indexbyte.log │ + │ sec/op │ sec/op vs base │ +IndexByte/10 19.72n ± 0% 13.72n ± 0% -30.44% (p=0.000 n=20) +IndexByte/32 58.27n ± 0% 21.54n ± 0% -63.04% (p=0.000 n=20) +IndexByte/4K 5.609µ ± 0% 2.349µ ± 0% -58.13% (p=0.000 n=20) +IndexByte/4M 3.844m ± 2% 2.408m ± 1% -37.36% (p=0.000 n=20) +IndexByte/64M 62.38m ± 0% 41.83m ± 2% -32.94% (p=0.000 n=20) +geomean 17.29µ 9.309µ -46.17% + +Change-Id: I9d60af0196a0078e829669ccd88f93b5f7a5db0a +--- + src/internal/bytealg/indexbyte_loong64.s | 105 ++++++++++++++++++----- + 1 file changed, 82 insertions(+), 23 deletions(-) + +diff --git a/src/internal/bytealg/indexbyte_loong64.s b/src/internal/bytealg/indexbyte_loong64.s +index c9591b3cda..7811741423 100644 +--- a/src/internal/bytealg/indexbyte_loong64.s ++++ b/src/internal/bytealg/indexbyte_loong64.s +@@ -10,41 +10,100 @@ TEXT ·IndexByte(SB),NOSPLIT,$0-40 + // R5 = b_len + // R6 = b_cap (unused) + // R7 = byte to find +- AND $0xff, R7 ++ ADDV R4, R5 // end + MOVV R4, R6 // store base for later ++ AND $0xff, R7 ++ JMP indexbytebody<>(SB) ++ ++TEXT ·IndexByteString(SB),NOSPLIT,$0-32 ++ // R4 = s_base ++ // R5 = s_len ++ // R6 = byte to find ++ AND $0xff, R6, R7 + ADDV R4, R5 // end +- ADDV $-1, R4 ++ MOVV R4, R6 // store base for later ++ JMP indexbytebody<>(SB) + +- PCALIGN $16 ++// input: ++// R4: b_base ++// R5: end ++// R6: store base for later ++// R7: byte to find ++TEXT indexbytebody<>(SB),NOSPLIT,$0 + loop: ++ ADDV $8, R4, R10 ++ BLT R5, R10, tail ++ MOVV (R4), R8 ++ ++ AND $0xff, R8, R9 ++ BEQ R7, R9, found ++ ++ WORD $0xcf2109 // bstrpick.w r9, r8, 15, 8 ++ BEQ R7, R9, byte_1th ++ ++ WORD $0xd74109 // bstrpick.w r9, r8, 23, 16 ++ BEQ R7, R9, byte_2th ++ ++ WORD $0xdf6109 // bstrpick.w r9, r8, 31, 24 ++ BEQ R7, R9, byte_3th ++ ++ WORD $0xe78109 // bstrpick.w r9, r8, 39, 32 ++ BEQ R7, R9, byte_4th ++ ++ WORD $0xefa109 // bstrpick.w r9, r8, 47, 40 ++ BEQ R7, R9, byte_5th ++ ++ WORD $0xf7c109 // bstrpick.w r9, r8, 55, 48 ++ BEQ R7, R9, byte_6th ++ ++ WORD $0xffe109 // bstrpick.w r9, r8, 63, 56 ++ BEQ R7, R9, byte_7th ++ ++ MOVV R10, R4 ++ JMP loop ++ ++tail: ++ BEQ R4, R5, notfound ++ MOVBU (R4), R8 ++ BEQ R7, R8, found + ADDV $1, R4 +- BEQ R4, R5, notfound +- MOVBU (R4), R8 +- BNE R7, R8, loop ++ JMP tail + +- SUBV R6, R4 // remove base ++byte_1th: ++ ADDV $1, R4 ++ SUBV R6, R4 + RET + +-notfound: +- MOVV $-1, R4 ++byte_2th: ++ ADDV $2, R4 ++ SUBV R6, R4 + RET + +-TEXT ·IndexByteString(SB),NOSPLIT,$0-32 +- // R4 = s_base +- // R5 = s_len +- // R6 = byte to find +- MOVV R4, R7 // store base for later +- ADDV R4, R5 // end +- ADDV $-1, R4 ++byte_3th: ++ ADDV $3, R4 ++ SUBV R6, R4 ++ RET + +- PCALIGN $16 +-loop: +- ADDV $1, R4 +- BEQ R4, R5, notfound +- MOVBU (R4), R8 +- BNE R6, R8, loop ++byte_4th: ++ ADDV $4, R4 ++ SUBV R6, R4 ++ RET ++ ++byte_5th: ++ ADDV $5, R4 ++ SUBV R6, R4 ++ RET + +- SUBV R7, R4 // remove base ++byte_6th: ++ ADDV $6, R4 ++ SUBV R6, R4 ++ RET ++ ++byte_7th: ++ ADDV $7, R4 ++ ++found: ++ SUBV R6, R4 // remove base + RET + + notfound: +-- +2.38.1 + diff --git a/0007-internal-bytealg-optimize-memequal-and-memequal_varl.patch b/0007-internal-bytealg-optimize-memequal-and-memequal_varl.patch new file mode 100644 index 0000000000000000000000000000000000000000..7e97b4ae39bffab8dbc8df201de06e31b71fee74 --- /dev/null +++ b/0007-internal-bytealg-optimize-memequal-and-memequal_varl.patch @@ -0,0 +1,142 @@ +From 83f497423050707a8cd27152256699ccd7819456 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Sat, 18 May 2024 11:00:57 +0800 +Subject: [PATCH 07/44] internal/bytealg: optimize memequal and memequal_varlen + function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_equal.log │ test/new_3c5000_equal.log │ + │ sec/op │ sec/op vs base │ +Equal/0 0.6824n ± 0% 0.6837n ± 0% +0.20% (p=0.000 n=20) +Equal/1 10.46n ± 0% 12.71n ± 0% +21.46% (p=0.000 n=20) +Equal/6 17.29n ± 0% 19.57n ± 0% +13.22% (p=0.000 n=20) +Equal/9 21.38n ± 0% 13.19n ± 0% -38.31% (p=0.000 n=20) +Equal/15 29.57n ± 0% 21.39n ± 0% -27.68% (p=0.000 n=20) +Equal/16 30.94n ± 0% 10.46n ± 0% -66.19% (p=0.000 n=20) +Equal/20 36.40n ± 0% 16.83n ± 0% -53.76% (p=0.000 n=20) +Equal/32 52.78n ± 0% 12.28n ± 0% -76.73% (p=0.000 n=20) +Equal/4K 5606.0n ± 0% 385.8n ± 0% -93.12% (p=0.000 n=20) +Equal/4M 5728.9µ ± 0% 746.4µ ± 0% -86.97% (p=0.000 n=20) +Equal/64M 92.02m ± 0% 14.13m ± 5% -84.65% (p=0.000 n=20) +EqualBothUnaligned/64_0 98.73n ± 0% 10.04n ± 0% -89.83% (p=0.000 n=20) +EqualBothUnaligned/64_1 98.73n ± 0% 10.29n ± 0% -89.58% (p=0.000 n=20) +EqualBothUnaligned/64_4 98.73n ± 0% 10.29n ± 0% -89.58% (p=0.000 n=20) +EqualBothUnaligned/64_7 98.73n ± 0% 10.28n ± 0% -89.59% (p=0.000 n=20) +EqualBothUnaligned/4096_0 5602.0n ± 0% 365.8n ± 0% -93.47% (p=0.000 n=20) +EqualBothUnaligned/4096_1 5602.0n ± 0% 437.2n ± 0% -92.19% (p=0.000 n=20) +EqualBothUnaligned/4096_4 5602.0n ± 0% 436.4n ± 0% -92.21% (p=0.000 n=20) +EqualBothUnaligned/4096_7 5602.0n ± 0% 439.2n ± 0% -92.16% (p=0.000 n=20) +EqualBothUnaligned/4194304_0 5729.0µ ± 0% 732.4µ ± 0% -87.22% (p=0.000 n=20) +EqualBothUnaligned/4194304_1 5729.2µ ± 0% 781.8µ ± 1% -86.35% (p=0.000 n=20) +EqualBothUnaligned/4194304_4 5729.3µ ± 0% 773.9µ ± 0% -86.49% (p=0.000 n=20) +EqualBothUnaligned/4194304_7 5729.3µ ± 0% 773.9µ ± 5% -86.49% (p=0.000 n=20) +EqualBothUnaligned/67108864_0 92.38m ± 0% 34.61m ± 38% -62.53% (p=0.000 n=20) +EqualBothUnaligned/67108864_1 92.38m ± 0% 33.07m ± 23% -64.20% (p=0.000 n=20) +EqualBothUnaligned/67108864_4 92.38m ± 0% 82.09m ± 32% -11.14% (p=0.000 n=20) +EqualBothUnaligned/67108864_7 92.39m ± 0% 61.47m ± 16% -33.46% (p=0.000 n=20) +geomean 11.86µ 2.654µ -77.62% + +Change-Id: Ib181f532238e6f6d82a3e9e6987abe121688b6eb +--- + src/internal/bytealg/equal_loong64.s | 72 +++++++++++++++++++--------- + 1 file changed, 49 insertions(+), 23 deletions(-) + +diff --git a/src/internal/bytealg/equal_loong64.s b/src/internal/bytealg/equal_loong64.s +index 830b09bd2c..4cc31d5e46 100644 +--- a/src/internal/bytealg/equal_loong64.s ++++ b/src/internal/bytealg/equal_loong64.s +@@ -9,36 +9,62 @@ + + // memequal(a, b unsafe.Pointer, size uintptr) bool + TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +- BEQ R4, R5, eq +- ADDV R4, R6, R7 +- PCALIGN $16 +-loop: +- BNE R4, R7, test +- MOVV $1, R4 ++ // R4 = a_base ++ // R5 = b_base ++ // R6 = size ++ JMP equalbody<>(SB) ++ ++// memequal_varlen(a, b unsafe.Pointer) bool ++TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 ++ // R4 = a_base ++ // R5 = b_base ++ MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure ++ JMP equalbody<>(SB) ++ ++TEXT equalbody<>(SB),NOSPLIT|NOFRAME,$0 ++ BEQ R4, R5, eq ++ ADDV R4, R6, R6 // end ++ ++loop_16byte: ++ ADDV $16, R4, R9 ++ BLT R6, R9, load8byte ++ MOVV (R4), R7 ++ MOVV (R5), R8 ++ MOVV 8(R4), R10 ++ MOVV 8(R5), R11 ++ MOVV R9, R4 ++ XOR R7, R8, R7 ++ XOR R10, R11, R10 ++ OR R10, R7, R7 ++ ADDV $16, R5 ++ BEQ R7, loop_16byte ++ ++ MOVB R0, R4 + RET +-test: +- MOVBU (R4), R9 ++ ++load8byte: ++ ADDV $8, R4, R9 ++ BLT R6, R9, tail ++ MOVV (R4), R7 ++ MOVV (R5), R8 ++ MOVV R9, R4 ++ ADDV $8, R5 ++ BEQ R7, R8, tail ++ ++ MOVB R0, R4 ++ RET ++ ++tail: ++ BEQ R4, R6, eq ++ MOVBU (R4), R7 ++ MOVBU (R5), R8 + ADDV $1, R4 +- MOVBU (R5), R10 + ADDV $1, R5 +- BEQ R9, R10, loop ++ BEQ R7, R8, tail + + MOVB R0, R4 + RET +-eq: +- MOVV $1, R4 +- RET + +-// memequal_varlen(a, b unsafe.Pointer) bool +-TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 +- BEQ R4, R5, eq +- MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure +- MOVV R4, 8(R3) +- MOVV R5, 16(R3) +- MOVV R6, 24(R3) +- JAL runtime·memequal(SB) +- MOVBU 32(R3), R4 +- RET + eq: + MOVV $1, R4 + RET +-- +2.38.1 + diff --git a/0007-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch b/0007-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch deleted file mode 100644 index ebb0d9c756cc273848b70d738b87965c4891c980..0000000000000000000000000000000000000000 --- a/0007-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch +++ /dev/null @@ -1,542 +0,0 @@ -From a65d14af4e335b9b51b1c7bfd6536f68b9d62c1a Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Fri, 21 Apr 2023 11:08:09 +0800 -Subject: [PATCH 07/51] runtime: remove the meaningless offset of 8 for - duffzero on loong64 - -Currently we subtract 8 from offset when calling duffzero because 8 -is added to offset in the duffzero implementation. This operation is -meaningless, so remove it. - -Change-Id: I22da26e19353275a9bfae523a9e37f8e4ec26041 ---- - src/cmd/compile/internal/loong64/ggen.go | 23 +- - src/cmd/compile/internal/loong64/ssa.go | 10 +- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 2 +- - src/cmd/compile/internal/ssa/opGen.go | 2 +- - src/runtime/duff_loong64.s | 256 +++++++++--------- - src/runtime/mkduff.go | 4 +- - 6 files changed, 148 insertions(+), 149 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ggen.go b/src/cmd/compile/internal/loong64/ggen.go -index 8a24d2f295..27d318a8bb 100644 ---- a/src/cmd/compile/internal/loong64/ggen.go -+++ b/src/cmd/compile/internal/loong64/ggen.go -@@ -5,6 +5,7 @@ - package loong64 - - import ( -+ "cmd/compile/internal/base" - "cmd/compile/internal/ir" - "cmd/compile/internal/objw" - "cmd/compile/internal/types" -@@ -16,34 +17,38 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog - if cnt == 0 { - return p - } -+ -+ // Adjust the frame to account for LR. -+ off += base.Ctxt.Arch.FixedFrameSize -+ - if cnt < int64(4*types.PtrSize) { - for i := int64(0); i < cnt; i += int64(types.PtrSize) { -- p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, 8+off+i) -+ p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGSP, off+i) - } - } else if cnt <= int64(128*types.PtrSize) { -- p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) -+ p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0) - p.Reg = loong64.REGSP - p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0) - p.To.Name = obj.NAME_EXTERN - p.To.Sym = ir.Syms.Duffzero - p.To.Offset = 8 * (128 - cnt/int64(types.PtrSize)) - } else { -- // ADDV $(8+frame+lo-8), SP, r1 -+ // ADDV $(off), SP, r1 - // ADDV $cnt, r1, r2 - // loop: -- // MOVV R0, (Widthptr)r1 -+ // MOVV R0, (r1) - // ADDV $Widthptr, r1 -- // BNE r1, r2, loop -- p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, loong64.REGRT1, 0) -+ // BNE r1, r2, loop -+ p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, off, obj.TYPE_REG, loong64.REGRT1, 0) - p.Reg = loong64.REGSP - p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, loong64.REGRT2, 0) - p.Reg = loong64.REGRT1 -- p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, int64(types.PtrSize)) -- p1 := p -+ p = pp.Append(p, loong64.AMOVV, obj.TYPE_REG, loong64.REGZERO, 0, obj.TYPE_MEM, loong64.REGRT1, 0) -+ loop := p - p = pp.Append(p, loong64.AADDV, obj.TYPE_CONST, 0, int64(types.PtrSize), obj.TYPE_REG, loong64.REGRT1, 0) - p = pp.Append(p, loong64.ABNE, obj.TYPE_REG, loong64.REGRT1, 0, obj.TYPE_BRANCH, 0, 0) - p.Reg = loong64.REGRT2 -- p.To.SetTarget(p1) -+ p.To.SetTarget(loop) - } - - return p -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 8193b4e321..574217fd92 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -340,14 +340,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Type = obj.TYPE_REG - p.To.Reg = v.Reg() - case ssa.OpLOONG64DUFFZERO: -- // runtime.duffzero expects start address - 8 in R19 -- p := s.Prog(loong64.ASUBVU) -- p.From.Type = obj.TYPE_CONST -- p.From.Offset = 8 -- p.Reg = v.Args[0].Reg() -- p.To.Type = obj.TYPE_REG -- p.To.Reg = loong64.REG_R19 -- p = s.Prog(obj.ADUFFZERO) -+ // runtime.duffzero expects start address in R19 -+ p := s.Prog(obj.ADUFFZERO) - p.To.Type = obj.TYPE_MEM - p.To.Name = obj.NAME_EXTERN - p.To.Sym = ir.Syms.Duffzero -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 23f20fddeb..b715b36542 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -289,7 +289,7 @@ func init() { - aux: "Int64", - argLength: 2, - reg: regInfo{ -- inputs: []regMask{gp}, -+ inputs: []regMask{buildReg("R19")}, - clobbers: buildReg("R19 R1"), - }, - faultOnNilArg0: true, -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index e7caf9050c..e95cb250d9 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -24266,7 +24266,7 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 262144}, // R19 - }, - clobbers: 262146, // R1 R19 - }, -diff --git a/src/runtime/duff_loong64.s b/src/runtime/duff_loong64.s -index 7f78e4fa9f..63fa3bcca1 100644 ---- a/src/runtime/duff_loong64.s -+++ b/src/runtime/duff_loong64.s -@@ -5,261 +5,261 @@ - #include "textflag.h" - - TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 -- MOVV R0, 8(R19) -+ MOVV R0, (R19) - ADDV $8, R19 - RET - -diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go -index cc58558a56..e8d4fcc93e 100644 ---- a/src/runtime/mkduff.go -+++ b/src/runtime/mkduff.go -@@ -179,11 +179,11 @@ func copyARM64(w io.Writer) { - - func zeroLOONG64(w io.Writer) { - // R0: always zero -- // R19 (aka REGRT1): ptr to memory to be zeroed - 8 -+ // R19 (aka REGRT1): ptr to memory to be zeroed - // On return, R19 points to the last zeroed dword. - fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") - for i := 0; i < 128; i++ { -- fmt.Fprintln(w, "\tMOVV\tR0, 8(R19)") -+ fmt.Fprintln(w, "\tMOVV\tR0, (R19)") - fmt.Fprintln(w, "\tADDV\t$8, R19") - } - fmt.Fprintln(w, "\tRET") --- -2.38.1 - diff --git a/0008-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch b/0008-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch deleted file mode 100644 index 8808734541bda774264ee97cce035b29a66f72ca..0000000000000000000000000000000000000000 --- a/0008-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch +++ /dev/null @@ -1,312 +0,0 @@ -From d954f762b52f269fcc009334bb5209e854696dab Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 25 Apr 2023 03:27:23 +0800 -Subject: [PATCH 08/51] cmd/compiler: remove the meaningless offset of 8 for - Lowered{Zero,Move} on loong64 - -Like the CL 487295, remove the meaningless +/- offset operation in the -LoweredZero and LoweredMove implementation. - -Change LoweredMove's Rarg0 register to R20, consistent with duffcopy. - -Change-Id: I19203c428ab25e7ecf657e2ea8ebb0bca77f49ee ---- - src/cmd/compile/internal/loong64/ssa.go | 168 ++++++++---------- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 28 ++- - src/cmd/compile/internal/ssa/opGen.go | 6 +- - 3 files changed, 91 insertions(+), 111 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 574217fd92..f74f90fb5f 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -80,6 +80,28 @@ func storeByType(t *types.Type, r int16) obj.As { - panic("bad store type") - } - -+// largestMove returns the largest move instruction possible and its size, -+// given the alignment of the total size of the move. -+// -+// e.g., a 16-byte move may use MOVV, but an 11-byte move must use MOVB. -+// -+// Note that the moves may not be on naturally aligned addresses depending on -+// the source and destination. -+// -+// This matches the calculation in ssa.moveSize. -+func largestMove(alignment int64) (obj.As, int64) { -+ switch { -+ case alignment%8 == 0: -+ return loong64.AMOVV, 8 -+ case alignment%4 == 0: -+ return loong64.AMOVW, 4 -+ case alignment%2 == 0: -+ return loong64.AMOVH, 2 -+ default: -+ return loong64.AMOVB, 1 -+ } -+} -+ - func ssaGenValue(s *ssagen.State, v *ssa.Value) { - switch v.Op { - case ssa.OpCopy, ssa.OpLOONG64MOVVreg: -@@ -347,49 +369,29 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Sym = ir.Syms.Duffzero - p.To.Offset = v.AuxInt - case ssa.OpLOONG64LoweredZero: -- // SUBV $8, R19 -- // MOVV R0, 8(R19) -- // ADDV $8, R19 -- // BNE Rarg1, R19, -2(PC) -- // arg1 is the address of the last element to zero -- var sz int64 -- var mov obj.As -- switch { -- case v.AuxInt%8 == 0: -- sz = 8 -- mov = loong64.AMOVV -- case v.AuxInt%4 == 0: -- sz = 4 -- mov = loong64.AMOVW -- case v.AuxInt%2 == 0: -- sz = 2 -- mov = loong64.AMOVH -- default: -- sz = 1 -- mov = loong64.AMOVB -- } -- p := s.Prog(loong64.ASUBVU) -- p.From.Type = obj.TYPE_CONST -- p.From.Offset = sz -- p.To.Type = obj.TYPE_REG -- p.To.Reg = loong64.REG_R19 -- p2 := s.Prog(mov) -- p2.From.Type = obj.TYPE_REG -- p2.From.Reg = loong64.REGZERO -- p2.To.Type = obj.TYPE_MEM -- p2.To.Reg = loong64.REG_R19 -- p2.To.Offset = sz -- p3 := s.Prog(loong64.AADDVU) -- p3.From.Type = obj.TYPE_CONST -- p3.From.Offset = sz -- p3.To.Type = obj.TYPE_REG -- p3.To.Reg = loong64.REG_R19 -- p4 := s.Prog(loong64.ABNE) -- p4.From.Type = obj.TYPE_REG -- p4.From.Reg = v.Args[1].Reg() -- p4.Reg = loong64.REG_R19 -- p4.To.Type = obj.TYPE_BRANCH -- p4.To.SetTarget(p2) -+ // MOVx R0, (Rarg0) -+ // ADDV $sz, Rarg0 -+ // BGEU Rarg1, Rarg0, -2(PC) -+ mov, sz := largestMove(v.AuxInt) -+ p := s.Prog(mov) -+ p.From.Type = obj.TYPE_REG -+ p.From.Reg = loong64.REGZERO -+ p.To.Type = obj.TYPE_MEM -+ p.To.Reg = v.Args[0].Reg() -+ -+ p2 := s.Prog(loong64.AADDVU) -+ p2.From.Type = obj.TYPE_CONST -+ p2.From.Offset = sz -+ p2.To.Type = obj.TYPE_REG -+ p2.To.Reg = v.Args[0].Reg() -+ -+ p3 := s.Prog(loong64.ABGEU) -+ p3.From.Type = obj.TYPE_REG -+ p3.From.Reg = v.Args[1].Reg() -+ p3.Reg = v.Args[0].Reg() -+ p3.To.Type = obj.TYPE_BRANCH -+ p3.To.SetTarget(p) -+ - case ssa.OpLOONG64DUFFCOPY: - p := s.Prog(obj.ADUFFCOPY) - p.To.Type = obj.TYPE_MEM -@@ -397,61 +399,43 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Sym = ir.Syms.Duffcopy - p.To.Offset = v.AuxInt - case ssa.OpLOONG64LoweredMove: -- // SUBV $8, R19 -- // MOVV 8(R19), Rtmp -- // MOVV Rtmp, (R4) -- // ADDV $8, R19 -- // ADDV $8, R4 -- // BNE Rarg2, R19, -4(PC) -- // arg2 is the address of the last element of src -- var sz int64 -- var mov obj.As -- switch { -- case v.AuxInt%8 == 0: -- sz = 8 -- mov = loong64.AMOVV -- case v.AuxInt%4 == 0: -- sz = 4 -- mov = loong64.AMOVW -- case v.AuxInt%2 == 0: -- sz = 2 -- mov = loong64.AMOVH -- default: -- sz = 1 -- mov = loong64.AMOVB -- } -- p := s.Prog(loong64.ASUBVU) -- p.From.Type = obj.TYPE_CONST -- p.From.Offset = sz -+ // MOVx (Rarg1), Rtmp -+ // MOVx Rtmp, (Rarg0) -+ // ADDV $sz, Rarg1 -+ // ADDV $sz, Rarg0 -+ // BGEU Rarg2, Rarg0, -4(PC) -+ mov, sz := largestMove(v.AuxInt) -+ p := s.Prog(mov) -+ p.From.Type = obj.TYPE_MEM -+ p.From.Reg = v.Args[1].Reg() - p.To.Type = obj.TYPE_REG -- p.To.Reg = loong64.REG_R19 -+ p.To.Reg = loong64.REGTMP -+ - p2 := s.Prog(mov) -- p2.From.Type = obj.TYPE_MEM -- p2.From.Reg = loong64.REG_R19 -- p2.From.Offset = sz -- p2.To.Type = obj.TYPE_REG -- p2.To.Reg = loong64.REGTMP -- p3 := s.Prog(mov) -- p3.From.Type = obj.TYPE_REG -- p3.From.Reg = loong64.REGTMP -- p3.To.Type = obj.TYPE_MEM -- p3.To.Reg = loong64.REG_R4 -+ p2.From.Type = obj.TYPE_REG -+ p2.From.Reg = loong64.REGTMP -+ p2.To.Type = obj.TYPE_MEM -+ p2.To.Reg = v.Args[0].Reg() -+ -+ p3 := s.Prog(loong64.AADDVU) -+ p3.From.Type = obj.TYPE_CONST -+ p3.From.Offset = sz -+ p3.To.Type = obj.TYPE_REG -+ p3.To.Reg = v.Args[1].Reg() -+ - p4 := s.Prog(loong64.AADDVU) - p4.From.Type = obj.TYPE_CONST - p4.From.Offset = sz - p4.To.Type = obj.TYPE_REG -- p4.To.Reg = loong64.REG_R19 -- p5 := s.Prog(loong64.AADDVU) -- p5.From.Type = obj.TYPE_CONST -- p5.From.Offset = sz -- p5.To.Type = obj.TYPE_REG -- p5.To.Reg = loong64.REG_R4 -- p6 := s.Prog(loong64.ABNE) -- p6.From.Type = obj.TYPE_REG -- p6.From.Reg = v.Args[2].Reg() -- p6.Reg = loong64.REG_R19 -- p6.To.Type = obj.TYPE_BRANCH -- p6.To.SetTarget(p2) -+ p4.To.Reg = v.Args[0].Reg() -+ -+ p5 := s.Prog(loong64.ABGEU) -+ p5.From.Type = obj.TYPE_REG -+ p5.From.Reg = v.Args[2].Reg() -+ p5.Reg = v.Args[1].Reg() -+ p5.To.Type = obj.TYPE_BRANCH -+ p5.To.SetTarget(p) -+ - case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter: - s.Call(v) - case ssa.OpLOONG64CALLtail: -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index b715b36542..cb058f45c0 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -319,10 +319,9 @@ func init() { - // arg2 = mem - // auxint = alignment - // returns mem -- // SUBV $8, R19 -- // MOVV R0, 8(R19) -- // ADDV $8, R19 -- // BNE Rarg1, R19, -2(PC) -+ // MOVx R0, (R19) -+ // ADDV $sz, R19 -+ // BGEU Rarg1, R19, -2(PC) - { - name: "LoweredZero", - aux: "Int64", -@@ -331,32 +330,31 @@ func init() { - inputs: []regMask{buildReg("R19"), gp}, - clobbers: buildReg("R19"), - }, -- clobberFlags: true, -+ typ: "Mem", - faultOnNilArg0: true, - }, - - // large or unaligned move -- // arg0 = address of dst memory (in R4, changed as side effect) -+ // arg0 = address of dst memory (in R20, changed as side effect) - // arg1 = address of src memory (in R19, changed as side effect) - // arg2 = address of the last element of src - // arg3 = mem - // auxint = alignment - // returns mem -- // SUBV $8, R19 -- // MOVV 8(R19), Rtmp -- // MOVV Rtmp, (R4) -- // ADDV $8, R19 -- // ADDV $8, R4 -- // BNE Rarg2, R19, -4(PC) -+ // MOVx (R19), Rtmp -+ // MOVx Rtmp, (R20) -+ // ADDV $sz, R19 -+ // ADDV $sz, R20 -+ // BGEU Rarg2, R19, -4(PC) - { - name: "LoweredMove", - aux: "Int64", - argLength: 4, - reg: regInfo{ -- inputs: []regMask{buildReg("R4"), buildReg("R19"), gp}, -- clobbers: buildReg("R19 R4"), -+ inputs: []regMask{buildReg("R20"), buildReg("R19"), gp}, -+ clobbers: buildReg("R19 R20"), - }, -- clobberFlags: true, -+ typ: "Mem", - faultOnNilArg0: true, - faultOnNilArg1: true, - }, -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index e95cb250d9..2b712a1189 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -24289,7 +24289,6 @@ var opcodeTable = [...]opInfo{ - name: "LoweredZero", - auxType: auxInt64, - argLen: 3, -- clobberFlags: true, - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -@@ -24303,16 +24302,15 @@ var opcodeTable = [...]opInfo{ - name: "LoweredMove", - auxType: auxInt64, - argLen: 4, -- clobberFlags: true, - faultOnNilArg0: true, - faultOnNilArg1: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 8}, // R4 -+ {0, 524288}, // R20 - {1, 262144}, // R19 - {2, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 - }, -- clobbers: 262152, // R4 R19 -+ clobbers: 786432, // R19 R20 - }, - }, - { --- -2.38.1 - diff --git a/0008-internal-bytealg-optimize-Index-and-IndexString-func.patch b/0008-internal-bytealg-optimize-Index-and-IndexString-func.patch new file mode 100644 index 0000000000000000000000000000000000000000..4fb2113d1dc87b193fc7af7c42f2ab6d9a4a6047 --- /dev/null +++ b/0008-internal-bytealg-optimize-Index-and-IndexString-func.patch @@ -0,0 +1,299 @@ +From 89d740fe5889c558dbb69b6ac3a80ec38cd5765c Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Thu, 23 May 2024 16:25:06 +0800 +Subject: [PATCH 08/44] internal/bytealg: optimize Index and IndexString + function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_index.log │ test/new_3c5000_index.log │ + │ sec/op │ sec/op vs base │ +Index/10 66.42n ± 0% 20.47n ± 0% -69.18% (p=0.000 n=20) +Index/32 196.1n ± 0% 105.7n ± 0% -46.12% (p=0.000 n=20) +Index/4K 13.622µ ± 0% 5.673µ ± 0% -58.35% (p=0.000 n=20) +Index/4M 14.005m ± 0% 5.734m ± 0% -59.06% (p=0.000 n=20) +Index/64M 224.50m ± 0% 91.94m ± 0% -59.05% (p=0.000 n=20) +IndexEasy/10 21.30n ± 0% 18.66n ± 0% -12.41% (p=0.000 n=20) +IndexEasy/32 41.40n ± 0% 33.91n ± 1% -18.09% (p=0.000 n=20) +IndexEasy/4K 4.141µ ± 4% 2.373µ ± 1% -42.70% (p=0.000 n=20) +IndexEasy/4M 3.830m ± 0% 2.392m ± 0% -37.55% (p=0.000 n=20) +IndexEasy/64M 62.54m ± 1% 39.86m ± 0% -36.26% (p=0.000 n=20) +geomean 29.43µ 15.73µ -46.57% + +goos: linux +goarch: loong64 +pkg: strings +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_indexstring.log │ test/new_3c5000_indexstring.log │ + │ sec/op │ sec/op vs base │ +Index 30.54n ± 0% 16.91n ± 0% -44.64% (p=0.000 n=20) + +Change-Id: I92739ada1637356c6d42761a8a596b0bffec405d +--- + src/internal/bytealg/index_generic.go | 2 +- + src/internal/bytealg/index_loong64.go | 23 ++++ + src/internal/bytealg/index_loong64.s | 190 ++++++++++++++++++++++++++ + src/internal/bytealg/index_native.go | 2 +- + 4 files changed, 215 insertions(+), 2 deletions(-) + create mode 100644 src/internal/bytealg/index_loong64.go + create mode 100644 src/internal/bytealg/index_loong64.s + +diff --git a/src/internal/bytealg/index_generic.go b/src/internal/bytealg/index_generic.go +index a59e32938e..2d89c41825 100644 +--- a/src/internal/bytealg/index_generic.go ++++ b/src/internal/bytealg/index_generic.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !amd64 && !arm64 && !s390x && !ppc64le && !ppc64 ++//go:build !amd64 && !arm64 && !s390x && !ppc64le && !ppc64 && !loong64 + + package bytealg + +diff --git a/src/internal/bytealg/index_loong64.go b/src/internal/bytealg/index_loong64.go +new file mode 100644 +index 0000000000..d6f43eb32c +--- /dev/null ++++ b/src/internal/bytealg/index_loong64.go +@@ -0,0 +1,23 @@ ++// Copyright 2018 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++package bytealg ++ ++// Empirical data shows that using Index can get better ++// performance when len(s) <= 16. ++const MaxBruteForce = 16 ++ ++func init() { ++ // Optimize cases where the length of the substring is less than 32 bytes ++ MaxLen = 32 ++} ++ ++// Cutover reports the number of failures of IndexByte we should tolerate ++// before switching over to Index. ++// n is the number of bytes processed so far. ++// See the bytes.Index implementation for details. ++func Cutover(n int) int { ++ // 1 error per 8 characters, plus a few slop to start. ++ return (n + 16) / 8 ++} +diff --git a/src/internal/bytealg/index_loong64.s b/src/internal/bytealg/index_loong64.s +new file mode 100644 +index 0000000000..221d0332a4 +--- /dev/null ++++ b/src/internal/bytealg/index_loong64.s +@@ -0,0 +1,190 @@ ++// Copyright 2018 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "go_asm.h" ++#include "textflag.h" ++ ++TEXT ·Index(SB),NOSPLIT,$0-56 ++ MOVV R7, R6 // R6 = separator pointer ++ MOVV R8, R7 // R7 = separator length ++ JMP indexbody<>(SB) ++ ++TEXT ·IndexString(SB),NOSPLIT,$0-40 ++ JMP indexbody<>(SB) ++ ++// input: ++// R4 = string ++// R5 = length ++// R6 = separator pointer ++// R7 = separator length (2 <= len <= 32) ++TEXT indexbody<>(SB),NOSPLIT,$0 ++ // main idea is to load 'sep' into separate register(s) ++ // to avoid repeatedly re-load it again and again ++ // for sebsequent substring comparisons ++ SUBV R7, R5, R8 ++ ADDV $1, R4, R9 // store base for later ++ MOVV $8, R5 ++ ADDV R4, R8 // end ++ BLT R5, R7, len_gt_8 ++ ++len_le_8: ++ AND $0x8, R7, R5 ++ BNE R5, len_8 ++ AND $0x4, R7, R5 ++ BNE R5, len_4_7 ++ ++len_2_3: ++ AND $0x1, R7, R5 ++ BNE R5, len_3 ++ ++len_2: ++ MOVHU (R6), R10 ++loop_2: ++ BLT R8, R4, not_found ++ MOVHU (R4), R11 ++ ADDV $1, R4 ++ BNE R10, R11, loop_2 ++ JMP found ++ ++len_3: ++ MOVHU (R6), R10 ++ MOVBU 2(R6), R11 ++loop_3: ++ BLT R8, R4, not_found ++ MOVHU (R4), R12 ++ ADDV $1, R4 ++ BNE R10, R12, loop_3 ++ MOVBU 1(R4), R12 ++ BNE R11, R12, loop_3 ++ JMP found ++ ++len_4_7: ++ AND $0x2, R7, R5 ++ BNE R5, len_6_7 ++ AND $0x1, R7, R5 ++ BNE R5, len_5 ++ ++len_4: ++ MOVWU (R6), R10 ++loop_4: ++ BLT R8, R4, not_found ++ MOVWU (R4), R11 ++ ADDV $1, R4 ++ BNE R10, R11, loop_4 ++ JMP found ++len_5: ++ MOVWU (R6), R10 ++ MOVBU 4(R6), R11 ++loop_5: ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 ++ ADDV $1, R4 ++ BNE R10, R12, loop_5 ++ MOVBU 3(R4), R12 ++ BNE R11, R12, loop_5 ++ JMP found ++ ++len_6_7: ++ AND $0x1, R7, R5 ++ BNE R5, len_7 ++ ++len_6: ++ MOVWU (R6), R10 ++ MOVHU 4(R6), R11 ++loop_6: ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 ++ ADDV $1, R4 ++ BNE R10, R12, loop_6 ++ MOVHU 3(R4), R12 ++ BNE R11, R12, loop_6 ++ JMP found ++ ++len_7: ++ MOVWU (R6), R10 ++ MOVWU 3(R6), R11 ++loop_7: ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 ++ ADDV $1, R4 ++ BNE R10, R12, loop_7 ++ MOVWU 2(R4), R12 ++ BNE R11, R12, loop_7 ++ JMP found ++ ++len_8: ++ MOVV (R6), R10 ++loop_8: ++ BLT R8, R4, not_found ++ MOVV (R4), R11 ++ ADDV $1, R4 ++ BNE R10, R11, loop_8 ++ JMP found ++ ++len_gt_8: ++ MOVV $16, R5 ++ BLT R5, R7, len_gt_16 ++ ++len_9_16: ++ MOVV (R6), R10 ++ SUBV $8, R7 ++ MOVV (R6)(R7), R11 ++ SUBV $1, R7 ++loop_9_16: ++ BLT R8, R4, not_found ++ MOVV (R4), R12 ++ ADDV $1, R4 ++ BNE R10, R12, loop_9_16 ++ MOVV (R4)(R7), R12 ++ BNE R11, R12, loop_9_16 ++ JMP found ++ ++len_gt_16: ++ MOVV $24, R5 ++ BLT R5, R7, len_25_32 ++ ++len_17_24: ++ MOVV (R6), R10 ++ SUBV $8, R7 ++ MOVV 8(R6), R11 ++ MOVV (R6)(R7), R12 ++ SUBV $1, R7 ++loop_17_24: ++ BLT R8, R4, not_found ++ MOVV (R4), R13 ++ ADDV $1, R4 ++ BNE R10, R13, loop_17_24 ++ MOVV 7(R4), R13 ++ BNE R11, R13, loop_17_24 ++ MOVV (R4)(R7), R13 ++ BNE R12, R13, loop_17_24 ++ JMP found ++ ++len_25_32: ++ MOVV (R6), R10 ++ SUBV $8, R7 ++ MOVV 8(R6), R11 ++ MOVV 16(R6), R12 ++ MOVV (R6)(R7), R13 ++ SUBV $1, R7 ++loop_25_32: ++ BLT R8, R4, not_found ++ MOVV (R4), R14 ++ ADDV $1, R4 ++ BNE R10, R14, loop_25_32 ++ MOVV 7(R4), R14 ++ BNE R11, R14, loop_25_32 ++ MOVV 15(R4), R14 ++ BNE R12, R14, loop_25_32 ++ MOVV (R4)(R7), R14 ++ BNE R13, R14, loop_25_32 ++ JMP found ++ ++found: ++ SUBV R9, R4 ++ RET ++ ++not_found: ++ MOVV $-1, R4 ++ RET +diff --git a/src/internal/bytealg/index_native.go b/src/internal/bytealg/index_native.go +index 59c93f9d12..7aadaabe4e 100644 +--- a/src/internal/bytealg/index_native.go ++++ b/src/internal/bytealg/index_native.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build amd64 || arm64 || s390x || ppc64le || ppc64 ++//go:build amd64 || arm64 || s390x || ppc64le || ppc64 || loong64 + + package bytealg + +-- +2.38.1 + diff --git a/0009-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch b/0009-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch deleted file mode 100644 index 89ac785d99132693a331e0daa5dd706dee171e55..0000000000000000000000000000000000000000 --- a/0009-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch +++ /dev/null @@ -1,319 +0,0 @@ -From 0d50a1538d5834f9e94d8c781727d97ce1b3af5f Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Sat, 1 Apr 2023 03:43:20 +0800 -Subject: [PATCH 09/51] cmd/internal/obj/loong64: add atomic memory access - instructions support -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The AM* atomic access instruction performs a sequence of “read-modify-write” -operations on a memory cell atomically. Specifically, it retrieves the old -value at the specified address in memory and writes it to the general register -rd, performs some simple operations on the old value in memory and the value -in the general register rk, and then write the result of the operation back -to the memory address pointed to by general register rj. - -Go asm syntax: - AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB]{W/V} RK, (RJ), RD - AM{MAX/MIN}[DB]{WU/VU} RK, (RJ), RD - -Equivalent platform assembler syntax: - am{swap/add/and/or/xor/max/min}[_db].{w/d} rd, rk, rj - am{max/min}[_db].{wu/du} rd, rk, rj - -Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html -Change-Id: I1707c484c3b2a0cf523255c80f566480a77432fb ---- - src/cmd/asm/internal/arch/loong64.go | 4 ++ - src/cmd/asm/internal/asm/asm.go | 14 +++- - .../asm/internal/asm/testdata/loong64enc1.s | 38 +++++++++++ - src/cmd/internal/obj/loong64/a.out.go | 38 +++++++++++ - src/cmd/internal/obj/loong64/anames.go | 36 ++++++++++ - src/cmd/internal/obj/loong64/asm.go | 67 +++++++++++++++++++ - 6 files changed, 194 insertions(+), 3 deletions(-) - -diff --git a/src/cmd/asm/internal/arch/loong64.go b/src/cmd/asm/internal/arch/loong64.go -index 2958ee1a86..bf34a94f07 100644 ---- a/src/cmd/asm/internal/arch/loong64.go -+++ b/src/cmd/asm/internal/arch/loong64.go -@@ -55,6 +55,10 @@ func IsLoong64RDTIME(op obj.As) bool { - return false - } - -+func IsLoong64AMO(op obj.As) bool { -+ return loong64.IsAtomicInst(op) -+} -+ - func loong64RegisterNumber(name string, n int16) (int16, bool) { - switch name { - case "F": -diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go -index 563e794706..c8fff8e574 100644 ---- a/src/cmd/asm/internal/asm/asm.go -+++ b/src/cmd/asm/internal/asm/asm.go -@@ -664,9 +664,17 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { - prog.Reg = p.getRegister(prog, op, &a[1]) - prog.To = a[2] - case sys.Loong64: -- prog.From = a[0] -- prog.Reg = p.getRegister(prog, op, &a[1]) -- prog.To = a[2] -+ switch { -+ // Loong64 atomic instructions with one input and two outputs. -+ case arch.IsLoong64AMO(op): -+ prog.From = a[0] -+ prog.To = a[1] -+ prog.RegTo2 = a[2].Reg -+ default: -+ prog.From = a[0] -+ prog.Reg = p.getRegister(prog, op, &a[1]) -+ prog.To = a[2] -+ } - case sys.ARM: - // Special cases. - if arch.IsARMSTREX(op) { -diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s -index ea6c569f9d..288408b010 100644 ---- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s -+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s -@@ -233,3 +233,41 @@ lable2: - - MOVV FCC0, R4 // 04dc1401 - MOVV R4, FCC0 // 80d81401 -+ -+ // Loong64 atomic memory access instructions -+ AMSWAPW R14, (R13), R12 // ac396038 -+ AMSWAPV R14, (R13), R12 // acb96038 -+ AMADDW R14, (R13), R12 // ac396138 -+ AMADDV R14, (R13), R12 // acb96138 -+ AMANDW R14, (R13), R12 // ac396238 -+ AMANDV R14, (R13), R12 // acb96238 -+ AMORW R14, (R13), R12 // ac396338 -+ AMORV R14, (R13), R12 // acb96338 -+ AMXORW R14, (R13), R12 // ac396438 -+ AMXORV R14, (R13), R12 // acb96438 -+ AMMAXW R14, (R13), R12 // ac396538 -+ AMMAXV R14, (R13), R12 // acb96538 -+ AMMINW R14, (R13), R12 // ac396638 -+ AMMINV R14, (R13), R12 // acb96638 -+ AMMAXWU R14, (R13), R12 // ac396738 -+ AMMAXVU R14, (R13), R12 // acb96738 -+ AMMINWU R14, (R13), R12 // ac396838 -+ AMMINVU R14, (R13), R12 // acb96838 -+ AMSWAPDBW R14, (R13), R12 // ac396938 -+ AMSWAPDBV R14, (R13), R12 // acb96938 -+ AMADDDBW R14, (R13), R12 // ac396a38 -+ AMADDDBV R14, (R13), R12 // acb96a38 -+ AMANDDBW R14, (R13), R12 // ac396b38 -+ AMANDDBV R14, (R13), R12 // acb96b38 -+ AMORDBW R14, (R13), R12 // ac396c38 -+ AMORDBV R14, (R13), R12 // acb96c38 -+ AMXORDBW R14, (R13), R12 // ac396d38 -+ AMXORDBV R14, (R13), R12 // acb96d38 -+ AMMAXDBW R14, (R13), R12 // ac396e38 -+ AMMAXDBV R14, (R13), R12 // acb96e38 -+ AMMINDBW R14, (R13), R12 // ac396f38 -+ AMMINDBV R14, (R13), R12 // acb96f38 -+ AMMAXDBWU R14, (R13), R12 // ac397038 -+ AMMAXDBVU R14, (R13), R12 // acb97038 -+ AMMINDBWU R14, (R13), R12 // ac397138 -+ AMMINDBVU R14, (R13), R12 // acb97138 -diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go -index 9527e99b56..3ed15fc7e7 100644 ---- a/src/cmd/internal/obj/loong64/a.out.go -+++ b/src/cmd/internal/obj/loong64/a.out.go -@@ -394,6 +394,44 @@ const ( - AMOVVF - AMOVVD - -+ // 2.2.7. Atomic Memory Access Instructions -+ AAMSWAPW -+ AAMSWAPV -+ AAMADDW -+ AAMADDV -+ AAMANDW -+ AAMANDV -+ AAMORW -+ AAMORV -+ AAMXORW -+ AAMXORV -+ AAMMAXW -+ AAMMAXV -+ AAMMINW -+ AAMMINV -+ AAMMAXWU -+ AAMMAXVU -+ AAMMINWU -+ AAMMINVU -+ AAMSWAPDBW -+ AAMSWAPDBV -+ AAMADDDBW -+ AAMADDDBV -+ AAMANDDBW -+ AAMANDDBV -+ AAMORDBW -+ AAMORDBV -+ AAMXORDBW -+ AAMXORDBV -+ AAMMAXDBW -+ AAMMAXDBV -+ AAMMINDBW -+ AAMMINDBV -+ AAMMAXDBWU -+ AAMMAXDBVU -+ AAMMINDBWU -+ AAMMINDBVU -+ - // 2.2.10. Other Miscellaneous Instructions - ARDTIMELW - ARDTIMEHW -diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go -index f61756e7a8..d48ff8a281 100644 ---- a/src/cmd/internal/obj/loong64/anames.go -+++ b/src/cmd/internal/obj/loong64/anames.go -@@ -131,6 +131,42 @@ var Anames = []string{ - "MOVDV", - "MOVVF", - "MOVVD", -+ "AMSWAPW", -+ "AMSWAPV", -+ "AMADDW", -+ "AMADDV", -+ "AMANDW", -+ "AMANDV", -+ "AMORW", -+ "AMORV", -+ "AMXORW", -+ "AMXORV", -+ "AMMAXW", -+ "AMMAXV", -+ "AMMINW", -+ "AMMINV", -+ "AMMAXWU", -+ "AMMAXVU", -+ "AMMINWU", -+ "AMMINVU", -+ "AMSWAPDBW", -+ "AMSWAPDBV", -+ "AMADDDBW", -+ "AMADDDBV", -+ "AMANDDBW", -+ "AMANDDBV", -+ "AMORDBW", -+ "AMORDBV", -+ "AMXORDBW", -+ "AMXORDBV", -+ "AMMAXDBW", -+ "AMMAXDBV", -+ "AMMINDBW", -+ "AMMINDBV", -+ "AMMAXDBWU", -+ "AMMAXDBVU", -+ "AMMINDBWU", -+ "AMMINDBVU", - "RDTIMELW", - "RDTIMEHW", - "RDTIMED", -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index c8d00413a0..638bd1139f 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -363,6 +363,8 @@ var optab = []Optab{ - {ARDTIMEHW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, - {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, - -+ {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0}, -+ - {obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, - {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, - {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, -@@ -379,6 +381,51 @@ var optab = []Optab{ - {obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0}, - } - -+var atomicInst = map[obj.As]uint32{ -+ AAMSWAPW: 0x070C0 << 15, // amswap.w -+ AAMSWAPV: 0x070C1 << 15, // amswap.d -+ AAMADDW: 0x070C2 << 15, // amadd.w -+ AAMADDV: 0x070C3 << 15, // amadd.d -+ AAMANDW: 0x070C4 << 15, // amand.w -+ AAMANDV: 0x070C5 << 15, // amand.d -+ AAMORW: 0x070C6 << 15, // amor.w -+ AAMORV: 0x070C7 << 15, // amor.d -+ AAMXORW: 0x070C8 << 15, // amxor.w -+ AAMXORV: 0x070C9 << 15, // amxor.d -+ AAMMAXW: 0x070CA << 15, // ammax.w -+ AAMMAXV: 0x070CB << 15, // ammax.d -+ AAMMINW: 0x070CC << 15, // ammin.w -+ AAMMINV: 0x070CD << 15, // ammin.d -+ AAMMAXWU: 0x070CE << 15, // ammax.wu -+ AAMMAXVU: 0x070CF << 15, // ammax.du -+ AAMMINWU: 0x070D0 << 15, // ammin.wu -+ AAMMINVU: 0x070D1 << 15, // ammin.du -+ AAMSWAPDBW: 0x070D2 << 15, // amswap_db.w -+ AAMSWAPDBV: 0x070D3 << 15, // amswap_db.d -+ AAMADDDBW: 0x070D4 << 15, // amadd_db.w -+ AAMADDDBV: 0x070D5 << 15, // amadd_db.d -+ AAMANDDBW: 0x070D6 << 15, // amand_db.w -+ AAMANDDBV: 0x070D7 << 15, // amand_db.d -+ AAMORDBW: 0x070D8 << 15, // amor_db.w -+ AAMORDBV: 0x070D9 << 15, // amor_db.d -+ AAMXORDBW: 0x070DA << 15, // amxor_db.w -+ AAMXORDBV: 0x070DB << 15, // amxor_db.d -+ AAMMAXDBW: 0x070DC << 15, // ammax_db.w -+ AAMMAXDBV: 0x070DD << 15, // ammax_db.d -+ AAMMINDBW: 0x070DE << 15, // ammin_db.w -+ AAMMINDBV: 0x070DF << 15, // ammin_db.d -+ AAMMAXDBWU: 0x070E0 << 15, // ammax_db.wu -+ AAMMAXDBVU: 0x070E1 << 15, // ammax_db.du -+ AAMMINDBWU: 0x070E2 << 15, // ammin_db.wu -+ AAMMINDBVU: 0x070E3 << 15, // ammin_db.du -+} -+ -+func IsAtomicInst(as obj.As) bool { -+ _, ok := atomicInst[as] -+ -+ return ok -+} -+ - // pcAlignPadLength returns the number of bytes required to align pc to alignedValue, - // reporting an error if alignedValue is not a power of two or is out of range. - func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int { -@@ -1172,6 +1219,14 @@ func buildop(ctxt *obj.Link) { - - case AMASKEQZ: - opset(AMASKNEZ, r0) -+ -+ case AAMSWAPW: -+ for i := range atomicInst { -+ if i == AAMSWAPW { -+ continue -+ } -+ opset(i, r0) -+ } - } - } - } -@@ -1797,6 +1852,18 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel2.Sym = p.From.Sym - rel2.Type = objabi.R_LOONG64_GOT_LO - rel2.Add = 0x0 -+ -+ case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To -+ rk := p.From.Reg -+ rj := p.To.Reg -+ rd := p.RegTo2 -+ -+ // See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html -+ // for the register usage constraints. -+ if rd == rj || rd == rk { -+ c.ctxt.Diag("illegal register combination: %v\n", p) -+ } -+ o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd)) - } - - out[0] = o1 --- -2.38.1 - diff --git a/0009-internal-bytealg-optimize-Count-and-CountString-func.patch b/0009-internal-bytealg-optimize-Count-and-CountString-func.patch new file mode 100644 index 0000000000000000000000000000000000000000..9b99d37cca346311046f3085e4133441b5f321ae --- /dev/null +++ b/0009-internal-bytealg-optimize-Count-and-CountString-func.patch @@ -0,0 +1,153 @@ +From 37c73e45ea537b7e8662b968b630a2566b25ae59 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Wed, 29 May 2024 10:49:41 +0800 +Subject: [PATCH 09/44] internal/bytealg: optimize Count and CountString + function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_count.log │ test/new_3c5000_count.log │ + │ sec/op │ sec/op vs base │ +CountSingle/10 16.26n ± 0% 16.26n ± 0% ~ (p=0.653 n=20) +CountSingle/32 41.48n ± 0% 27.48n ± 0% -33.75% (p=0.000 n=20) +CountSingle/4K 4.998µ ± 0% 2.961µ ± 0% -40.76% (p=0.000 n=20) +CountSingle/4M 5.076m ± 0% 3.510m ± 8% -30.84% (p=0.000 n=20) +CountSingle/64M 88.70m ± 0% 58.15m ± 1% -34.45% (p=0.000 n=20) +geomean 17.23µ 12.20µ -29.19% + +Change-Id: Ic60d49fea83c9cf4f9b02bae3ce69b81206c7017 +--- + src/internal/bytealg/count_generic.go | 2 +- + src/internal/bytealg/count_loong64.s | 86 +++++++++++++++++++++++++++ + src/internal/bytealg/count_native.go | 2 +- + 3 files changed, 88 insertions(+), 2 deletions(-) + create mode 100644 src/internal/bytealg/count_loong64.s + +diff --git a/src/internal/bytealg/count_generic.go b/src/internal/bytealg/count_generic.go +index 932a7c584c..16f974539c 100644 +--- a/src/internal/bytealg/count_generic.go ++++ b/src/internal/bytealg/count_generic.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !amd64 && !arm && !arm64 && !ppc64le && !ppc64 && !riscv64 && !s390x ++//go:build !amd64 && !arm && !arm64 && !loong64 && !ppc64le && !ppc64 && !riscv64 && !s390x + + package bytealg + +diff --git a/src/internal/bytealg/count_loong64.s b/src/internal/bytealg/count_loong64.s +new file mode 100644 +index 0000000000..ca19c5f343 +--- /dev/null ++++ b/src/internal/bytealg/count_loong64.s +@@ -0,0 +1,86 @@ ++// Copyright 2020 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "go_asm.h" ++#include "textflag.h" ++ ++TEXT ·Count(SB),NOSPLIT,$0-40 ++ // R4 = b_base ++ // R5 = b_len ++ // R6 = b_cap (unused) ++ // R7 = byte to count (want in R6) ++ AND $0xff, R7, R6 ++ JMP countbody<>(SB) ++ ++TEXT ·CountString(SB),NOSPLIT,$0-32 ++ // R4 = s_base ++ // R5 = s_len ++ // R6 = byte to count ++ AND $0xff, R6 ++ JMP countbody<>(SB) ++ ++// input: ++// R4 = s_base ++// R5 = s_len ++// R6 = byte to count ++TEXT countbody<>(SB),NOSPLIT,$0 ++ MOVV R0, R7 // count ++ ADDV R4, R5 // end ++ MOVV $1, R17 ++ ++loop: ++ ADDV $8, R4, R9 ++ BLT R5, R9, tail ++ MOVV (R4), R8 ++ ++ AND $0xff, R8, R10 ++ WORD $0xcf210b // bstrpick.w r11, r8, 15, 8 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 ++ ++ WORD $0xd7410a // bstrpick.w r10, r8, 23, 16 ++ WORD $0xdf610b // bstrpick.w r11, r8, 31, 24 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 ++ ++ WORD $0xe7810a // bstrpick.w r10, r8, 39, 32 ++ WORD $0xefa10b // bstrpick.w r11, r8, 47, 40 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 ++ ++ WORD $0xf7c10a // bstrpick.w r10, r8, 55, 48 ++ WORD $0xffe10b // bstrpick.w r11, r8, 63, 56 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 ++ ++ MOVV R9, R4 ++ JMP loop ++ ++tail: ++ BEQ R4, R5, done ++ MOVBU (R4), R8 ++ ADDV $1, R4 ++ BNE R6, R8, tail ++ ADDV $1, R7 ++ JMP tail ++ ++done: ++ MOVV R7, R4 ++ RET +diff --git a/src/internal/bytealg/count_native.go b/src/internal/bytealg/count_native.go +index 90189c9fe0..eab64e8950 100644 +--- a/src/internal/bytealg/count_native.go ++++ b/src/internal/bytealg/count_native.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build amd64 || arm || arm64 || ppc64le || ppc64 || riscv64 || s390x ++//go:build amd64 || arm || arm64 || loong64 || ppc64le || ppc64 || riscv64 || s390x + + package bytealg + +-- +2.38.1 + diff --git a/0010-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch b/0010-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch deleted file mode 100644 index 01abe54f915f7a919380eae1f68ec2a4a6e7abd3..0000000000000000000000000000000000000000 --- a/0010-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch +++ /dev/null @@ -1,164 +0,0 @@ -From 88bc9d7c907c3312d6ff6079aca176471f6d3e5d Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Sat, 1 Apr 2023 08:49:58 +0800 -Subject: [PATCH 10/51] cmd/compiler,runtime/internal/atomic: optimize xchg and - xchg64 on loong64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Use Loong64's atomic operation instruction AMSWAPx to implement Xchg and Xchg64 - -goos: linux -goarch: loong64 -pkg: runtime/internal/atomic - │ bench.old │ bench.new │ - │ sec/op │ sec/op vs base │ -Xchg 30.06n ± 0% 13.66n ± 0% -54.56% (p=0.000 n=20) -Xchg-2 37.43n ± 6% 23.55n ± 1% -37.06% (p=0.000 n=20) -Xchg-4 37.16n ± 5% 33.25n ± 2% -10.55% (p=0.000 n=20) -Xchg-8 37.81n ± 3% 32.12n ± 1% -15.04% (p=0.000 n=20) -Xchg-16 37.55n ± 0% 33.70n ± 0% -10.25% (p=0.000 n=20) -Xchg64 30.05n ± 0% 14.13n ± 0% -52.96% (p=0.000 n=20) -Xchg64-2 37.42n ± 1% 21.80n ± 0% -41.74% (p=0.000 n=20) -Xchg64-4 38.17n ± 6% 31.95n ± 1% -16.30% (p=0.000 n=20) -Xchg64-8 37.44n ± 1% 32.12n ± 2% -14.18% (p=0.000 n=20) -Xchg64-16 37.56n ± 0% 33.65n ± 0% -10.41% (p=0.000 n=20) -geomean 36.54n 25.65n -28.61% - -Updates #59120. - -Change-Id: I1f78f547b818ea6ead9480254c05745e2eee4d68 ---- - src/cmd/compile/internal/loong64/ssa.go | 38 ++++--------------- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 6 --- - src/runtime/internal/atomic/atomic_loong64.s | 30 +++++++-------- - 3 files changed, 22 insertions(+), 52 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index f74f90fb5f..e103c896e6 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -495,40 +495,18 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Reg = v.Args[0].Reg() - s.Prog(loong64.ADBAR) - case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64: -- // DBAR -- // MOVV Rarg1, Rtmp -- // LL (Rarg0), Rout -- // SC Rtmp, (Rarg0) -- // BEQ Rtmp, -3(PC) -- // DBAR -- ll := loong64.ALLV -- sc := loong64.ASCV -+ // AMSWAPx Rarg1, (Rarg0), Rout -+ amswapx := loong64.AAMSWAPV - if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 { -- ll = loong64.ALL -- sc = loong64.ASC -+ amswapx = loong64.AAMSWAPW - } -- s.Prog(loong64.ADBAR) -- p := s.Prog(loong64.AMOVV) -+ p := s.Prog(amswapx) - p.From.Type = obj.TYPE_REG - p.From.Reg = v.Args[1].Reg() -- p.To.Type = obj.TYPE_REG -- p.To.Reg = loong64.REGTMP -- p1 := s.Prog(ll) -- p1.From.Type = obj.TYPE_MEM -- p1.From.Reg = v.Args[0].Reg() -- p1.To.Type = obj.TYPE_REG -- p1.To.Reg = v.Reg0() -- p2 := s.Prog(sc) -- p2.From.Type = obj.TYPE_REG -- p2.From.Reg = loong64.REGTMP -- p2.To.Type = obj.TYPE_MEM -- p2.To.Reg = v.Args[0].Reg() -- p3 := s.Prog(loong64.ABEQ) -- p3.From.Type = obj.TYPE_REG -- p3.From.Reg = loong64.REGTMP -- p3.To.Type = obj.TYPE_BRANCH -- p3.To.SetTarget(p) -- s.Prog(loong64.ADBAR) -+ p.To.Type = obj.TYPE_MEM -+ p.To.Reg = v.Args[0].Reg() -+ p.RegTo2 = v.Reg0() -+ - case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: - // DBAR - // LL (Rarg0), Rout -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index cb058f45c0..09c3df6af2 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -377,12 +377,6 @@ func init() { - - // atomic exchange. - // store arg1 to arg0. arg2=mem. returns . -- // DBAR -- // LL (Rarg0), Rout -- // MOVV Rarg1, Rtmp -- // SC Rtmp, (Rarg0) -- // BEQ Rtmp, -3(PC) -- // DBAR - {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index 34193add3e..ec34d254fc 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -121,35 +121,33 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24 - DBAR - RET - -+// func Xchg(ptr *uint32, new uint32) uint32 - TEXT ·Xchg(SB), NOSPLIT, $0-20 - MOVV ptr+0(FP), R4 - MOVW new+8(FP), R5 -- -- DBAR -- MOVV R5, R6 -- LL (R4), R7 -- SC R6, (R4) -- BEQ R6, -3(PC) -- MOVW R7, ret+16(FP) -- DBAR -+ AMSWAPW R5, (R4), R6 -+ MOVW R6, ret+16(FP) - RET - -+// func Xchg64(ptr *uint64, new uint64) uint64 - TEXT ·Xchg64(SB), NOSPLIT, $0-24 - MOVV ptr+0(FP), R4 - MOVV new+8(FP), R5 -- -- DBAR -- MOVV R5, R6 -- LLV (R4), R7 -- SCV R6, (R4) -- BEQ R6, -3(PC) -- MOVV R7, ret+16(FP) -- DBAR -+ AMSWAPV R5, (R4), R6 -+ MOVV R6, ret+16(FP) - RET - - TEXT ·Xchguintptr(SB), NOSPLIT, $0-24 - JMP ·Xchg64(SB) - -+// func Xchgint32(ptr *int32, new int32) int32 -+TEXT ·Xchgint32(SB), NOSPLIT, $0-20 -+ JMP ·Xchg(SB) -+ -+// func Xchgint64(ptr *int64, new int64) int64 -+TEXT ·Xchgint64(SB), NOSPLIT, $0-24 -+ JMP ·Xchg64(SB) -+ - TEXT ·StorepNoWB(SB), NOSPLIT, $0-16 - JMP ·Store64(SB) - --- -2.38.1 - diff --git a/0010-internal-bytealg-adjust-the-format-of-assembly-files.patch b/0010-internal-bytealg-adjust-the-format-of-assembly-files.patch new file mode 100644 index 0000000000000000000000000000000000000000..85a21d39280a660722421eccf9ed74d018375699 --- /dev/null +++ b/0010-internal-bytealg-adjust-the-format-of-assembly-files.patch @@ -0,0 +1,583 @@ +From 14ffec301d84da6bcd5ef5757d6cd6445351225e Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Mon, 3 Jun 2024 15:43:32 +0800 +Subject: [PATCH 10/44] internal/bytealg: adjust the format of assembly files + {count, equal, index, indexbyte}_loong64.s + +Change-Id: I19e6650e6595148e449da7a82be6e735c6f01ab6 +--- + src/internal/bytealg/count_loong64.s | 92 +++++++------- + src/internal/bytealg/equal_loong64.s | 42 ++++--- + src/internal/bytealg/index_loong64.s | 148 +++++++++++------------ + src/internal/bytealg/indexbyte_loong64.s | 52 ++++---- + 4 files changed, 169 insertions(+), 165 deletions(-) + +diff --git a/src/internal/bytealg/count_loong64.s b/src/internal/bytealg/count_loong64.s +index ca19c5f343..db8ba2cb24 100644 +--- a/src/internal/bytealg/count_loong64.s ++++ b/src/internal/bytealg/count_loong64.s +@@ -1,4 +1,4 @@ +-// Copyright 2020 The Go Authors. All rights reserved. ++// Copyright 2024 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +@@ -9,77 +9,77 @@ TEXT ·Count(SB),NOSPLIT,$0-40 + // R4 = b_base + // R5 = b_len + // R6 = b_cap (unused) +- // R7 = byte to count (want in R6) +- AND $0xff, R7, R6 +- JMP countbody<>(SB) ++ // R7 = byte to count ++ AND $0xff, R7, R6 ++ JMP countbody<>(SB) + + TEXT ·CountString(SB),NOSPLIT,$0-32 + // R4 = s_base + // R5 = s_len + // R6 = byte to count +- AND $0xff, R6 +- JMP countbody<>(SB) ++ AND $0xff, R6 ++ JMP countbody<>(SB) + + // input: + // R4 = s_base + // R5 = s_len + // R6 = byte to count + TEXT countbody<>(SB),NOSPLIT,$0 +- MOVV R0, R7 // count +- ADDV R4, R5 // end +- MOVV $1, R17 ++ MOVV R0, R7 // count ++ ADDV R4, R5 // end ++ MOVV $1, R17 + + loop: + ADDV $8, R4, R9 +- BLT R5, R9, tail ++ BLT R5, R9, tail + MOVV (R4), R8 + +- AND $0xff, R8, R10 +- WORD $0xcf210b // bstrpick.w r11, r8, 15, 8 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 ++ AND $0xff, R8, R10 ++ WORD $0xcf210b // bstrpick.w r11, r8, 15, 8 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 + +- WORD $0xd7410a // bstrpick.w r10, r8, 23, 16 +- WORD $0xdf610b // bstrpick.w r11, r8, 31, 24 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 ++ WORD $0xd7410a // bstrpick.w r10, r8, 23, 16 ++ WORD $0xdf610b // bstrpick.w r11, r8, 31, 24 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 + +- WORD $0xe7810a // bstrpick.w r10, r8, 39, 32 +- WORD $0xefa10b // bstrpick.w r11, r8, 47, 40 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 ++ WORD $0xe7810a // bstrpick.w r10, r8, 39, 32 ++ WORD $0xefa10b // bstrpick.w r11, r8, 47, 40 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 + +- WORD $0xf7c10a // bstrpick.w r10, r8, 55, 48 +- WORD $0xffe10b // bstrpick.w r11, r8, 63, 56 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 ++ WORD $0xf7c10a // bstrpick.w r10, r8, 55, 48 ++ WORD $0xffe10b // bstrpick.w r11, r8, 63, 56 ++ XOR R6, R10, R10 ++ XOR R6, R11, R11 ++ MASKNEZ R10, R17, R12 ++ MASKNEZ R11, R17, R13 ++ ADDV R7, R12, R7 ++ ADDV R7, R13, R7 + + MOVV R9, R4 +- JMP loop ++ JMP loop + + tail: +- BEQ R4, R5, done ++ BEQ R4, R5, done + MOVBU (R4), R8 + ADDV $1, R4 +- BNE R6, R8, tail ++ BNE R6, R8, tail + ADDV $1, R7 +- JMP tail ++ JMP tail + + done: + MOVV R7, R4 +diff --git a/src/internal/bytealg/equal_loong64.s b/src/internal/bytealg/equal_loong64.s +index 4cc31d5e46..5d5d591a2c 100644 +--- a/src/internal/bytealg/equal_loong64.s ++++ b/src/internal/bytealg/equal_loong64.s +@@ -12,57 +12,61 @@ TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 + // R4 = a_base + // R5 = b_base + // R6 = size +- JMP equalbody<>(SB) ++ JMP equalbody<>(SB) + + // memequal_varlen(a, b unsafe.Pointer) bool + TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17 + // R4 = a_base + // R5 = b_base +- MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure +- JMP equalbody<>(SB) ++ MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure ++ JMP equalbody<>(SB) + ++// input: ++// R4 = a_base ++// R5 = b_base ++// R6 = size + TEXT equalbody<>(SB),NOSPLIT|NOFRAME,$0 +- BEQ R4, R5, eq +- ADDV R4, R6, R6 // end ++ BEQ R4, R5, eq ++ ADDV R4, R6, R6 // end + + loop_16byte: +- ADDV $16, R4, R9 +- BLT R6, R9, load8byte ++ ADDV $16, R4, R9 ++ BLT R6, R9, load8byte + MOVV (R4), R7 + MOVV (R5), R8 + MOVV 8(R4), R10 + MOVV 8(R5), R11 + MOVV R9, R4 +- XOR R7, R8, R7 +- XOR R10, R11, R10 +- OR R10, R7, R7 ++ XOR R7, R8, R7 ++ XOR R10, R11, R10 ++ OR R10, R7, R7 + ADDV $16, R5 +- BEQ R7, loop_16byte ++ BEQ R7, loop_16byte + +- MOVB R0, R4 ++ MOVB R0, R4 + RET + + load8byte: +- ADDV $8, R4, R9 +- BLT R6, R9, tail ++ ADDV $8, R4, R9 ++ BLT R6, R9, tail + MOVV (R4), R7 + MOVV (R5), R8 + MOVV R9, R4 + ADDV $8, R5 +- BEQ R7, R8, tail ++ BEQ R7, R8, tail + +- MOVB R0, R4 ++ MOVB R0, R4 + RET + + tail: +- BEQ R4, R6, eq ++ BEQ R4, R6, eq + MOVBU (R4), R7 + MOVBU (R5), R8 + ADDV $1, R4 + ADDV $1, R5 +- BEQ R7, R8, tail ++ BEQ R7, R8, tail + +- MOVB R0, R4 ++ MOVB R0, R4 + RET + + eq: +diff --git a/src/internal/bytealg/index_loong64.s b/src/internal/bytealg/index_loong64.s +index 221d0332a4..7f7190b3be 100644 +--- a/src/internal/bytealg/index_loong64.s ++++ b/src/internal/bytealg/index_loong64.s +@@ -1,4 +1,4 @@ +-// Copyright 2018 The Go Authors. All rights reserved. ++// Copyright 2024 The Go Authors. All rights reserved. + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +@@ -6,12 +6,12 @@ + #include "textflag.h" + + TEXT ·Index(SB),NOSPLIT,$0-56 +- MOVV R7, R6 // R6 = separator pointer +- MOVV R8, R7 // R7 = separator length +- JMP indexbody<>(SB) ++ MOVV R7, R6 // R6 = separator pointer ++ MOVV R8, R7 // R7 = separator length ++ JMP indexbody<>(SB) + + TEXT ·IndexString(SB),NOSPLIT,$0-40 +- JMP indexbody<>(SB) ++ JMP indexbody<>(SB) + + // input: + // R4 = string +@@ -23,108 +23,108 @@ TEXT indexbody<>(SB),NOSPLIT,$0 + // to avoid repeatedly re-load it again and again + // for sebsequent substring comparisons + SUBV R7, R5, R8 +- ADDV $1, R4, R9 // store base for later ++ ADDV $1, R4, R9 // store base for later + MOVV $8, R5 +- ADDV R4, R8 // end +- BLT R5, R7, len_gt_8 ++ ADDV R4, R8 // end ++ BLT R5, R7, len_gt_8 + + len_le_8: +- AND $0x8, R7, R5 +- BNE R5, len_8 +- AND $0x4, R7, R5 +- BNE R5, len_4_7 ++ AND $0x8, R7, R5 ++ BNE R5, len_8 ++ AND $0x4, R7, R5 ++ BNE R5, len_4_7 + + len_2_3: +- AND $0x1, R7, R5 +- BNE R5, len_3 ++ AND $0x1, R7, R5 ++ BNE R5, len_3 + + len_2: +- MOVHU (R6), R10 ++ MOVHU (R6), R10 + loop_2: +- BLT R8, R4, not_found +- MOVHU (R4), R11 ++ BLT R8, R4, not_found ++ MOVHU (R4), R11 + ADDV $1, R4 +- BNE R10, R11, loop_2 +- JMP found ++ BNE R10, R11, loop_2 ++ JMP found + + len_3: + MOVHU (R6), R10 + MOVBU 2(R6), R11 + loop_3: +- BLT R8, R4, not_found +- MOVHU (R4), R12 ++ BLT R8, R4, not_found ++ MOVHU (R4), R12 + ADDV $1, R4 +- BNE R10, R12, loop_3 +- MOVBU 1(R4), R12 +- BNE R11, R12, loop_3 +- JMP found ++ BNE R10, R12, loop_3 ++ MOVBU 1(R4), R12 ++ BNE R11, R12, loop_3 ++ JMP found + + len_4_7: +- AND $0x2, R7, R5 +- BNE R5, len_6_7 +- AND $0x1, R7, R5 +- BNE R5, len_5 ++ AND $0x2, R7, R5 ++ BNE R5, len_6_7 ++ AND $0x1, R7, R5 ++ BNE R5, len_5 + + len_4: +- MOVWU (R6), R10 ++ MOVWU (R6), R10 + loop_4: +- BLT R8, R4, not_found +- MOVWU (R4), R11 ++ BLT R8, R4, not_found ++ MOVWU (R4), R11 + ADDV $1, R4 +- BNE R10, R11, loop_4 +- JMP found ++ BNE R10, R11, loop_4 ++ JMP found + len_5: + MOVWU (R6), R10 + MOVBU 4(R6), R11 + loop_5: +- BLT R8, R4, not_found +- MOVWU (R4), R12 ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 + ADDV $1, R4 +- BNE R10, R12, loop_5 +- MOVBU 3(R4), R12 +- BNE R11, R12, loop_5 +- JMP found ++ BNE R10, R12, loop_5 ++ MOVBU 3(R4), R12 ++ BNE R11, R12, loop_5 ++ JMP found + + len_6_7: +- AND $0x1, R7, R5 +- BNE R5, len_7 ++ AND $0x1, R7, R5 ++ BNE R5, len_7 + + len_6: + MOVWU (R6), R10 + MOVHU 4(R6), R11 + loop_6: +- BLT R8, R4, not_found +- MOVWU (R4), R12 ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 + ADDV $1, R4 +- BNE R10, R12, loop_6 +- MOVHU 3(R4), R12 +- BNE R11, R12, loop_6 +- JMP found ++ BNE R10, R12, loop_6 ++ MOVHU 3(R4), R12 ++ BNE R11, R12, loop_6 ++ JMP found + + len_7: + MOVWU (R6), R10 + MOVWU 3(R6), R11 + loop_7: +- BLT R8, R4, not_found +- MOVWU (R4), R12 ++ BLT R8, R4, not_found ++ MOVWU (R4), R12 + ADDV $1, R4 +- BNE R10, R12, loop_7 +- MOVWU 2(R4), R12 +- BNE R11, R12, loop_7 +- JMP found ++ BNE R10, R12, loop_7 ++ MOVWU 2(R4), R12 ++ BNE R11, R12, loop_7 ++ JMP found + + len_8: + MOVV (R6), R10 + loop_8: +- BLT R8, R4, not_found ++ BLT R8, R4, not_found + MOVV (R4), R11 + ADDV $1, R4 +- BNE R10, R11, loop_8 +- JMP found ++ BNE R10, R11, loop_8 ++ JMP found + + len_gt_8: + MOVV $16, R5 +- BLT R5, R7, len_gt_16 ++ BLT R5, R7, len_gt_16 + + len_9_16: + MOVV (R6), R10 +@@ -132,17 +132,17 @@ len_9_16: + MOVV (R6)(R7), R11 + SUBV $1, R7 + loop_9_16: +- BLT R8, R4, not_found ++ BLT R8, R4, not_found + MOVV (R4), R12 + ADDV $1, R4 +- BNE R10, R12, loop_9_16 ++ BNE R10, R12, loop_9_16 + MOVV (R4)(R7), R12 +- BNE R11, R12, loop_9_16 +- JMP found ++ BNE R11, R12, loop_9_16 ++ JMP found + + len_gt_16: + MOVV $24, R5 +- BLT R5, R7, len_25_32 ++ BLT R5, R7, len_25_32 + + len_17_24: + MOVV (R6), R10 +@@ -151,15 +151,15 @@ len_17_24: + MOVV (R6)(R7), R12 + SUBV $1, R7 + loop_17_24: +- BLT R8, R4, not_found ++ BLT R8, R4, not_found + MOVV (R4), R13 + ADDV $1, R4 +- BNE R10, R13, loop_17_24 ++ BNE R10, R13, loop_17_24 + MOVV 7(R4), R13 +- BNE R11, R13, loop_17_24 ++ BNE R11, R13, loop_17_24 + MOVV (R4)(R7), R13 +- BNE R12, R13, loop_17_24 +- JMP found ++ BNE R12, R13, loop_17_24 ++ JMP found + + len_25_32: + MOVV (R6), R10 +@@ -169,17 +169,17 @@ len_25_32: + MOVV (R6)(R7), R13 + SUBV $1, R7 + loop_25_32: +- BLT R8, R4, not_found ++ BLT R8, R4, not_found + MOVV (R4), R14 + ADDV $1, R4 +- BNE R10, R14, loop_25_32 ++ BNE R10, R14, loop_25_32 + MOVV 7(R4), R14 +- BNE R11, R14, loop_25_32 ++ BNE R11, R14, loop_25_32 + MOVV 15(R4), R14 +- BNE R12, R14, loop_25_32 ++ BNE R12, R14, loop_25_32 + MOVV (R4)(R7), R14 +- BNE R13, R14, loop_25_32 +- JMP found ++ BNE R13, R14, loop_25_32 ++ JMP found + + found: + SUBV R9, R4 +diff --git a/src/internal/bytealg/indexbyte_loong64.s b/src/internal/bytealg/indexbyte_loong64.s +index 7811741423..b5f8f9cdbc 100644 +--- a/src/internal/bytealg/indexbyte_loong64.s ++++ b/src/internal/bytealg/indexbyte_loong64.s +@@ -12,17 +12,17 @@ TEXT ·IndexByte(SB),NOSPLIT,$0-40 + // R7 = byte to find + ADDV R4, R5 // end + MOVV R4, R6 // store base for later +- AND $0xff, R7 +- JMP indexbytebody<>(SB) ++ AND $0xff, R7 ++ JMP indexbytebody<>(SB) + + TEXT ·IndexByteString(SB),NOSPLIT,$0-32 + // R4 = s_base + // R5 = s_len + // R6 = byte to find +- AND $0xff, R6, R7 ++ AND $0xff, R6, R7 + ADDV R4, R5 // end + MOVV R4, R6 // store base for later +- JMP indexbytebody<>(SB) ++ JMP indexbytebody<>(SB) + + // input: + // R4: b_base +@@ -32,42 +32,42 @@ TEXT ·IndexByteString(SB),NOSPLIT,$0-32 + TEXT indexbytebody<>(SB),NOSPLIT,$0 + loop: + ADDV $8, R4, R10 +- BLT R5, R10, tail ++ BLT R5, R10, tail + MOVV (R4), R8 + +- AND $0xff, R8, R9 +- BEQ R7, R9, found ++ AND $0xff, R8, R9 ++ BEQ R7, R9, found + +- WORD $0xcf2109 // bstrpick.w r9, r8, 15, 8 +- BEQ R7, R9, byte_1th ++ WORD $0xcf2109 // bstrpick.w r9, r8, 15, 8 ++ BEQ R7, R9, byte_1th + +- WORD $0xd74109 // bstrpick.w r9, r8, 23, 16 +- BEQ R7, R9, byte_2th ++ WORD $0xd74109 // bstrpick.w r9, r8, 23, 16 ++ BEQ R7, R9, byte_2th + +- WORD $0xdf6109 // bstrpick.w r9, r8, 31, 24 +- BEQ R7, R9, byte_3th ++ WORD $0xdf6109 // bstrpick.w r9, r8, 31, 24 ++ BEQ R7, R9, byte_3th + +- WORD $0xe78109 // bstrpick.w r9, r8, 39, 32 +- BEQ R7, R9, byte_4th ++ WORD $0xe78109 // bstrpick.w r9, r8, 39, 32 ++ BEQ R7, R9, byte_4th + +- WORD $0xefa109 // bstrpick.w r9, r8, 47, 40 +- BEQ R7, R9, byte_5th ++ WORD $0xefa109 // bstrpick.w r9, r8, 47, 40 ++ BEQ R7, R9, byte_5th + +- WORD $0xf7c109 // bstrpick.w r9, r8, 55, 48 +- BEQ R7, R9, byte_6th ++ WORD $0xf7c109 // bstrpick.w r9, r8, 55, 48 ++ BEQ R7, R9, byte_6th + +- WORD $0xffe109 // bstrpick.w r9, r8, 63, 56 +- BEQ R7, R9, byte_7th ++ WORD $0xffe109 // bstrpick.w r9, r8, 63, 56 ++ BEQ R7, R9, byte_7th + + MOVV R10, R4 +- JMP loop ++ JMP loop + + tail: +- BEQ R4, R5, notfound +- MOVBU (R4), R8 +- BEQ R7, R8, found ++ BEQ R4, R5, notfound ++ MOVBU (R4), R8 ++ BEQ R7, R8, found + ADDV $1, R4 +- JMP tail ++ JMP tail + + byte_1th: + ADDV $1, R4 +-- +2.38.1 + diff --git a/0011-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch b/0011-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch deleted file mode 100644 index f6e1c91220fee855c3d3c55ac666b15d9fed7ed7..0000000000000000000000000000000000000000 --- a/0011-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch +++ /dev/null @@ -1,182 +0,0 @@ -From 3cff40986c3a3f46a0f5bd8ed6b40638e6702226 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Mon, 3 Apr 2023 12:11:46 +0800 -Subject: [PATCH 11/51] cmd/compiler,runtime/internal/atomic: optimize xadd and - xadd64 on loong64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Use Loong64's atomic operation instruction AMADDx to implement Xadd and Xadd64 - -goos: linux -goarch: loong64 -pkg: runtime/internal/atomic - │ bench.old │ bench.new │ - │ sec/op │ sec/op vs base │ -Xadd 27.24n ± 0% 27.23n ± 0% -0.04% (p=0.000 n=35) -Xadd-2 31.90n ± 0% 31.91n ± 0% ~ (p=0.765 n=35) -Xadd-4 31.90n ± 0% 31.90n ± 0% ~ (p=0.636 n=35) -Xadd-8 32.61n ± 3% 32.50n ± 4% ~ (p=0.883 n=35) -Xadd-16 32.36n ± 1% 32.33n ± 1% ~ (p=0.266 n=35) -Xadd64 27.24n ± 0% 27.23n ± 0% -0.04% (p=0.000 n=35) -Xadd64-2 31.92n ± 0% 31.92n ± 0% ~ (p=0.617 n=35) -Xadd64-4 31.90n ± 0% 31.90n ± 0% 0.00% (p=0.011 n=35) -Xadd64-8 32.95n ± 1% 32.89n ± 3% ~ (p=1.000 n=35) -Xadd64-16 32.16n ± 1% 32.31n ± 1% ~ (p=0.057 n=35) -geomean 31.15n 31.14n -0.02% - -Change-Id: I982539c2aa04680e9dd11b099ba8d5f215bf9b32 ---- - src/cmd/compile/internal/loong64/ssa.go | 51 ++++++------------- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 7 --- - src/runtime/internal/atomic/atomic_loong64.s | 29 +++++------ - 3 files changed, 27 insertions(+), 60 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index e103c896e6..f809fbac5f 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -508,48 +508,27 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.RegTo2 = v.Reg0() - - case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: -- // DBAR -- // LL (Rarg0), Rout -- // ADDV Rarg1, Rout, Rtmp -- // SC Rtmp, (Rarg0) -- // BEQ Rtmp, -3(PC) -- // DBAR -- // ADDV Rarg1, Rout -- ll := loong64.ALLV -- sc := loong64.ASCV -+ // AMADDx Rarg1, (Rarg0), Rout -+ // ADDxU Rarg1, Rout, Rout -+ amaddx := loong64.AAMADDV -+ addx := loong64.AADDVU - if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 { -- ll = loong64.ALL -- sc = loong64.ASC -+ amaddx = loong64.AAMADDW -+ addx = loong64.AADDU - } -- s.Prog(loong64.ADBAR) -- p := s.Prog(ll) -- p.From.Type = obj.TYPE_MEM -- p.From.Reg = v.Args[0].Reg() -- p.To.Type = obj.TYPE_REG -- p.To.Reg = v.Reg0() -- p1 := s.Prog(loong64.AADDVU) -+ p := s.Prog(amaddx) -+ p.From.Type = obj.TYPE_REG -+ p.From.Reg = v.Args[1].Reg() -+ p.To.Type = obj.TYPE_MEM -+ p.To.Reg = v.Args[0].Reg() -+ p.RegTo2 = v.Reg0() -+ p1 := s.Prog(addx) - p1.From.Type = obj.TYPE_REG - p1.From.Reg = v.Args[1].Reg() - p1.Reg = v.Reg0() - p1.To.Type = obj.TYPE_REG -- p1.To.Reg = loong64.REGTMP -- p2 := s.Prog(sc) -- p2.From.Type = obj.TYPE_REG -- p2.From.Reg = loong64.REGTMP -- p2.To.Type = obj.TYPE_MEM -- p2.To.Reg = v.Args[0].Reg() -- p3 := s.Prog(loong64.ABEQ) -- p3.From.Type = obj.TYPE_REG -- p3.From.Reg = loong64.REGTMP -- p3.To.Type = obj.TYPE_BRANCH -- p3.To.SetTarget(p) -- s.Prog(loong64.ADBAR) -- p4 := s.Prog(loong64.AADDVU) -- p4.From.Type = obj.TYPE_REG -- p4.From.Reg = v.Args[1].Reg() -- p4.Reg = v.Reg0() -- p4.To.Type = obj.TYPE_REG -- p4.To.Reg = v.Reg0() -+ p1.To.Reg = v.Reg0() -+ - case ssa.OpLOONG64LoweredAtomicAddconst32, ssa.OpLOONG64LoweredAtomicAddconst64: - // DBAR - // LL (Rarg0), Rout -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 09c3df6af2..b83a7b0128 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -382,13 +382,6 @@ func init() { - - // atomic add. - // *arg0 += arg1. arg2=mem. returns . -- // DBAR -- // LL (Rarg0), Rout -- // ADDV Rarg1, Rout, Rtmp -- // SC Rtmp, (Rarg0) -- // BEQ Rtmp, -3(PC) -- // DBAR -- // ADDV Rarg1, Rout - {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - // *arg0 += auxint. arg1=mem. returns . auxint is 32-bit. -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index ec34d254fc..eadd031553 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -78,6 +78,9 @@ TEXT ·Xadduintptr(SB), NOSPLIT, $0-24 - TEXT ·Loadint64(SB), NOSPLIT, $0-16 - JMP ·Load64(SB) - -+TEXT ·Xaddint32(SB),NOSPLIT,$0-20 -+ JMP ·Xadd(SB) -+ - TEXT ·Xaddint64(SB), NOSPLIT, $0-24 - JMP ·Xadd64(SB) - -@@ -91,34 +94,26 @@ TEXT ·Xaddint64(SB), NOSPLIT, $0-24 - TEXT ·Casp1(SB), NOSPLIT, $0-25 - JMP ·Cas64(SB) - --// uint32 xadd(uint32 volatile *ptr, int32 delta) - // Atomically: - // *val += delta; - // return *val; -+// -+// func Xadd(ptr *uint32, delta int32) uint32 - TEXT ·Xadd(SB), NOSPLIT, $0-20 - MOVV ptr+0(FP), R4 - MOVW delta+8(FP), R5 -- DBAR -- LL (R4), R6 -- ADDU R6, R5, R7 -- MOVV R7, R6 -- SC R7, (R4) -- BEQ R7, -4(PC) -- MOVW R6, ret+16(FP) -- DBAR -+ AMADDW R5, (R4), R6 -+ ADDU R6, R5, R4 -+ MOVW R4, ret+16(FP) - RET - -+// func Xadd64(ptr *uint64, delta int64) uint64 - TEXT ·Xadd64(SB), NOSPLIT, $0-24 - MOVV ptr+0(FP), R4 - MOVV delta+8(FP), R5 -- DBAR -- LLV (R4), R6 -- ADDVU R6, R5, R7 -- MOVV R7, R6 -- SCV R7, (R4) -- BEQ R7, -4(PC) -- MOVV R6, ret+16(FP) -- DBAR -+ AMADDV R5, (R4), R6 -+ ADDVU R6, R5, R4 -+ MOVV R4, ret+16(FP) - RET - - // func Xchg(ptr *uint32, new uint32) uint32 --- -2.38.1 - diff --git a/0011-cmd-internal-obj-loong64-optimize-immediate-loading.patch b/0011-cmd-internal-obj-loong64-optimize-immediate-loading.patch new file mode 100644 index 0000000000000000000000000000000000000000..6136b63a2a511b369295c0585254d7237caab0fc --- /dev/null +++ b/0011-cmd-internal-obj-loong64-optimize-immediate-loading.patch @@ -0,0 +1,776 @@ +From a08a479c526bcc63bf24e69ff7fa1d37a1179e1f Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Thu, 11 Jul 2024 21:03:45 +0800 +Subject: [PATCH 11/44] cmd/internal/obj/loong64: optimize immediate loading +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + | old | new | + | sec/op | sec/op vs base | +BinaryTree17 11.08 ± 2% 11.16 ± 1% ~ (p=0.529 n=10) +Fannkuch11 2.716 ± 0% 2.737 ± 0% +0.79% (p=0.000 n=10) +FmtFprintfEmpty 67.37n ± 0% 66.42n ± 0% -1.41% (p=0.000 n=10) +FmtFprintfString 95.28n ± 0% 90.85n ± 0% -4.64% (p=0.000 n=10) +FmtFprintfInt 97.69n ± 0% 98.06n ± 0% +0.38% (p=0.000 n=10) +FmtFprintfIntInt 149.1n ± 0% 147.4n ± 0% -1.14% (p=0.000 n=10) +FmtFprintfPrefixedInt 223.6n ± 0% 196.5n ± 0% -12.10% (p=0.000 n=10) +FmtFprintfFloat 290.9n ± 0% 281.6n ± 1% -3.21% (p=0.000 n=10) +FmtManyArgs 670.6n ± 0% 642.6n ± 0% -4.18% (p=0.000 n=10) +GobDecode 10.26m ± 1% 10.23m ± 1% ~ (p=0.105 n=10) +GobEncode 12.09m ± 1% 11.94m ± 1% -1.24% (p=0.000 n=10) +Gzip 316.9m ± 0% 315.9m ± 0% -0.32% (p=0.001 n=10) +Gunzip 65.48m ± 0% 59.77m ± 0% -8.72% (p=0.000 n=10) +HTTPClientServer 70.36µ ± 0% 68.72µ ± 0% -2.34% (p=0.000 n=10) +JSONEncode 13.61m ± 1% 13.19m ± 1% -3.13% (p=0.000 n=10) +JSONDecode 57.52m ± 1% 54.15m ± 1% -5.86% (p=0.000 n=10) +Mandelbrot200 4.577m ± 0% 4.572m ± 0% -0.10% (p=0.002 n=10) +GoParse 6.466m ± 0% 6.363m ± 0% -1.58% (p=0.000 n=10) +RegexpMatchEasy0_32 89.20n ± 0% 87.72n ± 0% -1.65% (p=0.000 n=10) +RegexpMatchEasy0_1K 748.6n ± 0% 907.6n ± 0% +21.22% (p=0.000 n=10) +RegexpMatchEasy1_32 94.14n ± 0% 93.81n ± 0% -0.35% (p=0.000 n=10) +RegexpMatchEasy1_1K 832.1n ± 0% 953.6n ± 0% +14.59% (p=0.000 n=10) +RegexpMatchMedium_32 982.7n ± 0% 1018.0n ± 0% +3.59% (p=0.000 n=10) +RegexpMatchMedium_1K 30.51µ ± 0% 30.00µ ± 0% -1.65% (p=0.000 n=10) +RegexpMatchHard_32 1.721µ ± 0% 1.664µ ± 0% -3.34% (p=0.000 n=10) +RegexpMatchHard_1K 50.76µ ± 0% 50.92µ ± 0% +0.32% (p=0.000 n=10) +Revcomp 870.5m ± 0% 710.5m ± 0% -18.38% (p=0.000 n=10) +Template 93.18m ± 1% 93.67m ± 1% ~ (p=0.123 n=10) +TimeParse 309.2n ± 0% 307.8n ± 0% -0.45% (p=0.000 n=10) +TimeFormat 401.5n ± 0% 394.2n ± 0% -1.82% (p=0.000 n=10) +geomean 72.73µ 71.70µ -1.41% + +Change-Id: Id8d342ef3bb82a420434b2b841674683efef67be +--- + src/cmd/asm/internal/asm/endtoend_test.go | 2 + + .../asm/internal/asm/testdata/loong64enc1.s | 24 ++ + .../asm/internal/asm/testdata/loong64enc2.s | 46 +++ + .../asm/internal/asm/testdata/loong64enc3.s | 65 ++++ + .../asm/internal/asm/testdata/loong64enc4.s | 42 +++ + .../asm/internal/asm/testdata/loong64enc5.s | 17 + + src/cmd/internal/obj/loong64/a.out.go | 54 ++- + src/cmd/internal/obj/loong64/asm.go | 321 +++++++++++++++++- + src/cmd/internal/obj/loong64/cnames.go | 14 + + 9 files changed, 579 insertions(+), 6 deletions(-) + create mode 100644 src/cmd/asm/internal/asm/testdata/loong64enc4.s + create mode 100644 src/cmd/asm/internal/asm/testdata/loong64enc5.s + +diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go +index 6e1aa1cd95..3760b77625 100644 +--- a/src/cmd/asm/internal/asm/endtoend_test.go ++++ b/src/cmd/asm/internal/asm/endtoend_test.go +@@ -465,6 +465,8 @@ func TestLOONG64Encoder(t *testing.T) { + testEndToEnd(t, "loong64", "loong64enc1") + testEndToEnd(t, "loong64", "loong64enc2") + testEndToEnd(t, "loong64", "loong64enc3") ++ testEndToEnd(t, "loong64", "loong64enc4") ++ testEndToEnd(t, "loong64", "loong64enc5") + testEndToEnd(t, "loong64", "loong64") + } + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 4a88aca031..3a3eb10a74 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -516,3 +516,27 @@ lable2: + XVPCNTH X3, X2 // 62249c76 + XVPCNTW X3, X2 // 62289c76 + XVPCNTV X3, X2 // 622c9c76 ++ ++ // MOVV C_DCON12_0, r ++ MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 ++ MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 ++ ++ // MOVV C_UCON, r ++ MOVV $0x54321000, R4 // MOVV $1412567040, R4 // 2464a814 ++ MOVV $0xffffffff8432f000, R4 // MOVV $-2077036544, R4 // e4650815 ++ ++ // MOVV C_ADDCON, r ++ MOVV $0xfffffffffffff821, R4 // MOVV $-2015, R4 // 0484e002 ++ ++ // MOVV C_ANDCON, r ++ MOVV $0x821, R4 // MOVV $2081, R4 // 0484a003 ++ ++ // ADDV C_SCON, [r1], r2 ++ ADDV $0x321, R4 // ADDV $801, R4 // 8484cc02 ++ ADDV $0x321, R5, R4 // ADDV $801, R5, R4 // a484cc02 ++ ADDV $0xfffffffffffffc21, R4 // ADDV $-991, R4 // 8484f002 ++ ADDV $0xfffffffffffffc21, R5, R4 // ADDV $-991, R5, R4 // a484f002 ++ ++ // AND C_SCON, [r1], r2 ++ AND $0x321, R4 // AND $801, R4 // 84844c03 ++ AND $0x321, R5, R4 // AND $801, R5, R4 // a4844c03 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc2.s b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +index e497b83627..ee3bad74b1 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc2.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +@@ -77,3 +77,49 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + MOVH name(SB), R4 // 1e00001ac4034028 + MOVHU R4, name(SB) // 1e00001ac4034029 + MOVHU name(SB), R4 // 1e00001ac403402a ++ ++ // MOVV C_DCON12_20S, r ++ MOVV $0x273fffff80000000, R4 // MOVV $2828260563841187840, R4 // 0400001584cc0903 ++ MOVV $0xf73fffff80000000, R4 // MOVV $-630503949979353088, R4 // 0400001584cc3d03 ++ ++ // MOVV C_DCON20S_20, r ++ MOVV $0xfff800000f000000, R4 // MOVV $-2251799562027008, R4 // 04001e1404000017 ++ ++ // MOVV C_DCON12_12S, r ++ MOVV $0x273ffffffffff800, R4 // MOVV $2828260565988669440, R4 // 0400e00284cc0903 ++ MOVV $0xf73ffffffffff800, R4 // MOVV $-630503947831871488, R4 // 0400e00284cc3d03 ++ ++ // MOVV C_DCON20S_12S, r ++ MOVV $0xfff80000fffff800, R4 // MOVV $-2251795518720000, R4 // 0400a00204000017 ++ MOVV $0xfff8000000000000, R4 // MOVV $-2251799813685248, R4 // 0400800204000017 ++ ++ // MOVV C_DCON12_12U, r ++ MOVV $0x2730000000000800, R4 // MOVV $2823756966361303040, R4 // 0400a00384cc0903 ++ MOVV $0xf730000000000800, R4 // MOVV $-635007547459237888, R4 // 0400a00384cc3d03 ++ ++ // MOVV C_DCON20S_12U, r ++ MOVV $0xfff8000000000800, R4 // MOVV $-2251799813683200, R4 // 0400a00304000017 ++ ++ // ADDV/AND C_DCON12_0, [r1], r2 ++ ADDV $0x3210000000000000, R4 // ADDV $3607383301523767296, R4 // 1e840c0384f81000 ++ ADDV $0x3210000000000000, R5, R4 // ADDV $3607383301523767296, R5, R4 // 1e840c03a4f81000 ++ ADDV $0xc210000000000000, R4 // ADDV $-4463067230724161536, R4 // 1e84300384f81000 ++ ADDV $0xc210000000000000, R5, R4 // ADDV $-4463067230724161536, R5, R4 // 1e843003a4f81000 ++ AND $0x3210000000000000, R4 // AND $3607383301523767296, R4 // 1e840c0384f81400 ++ AND $0x3210000000000000, R5, R4 // AND $3607383301523767296, R5, R4 // 1e840c03a4f81400 ++ AND $0xc210000000000000, R4 // AND $-4463067230724161536, R4 // 1e84300384f81400 ++ AND $0xc210000000000000, R5, R4 // AND $-4463067230724161536, R5, R4 // 1e843003a4f81400 ++ ++ // ADDV/AND C_UCON, [r1], r2 ++ ADDV $0x43210000, R4 // ADDV $1126236160, R4 // 1e42861484f81000 ++ ADDV $0x43210000, R5, R4 // ADDV $1126236160, R5, R4 // 1e428614a4f81000 ++ ADDV $0xffffffffc3210000, R4 // ADDV $-1021247488, R4 // 1e42861584f81000 ++ ADDV $0xffffffffc3210000, R5, R4 // ADDV $-1021247488, R5, R4 // 1e428615a4f81000 ++ AND $0x43210000, R4 // AND $1126236160, R4 // 1e42861484f81400 ++ AND $0x43210000, R5, R4 // AND $1126236160, R5, R4 // 1e428614a4f81400 ++ AND $0xffffffffc3210000, R4 // AND $-1021247488, R4 // 1e42861584f81400 ++ AND $0xffffffffc3210000, R5, R4 // AND $-1021247488, R5, R4 // 1e428615a4f81400 ++ ++ // AND C_ADDCON, [r1], r2 ++ AND $0xfffffffffffffc21, R4 // AND $-991, R4 // 1e84b00284f81400 ++ AND $0xfffffffffffffc21, R5, R4 // AND $-991, R5, R4 // 1e84b002a4f81400 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc3.s b/src/cmd/asm/internal/asm/testdata/loong64enc3.s +index 2600884309..2d83bd719a 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc3.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc3.s +@@ -121,3 +121,68 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + XOR $74565, R4, R5 // 5e020014de178d0385f81500 + XOR $4097, R4 // 3e000014de07800384f81500 + XOR $4097, R4, R5 // 3e000014de07800385f81500 ++ ++ // MOVV C_DCON32_12S, r ++ MOVV $0x27312345fffff800, R4 // MOVV $2824077224892692480, R4 // 0400a002a468241684cc0903 ++ MOVV $0xf7312345fffff800, R4 // MOVV $-634687288927848448, R4 // 0400a002a468241684cc3d03 ++ ++ // MOVV C_DCON32_0, r ++ MOVV $0x2731234500000000, R4 // MOVV $2824077220597727232, R4 // 04008002a468241684cc0903 ++ MOVV $0xf731234500000000, R4 // MOVV $-634687293222813696, R4 // 04008002a468241684cc3d03 ++ ++ // MOVV C_DCON32_20, r ++ MOVV $0x2731234512345000, R4 // MOVV $2824077220903145472, R4 // a4682414a468241684cc0903 ++ MOVV $0xf731234512345000, R4 // MOVV $-634687292917395456, R4 // a4682414a468241684cc3d03 ++ ++ // MOVV C_DCON12_32S, r ++ MOVV $0x273fffff80000800, R4 // MOVV $2828260563841189888, R4 // 040000158400a00384cc0903 ++ MOVV $0xf73fffff80000800, R4 // MOVV $-630503949979351040, R4 // 040000158400a00384cc3d03 ++ ++ // MOVV C_DCON20S_32, r ++ MOVV $0xfff8000080000800, R4 // MOVV $-2251797666199552, R4 // 040000158400a00304000017 ++ ++ // MOVV C_DCON32_12U, r ++ MOVV $0x2731234500000800, R4 // MOVV $2824077220597729280, R4 // 0400a003a468241684cc0903 ++ MOVV $0xf731234500000800, R4 // MOVV $-634687293222811648, R4 // 0400a003a468241684cc3d03 ++ ++ // ADDV/AND C_DCON12_20S, [r1], r2 ++ ADDV $0x273fffff80000000, R4 // ADDV $2828260563841187840, R4 // 1e000015decf090384f81000 ++ ADDV $0x273fffff80000000, R4, R5 // ADDV $2828260563841187840, R4, R5 // 1e000015decf090385f81000 ++ AND $0x273fffff80000000, R4 // AND $2828260563841187840, R4 // 1e000015decf090384f81400 ++ AND $0x273fffff80000000, R4, R5 // AND $2828260563841187840, R4, R5 // 1e000015decf090385f81400 ++ ++ // ADDV/AND C_DCON20S_20, [r1], r2 ++ ADDV $0xfff800000f000000, R4 // ADDV $-2251799562027008, R4 // 1e001e141e00001784f81000 ++ ADDV $0xfff800000f000000, R4, R5 // ADDV $-2251799562027008, R4, R5 // 1e001e141e00001785f81000 ++ AND $0xfff800000f000000, R4 // AND $-2251799562027008, R4 // 1e001e141e00001784f81400 ++ AND $0xfff800000f000000, R4, R5 // AND $-2251799562027008, R4, R5 // 1e001e141e00001785f81400 ++ ++ // ADDV/AND C_DCON12_12S, [r1], r2 ++ ADDV $0x273ffffffffff800, R4 // ADDV $2828260565988669440, R4 // 1e00e002decf090384f81000 ++ ADDV $0x273ffffffffff800, R4, R5 // ADDV $2828260565988669440, R4, R5 // 1e00e002decf090385f81000 ++ AND $0x273ffffffffff800, R4 // AND $2828260565988669440, R4 // 1e00e002decf090384f81400 ++ AND $0x273ffffffffff800, R4, R5 // AND $2828260565988669440, R4, R5 // 1e00e002decf090385f81400 ++ ++ // ADDV/AND C_DCON20S_12S, [r1], r2 ++ ADDV $0xfff80000fffff800, R4 // ADDV $-2251795518720000, R4 // 1e00a0021e00001784f81000 ++ ADDV $0xfff80000fffff800, R4, R5 // ADDV $-2251795518720000, R4, R5 // 1e00a0021e00001785f81000 ++ AND $0xfff80000fffff800, R4 // AND $-2251795518720000, R4 // 1e00a0021e00001784f81400 ++ AND $0xfff80000fffff800, R4, R5 // AND $-2251795518720000, R4, R5 // 1e00a0021e00001785f81400 ++ ++ // ADDV/AND C_DCON20S_0, [r1], r2 ++ ADDV $0xfff8000000000000, R4 // ADDV $-2251799813685248, R4 // 1e0080021e00001784f81000 ++ ADDV $0xfff8000000000000, R4, R5 // ADDV $-2251799813685248, R4, R5 // 1e0080021e00001785f81000 ++ AND $0xfff8000000000000, R4 // AND $-2251799813685248, R4 // 1e0080021e00001784f81400 ++ AND $0xfff8000000000000, R4, R5 // AND $-2251799813685248, R4, R5 // 1e0080021e00001785f81400 ++ ++ // ADDV/AND C_DCON12_12U, [r1], r2 ++ ADDV $0x2730000000000800, R4 // ADDV $2823756966361303040, R4 // 1e00a003decf090384f81000 ++ ADDV $0x2730000000000800, R4, R5 // ADDV $2823756966361303040, R4, R5 // 1e00a003decf090385f81000 ++ AND $0x2730000000000800, R4 // AND $2823756966361303040, R4 // 1e00a003decf090384f81400 ++ AND $0x2730000000000800, R4, R5 // AND $2823756966361303040, R4, R5 // 1e00a003decf090385f81400 ++ ++ // ADDV/AND C_DCON20S_12U, [r1], r2 ++ ADDV $0xfff8000000000800, R4 // ADDV $-2251799813683200, R4 // 1e00a0031e00001784f81000 ++ ADDV $0xfff8000000000800, R4, R5 // ADDV $-2251799813683200, R4, R5 // 1e00a0031e00001785f81000 ++ AND $0xfff8000000000800, R4 // AND $-2251799813683200, R4 // 1e00a0031e00001784f81400 ++ AND $0xfff8000000000800, R4, R5 // AND $-2251799813683200, R4, R5 // 1e00a0031e00001785f81400 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc4.s b/src/cmd/asm/internal/asm/testdata/loong64enc4.s +new file mode 100644 +index 0000000000..16c06a3501 +--- /dev/null ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc4.s +@@ -0,0 +1,42 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "../../../../../runtime/textflag.h" ++ ++TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ++ // ADDV/AND C_DCON32_12S, [r1], r2 ++ ADDV $0x27312345fffff800, R4 // ADDV $2824077224892692480, R4 // 1e00a002be682416decf090384f81000 ++ ADDV $0x27312345fffff800, R4, R5 // ADDV $2824077224892692480, R4, R5 // 1e00a002be682416decf090385f81000 ++ AND $0x27312345fffff800, R4 // AND $2824077224892692480, R4 // 1e00a002be682416decf090384f81400 ++ AND $0x27312345fffff800, R4, R5 // AND $2824077224892692480, R4, R5 // 1e00a002be682416decf090385f81400 ++ ++ // ADDV/AND C_DCON32_0, [r1], r2 ++ ADDV $0x2731234500000000, R4 // ADDV $2824077220597727232, R4 // 1e008002be682416decf090384f81000 ++ ADDV $0x2731234500000000, R4, R5 // ADDV $2824077220597727232, R4, R5 // 1e008002be682416decf090385f81000 ++ AND $0x2731234500000000, R4 // AND $2824077220597727232, R4 // 1e008002be682416decf090384f81400 ++ AND $0x2731234500000000, R4, R5 // AND $2824077220597727232, R4, R5 // 1e008002be682416decf090385f81400 ++ ++ // ADDV/AND C_DCON32_20, [r1], r2 ++ ADDV $0x2731234512345000, R4 // ADDV $2824077220903145472, R4 // be682414be682416decf090384f81000 ++ ADDV $0x2731234512345000, R4, R5 // ADDV $2824077220903145472, R4, R5 // be682414be682416decf090385f81000 ++ AND $0x2731234512345000, R4 // AND $2824077220903145472, R4 // be682414be682416decf090384f81400 ++ AND $0x2731234512345000, R4, R5 // AND $2824077220903145472, R4, R5 // be682414be682416decf090385f81400 ++ ++ // ADDV/AND C_DCON12_32S, [r1], r2 ++ ADDV $0x273fffff80000800, R4 // ADDV $2828260563841189888, R4 // 1e000015de03a003decf090384f81000 ++ ADDV $0x273fffff80000800, R4, R5 // ADDV $2828260563841189888, R4, R5 // 1e000015de03a003decf090385f81000 ++ AND $0x273fffff80000800, R4 // AND $2828260563841189888, R4 // 1e000015de03a003decf090384f81400 ++ AND $0x273fffff80000800, R4, R5 // AND $2828260563841189888, R4, R5 // 1e000015de03a003decf090385f81400 ++ ++ // ADDV/AND C_DCON20S_32, [r1], r2 ++ ADDV $0xfff8000080000800, R4 // ADDV $-2251797666199552, R4 // 1e000015de03a0031e00001784f81000 ++ ADDV $0xfff8000080000800, R4, R5 // ADDV $-2251797666199552, R4, R5 // 1e000015de03a0031e00001785f81000 ++ AND $0xfff8000080000800, R4 // AND $-2251797666199552, R4 // 1e000015de03a0031e00001784f81400 ++ AND $0xfff8000080000800, R4, R5 // AND $-2251797666199552, R4, R5 // 1e000015de03a0031e00001785f81400 ++ ++ // ADDV/AND C_DCON32_12U, [r1], r2 ++ ADDV $0x2731234500000800, R4 // ADDV $2824077220597729280, R4 // 1e00a003be682416decf090384f81000 ++ ADDV $0x2731234500000800, R4, R5 // ADDV $2824077220597729280, R4, R5 // 1e00a003be682416decf090385f81000 ++ AND $0x2731234500000800, R4 // AND $2824077220597729280, R4 // 1e00a003be682416decf090384f81400 ++ AND $0x2731234500000800, R4, R5 // AND $2824077220597729280, R4, R5 // 1e00a003be682416decf090385f81400 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc5.s b/src/cmd/asm/internal/asm/testdata/loong64enc5.s +new file mode 100644 +index 0000000000..423e5c3b01 +--- /dev/null ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc5.s +@@ -0,0 +1,17 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "../../../../../runtime/textflag.h" ++ ++TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ++ // ADDV/AND C_DCON, [r1], r2 ++ ADDV $0xfedcba9876543210, R4 // ADDV $-81985529216486896, R4 // 7ea8ec14de4388031e539717deb73f0384f81000 ++ ADDV $0xfedcba9876543210, R5, R4 // ADDV $-81985529216486896, R5, R4 // 7ea8ec14de4388031e539717deb73f03a4f81000 ++ ADDV $0x4edcba9876543210, R4 // ADDV $5682621993817747984, R4 // 7ea8ec14de4388031e539717deb7130384f81000 ++ ADDV $0x4edcba9876543210, R5, R4 // ADDV $5682621993817747984, R5, R4 // 7ea8ec14de4388031e539717deb71303a4f81000 ++ AND $0x4edcba9876543210, R4 // AND $5682621993817747984, R4 // 7ea8ec14de4388031e539717deb7130384f81400 ++ AND $0x4edcba9876543210, R5, R4 // AND $5682621993817747984, R5, R4 // 7ea8ec14de4388031e539717deb71303a4f81400 ++ AND $0xfedcba9876543210, R4 // AND $-81985529216486896, R4 // 7ea8ec14de4388031e539717deb73f0384f81400 ++ AND $0xfedcba9876543210, R5, R4 // AND $-81985529216486896, R5, R4 // 7ea8ec14de4388031e539717deb73f03a4f81400 ++ +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 53b005af4d..b2207c2523 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -328,12 +328,58 @@ const ( + C_ZCON + C_SCON // 12 bit signed + C_UCON // 32 bit signed, low 12 bits 0 ++ ++ // When the immediate value is SCON, it can choose either the ADDCON implementation ++ // or the ANDCON implementation, using ADD0CON/AND0CON to distinguish them, so that ++ // the program can choose the implementation with fewer instructions. + C_ADD0CON + C_AND0CON +- C_ADDCON // -0x800 <= v < 0 +- C_ANDCON // 0 < v <= 0xFFF +- C_LCON // other 32 +- C_DCON // other 64 (could subdivide further) ++ ++ C_ADDCON // -0x800 <= v < 0 ++ C_ANDCON // 0 < v <= 0xFFF ++ C_LCON // other 32 ++ ++ // 64 bit signed, lo32 bits 0, hi20 bits are not 0, hi12 bits can ++ // be obtained by sign extension of the hi20 bits. ++ C_DCON20S_0 ++ // 64 bit signed, lo52 bits 0, hi12 bits are not 0. ++ C_DCON12_0 ++ // 64 bit signed, lo32 bits 0, hi32 bits are not 0. ++ C_DCON32_0 ++ // 64 bit signed, lo12 bits 0, lo20 bits are not 0, hi20 bits can be ++ // obtained by sign extension of the lo20 bits, other bits are not 0. ++ C_DCON12_20S ++ // 64 bit signed, lo12 bits 0, hi20 bits are not 0, hi12 bits can be ++ // obtained by sign extension of the hi20 bits, other bits are not 0. ++ C_DCON20S_20 ++ // 64 bit signed, lo12 bits 0, other bits are not 0. ++ C_DCON32_20 ++ // 64 bit signed, lo12 bits are not 0, 12~51 bits can be obtained ++ // by sign extension of the lo12 bits, other bits are not 0. ++ C_DCON12_12S ++ // 64 bit signed, hi20 bits and lo12 bits are not 0, hi12 bits can ++ // be obtained by sign extension of the hi20 bits, lo20 bits can ++ // be obtained by sign extension of the lo12 bits. ++ C_DCON20S_12S ++ // 64 bit signed, lo12 bits are not 0, lo20 bits can be obtained by sign ++ // extension of the lo12 bits, other bits are not 0. ++ C_DCON32_12S ++ // 64 bit signed, lo20 and lo12 bits are not 0, hi20 bits can be obtained by sign ++ // extension of the lo20 bits. other bits are not 0. ++ C_DCON12_32S ++ // 64 bit signed, hi20 bits are not 0, hi12 bits can be obtained by sign ++ // extension of the hi20 bits, lo32 bits are not 0. ++ C_DCON20S_32 ++ // 64 bit signed, 12~51 bits 0, other bits are not 0. ++ C_DCON12_12U ++ // 64 bit signed, lo20 bits 0, hi20 bits are not 0, hi12 bits can be ++ // obtained by sign extension of the hi20 bits, lo12 bits are not 0. ++ C_DCON20S_12U ++ // 64 bit signed, lo20 bits 0, other bits are not 0. ++ C_DCON32_12U ++ // other 64 ++ C_DCON ++ + C_SACON // $n(REG) where n <= int12 + C_LACON // $n(REG) where int12 < n <= int32 + C_DACON // $n(REG) where int32 < n +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 9024c5e53e..5757c3c452 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -9,6 +9,7 @@ import ( + "cmd/internal/objabi" + "fmt" + "log" ++ "math/bits" + "slices" + ) + +@@ -192,6 +193,9 @@ var optab = []Optab{ + {AMOVV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, + {AMOVW, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, + {AMOVV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, ++ {AMOVV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 67, 4, 0, NOTUSETMP}, ++ {AMOVV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 68, 8, 0, NOTUSETMP}, ++ {AMOVV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 69, 12, 0, NOTUSETMP}, + {AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP}, + + {AADD, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +@@ -225,6 +229,20 @@ var optab = []Optab{ + + {AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, + {AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, ++ {AAND, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, ++ {AAND, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, ++ {AADDV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, ++ {AADDV, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, ++ {AAND, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, ++ {AAND, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0}, ++ {AADDV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, ++ {AADDV, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, ++ {AAND, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, ++ {AAND, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0}, ++ {AADDV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, ++ {AADDV, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, ++ {AAND, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, ++ {AAND, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + + {ASLL, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, + {ASLL, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, +@@ -790,7 +808,7 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + } + + if c.instoffset != int64(int32(c.instoffset)) { +- return C_DCON ++ return dconClass(c.instoffset) + } + + if c.instoffset >= 0 { +@@ -830,6 +848,159 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + return C_GOK + } + ++// The constants here define the data characteristics within the bit field range. ++// ++// ALL1: The data in the bit field is all 1 ++// ALL0: The data in the bit field is all 0 ++// ST1: The data in the bit field starts with 1, but not all 1 ++// ST0: The data in the bit field starts with 0, but not all 0 ++const ( ++ ALL1 = iota ++ ALL0 ++ ST1 ++ ST0 ++) ++ ++// mask returns the mask of the specified bit field, which is used to help determine ++// the data characteristics of the immediate value at the specified bit. ++func mask(suf int8, len int8) (uint64, uint64) { ++ if len == 12 { ++ if suf == 0 { ++ return 0xfff, 0x800 ++ } else { // suf == 52 ++ return 0xfff0000000000000, 0x8000000000000000 ++ } ++ } else { // len == 20 ++ if suf == 12 { ++ return 0xfffff000, 0x80000000 ++ } else { // suf == 32 ++ return 0xfffff00000000, 0x8000000000000 ++ } ++ } ++} ++ ++// bitField return a number represent status of val in bit field ++// ++// suf: The starting bit of the bit field ++// len: The length of the bit field ++func bitField(val int64, suf int8, len int8) int8 { ++ mask1, mask2 := mask(suf, len) ++ if uint64(val)&mask1 == mask1 { ++ return ALL1 ++ } else if uint64(val)&mask1 == 0x0 { ++ return ALL0 ++ } else if uint64(val)&mask2 == mask2 { ++ return ST1 ++ } else { ++ return ST0 ++ } ++} ++ ++// Loading an immediate value larger than 32 bits requires four instructions ++// on loong64 (lu12i.w + ori + lu32i.d + lu52i.d), but in some special cases, ++// we can use the sign extension and zero extension features of the instruction ++// to fill in the high-order data (all 0 or all 1), which can save one to ++// three instructions. ++// ++// | 63 ~ 52 | 51 ~ 32 | 31 ~ 12 | 11 ~ 0 | ++// | lu52i.d | lu32i.d | lu12i.w | ori | ++func dconClass(offset int64) int { ++ tzb := bits.TrailingZeros64(uint64(offset)) ++ hi12 := bitField(offset, 52, 12) ++ hi20 := bitField(offset, 32, 20) ++ lo20 := bitField(offset, 12, 20) ++ lo12 := bitField(offset, 0, 12) ++ if tzb >= 52 { ++ return C_DCON12_0 // lu52i.d ++ } ++ if tzb >= 32 { ++ if ((hi20 == ALL1 || hi20 == ST1) && hi12 == ALL1) || ((hi20 == ALL0 || hi20 == ST0) && hi12 == ALL0) { ++ return C_DCON20S_0 // addi.w + lu32i.d ++ } ++ return C_DCON32_0 // addi.w + lu32i.d + lu52i.d ++ } ++ if tzb >= 12 { ++ if lo20 == ST1 || lo20 == ALL1 { ++ if hi20 == ALL1 { ++ return C_DCON12_20S // lu12i.w + lu52i.d ++ } ++ if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { ++ return C_DCON20S_20 // lu12i.w + lu32i.d ++ } ++ return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d ++ } ++ if hi20 == ALL0 { ++ return C_DCON12_20S // lu12i.w + lu52i.d ++ } ++ if (hi20 == ST0 && hi12 == ALL0) || ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) { ++ return C_DCON20S_20 // lu12i.w + lu32i.d ++ } ++ return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d ++ } ++ if lo12 == ST1 || lo12 == ALL1 { ++ if lo20 == ALL1 { ++ if hi20 == ALL1 { ++ return C_DCON12_12S // addi.d + lu52i.d ++ } ++ if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { ++ return C_DCON20S_12S // addi.w + lu32i.d ++ } ++ return C_DCON32_12S // addi.w + lu32i.d + lu52i.d ++ } ++ if lo20 == ST1 { ++ if hi20 == ALL1 { ++ ++ return C_DCON12_32S // lu12i.w + ori + lu52i.d ++ } ++ if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { ++ return C_DCON20S_32 // lu12i.w + ori + lu32i.d ++ } ++ return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d ++ } ++ if lo20 == ALL0 { ++ if hi20 == ALL0 { ++ return C_DCON12_12U // ori + lu52i.d ++ } ++ if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { ++ return C_DCON20S_12U // ori + lu32i.d ++ } ++ return C_DCON32_12U // ori + lu32i.d + lu52i.d ++ } ++ if hi20 == ALL0 { ++ return C_DCON12_32S // lu12i.w + ori + lu52i.d ++ } ++ if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { ++ return C_DCON20S_32 // lu12i.w + ori + lu32i.d ++ } ++ return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d ++ } ++ if lo20 == ALL0 { ++ if hi20 == ALL0 { ++ return C_DCON12_12U // ori + lu52i.d ++ } ++ if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { ++ return C_DCON20S_12U // ori + lu32i.d ++ } ++ return C_DCON32_12U // ori + lu32i.d + lu52i.d ++ } ++ if lo20 == ST1 || lo20 == ALL1 { ++ if hi20 == ALL1 { ++ return C_DCON12_32S // lu12i.w + ori + lu52i.d ++ } ++ if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) { ++ return C_DCON20S_32 // lu12i.w + ori + lu32i.d ++ } ++ return C_DCON ++ } ++ if hi20 == ALL0 { ++ return C_DCON12_32S // lu12i.w + ori + lu52i.d ++ } ++ if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) { ++ return C_DCON20S_32 // lu12i.w + ori + lu32i.d ++ } ++ return C_DCON ++} ++ + // In Loong64,there are 8 CFRs, denoted as fcc0-fcc7. + // There are 4 FCSRs, denoted as fcsr0-fcsr3. + func (c *ctxt0) rclass(r int16) int { +@@ -935,7 +1106,14 @@ func cmp(a int, b int) bool { + } + switch a { + case C_DCON: +- if b == C_LCON { ++ if b == C_LCON || b == C_DCON32_0 || ++ b == C_DCON12_0 || b == C_DCON20S_0 || ++ b == C_DCON12_20S || b == C_DCON12_12S || ++ b == C_DCON20S_20 || b == C_DCON32_20 || ++ b == C_DCON20S_12S || b == C_DCON32_12S || ++ b == C_DCON12_32S || b == C_DCON20S_32 || ++ b == C_DCON12_12U || b == C_DCON20S_12U || ++ b == C_DCON32_12U { + return true + } + fallthrough +@@ -944,6 +1122,22 @@ func cmp(a int, b int) bool { + return true + } + ++ case C_DCON12_0: ++ ++ case C_DCON12_20S: ++ if b == C_DCON20S_20 || b == C_DCON12_12S || ++ b == C_DCON20S_12S || b == C_DCON12_12U || ++ b == C_DCON20S_12U || b == C_DCON20S_0 { ++ return true ++ } ++ ++ case C_DCON32_12S: ++ if b == C_DCON32_20 || b == C_DCON12_32S || ++ b == C_DCON20S_32 || b == C_DCON32_12U || ++ b == C_DCON32_0 { ++ return true ++ } ++ + case C_ADD0CON: + if b == C_ADDCON { + return true +@@ -2015,6 +2209,129 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + c.ctxt.Diag("illegal register combination: %v\n", p) + } + o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd)) ++ ++ case 67: // mov $dcon12_0, r ++ v := c.vregoff(&p.From) ++ o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(p.To.Reg)) ++ ++ case 68: // mov $dcon12_20S, r ++ v := c.vregoff(&p.From) ++ contype := c.aclass(&p.From) ++ switch contype { ++ default: // C_DCON12_20S ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON20S_20: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ case C_DCON12_12S: ++ o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON20S_12S, C_DCON20S_0: ++ o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ case C_DCON12_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON20S_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ } ++ ++ case 69: // mov $dcon32_12S, r ++ v := c.vregoff(&p.From) ++ contype := c.aclass(&p.From) ++ switch contype { ++ default: // C_DCON32_12S, C_DCON32_0 ++ o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON32_20: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON12_32S: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) ++ o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ case C_DCON20S_32: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) ++ o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) ++ o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ case C_DCON32_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg)) ++ } ++ ++ case 70: // add $dcon12_0,[r1],r2 ++ v := c.vregoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(REGTMP)) ++ o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) ++ ++ case 71: // add $dcon12_20S,[r1],r2 ++ v := c.vregoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ contype := c.aclass(&p.From) ++ switch contype { ++ default: // C_DCON12_20S ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON20S_20: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ case C_DCON12_12S: ++ o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON20S_12S, C_DCON20S_0: ++ o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ case C_DCON12_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON20S_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ } ++ o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) ++ ++ case 72: // add $dcon32_12S,[r1],r2 ++ v := c.vregoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ contype := c.aclass(&p.From) ++ switch contype { ++ default: // C_DCON32_12S, C_DCON32_0 ++ o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON32_20: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON12_32S: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) ++ o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ case C_DCON20S_32: ++ o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) ++ o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP)) ++ o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ case C_DCON32_12U: ++ o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP)) ++ o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP)) ++ o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) ++ } ++ o4 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + } + + out[0] = o1 +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index ce76109d2a..a2f04a22ee 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -21,6 +21,20 @@ var cnames0 = []string{ + "ADDCON", + "ANDCON", + "LCON", ++ "DCON20S_0", ++ "DCON12_0", ++ "DCON32_0", ++ "DCON12_20S", ++ "DCON20S_20", ++ "DCON32_20", ++ "DCON12_12S", ++ "DCON20S_12S", ++ "DCON32_12S", ++ "DCON12_32S", ++ "DCON20S_32", ++ "DCON12_12U", ++ "DCON20S_12U", ++ "DCON32_12U", + "DCON", + "SACON", + "LACON", +-- +2.38.1 + diff --git a/0012-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch b/0012-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch deleted file mode 100644 index 2a37ca83f98a19b152d96e4464b29f9d32c0923a..0000000000000000000000000000000000000000 --- a/0012-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch +++ /dev/null @@ -1,399 +0,0 @@ -From fe590f50bffe5cc7a2f99d558e34a02f2ede6c8c Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 5 Apr 2023 22:15:46 +0800 -Subject: [PATCH 12/51] cmd/compiler,runtime/internal/atomic: optimize - And{32,8} and Or{32,8} on loong64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Use Loong64's atomic operation instruction AMANDW to implement And{32,8}, -AMORW to implement Or{32,8}, and intrinsify them. - -goos: linux -goarch: loong64 -pkg: runtime/internal/atomic - │ bench.old │ bench.new │ - │ sec/op │ sec/op vs base │ -And8 36.25n ± 0% 17.14n ± 0% -52.72% (p=0.000 n=25) -And8-2 36.19n ± 0% 17.16n ± 0% -52.58% (p=0.000 n=25) -And8-4 36.12n ± 0% 17.15n ± 0% -52.52% (p=0.000 n=25) -And8-8 36.15n ± 0% 17.15n ± 0% -52.56% (p=0.000 n=25) -And8-16 36.15n ± 0% 17.15n ± 0% -52.56% (p=0.000 n=25) -And 35.84n ± 0% 16.15n ± 0% -54.94% (p=0.000 n=25) -And-2 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) -And-4 35.78n ± 0% 16.18n ± 0% -54.78% (p=0.000 n=25) -And-8 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) -And-16 35.79n ± 0% 16.19n ± 0% -54.76% (p=0.000 n=25) -And8Parallel 35.43n ± 0% 17.31n ± 0% -51.14% (p=0.000 n=25) -And8Parallel-2 42.19n ± 3% 26.85n ± 2% -36.36% (p=0.000 n=25) -And8Parallel-4 46.59n ± 2% 33.31n ± 1% -28.50% (p=0.000 n=25) -And8Parallel-8 45.36n ± 1% 32.64n ± 2% -28.04% (p=0.000 n=25) -And8Parallel-16 45.24n ± 0% 33.63n ± 0% -25.66% (p=0.000 n=25) -AndParallel 34.20n ± 0% 16.39n ± 0% -52.08% (p=0.000 n=25) -AndParallel-2 42.30n ± 3% 24.54n ± 4% -41.99% (p=0.000 n=25) -AndParallel-4 44.58n ± 2% 33.64n ± 7% -24.54% (p=0.000 n=25) -AndParallel-8 45.27n ± 1% 34.14n ± 1% -24.59% (p=0.000 n=25) -AndParallel-16 44.77n ± 1% 33.68n ± 0% -24.77% (p=0.000 n=25) -Or8 35.84n ± 0% 16.67n ± 0% -53.49% (p=0.000 n=25) -Or8-2 35.78n ± 0% 16.67n ± 0% -53.41% (p=0.000 n=25) -Or8-4 35.82n ± 0% 16.69n ± 0% -53.41% (p=0.000 n=25) -Or8-8 35.84n ± 0% 16.68n ± 0% -53.46% (p=0.000 n=25) -Or8-16 35.76n ± 0% 16.68n ± 0% -53.36% (p=0.000 n=25) -Or 35.71n ± 0% 16.17n ± 0% -54.72% (p=0.000 n=25) -Or-2 35.78n ± 0% 16.19n ± 0% -54.75% (p=0.000 n=25) -Or-4 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) -Or-8 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) -Or-16 35.81n ± 0% 16.19n ± 0% -54.79% (p=0.000 n=25) -Or8Parallel 34.56n ± 0% 16.89n ± 0% -51.13% (p=0.000 n=25) -Or8Parallel-2 43.00n ± 0% 33.59n ± 19% -21.88% (p=0.000 n=25) -Or8Parallel-4 45.62n ± 0% 30.48n ± 0% -33.19% (p=0.000 n=25) -Or8Parallel-8 44.58n ± 1% 33.16n ± 2% -25.62% (p=0.000 n=25) -Or8Parallel-16 44.80n ± 0% 33.67n ± 0% -24.84% (p=0.000 n=25) -OrParallel 34.16n ± 0% 16.39n ± 0% -52.02% (p=0.000 n=25) -OrParallel-2 42.44n ± 0% 25.44n ± 1% -40.06% (p=0.000 n=25) -OrParallel-4 45.91n ± 2% 32.47n ± 4% -29.27% (p=0.000 n=25) -OrParallel-8 45.37n ± 2% 33.08n ± 2% -27.09% (p=0.000 n=25) -OrParallel-16 44.60n ± 1% 33.67n ± 0% -24.51% (p=0.000 n=25) -geomean 38.98n 21.45n -44.96% - -Updates #59120. - -Change-Id: Ib998a26613adaa5ed2c23ed528245a4e83d10eca ---- - src/cmd/compile/internal/loong64/ssa.go | 8 ++ - .../compile/internal/ssa/_gen/LOONG64.rules | 19 +++++ - .../compile/internal/ssa/_gen/LOONG64Ops.go | 5 ++ - src/cmd/compile/internal/ssa/opGen.go | 34 ++++++++ - .../compile/internal/ssa/rewriteLOONG64.go | 81 +++++++++++++++++++ - src/cmd/compile/internal/ssagen/ssa.go | 8 +- - src/runtime/internal/atomic/atomic_loong64.s | 30 +------ - 7 files changed, 155 insertions(+), 30 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index f809fbac5f..8d305c4f57 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -507,6 +507,14 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Reg = v.Args[0].Reg() - p.RegTo2 = v.Reg0() - -+ case ssa.OpLOONG64LoweredAtomicAnd32, ssa.OpLOONG64LoweredAtomicOr32: -+ p := s.Prog(v.Op.Asm()) -+ p.From.Type = obj.TYPE_REG -+ p.From.Reg = v.Args[1].Reg() -+ p.To.Type = obj.TYPE_MEM -+ p.To.Reg = v.Args[0].Reg() -+ p.RegTo2 = loong64.REGZERO -+ - case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: - // AMADDx Rarg1, (Rarg0), Rout - // ADDxU Rarg1, Rout, Rout -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -index b9aaa3ff7f..25caad4406 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -@@ -405,6 +405,25 @@ - (AtomicCompareAndSwap32 ptr old new mem) => (LoweredAtomicCas32 ptr (SignExt32to64 old) new mem) - (AtomicCompareAndSwap64 ...) => (LoweredAtomicCas64 ...) - -+// AtomicAnd8(ptr,val) => LoweredAtomicAnd32(ptr&^3, ^((uint8(val) ^ 0xff) << ((ptr & 3) * 8))) -+(AtomicAnd8 ptr val mem) => -+ (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) -+ (OR (SLLV (ZeroExt8to32 val) -+ (SLLVconst [3] -+ (ANDconst [3] ptr))) -+ (NORconst [0] (SLLV -+ (MOVVconst [0xff]) (SLLVconst [3] -+ (ANDconst [3] ptr))))) mem) -+(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) -+ -+// AtomicOr8(ptr,val) => LoweredAtomicOr32(ptr&^3, uint32(val) << ((ptr & 3) * 8)) -+(AtomicOr8 ptr val mem) => -+ (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) -+ (SLLV (ZeroExt8to32 val) -+ (SLLVconst [3] -+ (ANDconst [3] ptr))) mem) -+(AtomicOr32 ...) => (LoweredAtomicOr32 ...) -+ - // checks - (NilCheck ...) => (LoweredNilCheck ...) - (IsNonNil ptr) => (SGTU ptr (MOVVconst [0])) -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index b83a7b0128..41cc431e8b 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -380,6 +380,11 @@ func init() { - {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, - -+ // Atomic 32 bit AND/OR. -+ // *arg0 &= (|=) arg1. arg2=mem. returns nil. -+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, asm: "AMANDW", faultOnNilArg0: true, hasSideEffects: true}, -+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, asm: "AMORW", faultOnNilArg0: true, hasSideEffects: true}, -+ - // atomic add. - // *arg0 += arg1. arg2=mem. returns . - {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index 2b712a1189..55a42ae782 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -1827,6 +1827,8 @@ const ( - OpLOONG64LoweredAtomicStorezero64 - OpLOONG64LoweredAtomicExchange32 - OpLOONG64LoweredAtomicExchange64 -+ OpLOONG64LoweredAtomicAnd32 -+ OpLOONG64LoweredAtomicOr32 - OpLOONG64LoweredAtomicAdd32 - OpLOONG64LoweredAtomicAdd64 - OpLOONG64LoweredAtomicAddconst32 -@@ -24444,6 +24446,38 @@ var opcodeTable = [...]opInfo{ - }, - }, - }, -+ { -+ name: "LoweredAtomicAnd32", -+ argLen: 3, -+ faultOnNilArg0: true, -+ hasSideEffects: true, -+ asm: loong64.AAMANDW, -+ reg: regInfo{ -+ inputs: []inputInfo{ -+ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ }, -+ outputs: []outputInfo{ -+ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ }, -+ }, -+ }, -+ { -+ name: "LoweredAtomicOr32", -+ argLen: 3, -+ faultOnNilArg0: true, -+ hasSideEffects: true, -+ asm: loong64.AAMORW, -+ reg: regInfo{ -+ inputs: []inputInfo{ -+ {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ }, -+ outputs: []outputInfo{ -+ {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ }, -+ }, -+ }, - { - name: "LoweredAtomicAdd32", - argLen: 3, -diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -index 757524bdbb..bb09bd2e58 100644 ---- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go -+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -@@ -50,6 +50,11 @@ func rewriteValueLOONG64(v *Value) bool { - case OpAtomicAdd64: - v.Op = OpLOONG64LoweredAtomicAdd64 - return true -+ case OpAtomicAnd32: -+ v.Op = OpLOONG64LoweredAtomicAnd32 -+ return true -+ case OpAtomicAnd8: -+ return rewriteValueLOONG64_OpAtomicAnd8(v) - case OpAtomicCompareAndSwap32: - return rewriteValueLOONG64_OpAtomicCompareAndSwap32(v) - case OpAtomicCompareAndSwap64: -@@ -73,6 +78,11 @@ func rewriteValueLOONG64(v *Value) bool { - case OpAtomicLoadPtr: - v.Op = OpLOONG64LoweredAtomicLoad64 - return true -+ case OpAtomicOr32: -+ v.Op = OpLOONG64LoweredAtomicOr32 -+ return true -+ case OpAtomicOr8: -+ return rewriteValueLOONG64_OpAtomicOr8(v) - case OpAtomicStore32: - v.Op = OpLOONG64LoweredAtomicStore32 - return true -@@ -718,6 +728,46 @@ func rewriteValueLOONG64_OpAddr(v *Value) bool { - return true - } - } -+func rewriteValueLOONG64_OpAtomicAnd8(v *Value) bool { -+ v_2 := v.Args[2] -+ v_1 := v.Args[1] -+ v_0 := v.Args[0] -+ b := v.Block -+ typ := &b.Func.Config.Types -+ // match: (AtomicAnd8 ptr val mem) -+ // result: (LoweredAtomicAnd32 (AND (MOVVconst [^3]) ptr) (OR (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) (NORconst [0] (SLLV (MOVVconst [0xff]) (SLLVconst [3] (ANDconst [3] ptr))))) mem) -+ for { -+ ptr := v_0 -+ val := v_1 -+ mem := v_2 -+ v.reset(OpLOONG64LoweredAtomicAnd32) -+ v0 := b.NewValue0(v.Pos, OpLOONG64AND, typ.Uintptr) -+ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) -+ v1.AuxInt = int64ToAuxInt(^3) -+ v0.AddArg2(v1, ptr) -+ v2 := b.NewValue0(v.Pos, OpLOONG64OR, typ.UInt64) -+ v3 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt32) -+ v4 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) -+ v4.AddArg(val) -+ v5 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) -+ v5.AuxInt = int64ToAuxInt(3) -+ v6 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.UInt64) -+ v6.AuxInt = int64ToAuxInt(3) -+ v6.AddArg(ptr) -+ v5.AddArg(v6) -+ v3.AddArg2(v4, v5) -+ v7 := b.NewValue0(v.Pos, OpLOONG64NORconst, typ.UInt64) -+ v7.AuxInt = int64ToAuxInt(0) -+ v8 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt64) -+ v9 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) -+ v9.AuxInt = int64ToAuxInt(0xff) -+ v8.AddArg2(v9, v5) -+ v7.AddArg(v8) -+ v2.AddArg2(v3, v7) -+ v.AddArg3(v0, v2, mem) -+ return true -+ } -+} - func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] -@@ -739,6 +789,37 @@ func rewriteValueLOONG64_OpAtomicCompareAndSwap32(v *Value) bool { - return true - } - } -+func rewriteValueLOONG64_OpAtomicOr8(v *Value) bool { -+ v_2 := v.Args[2] -+ v_1 := v.Args[1] -+ v_0 := v.Args[0] -+ b := v.Block -+ typ := &b.Func.Config.Types -+ // match: (AtomicOr8 ptr val mem) -+ // result: (LoweredAtomicOr32 (AND (MOVVconst [^3]) ptr) (SLLV (ZeroExt8to32 val) (SLLVconst [3] (ANDconst [3] ptr))) mem) -+ for { -+ ptr := v_0 -+ val := v_1 -+ mem := v_2 -+ v.reset(OpLOONG64LoweredAtomicOr32) -+ v0 := b.NewValue0(v.Pos, OpLOONG64AND, typ.Uintptr) -+ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) -+ v1.AuxInt = int64ToAuxInt(^3) -+ v0.AddArg2(v1, ptr) -+ v2 := b.NewValue0(v.Pos, OpLOONG64SLLV, typ.UInt32) -+ v3 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) -+ v3.AddArg(val) -+ v4 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) -+ v4.AuxInt = int64ToAuxInt(3) -+ v5 := b.NewValue0(v.Pos, OpLOONG64ANDconst, typ.UInt64) -+ v5.AuxInt = int64ToAuxInt(3) -+ v5.AddArg(ptr) -+ v4.AddArg(v5) -+ v2.AddArg2(v3, v4) -+ v.AddArg3(v0, v2, mem) -+ return true -+ } -+} - func rewriteValueLOONG64_OpAvg64u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] -diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go -index e994577c64..02a69cc44a 100644 ---- a/src/cmd/compile/internal/ssagen/ssa.go -+++ b/src/cmd/compile/internal/ssagen/ssa.go -@@ -4352,25 +4352,25 @@ func InitTables() { - s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, -- sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) -+ sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("runtime/internal/atomic", "And", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, -- sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) -+ sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("runtime/internal/atomic", "Or8", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, -- sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) -+ sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - addF("runtime/internal/atomic", "Or", - func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { - s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) - return nil - }, -- sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) -+ sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - - atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) { - s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem()) -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index eadd031553..092eb70c06 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -191,13 +191,7 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 - SLLV $3, R7 - // Shift val for aligned ptr. R5 = val << R4 - SLLV R7, R5 -- -- DBAR -- LL (R6), R7 -- OR R5, R7 -- SC R7, (R6) -- BEQ R7, -4(PC) -- DBAR -+ AMORW R5, (R6), R0 - RET - - // void And8(byte volatile*, byte); -@@ -216,37 +210,21 @@ TEXT ·And8(SB), NOSPLIT, $0-9 - SLLV R7, R8 - NOR R0, R8 - OR R8, R5 -- -- DBAR -- LL (R6), R7 -- AND R5, R7 -- SC R7, (R6) -- BEQ R7, -4(PC) -- DBAR -+ AMANDW R5, (R6), R0 - RET - - // func Or(addr *uint32, v uint32) - TEXT ·Or(SB), NOSPLIT, $0-12 - MOVV ptr+0(FP), R4 - MOVW val+8(FP), R5 -- DBAR -- LL (R4), R6 -- OR R5, R6 -- SC R6, (R4) -- BEQ R6, -4(PC) -- DBAR -+ AMORW R5, (R4), R0 - RET - - // func And(addr *uint32, v uint32) - TEXT ·And(SB), NOSPLIT, $0-12 - MOVV ptr+0(FP), R4 - MOVW val+8(FP), R5 -- DBAR -- LL (R4), R6 -- AND R5, R6 -- SC R6, (R4) -- BEQ R6, -4(PC) -- DBAR -+ AMANDW R5, (R4), R0 - RET - - // uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr) --- -2.38.1 - diff --git a/0012-math-big-optimize-addVV-function-for-loong64.patch b/0012-math-big-optimize-addVV-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..8d91ab457176ba0293acd53bab799992c13f77ce --- /dev/null +++ b/0012-math-big-optimize-addVV-function-for-loong64.patch @@ -0,0 +1,85 @@ +From a7a4eb8120aaf7d5f8d2146f190c64118c7e1235 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Thu, 6 Jun 2024 15:30:20 +0800 +Subject: [PATCH 12/44] math/big: optimize addVV function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_addvv.log │ test/new_3c5000_addvv.log │ + │ sec/op │ sec/op vs base │ +AddVV/1 10.920n ± 0% 7.671n ± 0% -29.75% (p=0.000 n=20) +AddVV/2 14.100n ± 0% 8.849n ± 0% -37.24% (p=0.000 n=20) +AddVV/3 16.38n ± 0% 11.07n ± 0% -32.42% (p=0.000 n=20) +AddVV/4 18.65n ± 0% 12.86n ± 0% -31.05% (p=0.000 n=20) +AddVV/5 20.93n ± 0% 15.01n ± 0% -28.28% (p=0.000 n=20) +AddVV/10 31.84n ± 0% 22.75n ± 0% -28.53% (p=0.000 n=20) +AddVV/100 242.4n ± 0% 149.7n ± 0% -38.24% (p=0.000 n=20) +AddVV/1000 2.290µ ± 0% 1.378µ ± 0% -39.83% (p=0.000 n=20) +AddVV/10000 32.73µ ± 0% 19.36µ ± 0% -40.84% (p=0.000 n=20) +AddVV/100000 340.9µ ± 0% 238.5µ ± 0% -30.03% (p=0.000 n=20) +geomean 213.2n 141.2n -33.79% + +Change-Id: I7983a93d9b97d4e9ebe96a49107ec6db9194b013 +--- + src/math/big/arith_loong64.s | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index 847e3127fb..bd7204cf06 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -2,15 +2,42 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !math_big_pure_go && loong64 ++//go:build !math_big_pure_go + + #include "textflag.h" + + // This file provides fast assembly versions for the elementary + // arithmetic operations on vectors implemented in arith.go. + ++// func addVV(z, x, y []Word) (c Word) + TEXT ·addVV(SB),NOSPLIT,$0 +- JMP ·addVV_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ MOVV z+0(FP), R4 ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV y+48(FP), R10 ++ MOVV $0, R6 ++ SLLV $3, R5 ++ MOVV $0, R8 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R9 ++ MOVV (R6)(R10), R11 ++ ADDV R9, R11, R11 // x1 + y1 = z1', if z1' < x1 then z1' overflow ++ ADDV R8, R11, R12 // z1' + c0 = z1, if z1 < z1' then z1 overflow ++ SGTU R9, R11, R9 ++ SGTU R11, R12, R11 ++ MOVV R12, (R6)(R4) ++ OR R9, R11, R8 ++ ADDV $8, R6 ++ JMP loop ++done: ++ MOVV R8, c+72(FP) ++ RET + + TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) +-- +2.38.1 + diff --git a/0013-cmd-compiler-runtime-internal-atomic-Implementing-xc.patch b/0013-cmd-compiler-runtime-internal-atomic-Implementing-xc.patch deleted file mode 100644 index c66bb247dd3df12b623dec152b5f780c433d6eef..0000000000000000000000000000000000000000 --- a/0013-cmd-compiler-runtime-internal-atomic-Implementing-xc.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 005a1a3c1fc5a668f1311cb27d0be6257983063f Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 15 Jun 2023 12:34:55 +0800 -Subject: [PATCH 13/51] cmd/compiler,runtime/internal/atomic: Implementing - xchg{,64} using amswapdb{w,d} on loong64 - -Change-Id: Ib8acb6f0f1a91e50c67064dae19c085f01341e08 ---- - src/cmd/compile/internal/loong64/ssa.go | 4 ++-- - src/runtime/internal/atomic/atomic_loong64.s | 4 ++-- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 8d305c4f57..74489fe13a 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -496,9 +496,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - s.Prog(loong64.ADBAR) - case ssa.OpLOONG64LoweredAtomicExchange32, ssa.OpLOONG64LoweredAtomicExchange64: - // AMSWAPx Rarg1, (Rarg0), Rout -- amswapx := loong64.AAMSWAPV -+ amswapx := loong64.AAMSWAPDBV - if v.Op == ssa.OpLOONG64LoweredAtomicExchange32 { -- amswapx = loong64.AAMSWAPW -+ amswapx = loong64.AAMSWAPDBW - } - p := s.Prog(amswapx) - p.From.Type = obj.TYPE_REG -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index 092eb70c06..215bb0382c 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -120,7 +120,7 @@ TEXT ·Xadd64(SB), NOSPLIT, $0-24 - TEXT ·Xchg(SB), NOSPLIT, $0-20 - MOVV ptr+0(FP), R4 - MOVW new+8(FP), R5 -- AMSWAPW R5, (R4), R6 -+ AMSWAPDBW R5, (R4), R6 - MOVW R6, ret+16(FP) - RET - -@@ -128,7 +128,7 @@ TEXT ·Xchg(SB), NOSPLIT, $0-20 - TEXT ·Xchg64(SB), NOSPLIT, $0-24 - MOVV ptr+0(FP), R4 - MOVV new+8(FP), R5 -- AMSWAPV R5, (R4), R6 -+ AMSWAPDBV R5, (R4), R6 - MOVV R6, ret+16(FP) - RET - --- -2.38.1 - diff --git a/0013-math-big-optimize-addVW-function-for-loong64.patch b/0013-math-big-optimize-addVW-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..853a0d43914daf3d1d20664bb8ea8f31f137c34e --- /dev/null +++ b/0013-math-big-optimize-addVW-function-for-loong64.patch @@ -0,0 +1,82 @@ +From 94a6bdcacffb17b8adf57ce0919a3d31ac70b646 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Tue, 11 Jun 2024 16:09:10 +0800 +Subject: [PATCH 13/44] math/big: optimize addVW function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_addvw.log │ test/new_3c5000_addvw.log │ + │ sec/op │ sec/op vs base │ +AddVW/1 9.555n ± 0% 5.915n ± 0% -38.09% (p=0.000 n=20) +AddVW/2 11.370n ± 0% 6.825n ± 0% -39.97% (p=0.000 n=20) +AddVW/3 12.485n ± 0% 7.970n ± 0% -36.16% (p=0.000 n=20) +AddVW/4 14.980n ± 0% 9.718n ± 0% -35.13% (p=0.000 n=20) +AddVW/5 16.73n ± 0% 10.63n ± 0% -36.46% (p=0.000 n=20) +AddVW/10 24.57n ± 0% 15.18n ± 0% -38.23% (p=0.000 n=20) +AddVW/100 184.9n ± 0% 102.4n ± 0% -44.62% (p=0.000 n=20) +AddVW/1000 1721.0n ± 0% 921.4n ± 0% -46.46% (p=0.000 n=20) +AddVW/10000 16.83µ ± 0% 11.68µ ± 0% -30.58% (p=0.000 n=20) +AddVW/100000 184.7µ ± 0% 131.3µ ± 0% -28.93% (p=0.000 n=20) +AddVWext/1 9.554n ± 0% 5.915n ± 0% -38.09% (p=0.000 n=20) +AddVWext/2 11.370n ± 0% 6.825n ± 0% -39.97% (p=0.000 n=20) +AddVWext/3 12.505n ± 0% 7.969n ± 0% -36.27% (p=0.000 n=20) +AddVWext/4 14.980n ± 0% 9.718n ± 0% -35.13% (p=0.000 n=20) +AddVWext/5 16.70n ± 0% 10.63n ± 0% -36.33% (p=0.000 n=20) +AddVWext/10 24.54n ± 0% 15.18n ± 0% -38.13% (p=0.000 n=20) +AddVWext/100 185.0n ± 0% 102.4n ± 0% -44.65% (p=0.000 n=20) +AddVWext/1000 1721.0n ± 0% 921.4n ± 0% -46.46% (p=0.000 n=20) +AddVWext/10000 16.83µ ± 0% 11.68µ ± 0% -30.60% (p=0.000 n=20) +AddVWext/100000 184.9µ ± 0% 130.4µ ± 0% -29.51% (p=0.000 n=20) +geomean 155.5n 96.87n -37.70% + +Change-Id: I824a90cb365e09d7d0d4a2c53ff4b30cf057a75e +--- + src/math/big/arith_loong64.s | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index bd7204cf06..bd6fec1b8d 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -42,8 +42,30 @@ done: + TEXT ·subVV(SB),NOSPLIT,$0 + JMP ·subVV_g(SB) + ++// func addVW(z, x []Word, y Word) (c Word) + TEXT ·addVW(SB),NOSPLIT,$0 +- JMP ·addVW_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ MOVV z+0(FP), R4 ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV y+48(FP), R10 ++ MOVV $0, R6 ++ SLLV $3, R5 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R8 ++ ADDV R8, R10, R9 // x1 + c = z1, if z1 < x1 then z1 overflow ++ SGTU R8, R9, R10 ++ MOVV R9, (R6)(R4) ++ ADDV $8, R6 ++ JMP loop ++done: ++ MOVV R10, c+56(FP) ++ RET + + TEXT ·subVW(SB),NOSPLIT,$0 + JMP ·subVW_g(SB) +-- +2.38.1 + diff --git a/0014-cmd-compiler-runtime-internal-atomic-Implementing-xa.patch b/0014-cmd-compiler-runtime-internal-atomic-Implementing-xa.patch deleted file mode 100644 index 8dac5fde681bb55919814947b12864e86e193984..0000000000000000000000000000000000000000 --- a/0014-cmd-compiler-runtime-internal-atomic-Implementing-xa.patch +++ /dev/null @@ -1,54 +0,0 @@ -From c9167e5fddcc9c01dce504382df32c33765dcb25 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 15 Jun 2023 12:37:00 +0800 -Subject: [PATCH 14/51] cmd/compiler,runtime/internal/atomic: Implementing - xadd{,64} using amadddb{w,d} on loong64 - -Change-Id: If58d4bcc1b367af5daf9f753bde61de4a7f690f1 ---- - src/cmd/compile/internal/loong64/ssa.go | 4 ++-- - src/runtime/internal/atomic/atomic_loong64.s | 4 ++-- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 74489fe13a..0e8683ba81 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -518,10 +518,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - case ssa.OpLOONG64LoweredAtomicAdd32, ssa.OpLOONG64LoweredAtomicAdd64: - // AMADDx Rarg1, (Rarg0), Rout - // ADDxU Rarg1, Rout, Rout -- amaddx := loong64.AAMADDV -+ amaddx := loong64.AAMADDDBV - addx := loong64.AADDVU - if v.Op == ssa.OpLOONG64LoweredAtomicAdd32 { -- amaddx = loong64.AAMADDW -+ amaddx = loong64.AAMADDDBW - addx = loong64.AADDU - } - p := s.Prog(amaddx) -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index 215bb0382c..1ea85b1944 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -102,7 +102,7 @@ TEXT ·Casp1(SB), NOSPLIT, $0-25 - TEXT ·Xadd(SB), NOSPLIT, $0-20 - MOVV ptr+0(FP), R4 - MOVW delta+8(FP), R5 -- AMADDW R5, (R4), R6 -+ AMADDDBW R5, (R4), R6 - ADDU R6, R5, R4 - MOVW R4, ret+16(FP) - RET -@@ -111,7 +111,7 @@ TEXT ·Xadd(SB), NOSPLIT, $0-20 - TEXT ·Xadd64(SB), NOSPLIT, $0-24 - MOVV ptr+0(FP), R4 - MOVV delta+8(FP), R5 -- AMADDV R5, (R4), R6 -+ AMADDDBV R5, (R4), R6 - ADDVU R6, R5, R4 - MOVV R4, ret+16(FP) - RET --- -2.38.1 - diff --git a/0014-math-big-optimize-subVV-function-for-loong64.patch b/0014-math-big-optimize-subVV-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..1a9c5dd8394467656a88cc5f1288e506562532a8 --- /dev/null +++ b/0014-math-big-optimize-subVV-function-for-loong64.patch @@ -0,0 +1,77 @@ +From 7939ebdcaa1156ef4e9d8f896f4877df88d7636c Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Tue, 11 Jun 2024 19:06:29 +0800 +Subject: [PATCH 14/44] math/big: optimize subVV function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_subvv.log │ test/new_3c5000_subvv.log │ + │ sec/op │ sec/op vs base │ +SubVV/1 10.920n ± 0% 7.657n ± 0% -29.88% (p=0.000 n=20) +SubVV/2 14.100n ± 0% 8.841n ± 0% -37.30% (p=0.000 n=20) +SubVV/3 16.38n ± 0% 11.06n ± 0% -32.48% (p=0.000 n=20) +SubVV/4 18.65n ± 0% 12.85n ± 0% -31.10% (p=0.000 n=20) +SubVV/5 20.93n ± 0% 14.79n ± 0% -29.34% (p=0.000 n=20) +SubVV/10 32.30n ± 0% 22.29n ± 0% -30.99% (p=0.000 n=20) +SubVV/100 244.3n ± 0% 149.2n ± 0% -38.93% (p=0.000 n=20) +SubVV/1000 2.292µ ± 0% 1.378µ ± 0% -39.88% (p=0.000 n=20) +SubVV/10000 26.26µ ± 0% 25.64µ ± 0% -2.33% (p=0.000 n=20) +SubVV/100000 341.3µ ± 0% 238.0µ ± 0% -30.26% (p=0.000 n=20) +geomean 209.1n 144.5n -30.86% + +Change-Id: I3863c2c6728f1b0f8fecbf77de13254299c5b1cb +--- + src/math/big/arith_loong64.s | 29 ++++++++++++++++++++++++++++- + 1 file changed, 28 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index bd6fec1b8d..8016c25207 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -39,8 +39,35 @@ done: + MOVV R8, c+72(FP) + RET + ++// func subVV(z, x, y []Word) (c Word) + TEXT ·subVV(SB),NOSPLIT,$0 +- JMP ·subVV_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ MOVV z+0(FP), R4 ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV y+48(FP), R10 ++ MOVV $0, R6 ++ SLLV $3, R5 ++ MOVV $0, R8 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R9 ++ MOVV (R6)(R10), R11 ++ SUBV R11, R9, R11 // x1 - y1 = z1', if z1' > x1 then overflow ++ SUBV R8, R11, R12 // z1' - c0 = z1, if z1 > z1' then overflow ++ SGTU R11, R9, R9 ++ SGTU R12, R11, R11 ++ MOVV R12, (R6)(R4) ++ OR R9, R11, R8 ++ ADDV $8, R6 ++ JMP loop ++done: ++ MOVV R8, c+72(FP) ++ RET + + // func addVW(z, x []Word, y Word) (c Word) + TEXT ·addVW(SB),NOSPLIT,$0 +-- +2.38.1 + diff --git a/0015-cmd-compiler-runtime-internal-atomic-Implementing-An.patch b/0015-cmd-compiler-runtime-internal-atomic-Implementing-An.patch deleted file mode 100644 index ccb452f0fec10db3d2cd7a0f364a3265d1f50f81..0000000000000000000000000000000000000000 --- a/0015-cmd-compiler-runtime-internal-atomic-Implementing-An.patch +++ /dev/null @@ -1,91 +0,0 @@ -From b9a8c32cd18ace66c2e3dda750cab21842231a37 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 15 Jun 2023 12:40:55 +0800 -Subject: [PATCH 15/51] cmd/compiler,runtime/internal/atomic: Implementing - {And,Or}{32,8} using am{and,or}dbw on loong64 - -Change-Id: Ic5ce31d240ef04f09e9c00623b0a7aa799cd7bf4 ---- - src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 4 ++-- - src/cmd/compile/internal/ssa/opGen.go | 4 ++-- - src/runtime/internal/atomic/atomic_loong64.s | 8 ++++---- - 3 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 41cc431e8b..ce08346a4a 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -382,8 +382,8 @@ func init() { - - // Atomic 32 bit AND/OR. - // *arg0 &= (|=) arg1. arg2=mem. returns nil. -- {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, asm: "AMANDW", faultOnNilArg0: true, hasSideEffects: true}, -- {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, asm: "AMORW", faultOnNilArg0: true, hasSideEffects: true}, -+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, asm: "AMANDDBW", faultOnNilArg0: true, hasSideEffects: true}, -+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, asm: "AMORDBW", faultOnNilArg0: true, hasSideEffects: true}, - - // atomic add. - // *arg0 += arg1. arg2=mem. returns . -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index 55a42ae782..fc735c48b5 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -24451,7 +24451,7 @@ var opcodeTable = [...]opInfo{ - argLen: 3, - faultOnNilArg0: true, - hasSideEffects: true, -- asm: loong64.AAMANDW, -+ asm: loong64.AAMANDDBW, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -@@ -24467,7 +24467,7 @@ var opcodeTable = [...]opInfo{ - argLen: 3, - faultOnNilArg0: true, - hasSideEffects: true, -- asm: loong64.AAMORW, -+ asm: loong64.AAMORDBW, - reg: regInfo{ - inputs: []inputInfo{ - {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -diff --git a/src/runtime/internal/atomic/atomic_loong64.s b/src/runtime/internal/atomic/atomic_loong64.s -index 1ea85b1944..7eee52ddb8 100644 ---- a/src/runtime/internal/atomic/atomic_loong64.s -+++ b/src/runtime/internal/atomic/atomic_loong64.s -@@ -191,7 +191,7 @@ TEXT ·Or8(SB), NOSPLIT, $0-9 - SLLV $3, R7 - // Shift val for aligned ptr. R5 = val << R4 - SLLV R7, R5 -- AMORW R5, (R6), R0 -+ AMORDBW R5, (R6), R0 - RET - - // void And8(byte volatile*, byte); -@@ -210,21 +210,21 @@ TEXT ·And8(SB), NOSPLIT, $0-9 - SLLV R7, R8 - NOR R0, R8 - OR R8, R5 -- AMANDW R5, (R6), R0 -+ AMANDDBW R5, (R6), R0 - RET - - // func Or(addr *uint32, v uint32) - TEXT ·Or(SB), NOSPLIT, $0-12 - MOVV ptr+0(FP), R4 - MOVW val+8(FP), R5 -- AMORW R5, (R4), R0 -+ AMORDBW R5, (R4), R0 - RET - - // func And(addr *uint32, v uint32) - TEXT ·And(SB), NOSPLIT, $0-12 - MOVV ptr+0(FP), R4 - MOVW val+8(FP), R5 -- AMANDW R5, (R4), R0 -+ AMANDDBW R5, (R4), R0 - RET - - // uint32 runtime∕internal∕atomic·Load(uint32 volatile* ptr) --- -2.38.1 - diff --git a/0015-math-big-optimize-subVW-function-for-loong64.patch b/0015-math-big-optimize-subVW-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..82c8a1c43ae0fc4c6ba1edd1e62c907c855baf55 --- /dev/null +++ b/0015-math-big-optimize-subVW-function-for-loong64.patch @@ -0,0 +1,82 @@ +From b8516483f552400ef8708645b8a10bed5f666dba Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Tue, 11 Jun 2024 20:33:50 +0800 +Subject: [PATCH 15/44] math/big: optimize subVW function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3C5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3C5000 @ 2200.00MHz + │ test/old_3c5000_subvw.log │ test/new_3c5000_subvw.log │ + │ sec/op │ sec/op vs base │ +SubVW/1 8.564n ± 0% 5.915n ± 0% -30.93% (p=0.000 n=20) +SubVW/2 11.675n ± 0% 6.825n ± 0% -41.54% (p=0.000 n=20) +SubVW/3 13.410n ± 0% 7.969n ± 0% -40.57% (p=0.000 n=20) +SubVW/4 15.300n ± 0% 9.740n ± 0% -36.34% (p=0.000 n=20) +SubVW/5 17.34n ± 1% 10.66n ± 0% -38.55% (p=0.000 n=20) +SubVW/10 26.55n ± 0% 15.21n ± 0% -42.70% (p=0.000 n=20) +SubVW/100 199.2n ± 0% 102.5n ± 0% -48.52% (p=0.000 n=20) +SubVW/1000 1866.5n ± 1% 924.6n ± 0% -50.46% (p=0.000 n=20) +SubVW/10000 17.67µ ± 2% 12.04µ ± 2% -31.83% (p=0.000 n=20) +SubVW/100000 186.4µ ± 0% 132.0µ ± 0% -29.17% (p=0.000 n=20) +SubVWext/1 8.616n ± 0% 5.949n ± 0% -30.95% (p=0.000 n=20) +SubVWext/2 11.410n ± 0% 7.008n ± 1% -38.58% (p=0.000 n=20) +SubVWext/3 13.255n ± 1% 8.073n ± 0% -39.09% (p=0.000 n=20) +SubVWext/4 15.095n ± 0% 9.893n ± 0% -34.47% (p=0.000 n=20) +SubVWext/5 16.87n ± 0% 10.86n ± 0% -35.63% (p=0.000 n=20) +SubVWext/10 26.00n ± 0% 15.54n ± 0% -40.22% (p=0.000 n=20) +SubVWext/100 196.0n ± 0% 104.3n ± 1% -46.76% (p=0.000 n=20) +SubVWext/1000 1847.0n ± 0% 923.7n ± 0% -49.99% (p=0.000 n=20) +SubVWext/10000 17.30µ ± 1% 11.71µ ± 1% -32.31% (p=0.000 n=20) +SubVWext/100000 187.5µ ± 0% 131.6µ ± 0% -29.82% (p=0.000 n=20) +geomean 159.7n 97.79n -38.79% + +Change-Id: I21a6903e79b02cb22282e80c9bfe2ae9f1a87589 +--- + src/math/big/arith_loong64.s | 24 +++++++++++++++++++++++- + 1 file changed, 23 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index 8016c25207..02d8262129 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -94,8 +94,30 @@ done: + MOVV R10, c+56(FP) + RET + ++// func subVW(z, x []Word, y Word) (c Word) + TEXT ·subVW(SB),NOSPLIT,$0 +- JMP ·subVW_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ MOVV z+0(FP), R4 ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV y+48(FP), R10 ++ MOVV $0, R6 ++ SLLV $3, R5 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R8 ++ SUBV R10, R8, R11 // x1 - c = z1, if z1 > x1 then overflow ++ SGTU R11, R8, R10 ++ MOVV R11, (R6)(R4) ++ ADDV $8, R6 ++ JMP loop ++done: ++ MOVV R10, c+56(FP) ++ RET + + TEXT ·shlVU(SB),NOSPLIT,$0 + JMP ·shlVU_g(SB) +-- +2.38.1 + diff --git a/0016-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch b/0016-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch deleted file mode 100644 index 38bc86b36f88dd4bbad33a82294f7dffdb82f6ea..0000000000000000000000000000000000000000 --- a/0016-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch +++ /dev/null @@ -1,177 +0,0 @@ -From bc400fff755f7c3fe853cf23a7e070a15b6259a9 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Mon, 8 May 2023 06:20:21 +0800 -Subject: [PATCH 16/51] cmd/internal/obj/loong64: remove the invalid plan9 - format of the BREAK instruction - -In the three formats corresponding to case 7 of the function asmout, BREAK actually -corresponds to the cacop instruction of Loong64, refer to the loong64 instruction -manual volume 1 [1], the cacop instruction is a privileged instruction used to -maintain the cache, and the user mode does not have permission to execute. - -Referring to the loong64 instruction manual volume 1 [1], the format of SYSCALL, -BREAK, DBAR and NOOP instructions is similar and can be grouped into one category. - -[1]: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html - -Change-Id: I0b8998270102d1557fc2b2410cf8c0b078bd0c2e ---- - .../asm/internal/asm/testdata/loong64enc1.s | 2 - - src/cmd/internal/obj/loong64/asm.go | 51 ++++++++++--------- - 2 files changed, 27 insertions(+), 26 deletions(-) - -diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s -index 288408b010..701515cf4c 100644 ---- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s -+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s -@@ -141,8 +141,6 @@ lable2: - MOVV R4, F5 // 85a81401 - MOVV F4, R5 // 85b81401 - WORD $74565 // 45230100 -- BREAK R4, result+16(FP) // 64600006 -- BREAK R4, 1(R5) // a4040006 - BREAK // 00002a00 - UNDEF // 00002a00 - -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index 638bd1139f..77eaa628e8 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -354,11 +354,6 @@ var optab = []Optab{ - {ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, - {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, - -- {ABREAK, C_REG, C_NONE, C_NONE, C_SEXT, C_NONE, 7, 4, 0, 0}, // really CACHE instruction -- {ABREAK, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0}, -- {ABREAK, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0}, -- {ABREAK, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, -- - {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, - {ARDTIMEHW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, - {ARDTIMED, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, -@@ -601,7 +596,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue) - for i = 0; i < int32(v/4); i++ { - // emit ANOOP instruction by the padding size -- c.ctxt.Arch.ByteOrder.PutUint32(bp, c.oprrr(ANOOP)) -+ c.ctxt.Arch.ByteOrder.PutUint32(bp, c.opi(ANOOP)) - bp = bp[4:] - } - continue -@@ -1165,6 +1160,7 @@ func buildop(ctxt *obj.Link) { - case ASYSCALL: - opset(ADBAR, r0) - opset(ANOOP, r0) -+ opset(ABREAK, r0) - - case ACMPEQF: - opset(ACMPGTF, r0) -@@ -1186,7 +1182,6 @@ func buildop(ctxt *obj.Link) { - AMOVD, - AMOVF, - AMOVV, -- ABREAK, - ARFE, - AJAL, - AJMP, -@@ -1272,6 +1267,10 @@ func OP_IR(op uint32, i uint32, r2 uint32) uint32 { - return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5 - } - -+func OP_I(op uint32, i uint32) uint32 { -+ return op | (i&0x7FFF)<<0 -+} -+ - // Encoding for the 'b' or 'bl' instruction. - func OP_B_BL(op uint32, i uint32) uint32 { - return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF) -@@ -1337,7 +1336,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) - - case 5: // syscall -- o1 = c.oprrr(p.As) -+ o1 = c.opi(p.As) - - case 6: // beq r1,[r2],sbra - v := int32(0) -@@ -1481,7 +1480,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - } else { // ATNE - o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg)) - } -- o2 = c.oprrr(ABREAK) | (uint32(v) & 0x7FFF) -+ o2 = OP_I(c.opi(ABREAK), uint32(v)) - - case 16: // sll $c,[r1],r2 - v := c.regoff(&p.From) -@@ -1652,7 +1651,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) - - case 49: // undef -- o1 = c.oprrr(ABREAK) -+ o1 = c.opi(ABREAK) - - // relocation operations - case 50: // mov r,addr ==> pcalau12i + sw -@@ -1972,10 +1971,6 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { - case AJAL: - return (0x13 << 26) | 1 // jirl r1, rj, 0 - -- case ABREAK: -- return 0x54 << 15 -- case ASYSCALL: -- return 0x56 << 15 - case ADIVF: - return 0x20d << 15 - case ADIVD: -@@ -2049,12 +2044,6 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { - return 0x4511 << 10 - case ASQRTD: - return 0x4512 << 10 -- -- case ADBAR: -- return 0x70e4 << 15 -- case ANOOP: -- // andi r0, r0, 0 -- return 0x03400000 - } - - if a < 0 { -@@ -2083,6 +2072,24 @@ func (c *ctxt0) oprr(a obj.As) uint32 { - return 0 - } - -+func (c *ctxt0) opi(a obj.As) uint32 { -+ switch a { -+ case ASYSCALL: -+ return 0x56 << 15 -+ case ABREAK: -+ return 0x54 << 15 -+ case ADBAR: -+ return 0x70e4 << 15 -+ case ANOOP: -+ // andi r0, r0, 0 -+ return 0x03400000 -+ } -+ -+ c.ctxt.Diag("bad ic opcode %v", a) -+ -+ return 0 -+} -+ - func (c *ctxt0) opir(a obj.As) uint32 { - switch a { - case ALU12IW: -@@ -2179,10 +2186,6 @@ func (c *ctxt0) opirr(a obj.As) uint32 { - return 0x0be << 22 - case AMOVVR: - return 0x0bf << 22 -- -- case ABREAK: -- return 0x018 << 22 -- - case -AMOVWL: - return 0x0b8 << 22 - case -AMOVWR: --- -2.38.1 - diff --git a/0016-math-big-optimize-shlVU-function-for-loong64.patch b/0016-math-big-optimize-shlVU-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..a7fb046e70251547d0934928b6f9000eb7ac700a --- /dev/null +++ b/0016-math-big-optimize-shlVU-function-for-loong64.patch @@ -0,0 +1,92 @@ +From 3d520765bbff022132512b918379fe1a5e788f2e Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Thu, 13 Jun 2024 11:36:30 +0800 +Subject: [PATCH 16/44] math/big: optimize shlVU function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3A5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3A5000-HV @ 2500.00MHz + │ old_3a5000_shlvu.log │ new_3a5000_shlvu_1st.log │ + │ sec/op │ sec/op vs base │ +NonZeroShifts/1/shlVU 7.606n ± 0% 5.304n ± 0% -30.27% (p=0.000 n=20) +NonZeroShifts/2/shlVU 9.608n ± 0% 6.164n ± 0% -35.85% (p=0.000 n=20) +NonZeroShifts/3/shlVU 11.610n ± 0% 6.984n ± 0% -39.84% (p=0.000 n=20) +NonZeroShifts/4/shlVU 12.210n ± 0% 8.869n ± 0% -27.36% (p=0.000 n=20) +NonZeroShifts/5/shlVU 14.11n ± 0% 10.41n ± 0% -26.22% (p=0.000 n=20) +NonZeroShifts/10/shlVU 22.02n ± 0% 14.77n ± 0% -32.92% (p=0.000 n=20) +NonZeroShifts/100/shlVU 161.30n ± 0% 91.15n ± 0% -43.49% (p=0.000 n=20) +NonZeroShifts/1000/shlVU 1514.0n ± 0% 811.7n ± 0% -46.39% (p=0.000 n=20) +NonZeroShifts/10000/shlVU 21.53µ ± 0% 10.54µ ± 0% -51.04% (p=0.000 n=20) +NonZeroShifts/100000/shlVU 208.1µ ± 0% 113.0µ ± 0% -45.69% (p=0.000 n=20) +geomean 142.8n 87.87n -38.46% + +Change-Id: I8e13eb0af27ac3d6846e559cdb61d2b544b05353 +--- + src/math/big/arith_loong64.s | 44 +++++++++++++++++++++++++++++++++++- + 1 file changed, 43 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index 02d8262129..1820988d3f 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -119,8 +119,50 @@ done: + MOVV R10, c+56(FP) + RET + ++// func shlVU(z, x []Word, s uint) (c Word) + TEXT ·shlVU(SB),NOSPLIT,$0 +- JMP ·shlVU_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: s ++ MOVV z_len+8(FP), R5 ++ MOVV s+48(FP), R10 ++ MOVV z+0(FP), R4 ++ MOVV x+24(FP), R7 ++ BEQ R5, len0 ++ SLLV $3, R5 ++ BEQ R10, copy ++ MOVV $64, R9 ++ ADDV $-8, R7 // &x[-1] ++ SUB R10, R9 // ŝ = 64 - s ++ MOVV (R5)(R7), R6 ++ SRLV R9, R6, R8 // c = x[len(z)-1] >> ŝ ++loop: ++ ADDV $-8, R5 ++ BEQ R5, done ++ SLLV R10, R6, R12 ++ MOVV (R5)(R7), R6 ++ SRLV R9, R6, R11 ++ OR R11, R12 ++ MOVV R12, (R5)(R4) // z[i] = x[i]<>ŝ ++ JMP loop ++done: ++ SLLV R10, R6 ++ MOVV R8, c+56(FP) ++ MOVV R6, 0(R4) // z[0] = x[0] << s ++ RET ++copy: ++ BEQ R7, R4, len0 ++copyloop: ++ ADDV $-8, R5 ++ BLT R5, R0, len0 ++ MOVV (R5)(R7), R9 ++ MOVV R9, (R5)(R4) ++ JMP copyloop ++len0: ++ MOVV R0, c+56(FP) ++ RET + + TEXT ·shrVU(SB),NOSPLIT,$0 + JMP ·shrVU_g(SB) +-- +2.38.1 + diff --git a/0017-cmd-internal-obj-loong64-correct-the-instruction-for.patch b/0017-cmd-internal-obj-loong64-correct-the-instruction-for.patch deleted file mode 100644 index 11e10591a3d7ef0d1a65f8e4e31097d0207cfae6..0000000000000000000000000000000000000000 --- a/0017-cmd-internal-obj-loong64-correct-the-instruction-for.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 3e403151f2f8aeec8dfa69b49f0e5380194ada87 Mon Sep 17 00:00:00 2001 -From: chenguoqi -Date: Thu, 27 Jul 2023 11:01:16 +0800 -Subject: [PATCH 17/51] cmd/internal/obj/loong64: correct the instruction - format of plan9 assembly NOOP - -Change-Id: Icbaa925775d8fb8978a6e0cf7caa1f4be8ebf7f4 ---- - src/cmd/internal/obj/loong64/asm.go | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index 77eaa628e8..c6c28002d1 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -596,7 +596,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue) - for i = 0; i < int32(v/4); i++ { - // emit ANOOP instruction by the padding size -- c.ctxt.Arch.ByteOrder.PutUint32(bp, c.opi(ANOOP)) -+ c.ctxt.Arch.ByteOrder.PutUint32(bp, c.op0(ANOOP)) - bp = bp[4:] - } - continue -@@ -1336,7 +1336,13 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) - - case 5: // syscall -- o1 = c.opi(p.As) -+ switch p.As { -+ case ANOOP: -+ o1 = c.op0(p.As) -+ default: -+ v := c.regoff(&p.From) -+ o1 = OP_I(c.opi(p.As), uint32(v)) -+ } - - case 6: // beq r1,[r2],sbra - v := int32(0) -@@ -2072,6 +2078,18 @@ func (c *ctxt0) oprr(a obj.As) uint32 { - return 0 - } - -+func (c *ctxt0) op0(a obj.As) uint32 { -+ switch a { -+ case ANOOP: -+ // andi r0, r0, 0 -+ return 0x03400000 -+ } -+ -+ c.ctxt.Diag("bad op0 opcode %v", a) -+ -+ return 0 -+} -+ - func (c *ctxt0) opi(a obj.As) uint32 { - switch a { - case ASYSCALL: -@@ -2080,12 +2098,9 @@ func (c *ctxt0) opi(a obj.As) uint32 { - return 0x54 << 15 - case ADBAR: - return 0x70e4 << 15 -- case ANOOP: -- // andi r0, r0, 0 -- return 0x03400000 - } - -- c.ctxt.Diag("bad ic opcode %v", a) -+ c.ctxt.Diag("bad opi opcode %v", a) - - return 0 - } --- -2.38.1 - diff --git a/0017-math-big-optimize-shrVU-function-for-loong64.patch b/0017-math-big-optimize-shrVU-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..504501900a094f24b89d68fa1e33f51664a5575e --- /dev/null +++ b/0017-math-big-optimize-shrVU-function-for-loong64.patch @@ -0,0 +1,92 @@ +From 14d44d92f1d59c42e85bd89797a3730f48699dc6 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Tue, 18 Jun 2024 02:00:38 +0000 +Subject: [PATCH 17/44] math/big: optimize shrVU function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3A5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3A5000-HV @ 2500.00MHz + │ test/old_3a5000_shrvu.log │ test/new_3a5000_shrvu.log │ + │ sec/op │ sec/op vs base │ +NonZeroShifts/1/shrVU 7.968n ± 0% 5.210n ± 0% -34.62% (p=0.000 n=20) +NonZeroShifts/2/shrVU 9.608n ± 0% 6.178n ± 0% -35.70% (p=0.000 n=20) +NonZeroShifts/3/shrVU 11.400n ± 0% 7.419n ± 0% -34.92% (p=0.000 n=20) +NonZeroShifts/4/shrVU 13.350n ± 0% 9.159n ± 0% -31.39% (p=0.000 n=20) +NonZeroShifts/5/shrVU 15.93n ± 0% 10.58n ± 0% -33.58% (p=0.000 n=20) +NonZeroShifts/10/shrVU 24.42n ± 0% 15.70n ± 0% -35.71% (p=0.000 n=20) +NonZeroShifts/100/shrVU 190.60n ± 0% 90.87n ± 0% -52.32% (p=0.000 n=20) +NonZeroShifts/1000/shrVU 1782.0n ± 0% 811.5n ± 0% -54.46% (p=0.000 n=20) +NonZeroShifts/10000/shrVU 21.54µ ± 0% 12.55µ ± 0% -41.76% (p=0.000 n=20) +NonZeroShifts/100000/shrVU 224.1µ ± 0% 126.2µ ± 0% -43.71% (p=0.000 n=20) +geomean 153.9n 91.78n -40.35% + +Change-Id: I86f1f3ac44d60ad8dc2e77bdb9b541f55eb18e74 +--- + src/math/big/arith_loong64.s | 45 +++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index 1820988d3f..bdaaf14821 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -165,7 +165,50 @@ len0: + RET + + TEXT ·shrVU(SB),NOSPLIT,$0 +- JMP ·shrVU_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: s ++ MOVV z_len+8(FP), R5 ++ MOVV s+48(FP), R10 ++ MOVV z+0(FP), R4 ++ MOVV x+24(FP), R7 ++ BEQ R5, len0 ++ SLLV $3, R5 ++ BEQ R10, copy ++ MOVV 0(R7), R6 ++ MOVV $64, R9 ++ MOVV $8, R8 ++ SUB R10, R9 // ŝ = 64 - s ++ ADDV $-8, R4 // &z[-1] ++ SLLV R9, R6, R13 // c = x[0] << ŝ ++loop: ++ BEQ R5, R8, done ++ SRLV R10, R6, R12 ++ MOVV (R8)(R7), R6 ++ SLLV R9, R6, R11 ++ OR R11, R12 ++ MOVV R12, (R8)(R4) // z[i-1] = x[i-1]>>s | x[i]<<ŝ ++ ADDV $8, R8 ++ JMP loop ++done: ++ SRLV R10, R6 ++ MOVV R13, c+56(FP) ++ MOVV R6, (R8)(R4) // z[len(z)-1] = x[len(z)-1] >> s ++ RET ++copy: ++ MOVV $0, R8 ++ BEQ R7, R4, len0 ++copyloop: ++ BEQ R5, R8, len0 ++ MOVV (R8)(R7), R9 ++ MOVV R9, (R8)(R4) ++ ADDV $8, R8 ++ JMP copyloop ++len0: ++ MOVV R0, c+56(FP) ++ RET + + TEXT ·mulAddVWW(SB),NOSPLIT,$0 + JMP ·mulAddVWW_g(SB) +-- +2.38.1 + diff --git a/0018-cmd-internal-obj-loong64-recheck-jump-offset-boundar.patch b/0018-cmd-internal-obj-loong64-recheck-jump-offset-boundar.patch deleted file mode 100644 index ee279081714536578fb3db4c867cf6fda08855e2..0000000000000000000000000000000000000000 --- a/0018-cmd-internal-obj-loong64-recheck-jump-offset-boundar.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 17adab69b89265c9d24cb454afb66430e58b6533 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 02:09:49 +0800 -Subject: [PATCH 18/51] cmd/internal/obj/loong64: recheck jump offset boundary - after auto-aligning loop heads - -After the alignment of the loop header is performed, the offset of the checked -conditional branch instruction may overflow, so it needs to be checked again. - -When checking whether the offset of the branch jump instruction overflows, it -can be classified and processed according to the range of the immediate field -of the specific instruction, which can reduce the introduction of unnecessary -jump instructions. - -Fixes #61819 - -Change-Id: Ica4c4ade43bf106c7035a1c02b89d3347a414b41 ---- - src/cmd/internal/obj/loong64/asm.go | 30 +++++++++++++++++++---------- - 1 file changed, 20 insertions(+), 10 deletions(-) - -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index c6c28002d1..74ee2b6cea 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -491,11 +491,8 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - } - - // Run these passes until convergence. -- bflag := 1 -- var otxt int64 -- var q *obj.Prog -- for bflag != 0 { -- bflag = 0 -+ for { -+ rescan := false - pc = 0 - prev := c.cursym.Func().Text - for p = prev.Link; p != nil; prev, p = p, p.Link { -@@ -510,7 +507,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - // because pc will be adjusted if padding happens. - if p.Mark&branchLoopHead != 0 && pc&(loopAlign-1) != 0 && - !(prev.As == obj.APCALIGN && prev.From.Offset >= loopAlign) { -- q = c.newprog() -+ q := c.newprog() - prev.Link = q - q.Link = p - q.Pc = pc -@@ -526,6 +523,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - // since this loop iteration is for p. - pc += int64(pcAlignPadLength(ctxt, pc, loopAlign)) - p.Pc = pc -+ rescan = true - } - - // very large conditional branches -@@ -535,9 +533,16 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - // generate extra passes putting branches - // around jmps to fix. this is rare. - if o.type_ == 6 && p.To.Target() != nil { -- otxt = p.To.Target().Pc - pc -- if otxt < -(1<<17)+10 || otxt >= (1<<17)-10 { -- q = c.newprog() -+ otxt := p.To.Target().Pc - pc -+ bound := int64(1 << (18 - 1)) -+ -+ switch p.As { -+ case -ABEQ, -ABNE, ABFPT, ABFPF: -+ bound = int64(1 << (23 - 1)) -+ } -+ -+ if otxt < -bound || otxt >= bound { -+ q := c.newprog() - q.Link = p.Link - p.Link = q - q.As = AJMP -@@ -552,7 +557,7 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - q.Pos = p.Pos - q.To.Type = obj.TYPE_BRANCH - q.To.SetTarget(q.Link.Link) -- bflag = 1 -+ rescan = true - } - } - -@@ -574,7 +579,12 @@ func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - } - - c.cursym.Size = pc -+ -+ if !rescan { -+ break -+ } - } -+ - pc += -pc & (FuncAlign - 1) - c.cursym.Size = pc - --- -2.38.1 - diff --git a/0018-math-big-optimize-mulAddVWW-function-for-loong64.patch b/0018-math-big-optimize-mulAddVWW-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..0ad375fa32d09ecea23d0d692da2b3adc95add16 --- /dev/null +++ b/0018-math-big-optimize-mulAddVWW-function-for-loong64.patch @@ -0,0 +1,77 @@ +From b956f69c885cd7fdf5305fd4047fd939000c9745 Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Wed, 19 Jun 2024 06:31:00 +0000 +Subject: [PATCH 18/44] math/big: optimize mulAddVWW function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3A5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3A5000-HV @ 2500.00MHz + │ test/old_3a5000_muladdvww.log │ test/new_3a5000_muladdvww.log │ + │ sec/op │ sec/op vs base │ +MulAddVWW/1 7.606n ± 0% 6.987n ± 0% -8.14% (p=0.000 n=20) +MulAddVWW/2 9.207n ± 0% 8.567n ± 0% -6.95% (p=0.000 n=20) +MulAddVWW/3 10.810n ± 0% 9.223n ± 0% -14.68% (p=0.000 n=20) +MulAddVWW/4 13.01n ± 0% 12.41n ± 0% -4.61% (p=0.000 n=20) +MulAddVWW/5 15.79n ± 0% 12.99n ± 0% -17.73% (p=0.000 n=20) +MulAddVWW/10 25.62n ± 0% 20.02n ± 0% -21.86% (p=0.000 n=20) +MulAddVWW/100 217.0n ± 0% 170.9n ± 0% -21.24% (p=0.000 n=20) +MulAddVWW/1000 2.064µ ± 0% 1.612µ ± 0% -21.90% (p=0.000 n=20) +MulAddVWW/10000 24.50µ ± 0% 16.74µ ± 0% -31.66% (p=0.000 n=20) +MulAddVWW/100000 239.1µ ± 0% 171.1µ ± 0% -28.45% (p=0.000 n=20) +geomean 159.2n 130.3n -18.18% + +Change-Id: I063434bc382f4f1234f879172ab671a3d6f2eb80 +--- + src/math/big/arith_loong64.s | 29 ++++++++++++++++++++++++++++- + 1 file changed, 28 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index bdaaf14821..fe7c971120 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -210,8 +210,35 @@ len0: + MOVV R0, c+56(FP) + RET + ++// func mulAddVWW(z, x []Word, y, r Word) (c Word) + TEXT ·mulAddVWW(SB),NOSPLIT,$0 +- JMP ·mulAddVWW_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ // R11: r ++ MOVV z+0(FP), R4 ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV y+48(FP), R10 ++ MOVV r+56(FP), R11 ++ SLLV $3, R5 ++ MOVV $0, R6 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R8 ++ MULV R8, R10, R9 ++ MULHVU R8, R10, R12 ++ ADDV R9, R11, R8 ++ SGTU R9, R8, R11 // if (c' = lo + c) < lo then overflow ++ MOVV R8, (R6)(R4) ++ ADDV R12, R11 ++ ADDV $8, R6 ++ JMP loop ++done: ++ MOVV R11, c+64(FP) ++ RET + + TEXT ·addMulVVW(SB),NOSPLIT,$0 + JMP ·addMulVVW_g(SB) +-- +2.38.1 + diff --git a/0019-cmd-link-internal-loong64-correct-the-glibc-dynamic-.patch b/0019-cmd-link-internal-loong64-correct-the-glibc-dynamic-.patch deleted file mode 100644 index ab43c8d1b9d30e6d04ade9360fbd20afc423a37f..0000000000000000000000000000000000000000 --- a/0019-cmd-link-internal-loong64-correct-the-glibc-dynamic-.patch +++ /dev/null @@ -1,29 +0,0 @@ -From f55a403e2b1195958d950cda6111982d3c474f9e Mon Sep 17 00:00:00 2001 -From: limeidan -Date: Wed, 6 Sep 2023 17:09:35 +0800 -Subject: [PATCH 19/51] cmd/link/internal/loong64: correct the glibc dynamic - linker path. - -Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html#_program_interpreter_path - -Change-Id: Ic2598110cc091362cb09f877b6b86433cacf32c6 ---- - src/cmd/link/internal/loong64/obj.go | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/cmd/link/internal/loong64/obj.go b/src/cmd/link/internal/loong64/obj.go -index c3f6ed9386..fd193a2445 100644 ---- a/src/cmd/link/internal/loong64/obj.go -+++ b/src/cmd/link/internal/loong64/obj.go -@@ -29,7 +29,7 @@ func Init() (*sys.Arch, ld.Arch) { - Gentext: gentext, - - ELF: ld.ELFArch{ -- Linuxdynld: "/lib64/ld.so.1", -+ Linuxdynld: "/lib64/ld-linux-loongarch-lp64d.so.1", - LinuxdynldMusl: "/lib64/ld-musl-loongarch.so.1", - Freebsddynld: "XXX", - Openbsddynld: "XXX", --- -2.38.1 - diff --git a/0019-math-big-optimize-addMulVVW-function-for-loong64.patch b/0019-math-big-optimize-addMulVVW-function-for-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..965a89c81ebc046ba70e60c87a94fc970c1005c4 --- /dev/null +++ b/0019-math-big-optimize-addMulVVW-function-for-loong64.patch @@ -0,0 +1,77 @@ +From e7a6135d5c0fc4685ad18a82e770acf9f226b08e Mon Sep 17 00:00:00 2001 +From: Huang Qiqi +Date: Wed, 19 Jun 2024 08:05:24 +0000 +Subject: [PATCH 19/44] math/big: optimize addMulVVW function for loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark results on Loongson 3A5000 (which is an LA464 implementation): + +goos: linux +goarch: loong64 +pkg: math/big +cpu: Loongson-3A5000-HV @ 2500.00MHz + │ test/old_3a5000_addmulvvw.log │ test/new_3a5000_addmulvvw.log │ + │ sec/op │ sec/op vs base │ +AddMulVVW/1 9.208n ± 0% 5.777n ± 0% -37.26% (p=0.000 n=20) +AddMulVVW/2 11.950n ± 0% 7.763n ± 0% -35.04% (p=0.000 n=20) +AddMulVVW/3 14.01n ± 0% 10.41n ± 0% -25.70% (p=0.000 n=20) +AddMulVVW/4 16.01n ± 0% 13.21n ± 0% -17.49% (p=0.000 n=20) +AddMulVVW/5 18.01n ± 0% 14.12n ± 0% -21.57% (p=0.000 n=20) +AddMulVVW/10 29.60n ± 0% 23.35n ± 0% -21.11% (p=0.000 n=20) +AddMulVVW/100 273.4n ± 0% 173.8n ± 0% -36.43% (p=0.000 n=20) +AddMulVVW/1000 2.516µ ± 0% 1.615µ ± 0% -35.81% (p=0.000 n=20) +AddMulVVW/10000 30.31µ ± 0% 21.54µ ± 0% -28.93% (p=0.000 n=20) +AddMulVVW/100000 322.5µ ± 0% 234.1µ ± 0% -27.41% (p=0.000 n=20) +geomean 197.1n 139.9n -29.00% + +Change-Id: Ib7e95b50f7af893abee72ec26948a65115455692 +--- + src/math/big/arith_loong64.s | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/src/math/big/arith_loong64.s b/src/math/big/arith_loong64.s +index fe7c971120..012af94f5c 100644 +--- a/src/math/big/arith_loong64.s ++++ b/src/math/big/arith_loong64.s +@@ -240,5 +240,35 @@ done: + MOVV R11, c+64(FP) + RET + ++// func addMulVVW(z, x []Word, y Word) (c Word) + TEXT ·addMulVVW(SB),NOSPLIT,$0 +- JMP ·addMulVVW_g(SB) ++ // input: ++ // R4: z ++ // R5: z_len ++ // R7: x ++ // R10: y ++ MOVV z_len+8(FP), R5 ++ MOVV x+24(FP), R7 ++ MOVV z+0(FP), R4 ++ MOVV y+48(FP), R10 ++ MOVV $0, R6 ++ SLLV $3, R5 ++ MOVV $0, R11 ++loop: ++ BEQ R5, R6, done ++ MOVV (R6)(R7), R8 ++ MOVV (R6)(R4), R9 ++ MULV R8, R10, R12 ++ MULHVU R8, R10, R13 ++ ADDV R12, R9, R8 ++ SGTU R12, R8, R9 ++ ADDV R13, R9 ++ ADDV R8, R11, R12 ++ SGTU R8, R12, R11 ++ MOVV R12, (R6)(R4) ++ ADDV $8, R6 ++ ADDV R9, R11 ++ JMP loop ++done: ++ MOVV R11, c+56(FP) ++ RET +-- +2.38.1 + diff --git a/0020-cmd-compile-fold-constant-shift-with-extension-on-lo.patch b/0020-cmd-compile-fold-constant-shift-with-extension-on-lo.patch new file mode 100644 index 0000000000000000000000000000000000000000..48553defe786efdc60d7eafbccac34ccc458148f --- /dev/null +++ b/0020-cmd-compile-fold-constant-shift-with-extension-on-lo.patch @@ -0,0 +1,376 @@ +From f10d1a3db9650a738d0254a58aadb62ec89eaca9 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Tue, 24 Sep 2024 16:59:06 +0800 +Subject: [PATCH 20/44] cmd/compile: fold constant shift with extension on + loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: test/bench/go1 +cpu: Loongson-3A6000 @ 2500.00MHz + │ bench.old │ bench.new │ + │ sec/op │ sec/op vs base │ +BinaryTree17 7.775 ± 1% 7.747 ± 1% ~ (p=0.713 n=15) +Fannkuch11 2.645 ± 0% 2.646 ± 0% +0.05% (p=0.002 n=15) +FmtFprintfEmpty 35.87n ± 0% 35.85n ± 0% -0.06% (p=0.000 n=15) +FmtFprintfString 59.50n ± 0% 59.17n ± 0% -0.55% (p=0.000 n=15) +FmtFprintfInt 62.03n ± 0% 62.38n ± 0% +0.56% (p=0.000 n=15) +FmtFprintfIntInt 97.73n ± 0% 96.51n ± 0% -1.25% (p=0.000 n=15) +FmtFprintfPrefixedInt 116.6n ± 0% 118.8n ± 0% +1.89% (p=0.000 n=15) +FmtFprintfFloat 204.1n ± 0% 200.3n ± 0% -1.86% (p=0.000 n=15) +FmtManyArgs 455.1n ± 0% 464.8n ± 0% +2.13% (p=0.000 n=15) +GobDecode 7.127m ± 1% 7.063m ± 1% -0.89% (p=0.033 n=15) +GobEncode 8.061m ± 1% 8.069m ± 5% ~ (p=0.870 n=15) +Gzip 279.8m ± 0% 271.4m ± 0% -3.00% (p=0.000 n=15) +Gunzip 32.63m ± 0% 31.68m ± 0% -2.93% (p=0.000 n=15) +HTTPClientServer 53.39µ ± 0% 53.12µ ± 0% -0.51% (p=0.000 n=15) +JSONEncode 9.323m ± 0% 8.990m ± 1% -3.57% (p=0.000 n=15) +JSONDecode 46.65m ± 1% 46.58m ± 0% ~ (p=0.050 n=15) +Mandelbrot200 4.600m ± 0% 4.603m ± 0% +0.06% (p=0.000 n=15) +GoParse 4.651m ± 0% 4.765m ± 1% +2.45% (p=0.000 n=15) +RegexpMatchEasy0_32 59.64n ± 0% 58.26n ± 0% -2.31% (p=0.000 n=15) +RegexpMatchEasy0_1K 457.3n ± 0% 458.0n ± 0% +0.15% (p=0.002 n=15) +RegexpMatchEasy1_32 59.24n ± 0% 60.12n ± 0% +1.49% (p=0.000 n=15) +RegexpMatchEasy1_1K 556.6n ± 0% 556.9n ± 0% +0.05% (p=0.002 n=15) +RegexpMatchMedium_32 801.5n ± 0% 799.5n ± 0% -0.25% (p=0.000 n=15) +RegexpMatchMedium_1K 27.25µ ± 0% 27.21µ ± 0% -0.15% (p=0.001 n=15) +RegexpMatchHard_32 1.382µ ± 0% 1.412µ ± 0% +2.17% (p=0.000 n=15) +RegexpMatchHard_1K 40.84µ ± 0% 40.91µ ± 0% +0.18% (p=0.000 n=15) +Revcomp 474.5m ± 0% 473.9m ± 0% ~ (p=0.081 n=15) +Template 76.85m ± 1% 74.71m ± 1% -2.79% (p=0.000 n=15) +TimeParse 271.1n ± 0% 269.1n ± 0% -0.74% (p=0.000 n=15) +TimeFormat 289.5n ± 0% 287.5n ± 0% -0.69% (p=0.000 n=15) +geomean 51.59µ 51.40µ -0.38% + +Change-Id: I721e930c30b3d1cb88a79306ec51990505d850f1 +--- + .../internal/ssa/_gen/LOONG64latelower.rules | 19 ++ + src/cmd/compile/internal/ssa/config.go | 2 + + .../internal/ssa/rewriteLOONG64latelower.go | 246 ++++++++++++++++++ + test/codegen/shift.go | 3 + + 4 files changed, 270 insertions(+) + create mode 100644 src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules + create mode 100644 src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go + +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules +new file mode 100644 +index 0000000000..1158f84422 +--- /dev/null ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules +@@ -0,0 +1,19 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// Fold constant shift with extension. ++(SRAVconst (MOVBreg x) [c]) && c < 8 => (SRAVconst (SLLVconst x [56]) [56+c]) ++(SRAVconst (MOVHreg x) [c]) && c < 16 => (SRAVconst (SLLVconst x [48]) [48+c]) ++(SRAVconst (MOVWreg x) [c]) && c < 32 => (SRAVconst (SLLVconst x [32]) [32+c]) ++(SRLVconst (MOVBUreg x) [c]) && c < 8 => (SRLVconst (SLLVconst x [56]) [56+c]) ++(SRLVconst (MOVHUreg x) [c]) && c < 16 => (SRLVconst (SLLVconst x [48]) [48+c]) ++(SRLVconst (MOVWUreg x) [c]) && c < 32 => (SRLVconst (SLLVconst x [32]) [32+c]) ++(SLLVconst (MOVBUreg x) [c]) && c <= 56 => (SRLVconst (SLLVconst x [56]) [56-c]) ++(SLLVconst (MOVHUreg x) [c]) && c <= 48 => (SRLVconst (SLLVconst x [48]) [48-c]) ++(SLLVconst (MOVWUreg x) [c]) && c <= 32 => (SRLVconst (SLLVconst x [32]) [32-c]) ++ ++// Shift by zero. ++(SRAVconst x [0]) => x ++(SRLVconst x [0]) => x ++(SLLVconst x [0]) => x +diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go +index d674cca009..9c4f60f613 100644 +--- a/src/cmd/compile/internal/ssa/config.go ++++ b/src/cmd/compile/internal/ssa/config.go +@@ -280,6 +280,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo + c.RegSize = 8 + c.lowerBlock = rewriteBlockLOONG64 + c.lowerValue = rewriteValueLOONG64 ++ c.lateLowerBlock = rewriteBlockLOONG64latelower ++ c.lateLowerValue = rewriteValueLOONG64latelower + c.registers = registersLOONG64[:] + c.gpRegMask = gpRegMaskLOONG64 + c.fpRegMask = fpRegMaskLOONG64 +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go +new file mode 100644 +index 0000000000..f092b0a1ef +--- /dev/null ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go +@@ -0,0 +1,246 @@ ++// Code generated from _gen/LOONG64latelower.rules using 'go generate'; DO NOT EDIT. ++ ++package ssa ++ ++func rewriteValueLOONG64latelower(v *Value) bool { ++ switch v.Op { ++ case OpLOONG64SLLVconst: ++ return rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v) ++ case OpLOONG64SRAVconst: ++ return rewriteValueLOONG64latelower_OpLOONG64SRAVconst(v) ++ case OpLOONG64SRLVconst: ++ return rewriteValueLOONG64latelower_OpLOONG64SRLVconst(v) ++ } ++ return false ++} ++func rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (SLLVconst (MOVBUreg x) [c]) ++ // cond: c <= 56 ++ // result: (SRLVconst (SLLVconst x [56]) [56-c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVBUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c <= 56) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(56 - c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(56) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SLLVconst (MOVHUreg x) [c]) ++ // cond: c <= 48 ++ // result: (SRLVconst (SLLVconst x [48]) [48-c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVHUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c <= 48) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(48 - c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(48) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SLLVconst (MOVWUreg x) [c]) ++ // cond: c <= 32 ++ // result: (SRLVconst (SLLVconst x [32]) [32-c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c <= 32) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(32 - c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SLLVconst x [0]) ++ // result: x ++ for { ++ if auxIntToInt64(v.AuxInt) != 0 { ++ break ++ } ++ x := v_0 ++ v.copyOf(x) ++ return true ++ } ++ return false ++} ++func rewriteValueLOONG64latelower_OpLOONG64SRAVconst(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (SRAVconst (MOVBreg x) [c]) ++ // cond: c < 8 ++ // result: (SRAVconst (SLLVconst x [56]) [56+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVBreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 8) { ++ break ++ } ++ v.reset(OpLOONG64SRAVconst) ++ v.AuxInt = int64ToAuxInt(56 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.Int64) ++ v0.AuxInt = int64ToAuxInt(56) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRAVconst (MOVHreg x) [c]) ++ // cond: c < 16 ++ // result: (SRAVconst (SLLVconst x [48]) [48+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVHreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 16) { ++ break ++ } ++ v.reset(OpLOONG64SRAVconst) ++ v.AuxInt = int64ToAuxInt(48 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.Int64) ++ v0.AuxInt = int64ToAuxInt(48) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRAVconst (MOVWreg x) [c]) ++ // cond: c < 32 ++ // result: (SRAVconst (SLLVconst x [32]) [32+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 32) { ++ break ++ } ++ v.reset(OpLOONG64SRAVconst) ++ v.AuxInt = int64ToAuxInt(32 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.Int64) ++ v0.AuxInt = int64ToAuxInt(32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRAVconst x [0]) ++ // result: x ++ for { ++ if auxIntToInt64(v.AuxInt) != 0 { ++ break ++ } ++ x := v_0 ++ v.copyOf(x) ++ return true ++ } ++ return false ++} ++func rewriteValueLOONG64latelower_OpLOONG64SRLVconst(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (SRLVconst (MOVBUreg x) [c]) ++ // cond: c < 8 ++ // result: (SRLVconst (SLLVconst x [56]) [56+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVBUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 8) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(56 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(56) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRLVconst (MOVHUreg x) [c]) ++ // cond: c < 16 ++ // result: (SRLVconst (SLLVconst x [48]) [48+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVHUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 16) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(48 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(48) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRLVconst (MOVWUreg x) [c]) ++ // cond: c < 32 ++ // result: (SRLVconst (SLLVconst x [32]) [32+c]) ++ for { ++ c := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWUreg { ++ break ++ } ++ x := v_0.Args[0] ++ if !(c < 32) { ++ break ++ } ++ v.reset(OpLOONG64SRLVconst) ++ v.AuxInt = int64ToAuxInt(32 + c) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRLVconst x [0]) ++ // result: x ++ for { ++ if auxIntToInt64(v.AuxInt) != 0 { ++ break ++ } ++ x := v_0 ++ v.copyOf(x) ++ return true ++ } ++ return false ++} ++func rewriteBlockLOONG64latelower(b *Block) bool { ++ return false ++} +diff --git a/test/codegen/shift.go b/test/codegen/shift.go +index 2d8cf86857..ad69d69aa5 100644 +--- a/test/codegen/shift.go ++++ b/test/codegen/shift.go +@@ -61,18 +61,21 @@ func rshConst64x64Overflow8(v int8) int64 { + func lshConst32x64(v int32) int32 { + // ppc64x:"SLW" + // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" ++ // loong64:"SLLV" + return v << uint64(29) + } + + func rshConst32Ux64(v uint32) uint32 { + // ppc64x:"SRW" + // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW" ++ // loong64:"SLLV","SRLV",-"MOVWU" + return v >> uint64(29) + } + + func rshConst32x64(v int32) int32 { + // ppc64x:"SRAW" + // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW" ++ // loong64:"SLLV","SRAV",-"MOVW" + return v >> uint64(29) + } + +-- +2.38.1 + diff --git a/0020-cmd-link-internal-loadelf-correct-the-relocation-siz.patch b/0020-cmd-link-internal-loadelf-correct-the-relocation-siz.patch deleted file mode 100644 index 34f5d29d7cd64c94fc18f6a4c72be331238c9b07..0000000000000000000000000000000000000000 --- a/0020-cmd-link-internal-loadelf-correct-the-relocation-siz.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 236eadf7de8e5fcb81bdf03cba85dcf8e7c00d41 Mon Sep 17 00:00:00 2001 -From: limeidan -Date: Thu, 14 Sep 2023 20:11:07 +0800 -Subject: [PATCH 20/51] cmd/link/internal/loadelf: correct the relocation size - of R_LARCH_64 - -Change-Id: If3eaca8b92e8f5265c7763d13021a6353b9df9b6 ---- - src/cmd/link/internal/loadelf/ldelf.go | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/src/cmd/link/internal/loadelf/ldelf.go b/src/cmd/link/internal/loadelf/ldelf.go -index 942d54c06c..5ab7cf2204 100644 ---- a/src/cmd/link/internal/loadelf/ldelf.go -+++ b/src/cmd/link/internal/loadelf/ldelf.go -@@ -1012,11 +1012,13 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { - LOONG64 | uint32(elf.R_LARCH_SOP_PUSH_ABSOLUTE)<<16, - LOONG64 | uint32(elf.R_LARCH_MARK_LA)<<16, - LOONG64 | uint32(elf.R_LARCH_SOP_POP_32_S_0_10_10_16_S2)<<16, -- LOONG64 | uint32(elf.R_LARCH_64)<<16, - LOONG64 | uint32(elf.R_LARCH_MARK_PCREL)<<16, - LOONG64 | uint32(elf.R_LARCH_32_PCREL)<<16: - return 4, 4, nil - -+ case LOONG64 | uint32(elf.R_LARCH_64)<<16: -+ return 8, 8, nil -+ - case S390X | uint32(elf.R_390_8)<<16: - return 1, 1, nil - --- -2.38.1 - diff --git a/0021-cmd-compile-cmd-internal-runtime-change-the-register.patch b/0021-cmd-compile-cmd-internal-runtime-change-the-register.patch deleted file mode 100644 index 35059918c02944fc5aff8e3173c57d698545df6e..0000000000000000000000000000000000000000 --- a/0021-cmd-compile-cmd-internal-runtime-change-the-register.patch +++ /dev/null @@ -1,3011 +0,0 @@ -From 3ba838091015878918265cd0c8d224588a875b17 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 17 Aug 2023 03:58:10 +0800 -Subject: [PATCH 21/51] cmd/compile, cmd/internal, runtime: change the - registers used by the duff device for loong64 - -Add R21 to the allocatable registers, use R20 and R21 in duff -device. This CL is in preparation for subsequent regABI support. - -Updates #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I6d18e94a96e7598f0700855fd07ae7c3ad86737d ---- - src/cmd/compile/internal/loong64/ssa.go | 2 +- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 44 +- - src/cmd/compile/internal/ssa/opGen.go | 438 +++--- - src/cmd/internal/obj/loong64/a.out.go | 8 +- - src/runtime/duff_loong64.s | 1280 ++++++++--------- - src/runtime/mkduff.go | 10 +- - 6 files changed, 891 insertions(+), 891 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 0e8683ba81..199fd4ce33 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -362,7 +362,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.To.Type = obj.TYPE_REG - p.To.Reg = v.Reg() - case ssa.OpLOONG64DUFFZERO: -- // runtime.duffzero expects start address in R19 -+ // runtime.duffzero expects start address in R20 - p := s.Prog(obj.ADUFFZERO) - p.To.Type = obj.TYPE_MEM - p.To.Name = obj.NAME_EXTERN -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index ce08346a4a..9950619baf 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -123,7 +123,7 @@ func init() { - - // Common individual register masks - var ( -- gp = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31") // R1 is LR, R2 is thread pointer, R3 is stack pointer, R21-unused, R22 is g, R30 is REGTMP -+ gp = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31") // R1 is LR, R2 is thread pointer, R3 is stack pointer, R22 is g, R30 is REGTMP - gpg = gp | buildReg("g") - gpsp = gp | buildReg("SP") - gpspg = gpg | buildReg("SP") -@@ -283,21 +283,21 @@ func init() { - // arg1 = mem - // auxint = offset into duffzero code to start executing - // returns mem -- // R19 aka loong64.REGRT1 changed as side effect -+ // R20 aka loong64.REGRT1 changed as side effect - { - name: "DUFFZERO", - aux: "Int64", - argLength: 2, - reg: regInfo{ -- inputs: []regMask{buildReg("R19")}, -- clobbers: buildReg("R19 R1"), -+ inputs: []regMask{buildReg("R20")}, -+ clobbers: buildReg("R20 R1"), - }, - faultOnNilArg0: true, - }, - - // duffcopy -- // arg0 = address of dst memory (in R20, changed as side effect) REGRT2 -- // arg1 = address of src memory (in R19, changed as side effect) REGRT1 -+ // arg0 = address of dst memory (in R21, changed as side effect) REGRT2 -+ // arg1 = address of src memory (in R20, changed as side effect) REGRT1 - // arg2 = mem - // auxint = offset into duffcopy code to start executing - // returns mem -@@ -306,53 +306,53 @@ func init() { - aux: "Int64", - argLength: 3, - reg: regInfo{ -- inputs: []regMask{buildReg("R20"), buildReg("R19")}, -- clobbers: buildReg("R19 R20 R1"), -+ inputs: []regMask{buildReg("R21"), buildReg("R20")}, -+ clobbers: buildReg("R20 R21 R1"), - }, - faultOnNilArg0: true, - faultOnNilArg1: true, - }, - - // large or unaligned zeroing -- // arg0 = address of memory to zero (in R19, changed as side effect) -+ // arg0 = address of memory to zero (in R20, changed as side effect) - // arg1 = address of the last element to zero - // arg2 = mem - // auxint = alignment - // returns mem -- // MOVx R0, (R19) -- // ADDV $sz, R19 -- // BGEU Rarg1, R19, -2(PC) -+ // MOVx R0, (R20) -+ // ADDV $sz, R20 -+ // BGEU Rarg1, R20, -2(PC) - { - name: "LoweredZero", - aux: "Int64", - argLength: 3, - reg: regInfo{ -- inputs: []regMask{buildReg("R19"), gp}, -- clobbers: buildReg("R19"), -+ inputs: []regMask{buildReg("R20"), gp}, -+ clobbers: buildReg("R20"), - }, - typ: "Mem", - faultOnNilArg0: true, - }, - - // large or unaligned move -- // arg0 = address of dst memory (in R20, changed as side effect) -- // arg1 = address of src memory (in R19, changed as side effect) -+ // arg0 = address of dst memory (in R21, changed as side effect) -+ // arg1 = address of src memory (in R20, changed as side effect) - // arg2 = address of the last element of src - // arg3 = mem - // auxint = alignment - // returns mem -- // MOVx (R19), Rtmp -- // MOVx Rtmp, (R20) -- // ADDV $sz, R19 -+ // MOVx (R20), Rtmp -+ // MOVx Rtmp, (R21) - // ADDV $sz, R20 -- // BGEU Rarg2, R19, -4(PC) -+ // ADDV $sz, R21 -+ // BGEU Rarg2, R20, -4(PC) - { - name: "LoweredMove", - aux: "Int64", - argLength: 4, - reg: regInfo{ -- inputs: []regMask{buildReg("R20"), buildReg("R19"), gp}, -- clobbers: buildReg("R19 R20"), -+ inputs: []regMask{buildReg("R21"), buildReg("R20"), gp}, -+ clobbers: buildReg("R20 R21"), - }, - typ: "Mem", - faultOnNilArg0: true, -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index fc735c48b5..2c17801ea4 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -22910,11 +22910,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AADDVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22925,10 +22925,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AADDVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693244}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741820}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22938,11 +22938,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASUBVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22953,10 +22953,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASUBVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22967,11 +22967,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMULV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22982,11 +22982,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMULHV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -22997,11 +22997,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMULHVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23011,11 +23011,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ADIVV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23025,11 +23025,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ADIVVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23039,11 +23039,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AREMV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23053,11 +23053,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AREMVU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23184,11 +23184,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AAND, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23199,10 +23199,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AAND, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23213,11 +23213,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23228,10 +23228,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23242,11 +23242,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AXOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23257,10 +23257,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AXOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23271,11 +23271,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ANOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23286,10 +23286,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ANOR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23298,10 +23298,10 @@ var opcodeTable = [...]opInfo{ - argLen: 1, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23363,11 +23363,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMASKEQZ, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23377,11 +23377,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMASKNEZ, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23391,11 +23391,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASLLV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23406,10 +23406,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASLLV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23419,11 +23419,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASRLV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23434,10 +23434,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASRLV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23447,11 +23447,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASRAV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23462,10 +23462,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASRAV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23475,11 +23475,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AROTR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23489,11 +23489,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AROTRV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23504,10 +23504,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AROTR, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23518,10 +23518,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AROTRV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23531,11 +23531,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASGT, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23546,10 +23546,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASGT, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23559,11 +23559,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASGTU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23574,10 +23574,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.ASGTU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23655,7 +23655,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVV, - reg: regInfo{ - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23695,7 +23695,7 @@ var opcodeTable = [...]opInfo{ - {0, 4611686018427387908}, // SP SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23708,10 +23708,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVB, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23724,10 +23724,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVBU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23740,10 +23740,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVH, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23756,10 +23756,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVHU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23772,10 +23772,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVW, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23788,10 +23788,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVWU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23804,10 +23804,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23820,7 +23820,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVF, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ - {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -@@ -23836,7 +23836,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVD, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ - {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -@@ -23852,8 +23852,8 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVB, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23866,8 +23866,8 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVH, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23880,8 +23880,8 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVW, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23894,8 +23894,8 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVV, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23908,7 +23908,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVF, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, -@@ -23922,7 +23922,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVD, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, -@@ -23936,7 +23936,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVB, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23949,7 +23949,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVH, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23962,7 +23962,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVW, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23975,7 +23975,7 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -23985,10 +23985,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVB, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -23998,10 +23998,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVBU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24011,10 +24011,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVH, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24024,10 +24024,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVHU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24037,10 +24037,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVW, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24050,10 +24050,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVWU, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24063,10 +24063,10 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AMOVV, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24076,10 +24076,10 @@ var opcodeTable = [...]opInfo{ - resultInArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24220,7 +24220,7 @@ var opcodeTable = [...]opInfo{ - clobberFlags: true, - call: true, - reg: regInfo{ -- clobbers: 4611686018426339320, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -+ clobbers: 4611686018427387896, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, - { -@@ -24231,7 +24231,7 @@ var opcodeTable = [...]opInfo{ - call: true, - tailCall: true, - reg: regInfo{ -- clobbers: 4611686018426339320, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -+ clobbers: 4611686018427387896, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, - { -@@ -24243,9 +24243,9 @@ var opcodeTable = [...]opInfo{ - reg: regInfo{ - inputs: []inputInfo{ - {1, 268435456}, // R29 -- {0, 1070596092}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644668}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, -- clobbers: 4611686018426339320, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -+ clobbers: 4611686018427387896, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, - { -@@ -24256,9 +24256,9 @@ var opcodeTable = [...]opInfo{ - call: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, -- clobbers: 4611686018426339320, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 -+ clobbers: 4611686018427387896, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 - }, - }, - { -@@ -24268,9 +24268,9 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 262144}, // R19 -+ {0, 524288}, // R20 - }, -- clobbers: 262146, // R1 R19 -+ clobbers: 524290, // R1 R20 - }, - }, - { -@@ -24281,10 +24281,10 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg1: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 524288}, // R20 -- {1, 262144}, // R19 -+ {0, 1048576}, // R21 -+ {1, 524288}, // R20 - }, -- clobbers: 786434, // R1 R19 R20 -+ clobbers: 1572866, // R1 R20 R21 - }, - }, - { -@@ -24294,10 +24294,10 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 262144}, // R19 -- {1, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 524288}, // R20 -+ {1, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, -- clobbers: 262144, // R19 -+ clobbers: 524288, // R20 - }, - }, - { -@@ -24308,11 +24308,11 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg1: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 524288}, // R20 -- {1, 262144}, // R19 -- {2, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1048576}, // R21 -+ {1, 524288}, // R20 -+ {2, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, -- clobbers: 786432, // R19 R20 -+ clobbers: 1572864, // R20 R21 - }, - }, - { -@@ -24321,10 +24321,10 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24334,10 +24334,10 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24347,10 +24347,10 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24361,8 +24361,8 @@ var opcodeTable = [...]opInfo{ - hasSideEffects: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -24373,8 +24373,8 @@ var opcodeTable = [...]opInfo{ - hasSideEffects: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -24385,8 +24385,8 @@ var opcodeTable = [...]opInfo{ - hasSideEffects: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -24397,7 +24397,7 @@ var opcodeTable = [...]opInfo{ - hasSideEffects: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -24408,7 +24408,7 @@ var opcodeTable = [...]opInfo{ - hasSideEffects: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - }, - }, -@@ -24421,11 +24421,11 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24438,11 +24438,11 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24454,11 +24454,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AAMANDDBW, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24470,11 +24470,11 @@ var opcodeTable = [...]opInfo{ - asm: loong64.AAMORDBW, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24487,11 +24487,11 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24504,11 +24504,11 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24522,10 +24522,10 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24539,10 +24539,10 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24555,12 +24555,12 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {2, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {2, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24573,12 +24573,12 @@ var opcodeTable = [...]opInfo{ - unsafePoint: true, - reg: regInfo{ - inputs: []inputInfo{ -- {1, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {2, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -- {0, 4611686019500081148}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 SB -+ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {2, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 4611686019501129724}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 SB - }, - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24589,7 +24589,7 @@ var opcodeTable = [...]opInfo{ - faultOnNilArg0: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 1072693240}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 g R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24598,7 +24598,7 @@ var opcodeTable = [...]opInfo{ - argLen: 1, - reg: regInfo{ - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24607,7 +24607,7 @@ var opcodeTable = [...]opInfo{ - argLen: 1, - reg: regInfo{ - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24627,7 +24627,7 @@ var opcodeTable = [...]opInfo{ - rematerializeable: true, - reg: regInfo{ - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -24637,7 +24637,7 @@ var opcodeTable = [...]opInfo{ - rematerializeable: true, - reg: regInfo{ - outputs: []outputInfo{ -- {0, 1070596088}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R23 R24 R25 R26 R27 R28 R29 R31 -+ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 - }, - }, - }, -@@ -40189,16 +40189,16 @@ var registersLOONG64 = [...]Register{ - {17, loong64.REG_R18, 14, "R18"}, - {18, loong64.REG_R19, 15, "R19"}, - {19, loong64.REG_R20, 16, "R20"}, -- {20, loong64.REG_R21, -1, "R21"}, -+ {20, loong64.REG_R21, 17, "R21"}, - {21, loong64.REGG, -1, "g"}, -- {22, loong64.REG_R23, 17, "R23"}, -- {23, loong64.REG_R24, 18, "R24"}, -- {24, loong64.REG_R25, 19, "R25"}, -- {25, loong64.REG_R26, 20, "R26"}, -- {26, loong64.REG_R27, 21, "R27"}, -- {27, loong64.REG_R28, 22, "R28"}, -- {28, loong64.REG_R29, 23, "R29"}, -- {29, loong64.REG_R31, 24, "R31"}, -+ {22, loong64.REG_R23, 18, "R23"}, -+ {23, loong64.REG_R24, 19, "R24"}, -+ {24, loong64.REG_R25, 20, "R25"}, -+ {25, loong64.REG_R26, 21, "R26"}, -+ {26, loong64.REG_R27, 22, "R27"}, -+ {27, loong64.REG_R28, 23, "R28"}, -+ {28, loong64.REG_R29, 24, "R29"}, -+ {29, loong64.REG_R31, 25, "R31"}, - {30, loong64.REG_F0, -1, "F0"}, - {31, loong64.REG_F1, -1, "F1"}, - {32, loong64.REG_F2, -1, "F2"}, -@@ -40235,7 +40235,7 @@ var registersLOONG64 = [...]Register{ - } - var paramIntRegLOONG64 = []int8{3, 4, 5, 6, 7, 8, 9, 10} - var paramFloatRegLOONG64 = []int8{30, 31, 32, 33, 34, 35, 36, 37} --var gpRegMaskLOONG64 = regMask(1070596088) -+var gpRegMaskLOONG64 = regMask(1071644664) - var fpRegMaskLOONG64 = regMask(4611686017353646080) - var specialRegMaskLOONG64 = regMask(0) - var framepointerRegLOONG64 = int8(-1) -diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go -index 3ed15fc7e7..8df48a1e01 100644 ---- a/src/cmd/internal/obj/loong64/a.out.go -+++ b/src/cmd/internal/obj/loong64/a.out.go -@@ -157,14 +157,14 @@ const ( - REGZERO = REG_R0 // set to zero - REGLINK = REG_R1 - REGSP = REG_R3 -- REGRET = REG_R19 -+ REGRET = REG_R20 // not use - REGARG = -1 // -1 disables passing the first argument in register -- REGRT1 = REG_R19 // reserved for runtime, duffzero and duffcopy -- REGRT2 = REG_R20 // reserved for runtime, duffcopy -+ REGRT1 = REG_R20 // reserved for runtime, duffzero and duffcopy -+ REGRT2 = REG_R21 // reserved for runtime, duffcopy - REGCTXT = REG_R29 // context for closures - REGG = REG_R22 // G in loong64 - REGTMP = REG_R30 // used by the assembler -- FREGRET = REG_F0 -+ FREGRET = REG_F0 // not use - ) - - var LOONG64DWARFRegisters = map[int16]int16{} -diff --git a/src/runtime/duff_loong64.s b/src/runtime/duff_loong64.s -index 63fa3bcca1..df8b653965 100644 ---- a/src/runtime/duff_loong64.s -+++ b/src/runtime/duff_loong64.s -@@ -5,903 +5,903 @@ - #include "textflag.h" - - TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -- MOVV R0, (R19) -- ADDV $8, R19 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 -+ MOVV R0, (R20) -+ ADDV $8, R20 - RET - - TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - -- MOVV (R19), R30 -- ADDV $8, R19 -- MOVV R30, (R20) -+ MOVV (R20), R30 - ADDV $8, R20 -+ MOVV R30, (R21) -+ ADDV $8, R21 - - RET -diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go -index e8d4fcc93e..77674254d4 100644 ---- a/src/runtime/mkduff.go -+++ b/src/runtime/mkduff.go -@@ -183,8 +183,8 @@ func zeroLOONG64(w io.Writer) { - // On return, R19 points to the last zeroed dword. - fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") - for i := 0; i < 128; i++ { -- fmt.Fprintln(w, "\tMOVV\tR0, (R19)") -- fmt.Fprintln(w, "\tADDV\t$8, R19") -+ fmt.Fprintln(w, "\tMOVV\tR0, (R20)") -+ fmt.Fprintln(w, "\tADDV\t$8, R20") - } - fmt.Fprintln(w, "\tRET") - } -@@ -192,10 +192,10 @@ func zeroLOONG64(w io.Writer) { - func copyLOONG64(w io.Writer) { - fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") - for i := 0; i < 128; i++ { -- fmt.Fprintln(w, "\tMOVV\t(R19), R30") -- fmt.Fprintln(w, "\tADDV\t$8, R19") -- fmt.Fprintln(w, "\tMOVV\tR30, (R20)") -+ fmt.Fprintln(w, "\tMOVV\t(R20), R30") - fmt.Fprintln(w, "\tADDV\t$8, R20") -+ fmt.Fprintln(w, "\tMOVV\tR30, (R21)") -+ fmt.Fprintln(w, "\tADDV\t$8, R21") - fmt.Fprintln(w) - } - fmt.Fprintln(w, "\tRET") --- -2.38.1 - diff --git a/0021-test-codegen-fix-the-matching-instructions-inside-pl.patch b/0021-test-codegen-fix-the-matching-instructions-inside-pl.patch new file mode 100644 index 0000000000000000000000000000000000000000..f9e0b7be338df01040c2c024600d5efd4d90c443 --- /dev/null +++ b/0021-test-codegen-fix-the-matching-instructions-inside-pl.patch @@ -0,0 +1,31 @@ +From 53fc992fd2ba2f64eb436c5cf210e31e70282fc0 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Tue, 8 Oct 2024 16:23:56 +0800 +Subject: [PATCH 21/44] test/codegen: fix the matching instructions inside + plain comments for func rshConst32Ux64 on loong64 + +after add rules for (x << lc) >> rc in commit "cmd/compile: add patterns +for bitfield opcodes on loong64", the generated assembly from func +rshConst32Ux64 matches BSTRPICKV, not SLLV and SRLV. + +Change-Id: I4348716156abc3410134495edb977a88727139f8 +--- + test/codegen/shift.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/test/codegen/shift.go b/test/codegen/shift.go +index ad69d69aa5..6112a989b9 100644 +--- a/test/codegen/shift.go ++++ b/test/codegen/shift.go +@@ -68,7 +68,7 @@ func lshConst32x64(v int32) int32 { + func rshConst32Ux64(v uint32) uint32 { + // ppc64x:"SRW" + // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW" +- // loong64:"SLLV","SRLV",-"MOVWU" ++ // loong64:"BSTRPICKV",-"SLLV",-"SRLV",-"MOVWU" + return v >> uint64(29) + } + +-- +2.38.1 + diff --git a/0022-cmd-compile-add-ABI-register-definations-for-loong64.patch b/0022-cmd-compile-add-ABI-register-definations-for-loong64.patch deleted file mode 100644 index d1bc59e30d1e2f43f0a5960366e1423b9ec06b13..0000000000000000000000000000000000000000 --- a/0022-cmd-compile-add-ABI-register-definations-for-loong64.patch +++ /dev/null @@ -1,77 +0,0 @@ -From b7e8567db8b4f939176c3ef9fcff4772cec291df Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 17:11:19 +0800 -Subject: [PATCH 22/51] cmd/compile: add ABI register definations for loong64 - -Updates #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Idc30393487077e5c2bdffab447e3d8c33d4c8925 ---- - src/cmd/compile/abi-internal.md | 50 +++++++++++++++++++++++++++++++++ - 1 file changed, 50 insertions(+) - -diff --git a/src/cmd/compile/abi-internal.md b/src/cmd/compile/abi-internal.md -index 14464ed904..db5197fb72 100644 ---- a/src/cmd/compile/abi-internal.md -+++ b/src/cmd/compile/abi-internal.md -@@ -633,6 +633,56 @@ modifying or saving the FPCR. - Functions are allowed to modify it between calls (as long as they - restore it), but as of this writing Go code never does. - -+### loong64 architecture -+ -+The loong64 architecture uses R4 – R19 for integer arguments and integer results. -+ -+It uses F0 – F15 for floating-point arguments and results. -+ -+Registers R20 - R21, R23 – R28, R30, F16 – F31 are permanent scratch registers. -+ -+Register R2 is reserved and never used. -+ -+Register R20, R21 is Used by runtime.duffcopy, runtime.duffzero. -+ -+Special-purpose registers used within Go generated code and Go assembly code -+are as follows: -+ -+| Register | Call meaning | Return meaning | Body meaning | -+| --- | --- | --- | --- | -+| R0 | Zero value | Same | Same | -+| R1 | Link register | Link register | Scratch | -+| R3 | Stack pointer | Same | Same | -+| R20,R21 | Scratch | Scratch | Used by duffcopy, duffzero | -+| R22 | Current goroutine | Same | Same | -+| R29 | Closure context pointer | Same | Same | -+| R30 | used by the assembler | Same | Same | -+ -+*Rationale*: These register meanings are compatible with Go’s stack-based -+calling convention. -+ -+#### Stack layout -+ -+The stack pointer, R3, grows down and is aligned to 8 bytes. -+ -+A function's stack frame, after the frame is created, is laid out as -+follows: -+ -+ +------------------------------+ -+ | ... locals ... | -+ | ... outgoing arguments ... | -+ | return PC | ← R3 points to -+ +------------------------------+ ↓ lower addresses -+ -+This stack layout is used by both register-based (ABIInternal) and -+stack-based (ABI0) calling conventions. -+ -+The "return PC" is loaded to the link register, R1, as part of the -+loong64 `JAL` operation. -+ -+#### Flags -+All bits in CSR are system flags and are not modified by Go. -+ - ### ppc64 architecture - - The ppc64 architecture uses R3 – R10 and R14 – R17 for integer arguments --- -2.38.1 - diff --git a/0022-cmd-compile-optimize-shifts-of-int32-and-uint32-on-l.patch b/0022-cmd-compile-optimize-shifts-of-int32-and-uint32-on-l.patch new file mode 100644 index 0000000000000000000000000000000000000000..c119f2e94194934fc073159898e1795f4b99b70a --- /dev/null +++ b/0022-cmd-compile-optimize-shifts-of-int32-and-uint32-on-l.patch @@ -0,0 +1,1064 @@ +From 2ab1123adf4a080d91ef549b76572bf4b22f907f Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Thu, 24 Oct 2024 17:41:01 +0800 +Subject: [PATCH 22/44] cmd/compile: optimize shifts of int32 and uint32 on + loong64 + +Change-Id: I6b8d110cfed8d55e2b753259a45f55e09b8f759d +--- + src/cmd/compile/internal/loong64/ssa.go | 6 + + .../compile/internal/ssa/_gen/LOONG64.rules | 39 +- + .../compile/internal/ssa/_gen/LOONG64Ops.go | 6 + + src/cmd/compile/internal/ssa/opGen.go | 90 ++++ + .../compile/internal/ssa/rewriteLOONG64.go | 431 +++++++++++++----- + test/codegen/shift.go | 20 +- + 6 files changed, 462 insertions(+), 130 deletions(-) + +diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go +index 0ba9efa1d3..bd761c407e 100644 +--- a/src/cmd/compile/internal/loong64/ssa.go ++++ b/src/cmd/compile/internal/loong64/ssa.go +@@ -165,8 +165,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpLOONG64OR, + ssa.OpLOONG64XOR, + ssa.OpLOONG64NOR, ++ ssa.OpLOONG64SLL, + ssa.OpLOONG64SLLV, ++ ssa.OpLOONG64SRL, + ssa.OpLOONG64SRLV, ++ ssa.OpLOONG64SRA, + ssa.OpLOONG64SRAV, + ssa.OpLOONG64ROTR, + ssa.OpLOONG64ROTRV, +@@ -274,8 +277,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpLOONG64ORconst, + ssa.OpLOONG64XORconst, + ssa.OpLOONG64NORconst, ++ ssa.OpLOONG64SLLconst, + ssa.OpLOONG64SLLVconst, ++ ssa.OpLOONG64SRLconst, + ssa.OpLOONG64SRLVconst, ++ ssa.OpLOONG64SRAconst, + ssa.OpLOONG64SRAVconst, + ssa.OpLOONG64ROTRconst, + ssa.OpLOONG64ROTRVconst, +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index 00a0a84f33..014cd6fb05 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -62,10 +62,10 @@ + (Lsh64x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + (Lsh64x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + +-(Lsh32x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +-(Lsh32x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Lsh32x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Lsh32x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++(Lsh32x64 x y) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) ++(Lsh32x32 x y) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) ++(Lsh32x16 x y) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) ++(Lsh32x8 x y) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + + (Lsh16x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) + (Lsh16x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +@@ -82,10 +82,10 @@ + (Rsh64Ux16 x y) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + (Rsh64Ux8 x y) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + +-(Rsh32Ux64 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) y) (SGTU (MOVVconst [64]) y)) +-(Rsh32Ux32 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Rsh32Ux16 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Rsh32Ux8 x y) => (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++(Rsh32Ux64 x y) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) ++(Rsh32Ux32 x y) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) ++(Rsh32Ux16 x y) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) ++(Rsh32Ux8 x y) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + + (Rsh16Ux64 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) + (Rsh16Ux32 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +@@ -102,10 +102,10 @@ + (Rsh64x16 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) + (Rsh64x8 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + +-(Rsh32x64 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +-(Rsh32x32 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +-(Rsh32x16 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +-(Rsh32x8 x y) => (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) ++(Rsh32x64 x y) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) ++(Rsh32x32 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) ++(Rsh32x16 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) ++(Rsh32x8 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) + + (Rsh16x64 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) + (Rsh16x32 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +@@ -683,15 +683,30 @@ + (XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x) + (NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x) + ++(SLL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0]) + (SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) ++(SRL _ (MOVVconst [c])) && uint64(c)>=32 => (MOVVconst [0]) + (SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0]) ++(SRA x (MOVVconst [c])) && uint64(c)>=32 => (SRAconst x [31]) + (SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63]) ++(SLL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <= 31 => (SLLconst x [c]) + (SLLV x (MOVVconst [c])) => (SLLVconst x [c]) ++(SRL x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <= 31 => (SRLconst x [c]) + (SRLV x (MOVVconst [c])) => (SRLVconst x [c]) ++(SRA x (MOVVconst [c])) && uint64(c) >=0 && uint64(c) <= 31 => (SRAconst x [c]) + (SRAV x (MOVVconst [c])) => (SRAVconst x [c]) + (ROTR x (MOVVconst [c])) => (ROTRconst x [c&31]) + (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63]) + ++// Avoid unnecessary zero and sign extension when right shifting. ++(SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y) ++(SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y) ++ ++// Replace right shifts that exceed size of signed type. ++(SRAVconst [rc] (MOVBreg y)) && rc >= 8 => (SRAVconst [63] (SLLVconst [56] y)) ++(SRAVconst [rc] (MOVHreg y)) && rc >= 16 => (SRAVconst [63] (SLLVconst [48] y)) ++(SRAVconst [rc] (MOVWreg y)) && rc >= 32 => (SRAconst [31] y) ++ + // If the shift amount is larger than the datasize(32, 16, 8), we can optimize to constant 0. + (MOVWUreg (SLLVconst [lc] x)) && lc >= 32 => (MOVVconst [0]) + (MOVHUreg (SLLVconst [lc] x)) && lc >= 16 => (MOVVconst [0]) +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +index 8f17158b64..4b3f1fd689 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +@@ -240,11 +240,17 @@ func init() { + {name: "FCOPYSGD", argLength: 2, reg: fp21, asm: "FCOPYSGD"}, // float64 + + // shifts ++ {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 32 + {name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"}, // arg0 << arg1, shift amount is mod 64 ++ {name: "SLLconst", argLength: 1, reg: gp11, asm: "SLL", aux: "Int64"}, // arg0 << auxInt + {name: "SLLVconst", argLength: 1, reg: gp11, asm: "SLLV", aux: "Int64"}, // arg0 << auxInt ++ {name: "SRL", argLength: 2, reg: gp21, asm: "SRL"}, // arg0 >> arg1, unsigned, shift amount is mod 32 + {name: "SRLV", argLength: 2, reg: gp21, asm: "SRLV"}, // arg0 >> arg1, unsigned, shift amount is mod 64 ++ {name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int64"}, // arg0 >> auxInt, unsigned + {name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned ++ {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, signed, shift amount is mod 32 + {name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64 ++ {name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int64"}, // arg0 >> auxInt, signed + {name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed + {name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits + {name: "ROTRV", argLength: 2, reg: gp21, asm: "ROTRV"}, // arg0 right rotate by (arg1 mod 64) bits +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index df1ddfa69e..643d012ca1 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -1824,11 +1824,17 @@ const ( + OpLOONG64MASKEQZ + OpLOONG64MASKNEZ + OpLOONG64FCOPYSGD ++ OpLOONG64SLL + OpLOONG64SLLV ++ OpLOONG64SLLconst + OpLOONG64SLLVconst ++ OpLOONG64SRL + OpLOONG64SRLV ++ OpLOONG64SRLconst + OpLOONG64SRLVconst ++ OpLOONG64SRA + OpLOONG64SRAV ++ OpLOONG64SRAconst + OpLOONG64SRAVconst + OpLOONG64ROTR + OpLOONG64ROTRV +@@ -24541,6 +24547,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SLL", ++ argLen: 2, ++ asm: loong64.ASLL, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SLLV", + argLen: 2, +@@ -24555,6 +24575,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SLLconst", ++ auxType: auxInt64, ++ argLen: 1, ++ asm: loong64.ASLL, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SLLVconst", + auxType: auxInt64, +@@ -24569,6 +24603,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SRL", ++ argLen: 2, ++ asm: loong64.ASRL, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SRLV", + argLen: 2, +@@ -24583,6 +24631,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SRLconst", ++ auxType: auxInt64, ++ argLen: 1, ++ asm: loong64.ASRL, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SRLVconst", + auxType: auxInt64, +@@ -24597,6 +24659,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SRA", ++ argLen: 2, ++ asm: loong64.ASRA, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SRAV", + argLen: 2, +@@ -24611,6 +24687,20 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "SRAconst", ++ auxType: auxInt64, ++ argLen: 1, ++ asm: loong64.ASRA, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ outputs: []outputInfo{ ++ {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 ++ }, ++ }, ++ }, + { + name: "SRAVconst", + auxType: auxInt64, +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index ab39040de1..93bf95eb51 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -440,14 +440,20 @@ func rewriteValueLOONG64(v *Value) bool { + return rewriteValueLOONG64_OpLOONG64SGTUconst(v) + case OpLOONG64SGTconst: + return rewriteValueLOONG64_OpLOONG64SGTconst(v) ++ case OpLOONG64SLL: ++ return rewriteValueLOONG64_OpLOONG64SLL(v) + case OpLOONG64SLLV: + return rewriteValueLOONG64_OpLOONG64SLLV(v) + case OpLOONG64SLLVconst: + return rewriteValueLOONG64_OpLOONG64SLLVconst(v) ++ case OpLOONG64SRA: ++ return rewriteValueLOONG64_OpLOONG64SRA(v) + case OpLOONG64SRAV: + return rewriteValueLOONG64_OpLOONG64SRAV(v) + case OpLOONG64SRAVconst: + return rewriteValueLOONG64_OpLOONG64SRAVconst(v) ++ case OpLOONG64SRL: ++ return rewriteValueLOONG64_OpLOONG64SRL(v) + case OpLOONG64SRLV: + return rewriteValueLOONG64_OpLOONG64SRLV(v) + case OpLOONG64SRLVconst: +@@ -5953,6 +5959,43 @@ func rewriteValueLOONG64_OpLOONG64SGTconst(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64SLL(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (SLL _ (MOVVconst [c])) ++ // cond: uint64(c)>=32 ++ // result: (MOVVconst [0]) ++ for { ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 32) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } ++ // match: (SLL x (MOVVconst [c])) ++ // cond: uint64(c) >=0 && uint64(c) <= 31 ++ // result: (SLLconst x [c]) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 0 && uint64(c) <= 31) { ++ break ++ } ++ v.reset(OpLOONG64SLLconst) ++ v.AuxInt = int64ToAuxInt(c) ++ v.AddArg(x) ++ return true ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -6002,6 +6045,45 @@ func rewriteValueLOONG64_OpLOONG64SLLVconst(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64SRA(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (SRA x (MOVVconst [c])) ++ // cond: uint64(c)>=32 ++ // result: (SRAconst x [31]) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 32) { ++ break ++ } ++ v.reset(OpLOONG64SRAconst) ++ v.AuxInt = int64ToAuxInt(31) ++ v.AddArg(x) ++ return true ++ } ++ // match: (SRA x (MOVVconst [c])) ++ // cond: uint64(c) >=0 && uint64(c) <= 31 ++ // result: (SRAconst x [c]) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 0 && uint64(c) <= 31) { ++ break ++ } ++ v.reset(OpLOONG64SRAconst) ++ v.AuxInt = int64ToAuxInt(c) ++ v.AddArg(x) ++ return true ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -6039,6 +6121,85 @@ func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { + } + func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { + v_0 := v.Args[0] ++ b := v.Block ++ // match: (SRAVconst [rc] (MOVWreg y)) ++ // cond: rc >= 0 && rc <= 31 ++ // result: (SRAconst [int64(rc)] y) ++ for { ++ t := v.Type ++ rc := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWreg { ++ break ++ } ++ y := v_0.Args[0] ++ if !(rc >= 0 && rc <= 31) { ++ break ++ } ++ v.reset(OpLOONG64SRAconst) ++ v.Type = t ++ v.AuxInt = int64ToAuxInt(int64(rc)) ++ v.AddArg(y) ++ return true ++ } ++ // match: (SRAVconst [rc] (MOVBreg y)) ++ // cond: rc >= 8 ++ // result: (SRAVconst [63] (SLLVconst [56] y)) ++ for { ++ t := v.Type ++ rc := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVBreg { ++ break ++ } ++ y := v_0.Args[0] ++ if !(rc >= 8) { ++ break ++ } ++ v.reset(OpLOONG64SRAVconst) ++ v.AuxInt = int64ToAuxInt(63) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t) ++ v0.AuxInt = int64ToAuxInt(56) ++ v0.AddArg(y) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRAVconst [rc] (MOVHreg y)) ++ // cond: rc >= 16 ++ // result: (SRAVconst [63] (SLLVconst [48] y)) ++ for { ++ t := v.Type ++ rc := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVHreg { ++ break ++ } ++ y := v_0.Args[0] ++ if !(rc >= 16) { ++ break ++ } ++ v.reset(OpLOONG64SRAVconst) ++ v.AuxInt = int64ToAuxInt(63) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLLVconst, t) ++ v0.AuxInt = int64ToAuxInt(48) ++ v0.AddArg(y) ++ v.AddArg(v0) ++ return true ++ } ++ // match: (SRAVconst [rc] (MOVWreg y)) ++ // cond: rc >= 32 ++ // result: (SRAconst [31] y) ++ for { ++ rc := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWreg { ++ break ++ } ++ y := v_0.Args[0] ++ if !(rc >= 32) { ++ break ++ } ++ v.reset(OpLOONG64SRAconst) ++ v.AuxInt = int64ToAuxInt(31) ++ v.AddArg(y) ++ return true ++ } + // match: (SRAVconst [c] (MOVVconst [d])) + // result: (MOVVconst [d>>uint64(c)]) + for { +@@ -6053,6 +6214,43 @@ func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { + } + return false + } ++func rewriteValueLOONG64_OpLOONG64SRL(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (SRL _ (MOVVconst [c])) ++ // cond: uint64(c)>=32 ++ // result: (MOVVconst [0]) ++ for { ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 32) { ++ break ++ } ++ v.reset(OpLOONG64MOVVconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } ++ // match: (SRL x (MOVVconst [c])) ++ // cond: uint64(c) >=0 && uint64(c) <= 31 ++ // result: (SRLconst x [c]) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64MOVVconst { ++ break ++ } ++ c := auxIntToInt64(v_1.AuxInt) ++ if !(uint64(c) >= 0 && uint64(c) <= 31) { ++ break ++ } ++ v.reset(OpLOONG64SRLconst) ++ v.AuxInt = int64ToAuxInt(c) ++ v.AddArg(x) ++ return true ++ } ++ return false ++} + func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -6157,6 +6355,25 @@ func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SRLVconst [rc] (MOVWUreg y)) ++ // cond: rc >= 0 && rc <= 31 ++ // result: (SRLconst [int64(rc)] y) ++ for { ++ t := v.Type ++ rc := auxIntToInt64(v.AuxInt) ++ if v_0.Op != OpLOONG64MOVWUreg { ++ break ++ } ++ y := v_0.Args[0] ++ if !(rc >= 0 && rc <= 31) { ++ break ++ } ++ v.reset(OpLOONG64SRLconst) ++ v.Type = t ++ v.AuxInt = int64ToAuxInt(int64(rc)) ++ v.AddArg(y) ++ return true ++ } + // match: (SRLVconst [rc] (MOVWUreg x)) + // cond: rc >= 32 + // result: (MOVVconst [0]) +@@ -7262,19 +7479,19 @@ func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Lsh32x16 x y) +- // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++ // result: (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v3.AuxInt = int64ToAuxInt(64) ++ v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) + return true +@@ -7286,19 +7503,19 @@ func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Lsh32x32 x y) +- // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++ // result: (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v3.AuxInt = int64ToAuxInt(64) ++ v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) + return true +@@ -7310,17 +7527,17 @@ func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Lsh32x64 x y) +- // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) ++ // result: (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v0.AddArg2(x, y) + v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v2.AuxInt = int64ToAuxInt(64) ++ v2.AuxInt = int64ToAuxInt(32) + v1.AddArg2(v2, y) + v.AddArg2(v0, v1) + return true +@@ -7332,19 +7549,19 @@ func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Lsh32x8 x y) +- // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ // result: (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) + v1.AddArg(y) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v3.AuxInt = int64ToAuxInt(64) ++ v3.AuxInt = int64ToAuxInt(32) + v2.AddArg2(v3, v1) + v.AddArg2(v0, v2) + return true +@@ -8694,23 +8911,21 @@ func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32Ux16 x y) +- // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++ // result: (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) +- v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v4.AuxInt = int64ToAuxInt(64) +- v3.AddArg2(v4, v2) +- v.AddArg2(v0, v3) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) ++ v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v1.AddArg(y) ++ v0.AddArg2(x, v1) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v3.AuxInt = int64ToAuxInt(32) ++ v2.AddArg2(v3, v1) ++ v.AddArg2(v0, v2) + return true + } + } +@@ -8720,23 +8935,21 @@ func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32Ux32 x y) +- // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++ // result: (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v4.AuxInt = int64ToAuxInt(64) +- v3.AddArg2(v4, v2) +- v.AddArg2(v0, v3) ++ v1.AddArg(y) ++ v0.AddArg2(x, v1) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v3.AuxInt = int64ToAuxInt(32) ++ v2.AddArg2(v3, v1) ++ v.AddArg2(v0, v2) + return true + } + } +@@ -8746,21 +8959,19 @@ func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32Ux64 x y) +- // result: (MASKEQZ (SRLV (ZeroExt32to64 x) y) (SGTU (MOVVconst [64]) y)) ++ // result: (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) +- v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v0.AddArg2(v1, y) +- v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v3.AuxInt = int64ToAuxInt(64) +- v2.AddArg2(v3, y) +- v.AddArg2(v0, v2) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) ++ v0.AddArg2(x, y) ++ v1 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v2 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v2.AuxInt = int64ToAuxInt(32) ++ v1.AddArg2(v2, y) ++ v.AddArg2(v0, v1) + return true + } + } +@@ -8770,23 +8981,21 @@ func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32Ux8 x y) +- // result: (MASKEQZ (SRLV (ZeroExt32to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ // result: (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLOONG64MASKEQZ) +- v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) +- v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v1.AddArg(x) +- v2 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v2.AddArg(y) +- v0.AddArg2(v1, v2) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v4.AuxInt = int64ToAuxInt(64) +- v3.AddArg2(v4, v2) +- v.AddArg2(v0, v3) ++ v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) ++ v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v1.AddArg(y) ++ v0.AddArg2(x, v1) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v3.AuxInt = int64ToAuxInt(32) ++ v2.AddArg2(v3, v1) ++ v.AddArg2(v0, v2) + return true + } + } +@@ -8796,25 +9005,23 @@ func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32x16 x y) +- // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) ++ // result: (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 +- v.reset(OpLOONG64SRAV) +- v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v0.AddArg(x) +- v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) +- v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +- v4.AddArg(y) +- v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v5.AuxInt = int64ToAuxInt(63) +- v3.AddArg2(v4, v5) +- v2.AddArg(v3) +- v1.AddArg2(v2, v4) +- v.AddArg2(v0, v1) ++ v.reset(OpLOONG64SRA) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) ++ v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v3.AddArg(y) ++ v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v4.AuxInt = int64ToAuxInt(31) ++ v2.AddArg2(v3, v4) ++ v1.AddArg(v2) ++ v0.AddArg2(v1, v3) ++ v.AddArg2(x, v0) + return true + } + } +@@ -8824,25 +9031,23 @@ func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32x32 x y) +- // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) ++ // result: (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 +- v.reset(OpLOONG64SRAV) +- v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v0.AddArg(x) +- v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) +- v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +- v4.AddArg(y) +- v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v5.AuxInt = int64ToAuxInt(63) +- v3.AddArg2(v4, v5) +- v2.AddArg(v3) +- v1.AddArg2(v2, v4) +- v.AddArg2(v0, v1) ++ v.reset(OpLOONG64SRA) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) ++ v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v3.AddArg(y) ++ v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v4.AuxInt = int64ToAuxInt(31) ++ v2.AddArg2(v3, v4) ++ v1.AddArg(v2) ++ v0.AddArg2(v1, v3) ++ v.AddArg2(x, v0) + return true + } + } +@@ -8852,23 +9057,21 @@ func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32x64 x y) +- // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) ++ // result: (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) + for { + t := v.Type + x := v_0 + y := v_1 +- v.reset(OpLOONG64SRAV) +- v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v0.AddArg(x) +- v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) +- v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v4.AuxInt = int64ToAuxInt(63) +- v3.AddArg2(y, v4) +- v2.AddArg(v3) +- v1.AddArg2(v2, y) +- v.AddArg2(v0, v1) ++ v.reset(OpLOONG64SRA) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) ++ v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v3.AuxInt = int64ToAuxInt(31) ++ v2.AddArg2(y, v3) ++ v1.AddArg(v2) ++ v0.AddArg2(v1, y) ++ v.AddArg2(x, v0) + return true + } + } +@@ -8878,25 +9081,23 @@ func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Rsh32x8 x y) +- // result: (SRAV (SignExt32to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) ++ // result: (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 +- v.reset(OpLOONG64SRAV) +- v0 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) +- v0.AddArg(x) +- v1 := b.NewValue0(v.Pos, OpLOONG64OR, t) +- v2 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +- v3 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) +- v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +- v4.AddArg(y) +- v5 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) +- v5.AuxInt = int64ToAuxInt(63) +- v3.AddArg2(v4, v5) +- v2.AddArg(v3) +- v1.AddArg2(v2, v4) +- v.AddArg2(v0, v1) ++ v.reset(OpLOONG64SRA) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) ++ v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) ++ v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, typ.Bool) ++ v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v3.AddArg(y) ++ v4 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v4.AuxInt = int64ToAuxInt(31) ++ v2.AddArg2(v3, v4) ++ v1.AddArg(v2) ++ v0.AddArg2(v1, v3) ++ v.AddArg2(x, v0) + return true + } + } +diff --git a/test/codegen/shift.go b/test/codegen/shift.go +index 6112a989b9..3c669edcb2 100644 +--- a/test/codegen/shift.go ++++ b/test/codegen/shift.go +@@ -11,87 +11,99 @@ package codegen + // ------------------ // + + func lshConst64x64(v int64) int64 { ++ // loong64:"SLLV" + // ppc64x:"SLD" + // riscv64:"SLLI",-"AND",-"SLTIU" + return v << uint64(33) + } + + func rshConst64Ux64(v uint64) uint64 { ++ // loong64:"SRLV" + // ppc64x:"SRD" + // riscv64:"SRLI\t",-"AND",-"SLTIU" + return v >> uint64(33) + } + + func rshConst64Ux64Overflow32(v uint32) uint64 { ++ // loong64:"MOVV\t\\$0,",-"SRL\t" + // riscv64:"MOV\t\\$0,",-"SRL" + return uint64(v) >> 32 + } + + func rshConst64Ux64Overflow16(v uint16) uint64 { ++ // loong64:"MOVV\t\\$0,",-"SRLV" + // riscv64:"MOV\t\\$0,",-"SRL" + return uint64(v) >> 16 + } + + func rshConst64Ux64Overflow8(v uint8) uint64 { ++ // loong64:"MOVV\t\\$0,",-"SRLV" + // riscv64:"MOV\t\\$0,",-"SRL" + return uint64(v) >> 8 + } + + func rshConst64x64(v int64) int64 { ++ // loong64:"SRAV" + // ppc64x:"SRAD" + // riscv64:"SRAI\t",-"OR",-"SLTIU" + return v >> uint64(33) + } + + func rshConst64x64Overflow32(v int32) int64 { ++ // loong64:"SRA\t\\$31" + // riscv64:"SRAIW",-"SLLI",-"SRAI\t" + return int64(v) >> 32 + } + + func rshConst64x64Overflow16(v int16) int64 { ++ // loong64:"SLLV\t\\$48","SRAV\t\\$63" + // riscv64:"SLLI","SRAI",-"SRAIW" + return int64(v) >> 16 + } + + func rshConst64x64Overflow8(v int8) int64 { ++ // loong64:"SLLV\t\\$56","SRAV\t\\$63" + // riscv64:"SLLI","SRAI",-"SRAIW" + return int64(v) >> 8 + } + + func lshConst32x64(v int32) int32 { ++ // loong64:"SLL\t" + // ppc64x:"SLW" + // riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW" +- // loong64:"SLLV" + return v << uint64(29) + } + + func rshConst32Ux64(v uint32) uint32 { ++ // loong64:"SRL\t" + // ppc64x:"SRW" + // riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW" +- // loong64:"BSTRPICKV",-"SLLV",-"SRLV",-"MOVWU" + return v >> uint64(29) + } + + func rshConst32x64(v int32) int32 { ++ // loong64:"SRA\t" + // ppc64x:"SRAW" + // riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW" +- // loong64:"SLLV","SRAV",-"MOVW" + return v >> uint64(29) + } + + func lshConst64x32(v int64) int64 { ++ // loong64:"SLLV" + // ppc64x:"SLD" + // riscv64:"SLLI",-"AND",-"SLTIU" + return v << uint32(33) + } + + func rshConst64Ux32(v uint64) uint64 { ++ // loong64:"SRLV" + // ppc64x:"SRD" + // riscv64:"SRLI\t",-"AND",-"SLTIU" + return v >> uint32(33) + } + + func rshConst64x32(v int64) int64 { ++ // loong64:"SRAV" + // ppc64x:"SRAD" + // riscv64:"SRAI\t",-"OR",-"SLTIU" + return v >> uint32(33) +@@ -253,6 +265,7 @@ func rshGuarded64U(v uint64, s uint) uint64 { + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" + // arm64:"LSR",-"CSEL" ++ // loong64:"SRLV" + return v >> s + } + panic("shift too large") +@@ -264,6 +277,7 @@ func rshGuarded64(v int64, s uint) int64 { + // s390x:-"RISBGZ",-"AND",-"LOCGR" + // wasm:-"Select",-".*LtU" + // arm64:"ASR",-"CSEL" ++ // loong64:"SRAV" + return v >> s + } + panic("shift too large") +-- +2.38.1 + diff --git a/0023-cmd-compile-cmd-internal-runtime-change-registers-on.patch b/0023-cmd-compile-cmd-internal-runtime-change-registers-on.patch deleted file mode 100644 index 37cecfd4fed02112427c9964e2fd969bee1ea8f5..0000000000000000000000000000000000000000 --- a/0023-cmd-compile-cmd-internal-runtime-change-registers-on.patch +++ /dev/null @@ -1,405 +0,0 @@ -From eff24b2f636fe822a7e4c7d382bb4c8e7fe5bee6 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 19:23:51 +0800 -Subject: [PATCH 23/51] cmd/compile,cmd/internal,runtime: change registers on - loong64 to avoid regABI arguments - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I2be59a7300aa4f0ee9af509e2cc7201a968f7228 ---- - .../compile/internal/ssa/_gen/LOONG64Ops.go | 8 +- - src/cmd/compile/internal/ssa/opGen.go | 12 +-- - src/cmd/internal/obj/loong64/obj.go | 68 ++++++++--------- - src/runtime/asm_loong64.s | 74 +++++++++---------- - 4 files changed, 81 insertions(+), 81 deletions(-) - -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 9950619baf..0d0f475a5b 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -130,10 +130,10 @@ func init() { - gpspsbg = gpspg | buildReg("SB") - fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31") - callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g -- r1 = buildReg("R19") -- r2 = buildReg("R18") -- r3 = buildReg("R17") -- r4 = buildReg("R4") -+ r1 = buildReg("R20") -+ r2 = buildReg("R21") -+ r3 = buildReg("R23") -+ r4 = buildReg("R24") - ) - // Common regInfo - var ( -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index 2c17801ea4..6643aef21a 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -24660,8 +24660,8 @@ var opcodeTable = [...]opInfo{ - call: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 65536}, // R17 -- {1, 8}, // R4 -+ {0, 4194304}, // R23 -+ {1, 8388608}, // R24 - }, - }, - }, -@@ -24672,8 +24672,8 @@ var opcodeTable = [...]opInfo{ - call: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 131072}, // R18 -- {1, 65536}, // R17 -+ {0, 1048576}, // R21 -+ {1, 4194304}, // R23 - }, - }, - }, -@@ -24684,8 +24684,8 @@ var opcodeTable = [...]opInfo{ - call: true, - reg: regInfo{ - inputs: []inputInfo{ -- {0, 262144}, // R19 -- {1, 131072}, // R18 -+ {0, 524288}, // R20 -+ {1, 1048576}, // R21 - }, - }, - }, -diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go -index 38ab66b819..b0f5ac3087 100644 ---- a/src/cmd/internal/obj/loong64/obj.go -+++ b/src/cmd/internal/obj/loong64/obj.go -@@ -402,13 +402,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 { - // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame - // -- // MOV g_panic(g), R1 -- // BEQ R1, end -- // MOV panic_argp(R1), R2 -- // ADD $(autosize+FIXED_FRAME), R29, R3 -- // BNE R2, R3, end -- // ADD $FIXED_FRAME, R29, R2 -- // MOV R2, panic_argp(R1) -+ // MOV g_panic(g), R20 -+ // BEQ R20, end -+ // MOV panic_argp(R20), R24 -+ // ADD $(autosize+FIXED_FRAME), R3, R30 -+ // BNE R24, R30, end -+ // ADD $FIXED_FRAME, R3, R24 -+ // MOV R24, panic_argp(R20) - // end: - // NOP - // -@@ -425,12 +425,12 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - q.From.Reg = REGG - q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic - q.To.Type = obj.TYPE_REG -- q.To.Reg = REG_R19 -+ q.To.Reg = REG_R20 - - q = obj.Appendp(q, newprog) - q.As = ABEQ - q.From.Type = obj.TYPE_REG -- q.From.Reg = REG_R19 -+ q.From.Reg = REG_R20 - q.To.Type = obj.TYPE_BRANCH - q.Mark |= BRANCH - p1 = q -@@ -438,10 +438,10 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - q = obj.Appendp(q, newprog) - q.As = mov - q.From.Type = obj.TYPE_MEM -- q.From.Reg = REG_R19 -+ q.From.Reg = REG_R20 - q.From.Offset = 0 // Panic.argp - q.To.Type = obj.TYPE_REG -- q.To.Reg = REG_R4 -+ q.To.Reg = REG_R24 - - q = obj.Appendp(q, newprog) - q.As = add -@@ -449,13 +449,13 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - q.From.Offset = int64(autosize) + ctxt.Arch.FixedFrameSize - q.Reg = REGSP - q.To.Type = obj.TYPE_REG -- q.To.Reg = REG_R5 -+ q.To.Reg = REG_R30 - - q = obj.Appendp(q, newprog) - q.As = ABNE - q.From.Type = obj.TYPE_REG -- q.From.Reg = REG_R4 -- q.Reg = REG_R5 -+ q.From.Reg = REG_R24 -+ q.Reg = REG_R30 - q.To.Type = obj.TYPE_BRANCH - q.Mark |= BRANCH - p2 = q -@@ -466,14 +466,14 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { - q.From.Offset = ctxt.Arch.FixedFrameSize - q.Reg = REGSP - q.To.Type = obj.TYPE_REG -- q.To.Reg = REG_R4 -+ q.To.Reg = REG_R24 - - q = obj.Appendp(q, newprog) - q.As = mov - q.From.Type = obj.TYPE_REG -- q.From.Reg = REG_R4 -+ q.From.Reg = REG_R24 - q.To.Type = obj.TYPE_MEM -- q.To.Reg = REG_R19 -+ q.To.Reg = REG_R20 - q.To.Offset = 0 // Panic.argp - - q = obj.Appendp(q, newprog) -@@ -696,7 +696,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - // Jump back to here after morestack returns. - startPred := p - -- // MOV g_stackguard(g), R19 -+ // MOV g_stackguard(g), R20 - p = obj.Appendp(p, c.newprog) - - p.As = mov -@@ -707,7 +707,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1 - } - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R19 -+ p.To.Reg = REG_R20 - - // Mark the stack bound check and morestack call async nonpreemptible. - // If we get preempted here, when resumed the preemption request is -@@ -718,15 +718,15 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - var q *obj.Prog - if framesize <= abi.StackSmall { - // small stack: SP < stackguard -- // AGTU SP, stackguard, R19 -+ // AGTU SP, stackguard, R20 - p = obj.Appendp(p, c.newprog) - - p.As = ASGTU - p.From.Type = obj.TYPE_REG - p.From.Reg = REGSP -- p.Reg = REG_R19 -+ p.Reg = REG_R20 - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R19 -+ p.To.Reg = REG_R20 - } else { - // large stack: SP-framesize < stackguard-StackSmall - offset := int64(framesize) - abi.StackSmall -@@ -738,8 +738,8 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - // stack guard to incorrectly succeed. We explicitly - // guard against underflow. - // -- // SGTU $(framesize-StackSmall), SP, R4 -- // BNE R4, label-of-call-to-morestack -+ // SGTU $(framesize-StackSmall), SP, R24 -+ // BNE R24, label-of-call-to-morestack - - p = obj.Appendp(p, c.newprog) - p.As = ASGTU -@@ -747,13 +747,13 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - p.From.Offset = offset - p.Reg = REGSP - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R4 -+ p.To.Reg = REG_R24 - - p = obj.Appendp(p, c.newprog) - q = p - p.As = ABNE - p.From.Type = obj.TYPE_REG -- p.From.Reg = REG_R4 -+ p.From.Reg = REG_R24 - p.To.Type = obj.TYPE_BRANCH - p.Mark |= BRANCH - } -@@ -765,35 +765,35 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - p.From.Offset = -offset - p.Reg = REGSP - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R4 -+ p.To.Reg = REG_R24 - - p = obj.Appendp(p, c.newprog) - p.As = ASGTU - p.From.Type = obj.TYPE_REG -- p.From.Reg = REG_R4 -- p.Reg = REG_R19 -+ p.From.Reg = REG_R24 -+ p.Reg = REG_R20 - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R19 -+ p.To.Reg = REG_R20 - } - -- // q1: BNE R19, done -+ // q1: BNE R20, done - p = obj.Appendp(p, c.newprog) - q1 := p - - p.As = ABNE - p.From.Type = obj.TYPE_REG -- p.From.Reg = REG_R19 -+ p.From.Reg = REG_R20 - p.To.Type = obj.TYPE_BRANCH - p.Mark |= BRANCH - -- // MOV LINK, R5 -+ // MOV LINK, R30 - p = obj.Appendp(p, c.newprog) - - p.As = mov - p.From.Type = obj.TYPE_REG - p.From.Reg = REGLINK - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R5 -+ p.To.Reg = REG_R30 - if q != nil { - q.To.SetTarget(p) - p.Mark |= LABEL -diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index 78a1a4d358..23cbd09947 100644 ---- a/src/runtime/asm_loong64.s -+++ b/src/runtime/asm_loong64.s -@@ -214,7 +214,7 @@ noswitch: - - // Called during function prolog when more stack is needed. - // Caller has already loaded: --// loong64: R5: LR -+// loong64: R30: LR - // - // The traceback routines see morestack on a g0 as being - // the top of a stack (for example, morestack calling newstack -@@ -238,12 +238,12 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 - // Set g->sched to context in f. - MOVV R3, (g_sched+gobuf_sp)(g) - MOVV R1, (g_sched+gobuf_pc)(g) -- MOVV R5, (g_sched+gobuf_lr)(g) -+ MOVV R30, (g_sched+gobuf_lr)(g) - MOVV REGCTXT, (g_sched+gobuf_ctxt)(g) - - // Called from f. - // Set m->morebuf to f's caller. -- MOVV R5, (m_morebuf+gobuf_pc)(R7) // f's caller's PC -+ MOVV R30, (m_morebuf+gobuf_pc)(R7) // f's caller's PC - MOVV R3, (m_morebuf+gobuf_sp)(R7) // f's caller's SP - MOVV g, (m_morebuf+gobuf_g)(R7) - -@@ -786,70 +786,70 @@ TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 - // then tail call to the corresponding runtime handler. - // The tail call makes these stubs disappear in backtraces. - TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicIndex(SB) - TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicIndexU(SB) - TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSliceAlen(SB) - TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSliceAlenU(SB) - TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSliceAcap(SB) - TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSliceAcapU(SB) - TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicSliceB(SB) - TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicSliceBU(SB) - TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 -- MOVV R17, x+0(FP) -- MOVV R4, y+8(FP) -+ MOVV R23, x+0(FP) -+ MOVV R24, y+8(FP) - JMP runtime·goPanicSlice3Alen(SB) - TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 -- MOVV R17, x+0(FP) -- MOVV R4, y+8(FP) -+ MOVV R23, x+0(FP) -+ MOVV R24, y+8(FP) - JMP runtime·goPanicSlice3AlenU(SB) - TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 -- MOVV R17, x+0(FP) -- MOVV R4, y+8(FP) -+ MOVV R23, x+0(FP) -+ MOVV R24, y+8(FP) - JMP runtime·goPanicSlice3Acap(SB) - TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 -- MOVV R17, x+0(FP) -- MOVV R4, y+8(FP) -+ MOVV R23, x+0(FP) -+ MOVV R24, y+8(FP) - JMP runtime·goPanicSlice3AcapU(SB) - TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSlice3B(SB) - TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 -- MOVV R18, x+0(FP) -- MOVV R17, y+8(FP) -+ MOVV R21, x+0(FP) -+ MOVV R23, y+8(FP) - JMP runtime·goPanicSlice3BU(SB) - TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicSlice3C(SB) - TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 -- MOVV R19, x+0(FP) -- MOVV R18, y+8(FP) -+ MOVV R20, x+0(FP) -+ MOVV R21, y+8(FP) - JMP runtime·goPanicSlice3CU(SB) - TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-16 -- MOVV R17, x+0(FP) -- MOVV R4, y+8(FP) -+ MOVV R23, x+0(FP) -+ MOVV R24, y+8(FP) - JMP runtime·goPanicSliceConvert(SB) --- -2.38.1 - diff --git a/0023-cmd-compile-simplify-bounded-shift-on-loong64.patch b/0023-cmd-compile-simplify-bounded-shift-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..c4ba508b2422f8ec4217d0e36810d86d60d1d763 --- /dev/null +++ b/0023-cmd-compile-simplify-bounded-shift-on-loong64.patch @@ -0,0 +1,2206 @@ +From 03f91ceb084274b0840d7c2cf7a7cb83a7fb2ed0 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Fri, 15 Nov 2024 17:28:07 +0800 +Subject: [PATCH 23/44] cmd/compile: simplify bounded shift on loong64 + +Use the shiftIsBounded function to generate more efficient shift instructions. +Also optimize shift ops when the shift value is v&63 and v&31. + +Change-Id: I12548101a7cea6bca7f5fef2b12c4b8af8a20bb3 +--- + .../compile/internal/ssa/_gen/LOONG64.rules | 146 +-- + .../compile/internal/ssa/rewriteLOONG64.go | 968 ++++++++++++++++++ + test/codegen/shift.go | 16 + + 3 files changed, 1071 insertions(+), 59 deletions(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index 014cd6fb05..9d0435f434 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -57,65 +57,84 @@ + // shifts + // hardware instruction uses only the low 6 bits of the shift + // we compare to 64 to ensure Go semantics for large shifts +-(Lsh64x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +-(Lsh64x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Lsh64x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Lsh64x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Lsh32x64 x y) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) +-(Lsh32x32 x y) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +-(Lsh32x16 x y) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +-(Lsh32x8 x y) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) +- +-(Lsh16x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +-(Lsh16x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Lsh16x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Lsh16x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Lsh8x64 x y) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) +-(Lsh8x32 x y) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Lsh8x16 x y) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Lsh8x8 x y) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Rsh64Ux64 x y) => (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) +-(Rsh64Ux32 x y) => (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Rsh64Ux16 x y) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Rsh64Ux8 x y) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Rsh32Ux64 x y) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) +-(Rsh32Ux32 x y) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) +-(Rsh32Ux16 x y) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) +-(Rsh32Ux8 x y) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) +- +-(Rsh16Ux64 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) +-(Rsh16Ux32 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Rsh16Ux16 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Rsh16Ux8 x y) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Rsh8Ux64 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) +-(Rsh8Ux32 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) +-(Rsh8Ux16 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) +-(Rsh8Ux8 x y) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) +- +-(Rsh64x64 x y) => (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +-(Rsh64x32 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +-(Rsh64x16 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +-(Rsh64x8 x y) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +- +-(Rsh32x64 x y) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) +-(Rsh32x32 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) +-(Rsh32x16 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) +-(Rsh32x8 x y) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) +- +-(Rsh16x64 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +-(Rsh16x32 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +-(Rsh16x16 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +-(Rsh16x8 x y) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) +- +-(Rsh8x64 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) +-(Rsh8x32 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) +-(Rsh8x16 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) +-(Rsh8x8 x y) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) ++ ++// left shift ++(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) ++(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SLL x y) ++(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) ++(Lsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SLLV x y) ++ ++(Lsh64x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) ++(Lsh64x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Lsh64x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Lsh64x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++(Lsh32x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) ++(Lsh32x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) ++(Lsh32x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) ++(Lsh32x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) ++ ++(Lsh16x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) ++(Lsh16x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Lsh16x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Lsh16x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++(Lsh8x64 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) ++(Lsh8x32 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Lsh8x16 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Lsh8x8 x y) && !shiftIsBounded(v) => (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++// unsigned right shift ++(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV x y) ++(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRL x y) ++(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt16to64 x) y) ++(Rsh8Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRLV (ZeroExt8to64 x) y) ++ ++(Rsh64Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) ++(Rsh64Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Rsh64Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Rsh64Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++(Rsh32Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) ++(Rsh32Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) ++(Rsh32Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) ++(Rsh32Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) ++ ++(Rsh16Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) ++(Rsh16Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Rsh16Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Rsh16Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++(Rsh8Ux64 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) ++(Rsh8Ux32 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) ++(Rsh8Ux16 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) ++(Rsh8Ux8 x y) && !shiftIsBounded(v) => (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) ++ ++// signed right shift ++(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV x y) ++(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SRA x y) ++(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt16to64 x) y) ++(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SRAV (SignExt8to64 x) y) ++ ++(Rsh64x64 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) ++(Rsh64x32 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) ++(Rsh64x16 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) ++(Rsh64x8 x y) && !shiftIsBounded(v) => (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) ++ ++(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) ++(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) ++(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) ++(Rsh32x8 x y) && !shiftIsBounded(v) => (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) ++ ++(Rsh16x64 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) ++(Rsh16x32 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) ++(Rsh16x16 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) ++(Rsh16x8 x y) && !shiftIsBounded(v) => (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) ++ ++(Rsh8x64 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) ++(Rsh8x32 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) ++(Rsh8x16 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) ++(Rsh8x8 x y) && !shiftIsBounded(v) => (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + + // bitfield ops + +@@ -698,6 +717,15 @@ + (ROTR x (MOVVconst [c])) => (ROTRconst x [c&31]) + (ROTRV x (MOVVconst [c])) => (ROTRVconst x [c&63]) + ++// SLLV/SRLV/SRAV only considers the bottom 6 bits of y, similarly SLL/SRL/SRA only considers the ++// bottom 5 bits of y. ++(SLL x (ANDconst [31] y)) => (SLL x y) ++(SRL x (ANDconst [31] y)) => (SRL x y) ++(SRA x (ANDconst [31] y)) => (SRA x y) ++(SLLV x (ANDconst [63] y)) => (SLLV x y) ++(SRLV x (ANDconst [63] y)) => (SRLV x y) ++(SRAV x (ANDconst [63] y)) => (SRAV x y) ++ + // Avoid unnecessary zero and sign extension when right shifting. + (SRLVconst [rc] (MOVWUreg y)) && rc >= 0 && rc <= 31 => (SRLconst [int64(rc)] y) + (SRAVconst [rc] (MOVWreg y)) && rc >= 0 && rc <= 31 => (SRAconst [int64(rc)] y) +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index 93bf95eb51..9efdca9c9c 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -5994,6 +5994,18 @@ func rewriteValueLOONG64_OpLOONG64SLL(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SLL x (ANDconst [31] y)) ++ // result: (SLL x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SLL) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { +@@ -6027,6 +6039,18 @@ func rewriteValueLOONG64_OpLOONG64SLLV(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SLLV x (ANDconst [63] y)) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SLLVconst(v *Value) bool { +@@ -6082,6 +6106,18 @@ func rewriteValueLOONG64_OpLOONG64SRA(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SRA x (ANDconst [31] y)) ++ // result: (SRA x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SRA) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { +@@ -6117,6 +6153,18 @@ func rewriteValueLOONG64_OpLOONG64SRAV(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SRAV x (ANDconst [63] y)) ++ // result: (SRAV x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SRAV) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SRAVconst(v *Value) bool { +@@ -6249,6 +6297,18 @@ func rewriteValueLOONG64_OpLOONG64SRL(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SRL x (ANDconst [31] y)) ++ // result: (SRL x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 31 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SRL) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { +@@ -6282,6 +6342,18 @@ func rewriteValueLOONG64_OpLOONG64SRLV(v *Value) bool { + v.AddArg(x) + return true + } ++ // match: (SRLV x (ANDconst [63] y)) ++ // result: (SRLV x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpLOONG64ANDconst || auxIntToInt64(v_1.AuxInt) != 63 { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpLOONG64SRLV) ++ v.AddArg2(x, y) ++ return true ++ } + return false + } + func rewriteValueLOONG64_OpLOONG64SRLVconst(v *Value) bool { +@@ -7384,12 +7456,29 @@ func rewriteValueLOONG64_OpLsh16x16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh16x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh16x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -7402,18 +7491,36 @@ func rewriteValueLOONG64_OpLsh16x16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh16x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh16x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -7426,18 +7533,36 @@ func rewriteValueLOONG64_OpLsh16x32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh16x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh16x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh16x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0.AddArg2(x, y) +@@ -7448,18 +7573,36 @@ func rewriteValueLOONG64_OpLsh16x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh16x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh16x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -7472,18 +7615,36 @@ func rewriteValueLOONG64_OpLsh16x8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh32x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh32x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -7496,18 +7657,36 @@ func rewriteValueLOONG64_OpLsh32x16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh32x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh32x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -7520,18 +7699,36 @@ func rewriteValueLOONG64_OpLsh32x32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh32x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh32x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLL x y) (SGTU (MOVVconst [32]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v0.AddArg2(x, y) +@@ -7542,18 +7739,36 @@ func rewriteValueLOONG64_OpLsh32x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh32x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh32x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -7566,18 +7781,36 @@ func rewriteValueLOONG64_OpLsh32x8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh64x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh64x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh64x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -7590,18 +7823,36 @@ func rewriteValueLOONG64_OpLsh64x16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh64x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh64x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh64x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -7614,18 +7865,36 @@ func rewriteValueLOONG64_OpLsh64x32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh64x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh64x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh64x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0.AddArg2(x, y) +@@ -7636,18 +7905,36 @@ func rewriteValueLOONG64_OpLsh64x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh64x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh64x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -7660,18 +7947,36 @@ func rewriteValueLOONG64_OpLsh64x8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh8x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh8x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -7684,18 +7989,36 @@ func rewriteValueLOONG64_OpLsh8x16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh8x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh8x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -7708,18 +8031,36 @@ func rewriteValueLOONG64_OpLsh8x32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh8x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh8x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v0.AddArg2(x, y) +@@ -7730,18 +8071,36 @@ func rewriteValueLOONG64_OpLsh8x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpLsh8x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Lsh8x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SLLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SLLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Lsh8x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SLLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SLLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -7754,6 +8113,7 @@ func rewriteValueLOONG64_OpLsh8x8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpMod16(v *Value) bool { + v_1 := v.Args[1] +@@ -8698,12 +9058,31 @@ func rewriteValueLOONG64_OpRsh16Ux16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16Ux16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16Ux16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -8718,18 +9097,38 @@ func rewriteValueLOONG64_OpRsh16Ux16(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16Ux32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16Ux32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -8744,18 +9143,38 @@ func rewriteValueLOONG64_OpRsh16Ux32(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16Ux64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16Ux64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt16to64 x) y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -8768,18 +9187,38 @@ func rewriteValueLOONG64_OpRsh16Ux64(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16Ux8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16Ux8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt16to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -8794,18 +9233,38 @@ func rewriteValueLOONG64_OpRsh16Ux8(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) +@@ -8822,18 +9281,38 @@ func rewriteValueLOONG64_OpRsh16x16(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) +@@ -8850,18 +9329,38 @@ func rewriteValueLOONG64_OpRsh16x32(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) +@@ -8876,18 +9375,38 @@ func rewriteValueLOONG64_OpRsh16x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh16x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt16to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh16x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt16to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt16to64, typ.Int64) + v0.AddArg(x) +@@ -8904,18 +9423,36 @@ func rewriteValueLOONG64_OpRsh16x8(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32Ux16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32Ux16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRL x (ZeroExt16to64 y)) (SGTU (MOVVconst [32]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -8928,18 +9465,36 @@ func rewriteValueLOONG64_OpRsh32Ux16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32Ux32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32Ux32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRL x (ZeroExt32to64 y)) (SGTU (MOVVconst [32]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -8952,18 +9507,36 @@ func rewriteValueLOONG64_OpRsh32Ux32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32Ux64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32Ux64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRL x y) (SGTU (MOVVconst [32]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v0.AddArg2(x, y) +@@ -8974,18 +9547,36 @@ func rewriteValueLOONG64_OpRsh32Ux64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32Ux8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRL x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRL) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32Ux8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRL x (ZeroExt8to64 y)) (SGTU (MOVVconst [32]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRL, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -8998,18 +9589,36 @@ func rewriteValueLOONG64_OpRsh32Ux8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRA x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRA) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [31]))) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9024,18 +9633,36 @@ func rewriteValueLOONG64_OpRsh32x16(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRA x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRA) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [31]))) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9050,18 +9677,36 @@ func rewriteValueLOONG64_OpRsh32x32(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRA x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRA) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRA x (OR (NEGV (SGTU y (MOVVconst [31]))) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9074,18 +9719,36 @@ func rewriteValueLOONG64_OpRsh32x64(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh32x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRA x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRA) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh32x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRA x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [31]))) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRA) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9100,18 +9763,36 @@ func rewriteValueLOONG64_OpRsh32x8(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64Ux16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64Ux16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV x (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) +@@ -9124,18 +9805,36 @@ func rewriteValueLOONG64_OpRsh64Ux16(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64Ux32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64Ux32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV x (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) +@@ -9148,18 +9847,36 @@ func rewriteValueLOONG64_OpRsh64Ux32(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64Ux64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64Ux64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV x y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v0.AddArg2(x, y) +@@ -9170,18 +9887,36 @@ func rewriteValueLOONG64_OpRsh64Ux64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64Ux8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64Ux8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV x (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -9194,18 +9929,36 @@ func rewriteValueLOONG64_OpRsh64Ux8(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV x (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9220,18 +9973,36 @@ func rewriteValueLOONG64_OpRsh64x16(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV x (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9246,18 +10017,36 @@ func rewriteValueLOONG64_OpRsh64x32(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV x (OR (NEGV (SGTU y (MOVVconst [63]))) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9270,18 +10059,36 @@ func rewriteValueLOONG64_OpRsh64x64(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh64x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV x y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v.AddArg2(x, y) ++ return true ++ } + // match: (Rsh64x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV x (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpLOONG64OR, t) + v1 := b.NewValue0(v.Pos, OpLOONG64NEGV, t) +@@ -9296,18 +10103,38 @@ func rewriteValueLOONG64_OpRsh64x8(v *Value) bool { + v.AddArg2(x, v0) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8Ux16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8Ux16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8Ux16 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt16to64 y)) (SGTU (MOVVconst [64]) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -9322,18 +10149,38 @@ func rewriteValueLOONG64_OpRsh8Ux16(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8Ux32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8Ux32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8Ux32 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt32to64 y)) (SGTU (MOVVconst [64]) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -9348,18 +10195,38 @@ func rewriteValueLOONG64_OpRsh8Ux32(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8Ux64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8Ux64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8Ux64 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt8to64 x) y) (SGTU (MOVVconst [64]) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -9372,18 +10239,38 @@ func rewriteValueLOONG64_OpRsh8Ux64(v *Value) bool { + v.AddArg2(v0, v2) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8Ux8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8Ux8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRLV (ZeroExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRLV) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8Ux8 x y) ++ // cond: !shiftIsBounded(v) + // result: (MASKEQZ (SRLV (ZeroExt8to64 x) (ZeroExt8to64 y)) (SGTU (MOVVconst [64]) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64MASKEQZ) + v0 := b.NewValue0(v.Pos, OpLOONG64SRLV, t) + v1 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) +@@ -9398,18 +10285,38 @@ func rewriteValueLOONG64_OpRsh8Ux8(v *Value) bool { + v.AddArg2(v0, v3) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8x16 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8x16 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt16to64 y) (MOVVconst [63]))) (ZeroExt16to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) +@@ -9426,18 +10333,38 @@ func rewriteValueLOONG64_OpRsh8x16(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8x32 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8x32 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt32to64 y) (MOVVconst [63]))) (ZeroExt32to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) +@@ -9454,18 +10381,38 @@ func rewriteValueLOONG64_OpRsh8x32(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8x64 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8x64 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU y (MOVVconst [63]))) y)) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) +@@ -9480,18 +10427,38 @@ func rewriteValueLOONG64_OpRsh8x64(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpRsh8x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types ++ // match: (Rsh8x8 x y) ++ // cond: shiftIsBounded(v) ++ // result: (SRAV (SignExt8to64 x) y) ++ for { ++ x := v_0 ++ y := v_1 ++ if !(shiftIsBounded(v)) { ++ break ++ } ++ v.reset(OpLOONG64SRAV) ++ v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) ++ v0.AddArg(x) ++ v.AddArg2(v0, y) ++ return true ++ } + // match: (Rsh8x8 x y) ++ // cond: !shiftIsBounded(v) + // result: (SRAV (SignExt8to64 x) (OR (NEGV (SGTU (ZeroExt8to64 y) (MOVVconst [63]))) (ZeroExt8to64 y))) + for { + t := v.Type + x := v_0 + y := v_1 ++ if !(!shiftIsBounded(v)) { ++ break ++ } + v.reset(OpLOONG64SRAV) + v0 := b.NewValue0(v.Pos, OpSignExt8to64, typ.Int64) + v0.AddArg(x) +@@ -9508,6 +10475,7 @@ func rewriteValueLOONG64_OpRsh8x8(v *Value) bool { + v.AddArg2(v0, v1) + return true + } ++ return false + } + func rewriteValueLOONG64_OpSelect0(v *Value) bool { + v_0 := v.Args[0] +diff --git a/test/codegen/shift.go b/test/codegen/shift.go +index 3c669edcb2..db4e6409a8 100644 +--- a/test/codegen/shift.go ++++ b/test/codegen/shift.go +@@ -115,6 +115,7 @@ func rshConst64x32(v int64) int64 { + + func lshMask64x64(v int64, s uint64) int64 { + // arm64:"LSL",-"AND" ++ // loong64:"SLLV",-"AND" + // ppc64x:"RLDICL",-"ORN",-"ISEL" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -123,6 +124,7 @@ func lshMask64x64(v int64, s uint64) int64 { + + func rshMask64Ux64(v uint64, s uint64) uint64 { + // arm64:"LSR",-"AND",-"CSEL" ++ // loong64:"SRLV",-"AND" + // ppc64x:"RLDICL",-"ORN",-"ISEL" + // riscv64:"SRL\t",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -131,6 +133,7 @@ func rshMask64Ux64(v uint64, s uint64) uint64 { + + func rshMask64x64(v int64, s uint64) int64 { + // arm64:"ASR",-"AND",-"CSEL" ++ // loong64:"SRAV",-"AND" + // ppc64x:"RLDICL",-"ORN",-"ISEL" + // riscv64:"SRA\t",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -139,14 +142,21 @@ func rshMask64x64(v int64, s uint64) int64 { + + func lshMask32x64(v int32, s uint64) int32 { + // arm64:"LSL",-"AND" ++ // loong64:"SLL\t","AND","SGTU","MASKEQZ" + // ppc64x:"ISEL",-"ORN" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" + return v << (s & 63) + } + ++func lsh5Mask32x64(v int32, s uint64) int32 { ++ // loong64:"SLL\t",-"AND" ++ return v << (s & 31) ++} ++ + func rshMask32Ux64(v uint32, s uint64) uint32 { + // arm64:"LSR",-"AND" ++ // loong64:"SRL\t","AND","SGTU","MASKEQZ" + // ppc64x:"ISEL",-"ORN" + // riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -154,12 +164,14 @@ func rshMask32Ux64(v uint32, s uint64) uint32 { + } + + func rsh5Mask32Ux64(v uint32, s uint64) uint32 { ++ // loong64:"SRL\t",-"AND" + // riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t" + return v >> (s & 31) + } + + func rshMask32x64(v int32, s uint64) int32 { + // arm64:"ASR",-"AND" ++ // loong64:"SRA\t","AND","SGTU","SUBVU","OR" + // ppc64x:"ISEL",-"ORN" + // riscv64:"SRAW","OR","SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -167,12 +179,14 @@ func rshMask32x64(v int32, s uint64) int32 { + } + + func rsh5Mask32x64(v int32, s uint64) int32 { ++ // loong64:"SRA\t",-"AND" + // riscv64:"SRAW",-"OR",-"SLTIU" + return v >> (s & 31) + } + + func lshMask64x32(v int64, s uint32) int64 { + // arm64:"LSL",-"AND" ++ // loong64:"SLLV",-"AND" + // ppc64x:"RLDICL",-"ORN" + // riscv64:"SLL",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -181,6 +195,7 @@ func lshMask64x32(v int64, s uint32) int64 { + + func rshMask64Ux32(v uint64, s uint32) uint64 { + // arm64:"LSR",-"AND",-"CSEL" ++ // loong64:"SRLV",-"AND" + // ppc64x:"RLDICL",-"ORN" + // riscv64:"SRL\t",-"AND\t",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +@@ -189,6 +204,7 @@ func rshMask64Ux32(v uint64, s uint32) uint64 { + + func rshMask64x32(v int64, s uint32) int64 { + // arm64:"ASR",-"AND",-"CSEL" ++ // loong64:"SRAV",-"AND" + // ppc64x:"RLDICL",-"ORN",-"ISEL" + // riscv64:"SRA\t",-"OR",-"SLTIU" + // s390x:-"RISBGZ",-"AND",-"LOCGR" +-- +2.38.1 + diff --git a/0024-internal-abi-define-loong64-regABI-constants.patch b/0024-internal-abi-define-loong64-regABI-constants.patch deleted file mode 100644 index 45686971d2b9c4e245da2c3db5d9d2189e1d3b75..0000000000000000000000000000000000000000 --- a/0024-internal-abi-define-loong64-regABI-constants.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 8a07acf3eefab6311d0b38ef560ae3e7d500ac09 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 19:38:33 +0800 -Subject: [PATCH 24/51] internal/abi: define loong64 regABI constants - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Id580d9e22a562adee2ae02a467ac38a54949e737 ---- - src/internal/abi/abi_loong64.go | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - create mode 100644 src/internal/abi/abi_loong64.go - -diff --git a/src/internal/abi/abi_loong64.go b/src/internal/abi/abi_loong64.go -new file mode 100644 -index 0000000000..c2306ae8d8 ---- /dev/null -+++ b/src/internal/abi/abi_loong64.go -@@ -0,0 +1,19 @@ -+// Copyright 2023 The Go Authors. All rights reserved. -+// Use of this source code is governed by a BSD-style -+// license that can be found in the LICENSE file. -+ -+//go:build goexperiment.regabiargs -+ -+package abi -+ -+const ( -+ // See abi_generic.go. -+ -+ // R4 - R19 -+ IntArgRegs = 16 -+ -+ // F0 - F15 -+ FloatArgRegs = 16 -+ -+ EffectiveFloatRegSize = 8 -+) --- -2.38.1 - diff --git a/0024-runtime-use-ABIInternal-on-syscall-and-other-sys.stu.patch b/0024-runtime-use-ABIInternal-on-syscall-and-other-sys.stu.patch new file mode 100644 index 0000000000000000000000000000000000000000..bc81e40b22cf501e51b804355e9f185b57a9dd79 --- /dev/null +++ b/0024-runtime-use-ABIInternal-on-syscall-and-other-sys.stu.patch @@ -0,0 +1,505 @@ +From 7e54d3bbc1af00ca94819f9c1bbb61f822d37439 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Tue, 26 Nov 2024 15:44:28 +0800 +Subject: [PATCH 24/44] runtime: use ABIInternal on syscall and other sys.stuff + for loong64 + +Change-Id: Ieeb3f2af02c55a9ad62a19d0085b0e082a182db4 +--- + src/runtime/sys_linux_loong64.s | 227 +++++++++++--------------------- + 1 file changed, 79 insertions(+), 148 deletions(-) + +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index 57cee99da7..b4e9930755 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -47,8 +47,7 @@ + #define SYS_timer_delete 111 + + // func exit(code int32) +-TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4 +- MOVW code+0(FP), R4 ++TEXT runtime·exit(SB),NOSPLIT,$0 + MOVV $SYS_exit_group, R11 + SYSCALL + RET +@@ -67,48 +66,49 @@ TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8 + JMP 0(PC) + + // func open(name *byte, mode, perm int32) int32 +-TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20 ++TEXT runtime·open(SB),NOSPLIT,$0 ++ // before: ++ // R4: name ++ // R5: mode ++ // R6: perm ++ ++ // after: ++ // R4: AT_FDCWD ++ // R5: name ++ // R6: mode ++ // R7: perm ++ ++ MOVW R6, R7 ++ MOVW R5, R6 ++ MOVV R4, R5 + MOVW $AT_FDCWD, R4 // AT_FDCWD, so this acts like open +- MOVV name+0(FP), R5 +- MOVW mode+8(FP), R6 +- MOVW perm+12(FP), R7 ++ + MOVV $SYS_openat, R11 + SYSCALL + MOVW $-4096, R5 + BGEU R5, R4, 2(PC) + MOVW $-1, R4 +- MOVW R4, ret+16(FP) + RET + + // func closefd(fd int32) int32 +-TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12 +- MOVW fd+0(FP), R4 ++TEXT runtime·closefd(SB),NOSPLIT,$0 + MOVV $SYS_close, R11 + SYSCALL + MOVW $-4096, R5 + BGEU R5, R4, 2(PC) + MOVW $-1, R4 +- MOVW R4, ret+8(FP) + RET + + // func write1(fd uintptr, p unsafe.Pointer, n int32) int32 +-TEXT runtime·write1(SB),NOSPLIT|NOFRAME,$0-28 +- MOVV fd+0(FP), R4 +- MOVV p+8(FP), R5 +- MOVW n+16(FP), R6 ++TEXT runtime·write1(SB),NOSPLIT,$0 + MOVV $SYS_write, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func read(fd int32, p unsafe.Pointer, n int32) int32 +-TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28 +- MOVW fd+0(FP), R4 +- MOVV p+8(FP), R5 +- MOVW n+16(FP), R6 ++TEXT runtime·read(SB),NOSPLIT,$0 + MOVV $SYS_read, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func pipe2(flags int32) (r, w int32, errno int32) +@@ -121,16 +121,15 @@ TEXT runtime·pipe2(SB),NOSPLIT|NOFRAME,$0-20 + RET + + // func usleep(usec uint32) +-TEXT runtime·usleep(SB),NOSPLIT,$16-4 +- MOVWU usec+0(FP), R7 ++TEXT runtime·usleep(SB),NOSPLIT,$16 + MOVV $1000, R6 +- MULVU R6, R7, R7 ++ MULVU R6, R4, R4 + MOVV $1000000000, R6 + +- DIVVU R6, R7, R5 // ts->tv_sec +- REMVU R6, R7, R4 // ts->tv_nsec ++ DIVVU R6, R4, R5 // ts->tv_sec ++ REMVU R6, R4, R8 // ts->tv_nsec + MOVV R5, 8(R3) +- MOVV R4, 16(R3) ++ MOVV R8, 16(R3) + + // nanosleep(&ts, 0) + ADDV $8, R3, R4 +@@ -140,14 +139,14 @@ TEXT runtime·usleep(SB),NOSPLIT,$16-4 + RET + + // func gettid() uint32 +-TEXT runtime·gettid(SB),NOSPLIT,$0-4 ++TEXT runtime·gettid(SB),NOSPLIT,$0 + MOVV $SYS_gettid, R11 + SYSCALL +- MOVW R4, ret+0(FP) + RET + + // func raise(sig uint32) +-TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0 ++TEXT runtime·raise(SB),NOSPLIT,$0 ++ MOVW R4, R24 // backup sig + MOVV $SYS_getpid, R11 + SYSCALL + MOVW R4, R23 +@@ -155,87 +154,66 @@ TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0 + SYSCALL + MOVW R4, R5 // arg 2 tid + MOVW R23, R4 // arg 1 pid +- MOVW sig+0(FP), R6 // arg 3 ++ MOVW R24, R6 // arg 3 + MOVV $SYS_tgkill, R11 + SYSCALL + RET + + // func raiseproc(sig uint32) +-TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0 ++TEXT runtime·raiseproc(SB),NOSPLIT,$0 ++ MOVW R4, R24 // backup sig + MOVV $SYS_getpid, R11 + SYSCALL + //MOVW R4, R4 // arg 1 pid +- MOVW sig+0(FP), R5 // arg 2 ++ MOVW R24, R5 // arg 2 + MOVV $SYS_kill, R11 + SYSCALL + RET + + // func getpid() int +-TEXT ·getpid(SB),NOSPLIT|NOFRAME,$0-8 ++TEXT ·getpid(SB),NOSPLIT,$0 + MOVV $SYS_getpid, R11 + SYSCALL +- MOVV R4, ret+0(FP) + RET + + // func tgkill(tgid, tid, sig int) +-TEXT ·tgkill(SB),NOSPLIT|NOFRAME,$0-24 +- MOVV tgid+0(FP), R4 +- MOVV tid+8(FP), R5 +- MOVV sig+16(FP), R6 ++TEXT ·tgkill(SB),NOSPLIT,$0 + MOVV $SYS_tgkill, R11 + SYSCALL + RET + + // func setitimer(mode int32, new, old *itimerval) +-TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24 +- MOVW mode+0(FP), R4 +- MOVV new+8(FP), R5 +- MOVV old+16(FP), R6 ++TEXT runtime·setitimer(SB),NOSPLIT,$0 + MOVV $SYS_setitimer, R11 + SYSCALL + RET + + // func timer_create(clockid int32, sevp *sigevent, timerid *int32) int32 +-TEXT runtime·timer_create(SB),NOSPLIT,$0-28 +- MOVW clockid+0(FP), R4 +- MOVV sevp+8(FP), R5 +- MOVV timerid+16(FP), R6 ++TEXT runtime·timer_create(SB),NOSPLIT,$0 + MOVV $SYS_timer_create, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func timer_settime(timerid int32, flags int32, new, old *itimerspec) int32 +-TEXT runtime·timer_settime(SB),NOSPLIT,$0-28 +- MOVW timerid+0(FP), R4 +- MOVW flags+4(FP), R5 +- MOVV new+8(FP), R6 +- MOVV old+16(FP), R7 ++TEXT runtime·timer_settime(SB),NOSPLIT,$0 + MOVV $SYS_timer_settime, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func timer_delete(timerid int32) int32 +-TEXT runtime·timer_delete(SB),NOSPLIT,$0-12 +- MOVW timerid+0(FP), R4 ++TEXT runtime·timer_delete(SB),NOSPLIT,$0 + MOVV $SYS_timer_delete, R11 + SYSCALL +- MOVW R4, ret+8(FP) + RET + + // func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32 +-TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 +- MOVV dst+16(FP), R6 ++TEXT runtime·mincore(SB),NOSPLIT,$0 + MOVV $SYS_mincore, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func walltime() (sec int64, nsec int32) +-TEXT runtime·walltime(SB),NOSPLIT,$24-12 ++TEXT runtime·walltime(SB),NOSPLIT,$24 + MOVV R3, R23 // R23 is unchanged by C code + MOVV R3, R25 + +@@ -291,7 +269,7 @@ nosaveg: + JAL (R20) + + finish: +- MOVV 0(R3), R7 // sec ++ MOVV 0(R3), R4 // sec + MOVV 8(R3), R5 // nsec + + MOVV R23, R3 // restore SP +@@ -304,9 +282,6 @@ finish: + MOVV R25, m_vdsoSP(R24) + MOVV 8(R3), R25 + MOVV R25, m_vdsoPC(R24) +- +- MOVV R7, sec+0(FP) +- MOVW R5, nsec+8(FP) + RET + + fallback: +@@ -315,7 +290,7 @@ fallback: + JMP finish + + // func nanotime1() int64 +-TEXT runtime·nanotime1(SB),NOSPLIT,$16-8 ++TEXT runtime·nanotime1(SB),NOSPLIT,$24 + MOVV R3, R23 // R23 is unchanged by C code + MOVV R3, R25 + +@@ -389,8 +364,7 @@ finish: + // return nsec in R7 + MOVV $1000000000, R4 + MULVU R4, R7, R7 +- ADDVU R5, R7 +- MOVV R7, ret+0(FP) ++ ADDVU R5, R7, R4 + RET + + fallback: +@@ -399,11 +373,7 @@ fallback: + JMP finish + + // func rtsigprocmask(how int32, new, old *sigset, size int32) +-TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28 +- MOVW how+0(FP), R4 +- MOVV new+8(FP), R5 +- MOVV old+16(FP), R6 +- MOVW size+24(FP), R7 ++TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0 + MOVV $SYS_rt_sigprocmask, R11 + SYSCALL + MOVW $-4096, R5 +@@ -412,22 +382,21 @@ TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28 + RET + + // func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32 +-TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36 +- MOVV sig+0(FP), R4 +- MOVV new+8(FP), R5 +- MOVV old+16(FP), R6 +- MOVV size+24(FP), R7 ++TEXT runtime·rt_sigaction(SB),NOSPLIT,$0 + MOVV $SYS_rt_sigaction, R11 + SYSCALL +- MOVW R4, ret+32(FP) + RET + + // func sigfwd(fn uintptr, sig uint32, info *siginfo, ctx unsafe.Pointer) +-TEXT runtime·sigfwd(SB),NOSPLIT,$0-32 +- MOVW sig+8(FP), R4 +- MOVV info+16(FP), R5 +- MOVV ctx+24(FP), R6 +- MOVV fn+0(FP), R20 ++TEXT runtime·sigfwd(SB),NOSPLIT,$0 ++ // before: ++ // R4: fn, R5: sig, R6: info, R7: ctx ++ // after: ++ // R20: fn, R4: sig, R5: info, R6: ctx ++ MOVV R4, R20 ++ MOVV R5, R4 ++ MOVV R6, R5 ++ MOVV R7, R6 + JAL (R20) + RET + +@@ -460,48 +429,31 @@ TEXT runtime·cgoSigtramp(SB),NOSPLIT,$0 + JMP runtime·sigtramp(SB) + + // func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) (p unsafe.Pointer, err int) +-TEXT runtime·sysMmap(SB),NOSPLIT|NOFRAME,$0 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 +- MOVW prot+16(FP), R6 +- MOVW flags+20(FP), R7 +- MOVW fd+24(FP), R8 +- MOVW off+28(FP), R9 +- ++TEXT runtime·sysMmap(SB),NOSPLIT,$0 + MOVV $SYS_mmap, R11 + SYSCALL + MOVW $-4096, R5 + BGEU R5, R4, ok +- MOVV $0, p+32(FP) +- SUBVU R4, R0, R4 +- MOVV R4, err+40(FP) ++ SUBVU R4, R0, R5 ++ MOVV $0, R4 + RET + ok: +- MOVV R4, p+32(FP) +- MOVV $0, err+40(FP) ++ MOVV $0, R5 + RET + + // Call the function stored in _cgo_mmap using the GCC calling convention. + // This must be called on the system stack. + // func callCgoMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) uintptr +-TEXT runtime·callCgoMmap(SB),NOSPLIT,$0 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 +- MOVW prot+16(FP), R6 +- MOVW flags+20(FP), R7 +- MOVW fd+24(FP), R8 +- MOVW off+28(FP), R9 ++TEXT runtime·callCgoMmap(SB),NOSPLIT,$0 + MOVV _cgo_mmap(SB), R13 + SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. + JAL (R13) + ADDV $16, R3 +- MOVV R4, ret+32(FP) ++ MOVV R4, R4 + RET + + // func sysMunmap(addr unsafe.Pointer, n uintptr) +-TEXT runtime·sysMunmap(SB),NOSPLIT|NOFRAME,$0 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 ++TEXT runtime·sysMunmap(SB),NOSPLIT,$0 + MOVV $SYS_munmap, R11 + SYSCALL + MOVW $-4096, R5 +@@ -512,9 +464,7 @@ TEXT runtime·sysMunmap(SB),NOSPLIT|NOFRAME,$0 + // Call the function stored in _cgo_munmap using the GCC calling convention. + // This must be called on the system stack. + // func callCgoMunmap(addr unsafe.Pointer, n uintptr) +-TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 ++TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 + MOVV _cgo_munmap(SB), R13 + SUBV $16, R3 // reserve 16 bytes for sp-8 where fp may be saved. + JAL (R13) +@@ -522,38 +472,24 @@ TEXT runtime·callCgoMunmap(SB),NOSPLIT,$0 + RET + + // func madvise(addr unsafe.Pointer, n uintptr, flags int32) +-TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0 +- MOVV addr+0(FP), R4 +- MOVV n+8(FP), R5 +- MOVW flags+16(FP), R6 ++TEXT runtime·madvise(SB),NOSPLIT,$0 + MOVV $SYS_madvise, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32 +-TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0 +- MOVV addr+0(FP), R4 +- MOVW op+8(FP), R5 +- MOVW val+12(FP), R6 +- MOVV ts+16(FP), R7 +- MOVV addr2+24(FP), R8 +- MOVW val3+32(FP), R9 ++TEXT runtime·futex(SB),NOSPLIT,$0 + MOVV $SYS_futex, R11 + SYSCALL +- MOVW R4, ret+40(FP) + RET + + // int64 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void)); +-TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0 +- MOVW flags+0(FP), R4 +- MOVV stk+8(FP), R5 +- ++TEXT runtime·clone(SB),NOSPLIT,$0 + // Copy mp, gp, fn off parent stack for use by child. + // Careful: Linux system call clobbers ???. +- MOVV mp+16(FP), R23 +- MOVV gp+24(FP), R24 +- MOVV fn+32(FP), R25 ++ MOVV R6, R23 ++ MOVV R7, R24 ++ MOVV R8, R25 + + MOVV R23, -8(R5) + MOVV R24, -16(R5) +@@ -565,8 +501,7 @@ TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0 + SYSCALL + + // In parent, return. +- BEQ R4, 3(PC) +- MOVW R4, ret+40(FP) ++ BEQ R4, 2(PC) + RET + + // In child, on new stack. +@@ -606,9 +541,7 @@ nog: + JMP -3(PC) // keep exiting + + // func sigaltstack(new, old *stackt) +-TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0 +- MOVV new+0(FP), R4 +- MOVV old+8(FP), R5 ++TEXT runtime·sigaltstack(SB),NOSPLIT,$0 + MOVV $SYS_sigaltstack, R11 + SYSCALL + MOVW $-4096, R5 +@@ -617,42 +550,40 @@ TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0 + RET + + // func osyield() +-TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0 ++TEXT runtime·osyield(SB),NOSPLIT,$0 + MOVV $SYS_sched_yield, R11 + SYSCALL + RET + + // func sched_getaffinity(pid, len uintptr, buf *uintptr) int32 +-TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0 +- MOVV pid+0(FP), R4 +- MOVV len+8(FP), R5 +- MOVV buf+16(FP), R6 ++TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0 + MOVV $SYS_sched_getaffinity, R11 + SYSCALL +- MOVW R4, ret+24(FP) + RET + + // func sbrk0() uintptr +-TEXT runtime·sbrk0(SB),NOSPLIT|NOFRAME,$0-8 ++TEXT runtime·sbrk0(SB),NOSPLIT,$0 + // Implemented as brk(NULL). + MOVV $0, R4 + MOVV $SYS_brk, R11 + SYSCALL +- MOVV R4, ret+0(FP) + RET + ++// unimplemented, only needed for android; declared in stubs_linux.go + TEXT runtime·access(SB),$0-20 +- MOVV R0, 2(R0) // unimplemented, only needed for android; declared in stubs_linux.go ++ MOVV R0, 2(R0) + MOVW R0, ret+16(FP) // for vet + RET + ++// unimplemented, only needed for android; declared in stubs_linux.go + TEXT runtime·connect(SB),$0-28 +- MOVV R0, 2(R0) // unimplemented, only needed for android; declared in stubs_linux.go ++ MOVV R0, 2(R0) + MOVW R0, ret+24(FP) // for vet + RET + ++// unimplemented, only needed for android; declared in stubs_linux.go + TEXT runtime·socket(SB),$0-20 +- MOVV R0, 2(R0) // unimplemented, only needed for android; declared in stubs_linux.go ++ MOVV R0, 2(R0) + MOVW R0, ret+16(FP) // for vet + RET + +-- +2.38.1 + diff --git a/0025-cmd-compile-internal-add-register-info-for-loong64-r.patch b/0025-cmd-compile-internal-add-register-info-for-loong64-r.patch deleted file mode 100644 index f4a705663e6f2a918c06396049d33d6c85ce5a0b..0000000000000000000000000000000000000000 --- a/0025-cmd-compile-internal-add-register-info-for-loong64-r.patch +++ /dev/null @@ -1,76 +0,0 @@ -From f49b0e4d8eb7b6c0afbe7f3d4fa104564a52999a Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 19:54:51 +0800 -Subject: [PATCH 25/51] cmd/compile/internal: add register info for loong64 - regABI - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I4b40d0c17c479392ceaef65a8fd40a9117b87b4f ---- - src/cmd/compile/internal/loong64/ssa.go | 2 ++ - src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 4 ++-- - src/cmd/compile/internal/ssa/config.go | 2 ++ - src/cmd/compile/internal/ssa/opGen.go | 4 ++-- - 4 files changed, 8 insertions(+), 4 deletions(-) - -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index 199fd4ce33..f348f396b8 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -144,6 +144,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.From.Type = obj.TYPE_REG - p.From.Reg = r - ssagen.AddrAuto(&p.To, v) -+ case ssa.OpArgIntReg, ssa.OpArgFloatReg: -+ ssagen.CheckArgReg(v) - case ssa.OpLOONG64ADDV, - ssa.OpLOONG64SUBV, - ssa.OpLOONG64AND, -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 0d0f475a5b..8e3f3ce720 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -466,8 +466,8 @@ func init() { - blocks: blocks, - regnames: regNamesLOONG64, - // TODO: support register ABI on loong64 -- ParamIntRegNames: "R4 R5 R6 R7 R8 R9 R10 R11", -- ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7", -+ ParamIntRegNames: "R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19", -+ ParamFloatRegNames: "F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15", - gpregmask: gp, - fpregmask: fp, - framepointerreg: -1, // not used -diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go -index 43f9f0affc..31a6ee1af8 100644 ---- a/src/cmd/compile/internal/ssa/config.go -+++ b/src/cmd/compile/internal/ssa/config.go -@@ -296,6 +296,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo - c.registers = registersLOONG64[:] - c.gpRegMask = gpRegMaskLOONG64 - c.fpRegMask = fpRegMaskLOONG64 -+ // c.intParamRegs = paramIntRegLOONG64 -+ // c.floatParamRegs = paramFloatRegLOONG64 - c.FPReg = framepointerRegLOONG64 - c.LinkReg = linkRegLOONG64 - c.hasGReg = true -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index 6643aef21a..482046f016 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -40233,8 +40233,8 @@ var registersLOONG64 = [...]Register{ - {61, loong64.REG_F31, -1, "F31"}, - {62, 0, -1, "SB"}, - } --var paramIntRegLOONG64 = []int8{3, 4, 5, 6, 7, 8, 9, 10} --var paramFloatRegLOONG64 = []int8{30, 31, 32, 33, 34, 35, 36, 37} -+var paramIntRegLOONG64 = []int8{3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18} -+var paramFloatRegLOONG64 = []int8{30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45} - var gpRegMaskLOONG64 = regMask(1071644664) - var fpRegMaskLOONG64 = regMask(4611686017353646080) - var specialRegMaskLOONG64 = regMask(0) --- -2.38.1 - diff --git a/0025-runtime-use-correct-memory-barrier-in-exitThread-fun.patch b/0025-runtime-use-correct-memory-barrier-in-exitThread-fun.patch new file mode 100644 index 0000000000000000000000000000000000000000..0b81cb190f22ff450cd050c9bcc0f3cfc1827f41 --- /dev/null +++ b/0025-runtime-use-correct-memory-barrier-in-exitThread-fun.patch @@ -0,0 +1,34 @@ +From 5bb6b8ebb22faf46a01ff292c45a7dc72f2b5022 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Tue, 26 Nov 2024 17:10:32 +0800 +Subject: [PATCH 25/44] runtime: use correct memory barrier in exitThread + function on loong64 + +In the runtime.exitThread function, a storeRelease barrier +is required instead of a full barrier. + +Change-Id: I614c6f74e8c9fd56c3badf3bf450b3314e3f377c +--- + src/runtime/sys_linux_loong64.s | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/src/runtime/sys_linux_loong64.s b/src/runtime/sys_linux_loong64.s +index b4e9930755..830eb9d099 100644 +--- a/src/runtime/sys_linux_loong64.s ++++ b/src/runtime/sys_linux_loong64.s +@@ -56,10 +56,8 @@ TEXT runtime·exit(SB),NOSPLIT,$0 + TEXT runtime·exitThread(SB),NOSPLIT|NOFRAME,$0-8 + MOVV wait+0(FP), R19 + // We're done using the stack. +- MOVW $0, R11 +- DBAR +- MOVW R11, (R19) +- DBAR ++ DBAR $0x12 // StoreRelease barrier ++ MOVW R0, (R19) + MOVW $0, R4 // exit code + MOVV $SYS_exit, R11 + SYSCALL +-- +2.38.1 + diff --git a/0026-cmd-compile-internal-add-spill-support-for-loong64-r.patch b/0026-cmd-compile-internal-add-spill-support-for-loong64-r.patch deleted file mode 100644 index e2bce1f458eca5581343a427cd7242d35a5c810a..0000000000000000000000000000000000000000 --- a/0026-cmd-compile-internal-add-spill-support-for-loong64-r.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 6220821537490f266d3fbb3bdd36271aaeddc87c Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 20:10:33 +0800 -Subject: [PATCH 26/51] cmd/compile/internal: add spill support for loong64 - regABI - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I4a194177562dd1f15add5c32696bd69b027d88d7 ---- - src/cmd/compile/internal/loong64/galign.go | 2 ++ - src/cmd/compile/internal/loong64/ssa.go | 30 ++++++++++++++++++++++ - 2 files changed, 32 insertions(+) - -diff --git a/src/cmd/compile/internal/loong64/galign.go b/src/cmd/compile/internal/loong64/galign.go -index 99ab7bdfb5..a613165054 100644 ---- a/src/cmd/compile/internal/loong64/galign.go -+++ b/src/cmd/compile/internal/loong64/galign.go -@@ -20,4 +20,6 @@ func Init(arch *ssagen.ArchInfo) { - arch.SSAMarkMoves = func(s *ssagen.State, b *ssa.Block) {} - arch.SSAGenValue = ssaGenValue - arch.SSAGenBlock = ssaGenBlock -+ arch.LoadRegResult = loadRegResult -+ arch.SpillArgReg = spillArgReg - } -diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go -index f348f396b8..278f30649f 100644 ---- a/src/cmd/compile/internal/loong64/ssa.go -+++ b/src/cmd/compile/internal/loong64/ssa.go -@@ -10,6 +10,7 @@ import ( - "cmd/compile/internal/base" - "cmd/compile/internal/ir" - "cmd/compile/internal/logopt" -+ "cmd/compile/internal/objw" - "cmd/compile/internal/ssa" - "cmd/compile/internal/ssagen" - "cmd/compile/internal/types" -@@ -145,6 +146,16 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { - p.From.Reg = r - ssagen.AddrAuto(&p.To, v) - case ssa.OpArgIntReg, ssa.OpArgFloatReg: -+ // The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill -+ // The loop only runs once. -+ for _, a := range v.Block.Func.RegArgs { -+ // Pass the spill/unspill information along to the assembler, offset by size of -+ // the saved LR slot. -+ addr := ssagen.SpillSlotAddr(a, loong64.REGSP, base.Ctxt.Arch.FixedFrameSize) -+ s.FuncInfo().AddSpill( -+ obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type, a.Reg), Spill: storeByType(a.Type, a.Reg)}) -+ } -+ v.Block.Func.RegArgs = nil - ssagen.CheckArgReg(v) - case ssa.OpLOONG64ADDV, - ssa.OpLOONG64SUBV, -@@ -763,3 +774,22 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { - b.Fatalf("branch not implemented: %s", b.LongString()) - } - } -+ -+func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { -+ p := s.Prog(loadByType(t, reg)) -+ p.From.Type = obj.TYPE_MEM -+ p.From.Name = obj.NAME_AUTO -+ p.From.Sym = n.Linksym() -+ p.From.Offset = n.FrameOffset() + off -+ p.To.Type = obj.TYPE_REG -+ p.To.Reg = reg -+ return p -+} -+ -+func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog { -+ p = pp.Append(p, storeByType(t, reg), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off) -+ p.To.Name = obj.NAME_PARAM -+ p.To.Sym = n.Linksym() -+ p.Pos = p.Pos.WithNotStmt() -+ return p -+} --- -2.38.1 - diff --git a/0026-cmd-internal-obj-loong64-add-V-XV-SEQI-V-XV-.-AND-OR.patch b/0026-cmd-internal-obj-loong64-add-V-XV-SEQI-V-XV-.-AND-OR.patch new file mode 100644 index 0000000000000000000000000000000000000000..d73cc6515c410c16cd99184bc242d0799d477852 --- /dev/null +++ b/0026-cmd-internal-obj-loong64-add-V-XV-SEQI-V-XV-.-AND-OR.patch @@ -0,0 +1,410 @@ +From 38ab8bc5eb69cb2746b32fd4a6ca7931adb7722b Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 29 Nov 2024 15:41:33 +0800 +Subject: [PATCH 26/44] cmd/internal/obj/loong64: add {V,XV}SEQI, + {V,XV}.{AND,OR,XOR,NOR} instructions support + +Go asm syntax: + VSEQB $1, V2, V3 + XVSEQB $2, X2, X3 + V{AND,OR,XOR,NOR}B $1, V2, V3 + XV{AND,OR,XOR,NOR}B $1, V2, V3 + V{AND,OR,XOR,NOR,ANDN,ORN}V V1, V2, V3 + XV{AND,OR,XOR,NOR,ANDN,ORN}V V1, V2, V3 + +Equivalent platform assembler syntax: + vseqi.b v3, v2, $1 + xvseqi.b x3, x2 ,$2 + v{and,or,xor,nor}.b v3, v2, $1 + xv{and,or,xor,nor}.b x3, x2, $1 + v{and,or,xor,nor,andn,orn}v v3, v2, v1 + xv{and,or,xor,nor,andn,orn}v x3, x2, x1 + +Change-Id: I56ae0db72c7f473755cbdc7f7171c1058a9def97 +--- + .../asm/internal/asm/testdata/loong64enc1.s | 38 ++++ + src/cmd/internal/obj/loong64/a.out.go | 21 +++ + src/cmd/internal/obj/loong64/anames.go | 20 ++ + src/cmd/internal/obj/loong64/asm.go | 173 ++++++++++++++++-- + 4 files changed, 238 insertions(+), 14 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 3a3eb10a74..2418412a3a 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -506,6 +506,16 @@ lable2: + XVSEQH X3, X2, X4 // 448c0074 + XVSEQW X3, X2, X4 // 440c0174 + XVSEQV X3, X2, X4 // 448c0174 ++ VSEQB $0, V2, V3 // 43008072 ++ VSEQH $1, V2, V3 // 43848072 ++ VSEQW $8, V2, V3 // 43208172 ++ VSEQV $15, V2, V3 // 43bc8172 ++ VSEQV $-15, V2, V3 // 43c48172 ++ XVSEQB $0, X2, X4 // 44008076 ++ XVSEQH $3, X2, X4 // 448c8076 ++ XVSEQW $12, X2, X4 // 44308176 ++ XVSEQV $15, X2, X4 // 44bc8176 ++ XVSEQV $-15, X2, X4 // 44c48176 + + // VPCNT{B,H,W,V}, XVPCNT{B,H,W,V} instruction + VPCNTB V1, V2 // 22209c72 +@@ -517,6 +527,34 @@ lable2: + XVPCNTW X3, X2 // 62289c76 + XVPCNTV X3, X2 // 622c9c76 + ++ // VANDV,VORV,VXORV,VNORV,VANDNV,VORNV ++ VANDV V1, V2, V3 // 43042671 ++ VORV V1, V2, V3 // 43842671 ++ VXORV V1, V2, V3 // 43042771 ++ VNORV V1, V2, V3 // 43842771 ++ VANDNV V1, V2, V3 // 43042871 ++ VORNV V1, V2, V3 // 43842871 ++ ++ // VANDB,VORB,VXORB,VNORB ++ VANDB $0, V2, V3 // 4300d073 ++ VORB $64, V2, V3 // 4300d573 ++ VXORB $128, V2, V3 // 4300da73 ++ VNORB $255, V2, V3 // 43fcdf73 ++ ++ // XVANDV,XVORV,XVXORV,XVNORV,XVANDNV,XVORNV ++ XVANDV X1, X2, X3 // 43042675 ++ XVORV X1, X2, X3 // 43842675 ++ XVXORV X1, X2, X3 // 43042775 ++ XVNORV X1, X2, X3 // 43842775 ++ XVANDNV X1, X2, X3 // 43042875 ++ XVORNV X1, X2, X3 // 43842875 ++ ++ // XVANDB,XVORB,XVXORB,XVNORB ++ XVANDB $0, X2, X3 // 4300d077 ++ XVORB $1, X2, X3 // 4304d477 ++ XVXORB $127, X2, X3 // 43fcd977 ++ XVNORB $255, X2, X3 // 43fcdf77 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index b2207c2523..bd3ce61826 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -726,6 +726,27 @@ const ( + AXVMOVQ + + // LSX and LASX Bit-manipulation Instructions ++ AVANDB ++ AVORB ++ AVXORB ++ AVNORB ++ AXVANDB ++ AXVORB ++ AXVXORB ++ AXVNORB ++ AVANDV ++ AVORV ++ AVXORV ++ AVNORV ++ AVANDNV ++ AVORNV ++ AXVANDV ++ AXVORV ++ AXVXORV ++ AXVNORV ++ AXVANDNV ++ AXVORNV ++ + AVPCNTB + AVPCNTH + AVPCNTW +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 3d2f329917..6c1537d123 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -257,6 +257,26 @@ var Anames = []string{ + "FTINTRNEVD", + "VMOVQ", + "XVMOVQ", ++ "VANDB", ++ "VORB", ++ "VXORB", ++ "VNORB", ++ "XVANDB", ++ "XVORB", ++ "XVXORB", ++ "XVNORB", ++ "VANDV", ++ "VORV", ++ "VXORV", ++ "VNORV", ++ "VANDNV", ++ "VORNV", ++ "XVANDV", ++ "XVORV", ++ "XVXORV", ++ "XVNORV", ++ "XVANDNV", ++ "XVORNV", + "VPCNTB", + "VPCNTH", + "VPCNTW", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 5757c3c452..7247193c95 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -51,6 +51,8 @@ const ( + // branchLoopHead marks loop entry. + // Used to insert padding for under-aligned loops. + branchLoopHead ++ immFiledSi5 // The encoding of the immediate field in the instruction is 5-bits ++ immFiledUi8 // The encoding of the immediate field in the instruction is 8-bits + ) + + var optab = []Optab{ +@@ -88,6 +90,17 @@ var optab = []Optab{ + {ACMPEQF, C_FREG, C_FREG, C_NONE, C_FCCREG, C_NONE, 2, 4, 0, 0}, + {AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVSEQB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, immFiledSi5}, ++ {AXVSEQB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, immFiledSi5}, ++ {AVSEQB, C_ADDCON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, immFiledSi5}, ++ {AXVSEQB, C_ADDCON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, immFiledSi5}, ++ ++ {AVANDV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, ++ {AXVANDV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVANDB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, immFiledUi8}, ++ {AXVANDB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, immFiledUi8}, ++ {AVANDB, C_ADDCON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, immFiledUi8}, ++ {AXVANDB, C_ADDCON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, immFiledUi8}, + + {ACLOW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, + {AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0}, +@@ -1499,6 +1512,7 @@ func buildop(ctxt *obj.Link) { + } + opset(i, r0) + } ++ + case AVSEQB: + opset(AVSEQH, r0) + opset(AVSEQW, r0) +@@ -1509,6 +1523,30 @@ func buildop(ctxt *obj.Link) { + opset(AXVSEQW, r0) + opset(AXVSEQV, r0) + ++ case AVANDB: ++ opset(AVORB, r0) ++ opset(AVXORB, r0) ++ opset(AVNORB, r0) ++ ++ case AXVANDB: ++ opset(AXVORB, r0) ++ opset(AXVXORB, r0) ++ opset(AXVNORB, r0) ++ ++ case AVANDV: ++ opset(AVORV, r0) ++ opset(AVXORV, r0) ++ opset(AVNORV, r0) ++ opset(AVANDNV, r0) ++ opset(AVORNV, r0) ++ ++ case AXVANDV: ++ opset(AXVORV, r0) ++ opset(AXVXORV, r0) ++ opset(AXVNORV, r0) ++ opset(AXVANDNV, r0) ++ opset(AXVORNV, r0) ++ + case AVPCNTB: + opset(AVPCNTH, r0) + opset(AVPCNTW, r0) +@@ -1551,6 +1589,14 @@ func OP_12IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0xFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 + } + ++func OP_8IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { ++ return op | (i&0xFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 ++} ++ ++func OP_5IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { ++ return op | (i&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 ++} ++ + func OP_IR(op uint32, i uint32, r2 uint32) uint32 { + return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5 + } +@@ -1623,12 +1669,10 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + + case 4: // add $scon,[r1],r2 + v := c.regoff(&p.From) +- + r := int(p.Reg) + if r == 0 { + r = int(p.To.Reg) + } +- + o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 5: // syscall +@@ -1738,6 +1782,36 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + c.ctxt.Diag("unexpected encoding\n%v", p) + } + ++ case 13: // add $si5,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledSi5: ++ c.checkimmFiled(p, v, 5, true) ++ o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ ++ case 14: // add $ui8,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledUi8: ++ c.checkimmFiled(p, v, 8, false) ++ o1 = OP_8IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ + case 15: // teq $c r,r + v := c.regoff(&p.From) + r := int(p.Reg) +@@ -1760,18 +1834,18 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o2 = OP_15I(c.opi(ABREAK), uint32(v)) + + case 16: // sll $c,[r1],r2 +- v := c.regoff(&p.From) +- r := int(p.Reg) +- if r == 0 { +- r = int(p.To.Reg) +- } +- +- // instruction ending with V:6-digit immediate, others:5-digit immediate +- if v >= 32 && vshift(p.As) { +- o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg)) +- } else { +- o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg)) +- } ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ // instruction ending with V:6-digit immediate, others:5-digit immediate ++ if v >= 32 && vshift(p.As) { ++ o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg)) ++ } else { ++ o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg)) ++ } + + case 17: // bstrpickw $msbw, r1, $lsbw, r2 + rd, rj := p.To.Reg, p.Reg +@@ -2348,6 +2422,21 @@ func (c *ctxt0) checkindex(p *obj.Prog, index uint32, mask uint32) { + } + } + ++// checkimmFiled checks whether the immediate value exceeds the valid encoding range ++func (c *ctxt0) checkimmFiled(p *obj.Prog, imm int32, bits uint8, isSigned bool) { ++ if isSigned { ++ bound := int32(1 << (bits - 1)) ++ if imm < -bound || imm > bound { ++ c.ctxt.Diag("signed immediate %v exceeds the %d-bit range: %v", imm, bits, p) ++ } ++ } else { ++ mask := uint32(0xffffffff) << bits ++ if uint32(imm) != (uint32(imm) & ^mask) { ++ c.ctxt.Diag("unsigned immediate %v exceeds the %d-bit range: %v", imm, bits, p) ++ } ++ } ++} ++ + func (c *ctxt0) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + c.aclass(a) +@@ -2588,6 +2677,30 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0x0e003 << 15 // vseq.d + case AXVSEQV: + return 0x0e803 << 15 // xvseq.d ++ case AVANDV: ++ return 0x0E24C << 15 // vand.v ++ case AVORV: ++ return 0x0E24D << 15 // vor.v ++ case AVXORV: ++ return 0x0E24E << 15 // vxor.v ++ case AVNORV: ++ return 0x0E24F << 15 // vnor.v ++ case AVANDNV: ++ return 0x0E250 << 15 // vandn.v ++ case AVORNV: ++ return 0x0E251 << 15 // vorn.v ++ case AXVANDV: ++ return 0x0EA4C << 15 // xvand.v ++ case AXVORV: ++ return 0x0EA4D << 15 // xvor.v ++ case AXVXORV: ++ return 0x0EA4E << 15 // xvxor.v ++ case AXVNORV: ++ return 0x0EA4F << 15 // xvnor.v ++ case AXVANDNV: ++ return 0x0EA50 << 15 // xvandn.v ++ case AXVORNV: ++ return 0x0EA51 << 15 // xvorn.v + } + + if a < 0 { +@@ -2915,6 +3028,38 @@ func (c *ctxt0) opirr(a obj.As) uint32 { + return 0x021 << 24 + case ASCV: + return 0x023 << 24 ++ case AVANDB: ++ return 0x1CF4 << 18 // vandi.b ++ case AVORB: ++ return 0x1CF5 << 18 // vori.b ++ case AVXORB: ++ return 0x1CF6 << 18 // xori.b ++ case AVNORB: ++ return 0x1CF7 << 18 // xnori.b ++ case AXVANDB: ++ return 0x1DF4 << 18 // xvandi.b ++ case AXVORB: ++ return 0x1DF5 << 18 // xvori.b ++ case AXVXORB: ++ return 0x1DF6 << 18 // xvxori.b ++ case AXVNORB: ++ return 0x1DF7 << 18 // xvnor.b ++ case AVSEQB: ++ return 0x0E500 << 15 //vseqi.b ++ case AVSEQH: ++ return 0x0E501 << 15 // vseqi.h ++ case AVSEQW: ++ return 0x0E502 << 15 //vseqi.w ++ case AVSEQV: ++ return 0x0E503 << 15 //vseqi.d ++ case AXVSEQB: ++ return 0x0ED00 << 15 //xvseqi.b ++ case AXVSEQH: ++ return 0x0ED01 << 15 // xvseqi.h ++ case AXVSEQW: ++ return 0x0ED02 << 15 // xvseqi.w ++ case AXVSEQV: ++ return 0x0ED03 << 15 // xvseqi.d + } + + if a < 0 { +-- +2.38.1 + diff --git a/0027-cmd-compile-update-loong64-CALL-ops.patch b/0027-cmd-compile-update-loong64-CALL-ops.patch deleted file mode 100644 index 0d2326c33b70d1c0b934ee5560df55bdee527108..0000000000000000000000000000000000000000 --- a/0027-cmd-compile-update-loong64-CALL-ops.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 8ae8fa8d7878f23e47e4e8082260892ee7b6e211 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 20:23:46 +0800 -Subject: [PATCH 27/51] cmd/compile: update loong64 CALL* ops - -allow the loong64 CALL* ops to take variable number of args - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I117c6b48e0fbbe3ed8fd4c133895178c2cf288b1 ---- - src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 8 ++++---- - src/cmd/compile/internal/ssa/opGen.go | 8 ++++---- - 2 files changed, 8 insertions(+), 8 deletions(-) - -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -index 8e3f3ce720..9a83965493 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go -@@ -273,10 +273,10 @@ func init() { - {name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32 - - // function calls -- {name: "CALLstatic", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem -- {name: "CALLtail", argLength: 1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). arg0=mem, auxint=argsize, returns mem -- {name: "CALLclosure", argLength: 3, reg: regInfo{inputs: []regMask{gpsp, buildReg("R29"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem -- {name: "CALLinter", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem -+ {name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem -+ {name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem -+ {name: "CALLclosure", argLength: -1, reg: regInfo{inputs: []regMask{gpsp, buildReg("R29"), 0}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call function via closure. arg0=codeptr, arg1=closure, last arg=mem, auxint=argsize, returns mem -+ {name: "CALLinter", argLength: -1, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call fn by pointer. arg0=codeptr, last arg=mem, auxint=argsize, returns mem - - // duffzero - // arg0 = address of memory to zero -diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go -index 482046f016..290ad2682c 100644 ---- a/src/cmd/compile/internal/ssa/opGen.go -+++ b/src/cmd/compile/internal/ssa/opGen.go -@@ -24216,7 +24216,7 @@ var opcodeTable = [...]opInfo{ - { - name: "CALLstatic", - auxType: auxCallOff, -- argLen: 1, -+ argLen: -1, - clobberFlags: true, - call: true, - reg: regInfo{ -@@ -24226,7 +24226,7 @@ var opcodeTable = [...]opInfo{ - { - name: "CALLtail", - auxType: auxCallOff, -- argLen: 1, -+ argLen: -1, - clobberFlags: true, - call: true, - tailCall: true, -@@ -24237,7 +24237,7 @@ var opcodeTable = [...]opInfo{ - { - name: "CALLclosure", - auxType: auxCallOff, -- argLen: 3, -+ argLen: -1, - clobberFlags: true, - call: true, - reg: regInfo{ -@@ -24251,7 +24251,7 @@ var opcodeTable = [...]opInfo{ - { - name: "CALLinter", - auxType: auxCallOff, -- argLen: 2, -+ argLen: -1, - clobberFlags: true, - call: true, - reg: regInfo{ --- -2.38.1 - diff --git a/0027-cmd-internal-obj-loong64-add-V-XV-ADD-SUB-.-B-H-W-D-.patch b/0027-cmd-internal-obj-loong64-add-V-XV-ADD-SUB-.-B-H-W-D-.patch new file mode 100644 index 0000000000000000000000000000000000000000..2192272d7bb79100579ca52591c8b18158fec4a7 --- /dev/null +++ b/0027-cmd-internal-obj-loong64-add-V-XV-ADD-SUB-.-B-H-W-D-.patch @@ -0,0 +1,207 @@ +From f5bbb15710944ebcc7d2c808fe9087892a690bc4 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 09:26:38 +0800 +Subject: [PATCH 27/44] cmd/internal/obj/loong64: add + {V,XV}{ADD/SUB}.{B,H,W,D,Q} instructions support + +Go asm syntax: + V{ADD/SUB}{B,H,W,V,Q} VK, VJ, VD + XV{ADD/SUB}{B,H,W,V,Q} XK, XJ, XD + +Equivalent platform assembler syntax: + v{add/sub}.{b,w,h,d,q} vd, vj, vk + xv{add/sub}.{b,w,h,d,q} xd, xj, xk + +Change-Id: Iadc28100c93d6d6c69e9641bfea78fa85d75bddf +--- + .../asm/internal/asm/testdata/loong64enc1.s | 22 +++++++ + src/cmd/internal/obj/loong64/a.out.go | 22 +++++++ + src/cmd/internal/obj/loong64/anames.go | 20 +++++++ + src/cmd/internal/obj/loong64/asm.go | 60 +++++++++++++++++++ + 4 files changed, 124 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 2418412a3a..76faf2d3cb 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -555,6 +555,28 @@ lable2: + XVXORB $127, X2, X3 // 43fcd977 + XVNORB $255, X2, X3 // 43fcdf77 + ++ // [X]VADD{B,H,W,V,Q}, [X]VSUB{B,H,W,V,Q} instructions ++ VADDB V1, V2, V3 // 43040a70 ++ VADDH V1, V2, V3 // 43840a70 ++ VADDW V1, V2, V3 // 43040b70 ++ VADDV V1, V2, V3 // 43840b70 ++ VADDQ V1, V2, V3 // 43042d71 ++ VSUBB V1, V2, V3 // 43040c70 ++ VSUBH V1, V2, V3 // 43840c70 ++ VSUBW V1, V2, V3 // 43040d70 ++ VSUBV V1, V2, V3 // 43840d70 ++ VSUBQ V1, V2, V3 // 43842d71 ++ XVADDB X3, X2, X1 // 410c0a74 ++ XVADDH X3, X2, X1 // 418c0a74 ++ XVADDW X3, X2, X1 // 410c0b74 ++ XVADDV X3, X2, X1 // 418c0b74 ++ XVADDQ X3, X2, X1 // 410c2d75 ++ XVSUBB X3, X2, X1 // 410c0c74 ++ XVSUBH X3, X2, X1 // 418c0c74 ++ XVSUBW X3, X2, X1 // 410c0d74 ++ XVSUBV X3, X2, X1 // 418c0d74 ++ XVSUBQ X3, X2, X1 // 418c2d75 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index bd3ce61826..3bef0da869 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -725,6 +725,28 @@ const ( + AVMOVQ + AXVMOVQ + ++ // LSX and LASX arithmetic instructions ++ AVADDB ++ AVADDH ++ AVADDW ++ AVADDV ++ AVADDQ ++ AXVADDB ++ AXVADDH ++ AXVADDW ++ AXVADDV ++ AXVADDQ ++ AVSUBB ++ AVSUBH ++ AVSUBW ++ AVSUBV ++ AVSUBQ ++ AXVSUBB ++ AXVSUBH ++ AXVSUBW ++ AXVSUBV ++ AXVSUBQ ++ + // LSX and LASX Bit-manipulation Instructions + AVANDB + AVORB +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 6c1537d123..194021219e 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -257,6 +257,26 @@ var Anames = []string{ + "FTINTRNEVD", + "VMOVQ", + "XVMOVQ", ++ "VADDB", ++ "VADDH", ++ "VADDW", ++ "VADDV", ++ "VADDQ", ++ "XVADDB", ++ "XVADDH", ++ "XVADDW", ++ "XVADDV", ++ "XVADDQ", ++ "VSUBB", ++ "VSUBH", ++ "VSUBW", ++ "VSUBV", ++ "VSUBQ", ++ "XVSUBB", ++ "XVSUBH", ++ "XVSUBW", ++ "XVSUBV", ++ "XVSUBQ", + "VANDB", + "VORB", + "VXORB", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 7247193c95..7489b4dbf6 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1539,6 +1539,16 @@ func buildop(ctxt *obj.Link) { + opset(AVNORV, r0) + opset(AVANDNV, r0) + opset(AVORNV, r0) ++ opset(AVADDB, r0) ++ opset(AVADDH, r0) ++ opset(AVADDW, r0) ++ opset(AVADDV, r0) ++ opset(AVADDQ, r0) ++ opset(AVSUBB, r0) ++ opset(AVSUBH, r0) ++ opset(AVSUBW, r0) ++ opset(AVSUBV, r0) ++ opset(AVSUBQ, r0) + + case AXVANDV: + opset(AXVORV, r0) +@@ -1546,6 +1556,16 @@ func buildop(ctxt *obj.Link) { + opset(AXVNORV, r0) + opset(AXVANDNV, r0) + opset(AXVORNV, r0) ++ opset(AXVADDB, r0) ++ opset(AXVADDH, r0) ++ opset(AXVADDW, r0) ++ opset(AXVADDV, r0) ++ opset(AXVADDQ, r0) ++ opset(AXVSUBB, r0) ++ opset(AXVSUBH, r0) ++ opset(AXVSUBW, r0) ++ opset(AXVSUBV, r0) ++ opset(AXVSUBQ, r0) + + case AVPCNTB: + opset(AVPCNTH, r0) +@@ -2701,6 +2721,46 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0x0EA50 << 15 // xvandn.v + case AXVORNV: + return 0x0EA51 << 15 // xvorn.v ++ case AVADDB: ++ return 0xE014 << 15 // vadd.b ++ case AVADDH: ++ return 0xE015 << 15 // vadd.h ++ case AVADDW: ++ return 0xE016 << 15 // vadd.w ++ case AVADDV: ++ return 0xE017 << 15 // vadd.d ++ case AVADDQ: ++ return 0xE25A << 15 // vadd.q ++ case AVSUBB: ++ return 0xE018 << 15 // vsub.b ++ case AVSUBH: ++ return 0xE019 << 15 // vsub.h ++ case AVSUBW: ++ return 0xE01A << 15 // vsub.w ++ case AVSUBV: ++ return 0xE01B << 15 // vsub.d ++ case AVSUBQ: ++ return 0xE25B << 15 // vsub.q ++ case AXVADDB: ++ return 0xE814 << 15 // xvadd.b ++ case AXVADDH: ++ return 0xE815 << 15 // xvadd.h ++ case AXVADDW: ++ return 0xE816 << 15 // xvadd.w ++ case AXVADDV: ++ return 0xE817 << 15 // xvadd.d ++ case AXVADDQ: ++ return 0xEA5A << 15 // xvadd.q ++ case AXVSUBB: ++ return 0xE818 << 15 // xvsub.b ++ case AXVSUBH: ++ return 0xE819 << 15 // xvsub.h ++ case AXVSUBW: ++ return 0xE81A << 15 // xvsub.w ++ case AXVSUBV: ++ return 0xE81B << 15 // xvsub.d ++ case AXVSUBQ: ++ return 0xEA5B << 15 // xvsub.q + } + + if a < 0 { +-- +2.38.1 + diff --git a/0028-cmd-internal-obj-loong64-add-V-XV-ILV-L-H-.-B-H-W-D-.patch b/0028-cmd-internal-obj-loong64-add-V-XV-ILV-L-H-.-B-H-W-D-.patch new file mode 100644 index 0000000000000000000000000000000000000000..316746628a9aa2ec11178b65fa685d12a159372e --- /dev/null +++ b/0028-cmd-internal-obj-loong64-add-V-XV-ILV-L-H-.-B-H-W-D-.patch @@ -0,0 +1,181 @@ +From db7ccba69b0c246434a610f3be2ab31c8406b163 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 10:24:13 +0800 +Subject: [PATCH 28/44] cmd/internal/obj/loong64: add {V,XV}ILV{L/H}.{B/H/W/D} + instructions support + +Go asm syntax: + VILV{L/H}{B/H/W/V} VK, VJ, VD + XVILV{L/H}{B/H/W/V} XK, XJ, XD +Equivalent platform assembler syntax: + vilv{l/h}.{b/h/w/d} vd, vj, vk + xvilv{l/h}.{b/h/w/d} xd, xj, xk + +Change-Id: If1f146fd5e049281494026bf4c24d302bcad1373 +--- + .../asm/internal/asm/testdata/loong64enc1.s | 18 +++++++ + src/cmd/internal/obj/loong64/a.out.go | 18 +++++++ + src/cmd/internal/obj/loong64/anames.go | 16 +++++++ + src/cmd/internal/obj/loong64/asm.go | 48 +++++++++++++++++++ + 4 files changed, 100 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 76faf2d3cb..419f257c4a 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -577,6 +577,24 @@ lable2: + XVSUBV X3, X2, X1 // 418c0d74 + XVSUBQ X3, X2, X1 // 418c2d75 + ++ // [X]VILV{L/H}{B,H,W,V} instructions ++ VILVLB V1, V2, V3 // 43041a71 ++ VILVLH V1, V2, V3 // 43841a71 ++ VILVLW V1, V2, V3 // 43041b71 ++ VILVLV V1, V2, V3 // 43841b71 ++ VILVHB V1, V2, V3 // 43041c71 ++ VILVHH V1, V2, V3 // 43841c71 ++ VILVHW V1, V2, V3 // 43041d71 ++ VILVHV V1, V2, V3 // 43841d71 ++ XVILVLB X3, X2, X1 // 410c1a75 ++ XVILVLH X3, X2, X1 // 418c1a75 ++ XVILVLW X3, X2, X1 // 410c1b75 ++ XVILVLV X3, X2, X1 // 418c1b75 ++ XVILVHB X3, X2, X1 // 410c1c75 ++ XVILVHH X3, X2, X1 // 418c1c75 ++ XVILVHW X3, X2, X1 // 410c1d75 ++ XVILVHV X3, X2, X1 // 418c1d75 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 3bef0da869..c7f4769395 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -788,6 +788,24 @@ const ( + AVSEQV + AXVSEQV + ++ // LSX and LASX move and shuffle instructions ++ AVILVLB ++ AVILVLH ++ AVILVLW ++ AVILVLV ++ AVILVHB ++ AVILVHH ++ AVILVHW ++ AVILVHV ++ AXVILVLB ++ AXVILVLH ++ AXVILVLW ++ AXVILVLV ++ AXVILVHB ++ AXVILVHH ++ AXVILVHW ++ AXVILVHV ++ + ALAST + + // aliases +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 194021219e..485940e19c 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -313,5 +313,21 @@ var Anames = []string{ + "XVSEQW", + "VSEQV", + "XVSEQV", ++ "VILVLB", ++ "VILVLH", ++ "VILVLW", ++ "VILVLV", ++ "VILVHB", ++ "VILVHH", ++ "VILVHW", ++ "VILVHV", ++ "XVILVLB", ++ "XVILVLH", ++ "XVILVLW", ++ "XVILVLV", ++ "XVILVHB", ++ "XVILVHH", ++ "XVILVHW", ++ "XVILVHV", + "LAST", + } +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 7489b4dbf6..9ef414a132 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1549,6 +1549,14 @@ func buildop(ctxt *obj.Link) { + opset(AVSUBW, r0) + opset(AVSUBV, r0) + opset(AVSUBQ, r0) ++ opset(AVILVLB, r0) ++ opset(AVILVLH, r0) ++ opset(AVILVLW, r0) ++ opset(AVILVLV, r0) ++ opset(AVILVHB, r0) ++ opset(AVILVHH, r0) ++ opset(AVILVHW, r0) ++ opset(AVILVHV, r0) + + case AXVANDV: + opset(AXVORV, r0) +@@ -1566,6 +1574,14 @@ func buildop(ctxt *obj.Link) { + opset(AXVSUBW, r0) + opset(AXVSUBV, r0) + opset(AXVSUBQ, r0) ++ opset(AXVILVLB, r0) ++ opset(AXVILVLH, r0) ++ opset(AXVILVLW, r0) ++ opset(AXVILVLV, r0) ++ opset(AXVILVHB, r0) ++ opset(AXVILVHH, r0) ++ opset(AXVILVHW, r0) ++ opset(AXVILVHV, r0) + + case AVPCNTB: + opset(AVPCNTH, r0) +@@ -2761,6 +2777,38 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0xE81B << 15 // xvsub.d + case AXVSUBQ: + return 0xEA5B << 15 // xvsub.q ++ case AVILVLB: ++ return 0xE234 << 15 // vilvl.b ++ case AVILVLH: ++ return 0xE235 << 15 // vilvl.h ++ case AVILVLW: ++ return 0xE236 << 15 // vilvl.w ++ case AVILVLV: ++ return 0xE237 << 15 // vilvl.d ++ case AVILVHB: ++ return 0xE238 << 15 // vilvh.b ++ case AVILVHH: ++ return 0xE239 << 15 // vilvh.h ++ case AVILVHW: ++ return 0xE23A << 15 // vilvh.w ++ case AVILVHV: ++ return 0xE23B << 15 // vilvh.d ++ case AXVILVLB: ++ return 0xEA34 << 15 // xvilvl.b ++ case AXVILVLH: ++ return 0xEA35 << 15 // xvilvl.h ++ case AXVILVLW: ++ return 0xEA36 << 15 // xvilvl.w ++ case AXVILVLV: ++ return 0xEA37 << 15 // xvilvl.d ++ case AXVILVHB: ++ return 0xEA38 << 15 // xvilvh.b ++ case AXVILVHH: ++ return 0xEA39 << 15 // xvilvh.h ++ case AXVILVHW: ++ return 0xEA3A << 15 // xvilvh.w ++ case AXVILVHV: ++ return 0xEA3B << 15 // xvilvh.d + } + + if a < 0 { +-- +2.38.1 + diff --git a/0028-runtime-make-duff-device-as-ABIInternal-for-loong64.patch b/0028-runtime-make-duff-device-as-ABIInternal-for-loong64.patch deleted file mode 100644 index 1d719712a7d0fc0570ff1415c34c49eb1a7b1abb..0000000000000000000000000000000000000000 --- a/0028-runtime-make-duff-device-as-ABIInternal-for-loong64.patch +++ /dev/null @@ -1,61 +0,0 @@ -From 46396817e001306224c5b821c691f92a68e4a598 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 20:54:16 +0800 -Subject: [PATCH 28/51] runtime: make duff device as ABIInternal for loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Iaf7a7b7cb1897da859f59fafdedc4a249f867e98 ---- - src/runtime/duff_loong64.s | 4 ++-- - src/runtime/mkduff.go | 4 ++-- - 2 files changed, 4 insertions(+), 4 deletions(-) - -diff --git a/src/runtime/duff_loong64.s b/src/runtime/duff_loong64.s -index df8b653965..b05502d91d 100644 ---- a/src/runtime/duff_loong64.s -+++ b/src/runtime/duff_loong64.s -@@ -4,7 +4,7 @@ - - #include "textflag.h" - --TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 -+TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 - MOVV R0, (R20) - ADDV $8, R20 - MOVV R0, (R20) -@@ -263,7 +263,7 @@ TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 - ADDV $8, R20 - RET - --TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 -+TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 - MOVV (R20), R30 - ADDV $8, R20 - MOVV R30, (R21) -diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go -index 77674254d4..b7f07b5087 100644 ---- a/src/runtime/mkduff.go -+++ b/src/runtime/mkduff.go -@@ -181,7 +181,7 @@ func zeroLOONG64(w io.Writer) { - // R0: always zero - // R19 (aka REGRT1): ptr to memory to be zeroed - // On return, R19 points to the last zeroed dword. -- fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") -+ fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") - for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVV\tR0, (R20)") - fmt.Fprintln(w, "\tADDV\t$8, R20") -@@ -190,7 +190,7 @@ func zeroLOONG64(w io.Writer) { - } - - func copyLOONG64(w io.Writer) { -- fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") -+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") - for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVV\t(R20), R30") - fmt.Fprintln(w, "\tADDV\t$8, R20") --- -2.38.1 - diff --git a/0029-cmd-internal-obj-loong64-add-V-XV-SLL-SRL-SRA-ROTR-I.patch b/0029-cmd-internal-obj-loong64-add-V-XV-SLL-SRL-SRA-ROTR-I.patch new file mode 100644 index 0000000000000000000000000000000000000000..71ebec387f8fd8b8c24ca42ab3e6d936640ccb99 --- /dev/null +++ b/0029-cmd-internal-obj-loong64-add-V-XV-SLL-SRL-SRA-ROTR-I.patch @@ -0,0 +1,599 @@ +From d765027e47dec10f8869d04b0bf52661ac63f302 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 14:19:04 +0800 +Subject: [PATCH 29/44] cmd/internal/obj/loong64: add + {V,XV}{SLL/SRL/SRA/ROTR}[I].{B/H/W/D} instructions support + +Go asm syntax: + V{SLL/SRL/SRA/ROTR}{B/H/W/V} $1, V2, V3 + XV{SLL/SRL/SRA/ROTR}{B/H/W/V} $1, X2, X3 + V{SLL/SRL/SRA/ROTR}{B/H/W/V} VK, VJ, VD + XV{SLL/SRL/SRA/ROTR}{B/H/W/V} XK, XJ, XD + +Equivalent platform assembler syntax: + v{sll/srl/sra/rotr}i.{b/h/w/d} v3, v2, $1 + xv{sll/srl/sra/rotr}i.{b/h/w/d} x3, x2, $1 + v{sll/srl/sra/rotr}.{b/h/w/d} vd, vj, vk + xv{sll/srl/sra/rotr}.{b/h/w/d} xd, xj, xk + +Change-Id: I8693e15f3778057e5a1e636d618c6f46acc5042b +--- + .../asm/internal/asm/testdata/loong64enc1.s | 130 +++++++++ + src/cmd/internal/obj/loong64/a.out.go | 33 +++ + src/cmd/internal/obj/loong64/anames.go | 32 ++ + src/cmd/internal/obj/loong64/asm.go | 274 +++++++++++++++++- + 4 files changed, 468 insertions(+), 1 deletion(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 419f257c4a..79012784dc 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -595,6 +595,136 @@ lable2: + XVILVHW X3, X2, X1 // 410c1d75 + XVILVHV X3, X2, X1 // 418c1d75 + ++ // [X]{VSLL/VSRL/VSRA/VROTR}{B,H,W,V} instructions ++ VSLLB V1, V2, V3 // 4304e870 ++ VSLLH V1, V2, V3 // 4384e870 ++ VSLLW V1, V2, V3 // 4304e970 ++ VSLLV V1, V2, V3 // 4384e970 ++ VSRLB V1, V2, V3 // 4304ea70 ++ VSRLH V1, V2, V3 // 4384ea70 ++ VSRLW V1, V2, V3 // 4304eb70 ++ VSRLV V1, V2, V3 // 4384eb70 ++ VSRAB V1, V2, V3 // 4304ec70 ++ VSRAH V1, V2, V3 // 4384ec70 ++ VSRAW V1, V2, V3 // 4304ed70 ++ VSRAV V1, V2, V3 // 4384ed70 ++ VROTRB V1, V2, V3 // 4304ee70 ++ VROTRH V1, V2, V3 // 4384ee70 ++ VROTRW V1, V2, V3 // 4304ef70 ++ VROTRV V1, V2, V3 // 4384ef70 ++ XVSLLB X3, X2, X1 // 410ce874 ++ XVSLLH X3, X2, X1 // 418ce874 ++ XVSLLW X3, X2, X1 // 410ce974 ++ XVSLLV X3, X2, X1 // 418ce974 ++ XVSRLB X3, X2, X1 // 410cea74 ++ XVSRLH X3, X2, X1 // 418cea74 ++ XVSRLW X3, X2, X1 // 410ceb74 ++ XVSRLV X3, X2, X1 // 418ceb74 ++ XVSRAB X3, X2, X1 // 410cec74 ++ XVSRAH X3, X2, X1 // 418cec74 ++ XVSRAW X3, X2, X1 // 410ced74 ++ XVSRAV X3, X2, X1 // 418ced74 ++ XVROTRB X3, X2, X1 // 410cee74 ++ XVROTRH X3, X2, X1 // 418cee74 ++ XVROTRW X3, X2, X1 // 410cef74 ++ XVROTRV X3, X2, X1 // 418cef74 ++ VSLLB $0, V1, V2 // 22202c73 ++ VSLLB $7, V1, V2 // 223c2c73 ++ VSLLB $5, V1 // 21342c73 ++ VSLLH $0, V1, V2 // 22402c73 ++ VSLLH $15, V1, V2 // 227c2c73 ++ VSLLH $10, V1 // 21682c73 ++ VSLLW $0, V1, V2 // 22802c73 ++ VSLLW $31, V1, V2 // 22fc2c73 ++ VSLLW $11, V1 // 21ac2c73 ++ VSLLV $0, V1, V2 // 22002d73 ++ VSLLV $63, V1, V2 // 22fc2d73 ++ VSLLV $30, V1 // 21782d73 ++ VSRLB $0, V1, V2 // 22203073 ++ VSRLB $7, V1, V2 // 223c3073 ++ VSRLB $4, V1 // 21303073 ++ VSRLH $0, V1, V2 // 22403073 ++ VSRLH $15, V1, V2 // 227c3073 ++ VSRLH $9, V1 // 21643073 ++ VSRLW $0, V1, V2 // 22803073 ++ VSRLW $31, V1, V2 // 22fc3073 ++ VSRLW $16, V1 // 21c03073 ++ VSRLV $0, V1, V2 // 22003173 ++ VSRLV $63, V1, V2 // 22fc3173 ++ VSRLV $40, V1 // 21a03173 ++ VSRAB $0, V1, V2 // 22203473 ++ VSRAB $7, V1, V2 // 223c3473 ++ VSRAB $6, V1 // 21383473 ++ VSRAH $0, V1, V2 // 22403473 ++ VSRAH $15, V1, V2 // 227c3473 ++ VSRAH $8, V1 // 21603473 ++ VSRAW $0, V1, V2 // 22803473 ++ VSRAW $31, V1, V2 // 22fc3473 ++ VSRAW $12, V1 // 21b03473 ++ VSRAV $0, V1, V2 // 22003573 ++ VSRAV $63, V1, V2 // 22fc3573 ++ VSRAV $50, V1 // 21c83573 ++ VROTRB $0, V1, V2 // 2220a072 ++ VROTRB $7, V1, V2 // 223ca072 ++ VROTRB $3, V1 // 212ca072 ++ VROTRH $0, V1, V2 // 2240a072 ++ VROTRH $15, V1, V2 // 227ca072 ++ VROTRH $5, V1 // 2154a072 ++ VROTRW $0, V1, V2 // 2280a072 ++ VROTRW $31, V1, V2 // 22fca072 ++ VROTRW $18, V1 // 21c8a072 ++ VROTRV $0, V1, V2 // 2200a172 ++ VROTRV $63, V1, V2 // 22fca172 ++ VROTRV $52, V1 // 21d0a172 ++ XVSLLB $0, X2, X1 // 41202c77 ++ XVSLLB $7, X2, X1 // 413c2c77 ++ XVSLLB $4, X2 // 42302c77 ++ XVSLLH $0, X2, X1 // 41402c77 ++ XVSLLH $15, X2, X1 // 417c2c77 ++ XVSLLH $8, X2 // 42602c77 ++ XVSLLW $0, X2, X1 // 41802c77 ++ XVSLLW $31, X2, X1 // 41fc2c77 ++ XVSLLW $13, X2 // 42b42c77 ++ XVSLLV $0, X2, X1 // 41002d77 ++ XVSLLV $63, X2, X1 // 41fc2d77 ++ XVSLLV $36, X2 // 42902d77 ++ XVSRLB $0, X2, X1 // 41203077 ++ XVSRLB $7, X2, X1 // 413c3077 ++ XVSRLB $5, X2 // 42343077 ++ XVSRLH $0, X2, X1 // 41403077 ++ XVSRLH $15, X2, X1 // 417c3077 ++ XVSRLH $9, X2 // 42643077 ++ XVSRLW $0, X2, X1 // 41803077 ++ XVSRLW $31, X2, X1 // 41fc3077 ++ XVSRLW $14, X2 // 42b83077 ++ XVSRLV $0, X2, X1 // 41003177 ++ XVSRLV $63, X2, X1 // 41fc3177 ++ XVSRLV $45, X2 // 42b43177 ++ XVSRAB $0, X2, X1 // 41203477 ++ XVSRAB $7, X2, X1 // 413c3477 ++ XVSRAB $6, X2 // 42383477 ++ XVSRAH $0, X2, X1 // 41403477 ++ XVSRAH $15, X2, X1 // 417c3477 ++ XVSRAH $10, X2 // 42683477 ++ XVSRAW $0, X2, X1 // 41803477 ++ XVSRAW $31, X2, X1 // 41fc3477 ++ XVSRAW $16, X2 // 42c03477 ++ XVSRAV $0, X2, X1 // 41003577 ++ XVSRAV $63, X2, X1 // 41fc3577 ++ XVSRAV $48, X2 // 42c03577 ++ XVROTRB $0, X2, X1 // 4120a076 ++ XVROTRB $7, X2, X1 // 413ca076 ++ XVROTRB $3, X2 // 422ca076 ++ XVROTRH $0, X2, X1 // 4140a076 ++ XVROTRH $15, X2, X1 // 417ca076 ++ XVROTRH $13, X2 // 4274a076 ++ XVROTRW $0, X2, X1 // 4180a076 ++ XVROTRW $31, X2, X1 // 41fca076 ++ XVROTRW $24, X2 // 42e0a076 ++ XVROTRV $0, X2, X1 // 4100a176 ++ XVROTRV $63, X2, X1 // 41fca176 ++ XVROTRV $52, X2 // 42d0a176 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index c7f4769395..3257d376b4 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -769,6 +769,39 @@ const ( + AXVANDNV + AXVORNV + ++ AVSLLB ++ AVSLLH ++ AVSLLW ++ AVSLLV ++ AVSRLB ++ AVSRLH ++ AVSRLW ++ AVSRLV ++ AVSRAB ++ AVSRAH ++ AVSRAW ++ AVSRAV ++ AVROTRB ++ AVROTRH ++ AVROTRW ++ AVROTRV ++ AXVSLLB ++ AXVSLLH ++ AXVSLLW ++ AXVSLLV ++ AXVSRLB ++ AXVSRLH ++ AXVSRLW ++ AXVSRLV ++ AXVSRAB ++ AXVSRAH ++ AXVSRAW ++ AXVSRAV ++ AXVROTRB ++ AXVROTRH ++ AXVROTRW ++ AXVROTRV ++ + AVPCNTB + AVPCNTH + AVPCNTW +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 485940e19c..776e272a0b 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -297,6 +297,38 @@ var Anames = []string{ + "XVNORV", + "XVANDNV", + "XVORNV", ++ "VSLLB", ++ "VSLLH", ++ "VSLLW", ++ "VSLLV", ++ "VSRLB", ++ "VSRLH", ++ "VSRLW", ++ "VSRLV", ++ "VSRAB", ++ "VSRAH", ++ "VSRAW", ++ "VSRAV", ++ "VROTRB", ++ "VROTRH", ++ "VROTRW", ++ "VROTRV", ++ "XVSLLB", ++ "XVSLLH", ++ "XVSLLW", ++ "XVSLLV", ++ "XVSRLB", ++ "XVSRLH", ++ "XVSRLW", ++ "XVSRLV", ++ "XVSRAB", ++ "XVSRAH", ++ "XVSRAW", ++ "XVSRAV", ++ "XVROTRB", ++ "XVROTRH", ++ "XVROTRW", ++ "XVROTRV", + "VPCNTB", + "VPCNTH", + "VPCNTW", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 9ef414a132..25a40d736e 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -52,6 +52,10 @@ const ( + // Used to insert padding for under-aligned loops. + branchLoopHead + immFiledSi5 // The encoding of the immediate field in the instruction is 5-bits ++ immFiledUi3 // The encoding of the immediate field in the instruction is 3-bits ++ immFiledUi4 // The encoding of the immediate field in the instruction is 4-bits ++ immFiledUi5 // The encoding of the immediate field in the instruction is 5-bits ++ immFiledUi6 // The encoding of the immediate field in the instruction is 6-bits + immFiledUi8 // The encoding of the immediate field in the instruction is 8-bits + ) + +@@ -102,6 +106,34 @@ var optab = []Optab{ + {AVANDB, C_ADDCON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, immFiledUi8}, + {AXVANDB, C_ADDCON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, immFiledUi8}, + ++ {AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, ++ {AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVSLLB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 29, 4, 0, immFiledUi3}, ++ {AXVSLLB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 29, 4, 0, immFiledUi3}, ++ {AVSLLB, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 29, 4, 0, immFiledUi3}, ++ {AXVSLLB, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 29, 4, 0, immFiledUi3}, ++ ++ {AVSLLH, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, ++ {AXVSLLH, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVSLLH, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, immFiledUi4}, ++ {AXVSLLH, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, immFiledUi4}, ++ {AVSLLH, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 31, 4, 0, immFiledUi4}, ++ {AXVSLLH, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 31, 4, 0, immFiledUi4}, ++ ++ {AVSLLW, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, ++ {AXVSLLW, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVSLLW, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 32, 4, 0, immFiledUi5}, ++ {AXVSLLW, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 32, 4, 0, immFiledUi5}, ++ {AVSLLW, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 32, 4, 0, immFiledUi5}, ++ {AXVSLLW, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 32, 4, 0, immFiledUi5}, ++ ++ {AVSLLV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, ++ {AXVSLLV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, ++ {AVSLLV, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 33, 4, 0, immFiledUi6}, ++ {AXVSLLV, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 33, 4, 0, immFiledUi6}, ++ {AVSLLV, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 33, 4, 0, immFiledUi6}, ++ {AXVSLLV, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 33, 4, 0, immFiledUi6}, ++ + {ACLOW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, + {AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0}, + {AMOVVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0}, +@@ -1521,7 +1553,7 @@ func buildop(ctxt *obj.Link) { + case AXVSEQB: + opset(AXVSEQH, r0) + opset(AXVSEQW, r0) +- opset(AXVSEQV, r0) ++ opset(AXVSEQV, r0) + + case AVANDB: + opset(AVORB, r0) +@@ -1583,6 +1615,46 @@ func buildop(ctxt *obj.Link) { + opset(AXVILVHW, r0) + opset(AXVILVHV, r0) + ++ case AVSLLB: ++ opset(AVSRLB, r0) ++ opset(AVSRAB, r0) ++ opset(AVROTRB, r0) ++ ++ case AXVSLLB: ++ opset(AXVSRLB, r0) ++ opset(AXVSRAB, r0) ++ opset(AXVROTRB, r0) ++ ++ case AVSLLH: ++ opset(AVSRLH, r0) ++ opset(AVSRAH, r0) ++ opset(AVROTRH, r0) ++ ++ case AXVSLLH: ++ opset(AXVSRLH, r0) ++ opset(AXVSRAH, r0) ++ opset(AXVROTRH, r0) ++ ++ case AVSLLW: ++ opset(AVSRLW, r0) ++ opset(AVSRAW, r0) ++ opset(AVROTRW, r0) ++ ++ case AXVSLLW: ++ opset(AXVSRLW, r0) ++ opset(AXVSRAW, r0) ++ opset(AXVROTRW, r0) ++ ++ case AVSLLV: ++ opset(AVSRLV, r0) ++ opset(AVSRAV, r0) ++ opset(AVROTRV, r0) ++ ++ case AXVSLLV: ++ opset(AXVSRLV, r0) ++ opset(AXVSRAV, r0) ++ opset(AXVROTRV, r0) ++ + case AVPCNTB: + opset(AVPCNTH, r0) + opset(AVPCNTW, r0) +@@ -1629,10 +1701,22 @@ func OP_8IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0xFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 + } + ++func OP_6IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { ++ return op | (i&0x3F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 ++} ++ + func OP_5IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 + } + ++func OP_4IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { ++ return op | (i&0xF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 ++} ++ ++func OP_3IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { ++ return op | (i&0x7)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 ++} ++ + func OP_IR(op uint32, i uint32, r2 uint32) uint32 { + return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5 + } +@@ -1994,10 +2078,70 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg)) + } + ++ case 29: // add $ui3,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledUi3: ++ c.checkimmFiled(p, v, 3, false) ++ o1 = OP_3IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ + case 30: // mov gr/fr/fcc/fcsr, fr/fcc/fcsr/gr + a := c.specialFpMovInst(p.As, oclass(&p.From), oclass(&p.To)) + o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg)) + ++ case 31: // add $ui4,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledUi4: ++ c.checkimmFiled(p, v, 4, false) ++ o1 = OP_4IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ ++ case 32: // add $ui5,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledUi5: ++ c.checkimmFiled(p, v, 5, false) ++ o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ ++ case 33: // add $ui6,[r1],r2 ++ v := c.regoff(&p.From) ++ r := int(p.Reg) ++ if r == 0 { ++ r = int(p.To.Reg) ++ } ++ ++ switch o.flag { ++ case immFiledUi6: ++ c.checkimmFiled(p, v, 6, false) ++ o1 = OP_6IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ default: ++ c.ctxt.Diag("Invalid immediate value type\n%v", p) ++ } ++ + case 34: // mov $con,fr + v := c.regoff(&p.From) + a := AADDU +@@ -2809,6 +2953,70 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0xEA3A << 15 // xvilvh.w + case AXVILVHV: + return 0xEA3B << 15 // xvilvh.d ++ case AVSLLB: ++ return 0xE1D0 << 15 // vsll.b ++ case AVSLLH: ++ return 0xE1D1 << 15 // vsll.h ++ case AVSLLW: ++ return 0xE1D2 << 15 // vsll.w ++ case AVSLLV: ++ return 0xE1D3 << 15 // vsll.d ++ case AVSRLB: ++ return 0xE1D4 << 15 // vsrl.b ++ case AVSRLH: ++ return 0xE1D5 << 15 // vsrl.h ++ case AVSRLW: ++ return 0xE1D6 << 15 // vsrl.w ++ case AVSRLV: ++ return 0xE1D7 << 15 // vsrl.d ++ case AVSRAB: ++ return 0xE1D8 << 15 // vsra.b ++ case AVSRAH: ++ return 0xE1D9 << 15 // vsra.h ++ case AVSRAW: ++ return 0xE1DA << 15 // vsra.w ++ case AVSRAV: ++ return 0xE1DB << 15 // vsra.d ++ case AVROTRB: ++ return 0xE1DC << 15 // vrotr.b ++ case AVROTRH: ++ return 0xE1DD << 15 // vrotr.h ++ case AVROTRW: ++ return 0xE1DE << 15 // vrotr.w ++ case AVROTRV: ++ return 0xE1DF << 15 // vrotr.d ++ case AXVSLLB: ++ return 0xE9D0 << 15 // xvsll.b ++ case AXVSLLH: ++ return 0xE9D1 << 15 // xvsll.h ++ case AXVSLLW: ++ return 0xE9D2 << 15 // xvsll.w ++ case AXVSLLV: ++ return 0xE9D3 << 15 // xvsll.d ++ case AXVSRLB: ++ return 0xE9D4 << 15 // xvsrl.b ++ case AXVSRLH: ++ return 0xE9D5 << 15 // xvsrl.h ++ case AXVSRLW: ++ return 0xE9D6 << 15 // xvsrl.w ++ case AXVSRLV: ++ return 0xE9D7 << 15 // xvsrl.d ++ case AXVSRAB: ++ return 0xE9D8 << 15 // xvsra.b ++ case AXVSRAH: ++ return 0xE9D9 << 15 // xvsra.h ++ case AXVSRAW: ++ return 0xE9DA << 15 // xvsra.w ++ case AXVSRAV: ++ return 0xE9DB << 15 // xvsra.d ++ case AXVROTRB: ++ return 0xE9DC << 15 // xvrotr.b ++ case AXVROTRH: ++ return 0xE9DD << 15 // xvrotr.h ++ case AXVROTRW: ++ return 0xE9DE << 15 // xvrotr.w ++ case AXVROTRV: ++ return 0xE9DF << 15 // xvrotr.d + } + + if a < 0 { +@@ -3168,6 +3376,70 @@ func (c *ctxt0) opirr(a obj.As) uint32 { + return 0x0ED02 << 15 // xvseqi.w + case AXVSEQV: + return 0x0ED03 << 15 // xvseqi.d ++ case AVROTRB: ++ return 0x1CA8<<18 | 0x1<<13 // vrotri.b ++ case AVROTRH: ++ return 0x1CA8<<18 | 0x1<<14 // vrotri.h ++ case AVROTRW: ++ return 0x1CA8<<18 | 0x1<<15 // vrotri.w ++ case AVROTRV: ++ return 0x1CA8<<18 | 0x1<<16 // vrotri.d ++ case AXVROTRB: ++ return 0x1DA8<<18 | 0x1<<13 // xvrotri.b ++ case AXVROTRH: ++ return 0x1DA8<<18 | 0x1<<14 // xvrotri.h ++ case AXVROTRW: ++ return 0x1DA8<<18 | 0x1<<15 // xvrotri.w ++ case AXVROTRV: ++ return 0x1DA8<<18 | 0x1<<16 // xvrotri.d ++ case AVSLLB: ++ return 0x1CCB<<18 | 0x1<<13 // vslli.b ++ case AVSLLH: ++ return 0x1CCB<<18 | 0x1<<14 // vslli.h ++ case AVSLLW: ++ return 0x1CCB<<18 | 0x1<<15 // vslli.w ++ case AVSLLV: ++ return 0x1CCB<<18 | 0x1<<16 // vslli.d ++ case AVSRLB: ++ return 0x1CCC<<18 | 0x1<<13 // vsrli.b ++ case AVSRLH: ++ return 0x1CCC<<18 | 0x1<<14 // vsrli.h ++ case AVSRLW: ++ return 0x1CCC<<18 | 0x1<<15 // vsrli.w ++ case AVSRLV: ++ return 0x1CCC<<18 | 0x1<<16 // vsrli.d ++ case AVSRAB: ++ return 0x1CCD<<18 | 0x1<<13 // vsrai.b ++ case AVSRAH: ++ return 0x1CCD<<18 | 0x1<<14 // vsrai.h ++ case AVSRAW: ++ return 0x1CCD<<18 | 0x1<<15 // vsrai.w ++ case AVSRAV: ++ return 0x1CCD<<18 | 0x1<<16 // vsrai.d ++ case AXVSLLB: ++ return 0x1DCB<<18 | 0x1<<13 // xvslli.b ++ case AXVSLLH: ++ return 0x1DCB<<18 | 0x1<<14 // xvslli.h ++ case AXVSLLW: ++ return 0x1DCB<<18 | 0x1<<15 // xvslli.w ++ case AXVSLLV: ++ return 0x1DCB<<18 | 0x1<<16 // xvslli.d ++ case AXVSRLB: ++ return 0x1DCC<<18 | 0x1<<13 // xvsrli.b ++ case AXVSRLH: ++ return 0x1DCC<<18 | 0x1<<14 // xvsrli.h ++ case AXVSRLW: ++ return 0x1DCC<<18 | 0x1<<15 // xvsrli.w ++ case AXVSRLV: ++ return 0x1DCC<<18 | 0x1<<16 // xvsrli.d ++ case AXVSRAB: ++ return 0x1DCD<<18 | 0x1<<13 // xvsrai.b ++ case AXVSRAH: ++ return 0x1DCD<<18 | 0x1<<14 // xvsrai.h ++ case AXVSRAW: ++ return 0x1DCD<<18 | 0x1<<15 // xvsrai.w ++ case AXVSRAV: ++ return 0x1DCD<<18 | 0x1<<16 // xvsrai.d + } + + if a < 0 { +-- +2.38.1 + diff --git a/0029-runtime-support-regABI-and-add-spill-functions-in-ru.patch b/0029-runtime-support-regABI-and-add-spill-functions-in-ru.patch deleted file mode 100644 index 9ed7653d7d1acbaa0c481888da1b2377b01e4711..0000000000000000000000000000000000000000 --- a/0029-runtime-support-regABI-and-add-spill-functions-in-ru.patch +++ /dev/null @@ -1,462 +0,0 @@ -From 895275eb4c8bb5d168c2d426b7ec04ca18abd743 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 15 Aug 2023 21:09:16 +0800 -Subject: [PATCH 29/51] runtime: support regABI and add spill functions in - runtime for loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Ib5b0868664521035f33c1c488dccd24a5ace2186 ---- - src/runtime/asm_loong64.s | 295 ++++++++++++++++++++++++++++------- - src/runtime/stubs_loong64.go | 7 + - 2 files changed, 243 insertions(+), 59 deletions(-) - -diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index 23cbd09947..0a970ef20c 100644 ---- a/src/runtime/asm_loong64.s -+++ b/src/runtime/asm_loong64.s -@@ -72,7 +72,7 @@ nocgo: - MOVV R0, 1(R0) - RET - --DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) -+DATA runtime·mainPC+0(SB)/8,$runtime·main(SB) - GLOBL runtime·mainPC(SB),RODATA,$8 - - TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0 -@@ -123,26 +123,31 @@ TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 - // Switch to m->g0's stack, call fn(g). - // Fn must never return. It should gogo(&g->sched) - // to keep running g. --TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 -+TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R4, REGCTXT -+#else -+ MOVV fn+0(FP), REGCTXT -+#endif -+ - // Save caller state in g->sched - MOVV R3, (g_sched+gobuf_sp)(g) - MOVV R1, (g_sched+gobuf_pc)(g) - MOVV R0, (g_sched+gobuf_lr)(g) - - // Switch to m->g0 & its stack, call fn. -- MOVV g, R19 -- MOVV g_m(g), R4 -- MOVV m_g0(R4), g -+ MOVV g, R4 // arg = g -+ MOVV g_m(g), R20 -+ MOVV m_g0(R20), g - JAL runtime·save_g(SB) -- BNE g, R19, 2(PC) -+ BNE g, R4, 2(PC) - JMP runtime·badmcall(SB) -- MOVV fn+0(FP), REGCTXT // context -- MOVV 0(REGCTXT), R5 // code pointer -+ MOVV 0(REGCTXT), R20 // code pointer - MOVV (g_sched+gobuf_sp)(g), R3 // sp = m->g0->sched.sp - ADDV $-16, R3 -- MOVV R19, 8(R3) -+ MOVV R4, 8(R3) - MOVV R0, 0(R3) -- JAL (R5) -+ JAL (R20) - JMP runtime·badmcall2(SB) - - // systemstack_switch is a dummy routine that systemstack leaves at the bottom -@@ -272,7 +277,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 - JMP runtime·morestack(SB) - - // reflectcall: call a function with the given argument list --// func call(argtype *_type, f *FuncVal, arg *byte, argsize, retoffset uint32). -+// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs). - // we don't have variable-sized frames, so we use a small number - // of constant-sized-frame functions to encode a few bits of size in the pc. - // Caution: ugly multiline assembly macros in your future! -@@ -286,7 +291,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0 - // Note: can't just "BR NAME(SB)" - bad inlining results. - - TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48 -- MOVWU stackArgsSize+24(FP), R19 -+ MOVWU frameSize+32(FP), R19 - DISPATCH(runtime·call32, 32) - DISPATCH(runtime·call64, 64) - DISPATCH(runtime·call128, 128) -@@ -317,7 +322,7 @@ TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48 - JMP (R4) - - #define CALLFN(NAME,MAXSIZE) \ --TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ -+TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \ - NO_LOCAL_POINTERS; \ - /* copy arguments to stack */ \ - MOVV arg+16(FP), R4; \ -@@ -331,12 +336,17 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ - MOVBU R6, (R12); \ - ADDV $1, R12; \ - JMP -5(PC); \ -+ /* set up argument registers */ \ -+ MOVV regArgs+40(FP), R25; \ -+ JAL ·unspillArgs(SB); \ - /* call function */ \ - MOVV f+8(FP), REGCTXT; \ -- MOVV (REGCTXT), R6; \ -+ MOVV (REGCTXT), R25; \ - PCDATA $PCDATA_StackMapIndex, $0; \ -- JAL (R6); \ -+ JAL (R25); \ - /* copy return values back */ \ -+ MOVV regArgs+40(FP), R25; \ -+ JAL ·spillArgs(SB); \ - MOVV argtype+0(FP), R7; \ - MOVV arg+16(FP), R4; \ - MOVWU n+24(FP), R5; \ -@@ -352,11 +362,13 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ - // separate function so it can allocate stack space for the arguments - // to reflectcallmove. It does not follow the Go ABI; it expects its - // arguments in registers. --TEXT callRet<>(SB), NOSPLIT, $32-0 -+TEXT callRet<>(SB), NOSPLIT, $40-0 -+ NO_LOCAL_POINTERS - MOVV R7, 8(R3) - MOVV R4, 16(R3) - MOVV R12, 24(R3) - MOVV R5, 32(R3) -+ MOVV R25, 40(R3) - JAL runtime·reflectcallmove(SB) - RET - -@@ -567,7 +579,7 @@ havem: - // If the m on entry wasn't nil, - // 1. the thread might be a Go thread, - // 2. or it wasn't the first call from a C thread on pthread platforms, -- // since then we skip dropm to reuse the m in the first call. -+ // since then we skip dropm to resue the m in the first call. - MOVV savedm-8(SP), R12 - BNE R12, droppedm - -@@ -604,14 +616,14 @@ TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0 - UNDEF - - // AES hashing not implemented for loong64 --TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 -- JMP runtime·memhashFallback(SB) --TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 -- JMP runtime·strhashFallback(SB) --TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 -- JMP runtime·memhash32Fallback(SB) --TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 -- JMP runtime·memhash64Fallback(SB) -+TEXT runtime·memhash(SB),NOSPLIT|NOFRAME,$0-32 -+ JMP runtime·memhashFallback(SB) -+TEXT runtime·strhash(SB),NOSPLIT|NOFRAME,$0-24 -+ JMP runtime·strhashFallback(SB) -+TEXT runtime·memhash32(SB),NOSPLIT|NOFRAME,$0-24 -+ JMP runtime·memhash32Fallback(SB) -+TEXT runtime·memhash64(SB),NOSPLIT|NOFRAME,$0-24 -+ JMP runtime·memhash64Fallback(SB) - - TEXT runtime·return0(SB), NOSPLIT, $0 - MOVW $0, R19 -@@ -658,6 +670,86 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 - MOVB R19, ret+0(FP) - RET - -+#ifdef GOEXPERIMENT_regabiargs -+// spillArgs stores return values from registers to a *internal/abi.RegArgs in R25. -+TEXT ·spillArgs(SB),NOSPLIT,$0-0 -+ MOVV R4, (0*8)(R25) -+ MOVV R5, (1*8)(R25) -+ MOVV R6, (2*8)(R25) -+ MOVV R7, (3*8)(R25) -+ MOVV R8, (4*8)(R25) -+ MOVV R9, (5*8)(R25) -+ MOVV R10, (6*8)(R25) -+ MOVV R11, (7*8)(R25) -+ MOVV R12, (8*8)(R25) -+ MOVV R13, (9*8)(R25) -+ MOVV R14, (10*8)(R25) -+ MOVV R15, (11*8)(R25) -+ MOVV R16, (12*8)(R25) -+ MOVV R17, (13*8)(R25) -+ MOVV R18, (14*8)(R25) -+ MOVV R19, (15*8)(R25) -+ MOVD F0, (16*8)(R25) -+ MOVD F1, (17*8)(R25) -+ MOVD F2, (18*8)(R25) -+ MOVD F3, (19*8)(R25) -+ MOVD F4, (20*8)(R25) -+ MOVD F5, (21*8)(R25) -+ MOVD F6, (22*8)(R25) -+ MOVD F7, (23*8)(R25) -+ MOVD F8, (24*8)(R25) -+ MOVD F9, (25*8)(R25) -+ MOVD F10, (26*8)(R25) -+ MOVD F11, (27*8)(R25) -+ MOVD F12, (28*8)(R25) -+ MOVD F13, (29*8)(R25) -+ MOVD F14, (30*8)(R25) -+ MOVD F15, (31*8)(R25) -+ RET -+ -+// unspillArgs loads args into registers from a *internal/abi.RegArgs in R25. -+TEXT ·unspillArgs(SB),NOSPLIT,$0-0 -+ MOVV (0*8)(R25), R4 -+ MOVV (1*8)(R25), R5 -+ MOVV (2*8)(R25), R6 -+ MOVV (3*8)(R25), R7 -+ MOVV (4*8)(R25), R8 -+ MOVV (5*8)(R25), R9 -+ MOVV (6*8)(R25), R10 -+ MOVV (7*8)(R25), R11 -+ MOVV (8*8)(R25), R12 -+ MOVV (9*8)(R25), R13 -+ MOVV (10*8)(R25), R14 -+ MOVV (11*8)(R25), R15 -+ MOVV (12*8)(R25), R16 -+ MOVV (13*8)(R25), R17 -+ MOVV (14*8)(R25), R18 -+ MOVV (15*8)(R25), R19 -+ MOVD (16*8)(R25), F0 -+ MOVD (17*8)(R25), F1 -+ MOVD (18*8)(R25), F2 -+ MOVD (19*8)(R25), F3 -+ MOVD (20*8)(R25), F4 -+ MOVD (21*8)(R25), F5 -+ MOVD (22*8)(R25), F6 -+ MOVD (23*8)(R25), F7 -+ MOVD (24*8)(R25), F8 -+ MOVD (25*8)(R25), F9 -+ MOVD (26*8)(R25), F10 -+ MOVD (27*8)(R25), F11 -+ MOVD (28*8)(R25), F12 -+ MOVD (29*8)(R25), F13 -+ MOVD (30*8)(R25), F14 -+ MOVD (31*8)(R25), F15 -+ RET -+#else -+TEXT ·spillArgs(SB),NOSPLIT,$0-0 -+ RET -+ -+TEXT ·unspillArgs(SB),NOSPLIT,$0-0 -+ RET -+#endif -+ - // gcWriteBarrier informs the GC about heap pointer writes. - // - // gcWriteBarrier does NOT follow the Go ABI. It accepts the -@@ -785,71 +877,156 @@ TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 - // in the caller's stack frame. These stubs write the args into that stack space and - // then tail call to the corresponding runtime handler. - // The tail call makes these stubs disappear in backtraces. --TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 -+TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicIndex(SB) --TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicIndex(SB) -+TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicIndexU(SB) --TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicIndexU(SB) -+TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSliceAlen(SB) --TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceAlen(SB) -+TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSliceAlenU(SB) --TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceAlenU(SB) -+TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSliceAcap(SB) --TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceAcap(SB) -+TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSliceAcapU(SB) --TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceAcapU(SB) -+TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicSliceB(SB) --TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceB(SB) -+TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicSliceBU(SB) --TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSliceBU(SB) -+TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R23, R4 -+ MOVV R24, R5 -+#else - MOVV R23, x+0(FP) - MOVV R24, y+8(FP) -- JMP runtime·goPanicSlice3Alen(SB) --TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3Alen(SB) -+TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R23, R4 -+ MOVV R24, R5 -+#else - MOVV R23, x+0(FP) - MOVV R24, y+8(FP) -- JMP runtime·goPanicSlice3AlenU(SB) --TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3AlenU(SB) -+TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R23, R4 -+ MOVV R24, R5 -+#else - MOVV R23, x+0(FP) - MOVV R24, y+8(FP) -- JMP runtime·goPanicSlice3Acap(SB) --TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3Acap(SB) -+TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R23, R4 -+ MOVV R24, R5 -+#else - MOVV R23, x+0(FP) - MOVV R24, y+8(FP) -- JMP runtime·goPanicSlice3AcapU(SB) --TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3AcapU(SB) -+TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSlice3B(SB) --TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3B(SB) -+TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R21, R4 -+ MOVV R23, R5 -+#else - MOVV R21, x+0(FP) - MOVV R23, y+8(FP) -- JMP runtime·goPanicSlice3BU(SB) --TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3BU(SB) -+TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicSlice3C(SB) --TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3C(SB) -+TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R20, R4 -+ MOVV R21, R5 -+#else - MOVV R20, x+0(FP) - MOVV R21, y+8(FP) -- JMP runtime·goPanicSlice3CU(SB) --TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-16 -+#endif -+ JMP runtime·goPanicSlice3CU(SB) -+TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R23, R4 -+ MOVV R24, R5 -+#else - MOVV R23, x+0(FP) - MOVV R24, y+8(FP) -- JMP runtime·goPanicSliceConvert(SB) -+#endif -+ JMP runtime·goPanicSliceConvert(SB) -diff --git a/src/runtime/stubs_loong64.go b/src/runtime/stubs_loong64.go -index 556983cad1..4576089b0b 100644 ---- a/src/runtime/stubs_loong64.go -+++ b/src/runtime/stubs_loong64.go -@@ -10,6 +10,13 @@ package runtime - func load_g() - func save_g() - -+// Used by reflectcall and the reflect package. -+// -+// Spills/loads arguments in registers to/from an internal/abi.RegArgs -+// respectively. Does not follow the Go ABI. -+func spillArgs() -+func unspillArgs() -+ - // getfp returns the frame pointer register of its caller or 0 if not implemented. - // TODO: Make this a compiler intrinsic - func getfp() uintptr { return 0 } --- -2.38.1 - diff --git a/0030-cmd-internal-obj-loong64-add-V-XV-FSQRT-FRECIP-FRSQR.patch b/0030-cmd-internal-obj-loong64-add-V-XV-FSQRT-FRECIP-FRSQR.patch new file mode 100644 index 0000000000000000000000000000000000000000..ba201937d9220018abcfc3a9c8eb7ef92b211b0b --- /dev/null +++ b/0030-cmd-internal-obj-loong64-add-V-XV-FSQRT-FRECIP-FRSQR.patch @@ -0,0 +1,166 @@ +From 344852ff0ccb2b948dc77e0934f246cc5ddf9506 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 16:49:08 +0800 +Subject: [PATCH 30/44] cmd/internal/obj/loong64: add + {V,XV}{FSQRT/FRECIP/FRSQRT}.{S/D} instructions support + +Go asm syntax: + V{FSQRT/FRECIP/FRSQRT}{F/D} VJ, VD + XV{FSQRT/FRECIP/FRSQRT}{F/D} XJ, XD + +Equivalent platform assembler syntax: + v{fsqrt/frecip/frsqrt}.{s/d} vd, vj + xv{fsqrt/frecip/frsqrt}.{s/d} xd, xj + +Change-Id: Ied0b959e703d2199939c9ac0608eb3408ea249fa +--- + .../asm/internal/asm/testdata/loong64enc1.s | 14 +++++++ + src/cmd/internal/obj/loong64/a.out.go | 14 +++++++ + src/cmd/internal/obj/loong64/anames.go | 12 ++++++ + src/cmd/internal/obj/loong64/asm.go | 38 ++++++++++++++++++- + 4 files changed, 77 insertions(+), 1 deletion(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 79012784dc..e2e8a6de6c 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -725,6 +725,20 @@ lable2: + XVROTRV $63, X2, X1 // 41fca176 + XVROTRV $52, X2 // 42d0a176 + ++ // [X]VF{SQRT/RECIP/RSQRT}{F/D} instructions ++ VFSQRTF V1, V2 // 22e49c72 ++ VFSQRTD V1, V2 // 22e89c72 ++ VFRECIPF V1, V2 // 22f49c72 ++ VFRECIPD V1, V2 // 22f89c72 ++ VFRSQRTF V1, V2 // 22049d72 ++ VFRSQRTD V1, V2 // 22089d72 ++ XVFSQRTF X2, X1 // 41e49c76 ++ XVFSQRTD X2, X1 // 41e89c76 ++ XVFRECIPF X2, X1 // 41f49c76 ++ XVFRECIPD X2, X1 // 41f89c76 ++ XVFRSQRTF X2, X1 // 41049d76 ++ XVFRSQRTD X2, X1 // 41089d76 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 3257d376b4..bd2b1e8300 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -811,6 +811,20 @@ const ( + AXVPCNTW + AXVPCNTV + ++ // LSX and LASX floating point instructions ++ AVFSQRTF ++ AVFSQRTD ++ AVFRECIPF ++ AVFRECIPD ++ AVFRSQRTF ++ AVFRSQRTD ++ AXVFSQRTF ++ AXVFSQRTD ++ AXVFRECIPF ++ AXVFRECIPD ++ AXVFRSQRTF ++ AXVFRSQRTD ++ + // LSX and LASX integer comparison instruction + AVSEQB + AXVSEQB +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 776e272a0b..7dbe9b92e6 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -337,6 +337,18 @@ var Anames = []string{ + "XVPCNTH", + "XVPCNTW", + "XVPCNTV", ++ "VFSQRTF", ++ "VFSQRTD", ++ "VFRECIPF", ++ "VFRECIPD", ++ "VFRSQRTF", ++ "VFRSQRTD", ++ "XVFSQRTF", ++ "XVFSQRTD", ++ "XVFRECIPF", ++ "XVFRECIPD", ++ "XVFRSQRTF", ++ "XVFRSQRTD", + "VSEQB", + "XVSEQB", + "VSEQH", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 25a40d736e..af38bef3aa 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1553,7 +1553,7 @@ func buildop(ctxt *obj.Link) { + case AXVSEQB: + opset(AXVSEQH, r0) + opset(AXVSEQW, r0) +- opset(AXVSEQV, r0) ++ opset(AXVSEQV, r0) + + case AVANDB: + opset(AVORB, r0) +@@ -1659,11 +1659,23 @@ func buildop(ctxt *obj.Link) { + opset(AVPCNTH, r0) + opset(AVPCNTW, r0) + opset(AVPCNTV, r0) ++ opset(AVFSQRTF, r0) ++ opset(AVFSQRTD, r0) ++ opset(AVFRECIPF, r0) ++ opset(AVFRECIPD, r0) ++ opset(AVFRSQRTF, r0) ++ opset(AVFRSQRTD, r0) + + case AXVPCNTB: + opset(AXVPCNTH, r0) + opset(AXVPCNTW, r0) + opset(AXVPCNTV, r0) ++ opset(AXVFSQRTF, r0) ++ opset(AXVFSQRTD, r0) ++ opset(AXVFRECIPF, r0) ++ opset(AXVFRECIPD, r0) ++ opset(AXVFRSQRTF, r0) ++ opset(AXVFRSQRTD, r0) + } + } + } +@@ -3193,6 +3205,30 @@ func (c *ctxt0) oprr(a obj.As) uint32 { + return 0x1da70a << 10 // xvpcnt.w + case AXVPCNTV: + return 0x1da70b << 10 // xvpcnt.v ++ case AVFSQRTF: ++ return 0x1ca739 << 10 // vfsqrt.s ++ case AVFSQRTD: ++ return 0x1ca73a << 10 // vfsqrt.d ++ case AVFRECIPF: ++ return 0x1ca73d << 10 // vfrecip.s ++ case AVFRECIPD: ++ return 0x1ca73e << 10 // vfrecip.d ++ case AVFRSQRTF: ++ return 0x1ca741 << 10 // vfrsqrt.s ++ case AVFRSQRTD: ++ return 0x1ca742 << 10 // vfrsqrt.d ++ case AXVFSQRTF: ++ return 0x1da739 << 10 // xvfsqrt.s ++ case AXVFSQRTD: ++ return 0x1da73a << 10 // xvfsqrt.d ++ case AXVFRECIPF: ++ return 0x1da73d << 10 // xvfrecip.s ++ case AXVFRECIPD: ++ return 0x1da73e << 10 // xvfrecip.d ++ case AXVFRSQRTF: ++ return 0x1da741 << 10 // xvfrsqrt.s ++ case AXVFRSQRTD: ++ return 0x1da742 << 10 // xvfrsqrt.d + } + + c.ctxt.Diag("bad rr opcode %v", a) +-- +2.38.1 + diff --git a/0030-reflect-runtime-add-reflect-support-for-regABI-on-lo.patch b/0030-reflect-runtime-add-reflect-support-for-regABI-on-lo.patch deleted file mode 100644 index 647e64151dd2d9522ae235d4628970dce59b714a..0000000000000000000000000000000000000000 --- a/0030-reflect-runtime-add-reflect-support-for-regABI-on-lo.patch +++ /dev/null @@ -1,133 +0,0 @@ -From bd62b7384c43a0c7ea49428092dc6ddc0e19096a Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 08:28:28 +0800 -Subject: [PATCH 30/51] reflect, runtime: add reflect support for regABI on - loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I9fd6eb122db91c0ac89e9d919d5d56a106d79fe4 ---- - src/reflect/asm_loong64.s | 77 ++++++++++++++++++++++++++++++++------- - src/runtime/stkframe.go | 2 +- - 2 files changed, 64 insertions(+), 15 deletions(-) - -diff --git a/src/reflect/asm_loong64.s b/src/reflect/asm_loong64.s -index 341a6d55c1..520f0afdd5 100644 ---- a/src/reflect/asm_loong64.s -+++ b/src/reflect/asm_loong64.s -@@ -7,34 +7,83 @@ - - #define REGCTXT R29 - -+// The frames of each of the two functions below contain two locals, at offsets -+// that are known to the runtime. -+// -+// The first local is a bool called retValid with a whole pointer-word reserved -+// for it on the stack. The purpose of this word is so that the runtime knows -+// whether the stack-allocated return space contains valid values for stack -+// scanning. -+// -+// The second local is an abi.RegArgs value whose offset is also known to the -+// runtime, so that a stack map for it can be constructed, since it contains -+// pointers visible to the GC. -+#define LOCAL_RETVALID 40 -+#define LOCAL_REGARGS 48 -+ -+// The frame size of the functions below is -+// 32 (args of callReflect) + 8 (bool + padding) + 392 (abi.RegArgs) = 432. -+ - // makeFuncStub is the code half of the function returned by MakeFunc. - // See the comment on the declaration of makeFuncStub in makefunc.go - // for more details. - // No arg size here, runtime pulls arg map out of the func value. --TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$40 -+TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$432 - NO_LOCAL_POINTERS -+ ADDV $LOCAL_REGARGS, R3, R25 // spillArgs using R25 -+ JAL runtime·spillArgs(SB) -+ MOVV REGCTXT, 32(R3) // save REGCTXT > args of moveMakeFuncArgPtrs < LOCAL_REGARGS -+ -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV REGCTXT, R4 -+ MOVV R25, R5 -+#else - MOVV REGCTXT, 8(R3) -- MOVV $argframe+0(FP), R19 -- MOVV R19, 16(R3) -- MOVB R0, 40(R3) -- ADDV $40, R3, R19 -- MOVV R19, 24(R3) -- MOVV R0, 32(R3) -+ MOVV R25, 16(R3) -+#endif -+ JAL ·moveMakeFuncArgPtrs(SB) -+ MOVV 32(R3), REGCTXT // restore REGCTXT -+ -+ MOVV REGCTXT, 8(R3) -+ MOVV $argframe+0(FP), R20 -+ MOVV R20, 16(R3) -+ MOVV R0, LOCAL_RETVALID(R3) -+ ADDV $LOCAL_RETVALID, R3, R20 -+ MOVV R20, 24(R3) -+ ADDV $LOCAL_REGARGS, R3, R20 -+ MOVV R20, 32(R3) - JAL ·callReflect(SB) -+ ADDV $LOCAL_REGARGS, R3, R25 //unspillArgs using R25 -+ JAL runtime·unspillArgs(SB) - RET - - // methodValueCall is the code half of the function returned by makeMethodValue. - // See the comment on the declaration of methodValueCall in makefunc.go - // for more details. - // No arg size here; runtime pulls arg map out of the func value. --TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$40 -+TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$432 - NO_LOCAL_POINTERS -+ ADDV $LOCAL_REGARGS, R3, R25 // spillArgs using R25 -+ JAL runtime·spillArgs(SB) -+ MOVV REGCTXT, 32(R3) // save REGCTXT > args of moveMakeFuncArgPtrs < LOCAL_REGARGS -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV REGCTXT, R4 -+ MOVV R25, R5 -+#else -+ MOVV REGCTXT, 8(R3) -+ MOVV R25, 16(R3) -+#endif -+ JAL ·moveMakeFuncArgPtrs(SB) -+ MOVV 32(R3), REGCTXT // restore REGCTXT - MOVV REGCTXT, 8(R3) -- MOVV $argframe+0(FP), R19 -- MOVV R19, 16(R3) -- MOVB R0, 40(R3) -- ADDV $40, R3, R19 -- MOVV R19, 24(R3) -- MOVV R0, 32(R3) -+ MOVV $argframe+0(FP), R20 -+ MOVV R20, 16(R3) -+ MOVB R0, LOCAL_RETVALID(R3) -+ ADDV $LOCAL_RETVALID, R3, R20 -+ MOVV R20, 24(R3) -+ ADDV $LOCAL_REGARGS, R3, R20 -+ MOVV R20, 32(R3) // frame size to 32+SP as callreflect args) - JAL ·callMethod(SB) -+ ADDV $LOCAL_REGARGS, R3, R25 // unspillArgs using R25 -+ JAL runtime·unspillArgs(SB) - RET -diff --git a/src/runtime/stkframe.go b/src/runtime/stkframe.go -index 5caacbacba..eca419c674 100644 ---- a/src/runtime/stkframe.go -+++ b/src/runtime/stkframe.go -@@ -234,7 +234,7 @@ func (frame *stkframe) getStackMap(cache *pcvalueCache, debug bool) (locals, arg - } - - // stack objects. -- if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") && -+ if (GOARCH == "amd64" || GOARCH == "arm64" || GOARCH == "loong64" || GOARCH == "ppc64" || GOARCH == "ppc64le" || GOARCH == "riscv64") && - unsafe.Sizeof(abi.RegArgs{}) > 0 && isReflect { - // For reflect.makeFuncStub and reflect.methodValueCall, - // we need to fake the stack object record. --- -2.38.1 - diff --git a/0031-cmd-internal-obj-loong64-add-V-XV-NEG-B-H-W-V-instru.patch b/0031-cmd-internal-obj-loong64-add-V-XV-NEG-B-H-W-V-instru.patch new file mode 100644 index 0000000000000000000000000000000000000000..40e749c6f39030fd62a0f682dc3cc8486c1577e6 --- /dev/null +++ b/0031-cmd-internal-obj-loong64-add-V-XV-NEG-B-H-W-V-instru.patch @@ -0,0 +1,135 @@ +From 6849aaa3deb1fec44bb7625a70ecc2a19f86a389 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 17:19:04 +0800 +Subject: [PATCH 31/44] cmd/internal/obj/loong64: add {V,XV}NEG{B/H/W/V} + instructions support + +Go asm syntax: + VNEG{B/H/W/V} VJ, VD + XVNEG{B/H/W/V} XJ, XD + +Equivalent platform assembler syntax: + vneg.{b/h/w/d} vd, vj + xvneg.{b/h/w/d} xd, xj + +Change-Id: Ib2df46b5386149efb44fe12e2485c01826339a5d +--- + .../asm/internal/asm/testdata/loong64enc1.s | 10 ++++++++ + src/cmd/internal/obj/loong64/a.out.go | 10 ++++++++ + src/cmd/internal/obj/loong64/anames.go | 8 +++++++ + src/cmd/internal/obj/loong64/asm.go | 24 +++++++++++++++++++ + 4 files changed, 52 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index e2e8a6de6c..9deb3cbafd 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -739,6 +739,16 @@ lable2: + XVFRSQRTF X2, X1 // 41049d76 + XVFRSQRTD X2, X1 // 41089d76 + ++ // [X]VNEG{B/H/W/V} instructions ++ VNEGB V1, V2 // 22309c72 ++ VNEGH V1, V2 // 22349c72 ++ VNEGW V1, V2 // 22389c72 ++ VNEGV V1, V2 // 223c9c72 ++ XVNEGB X2, X1 // 41309c76 ++ XVNEGH X2, X1 // 41349c76 ++ XVNEGW X2, X1 // 41389c76 ++ XVNEGV X2, X1 // 413c9c76 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index bd2b1e8300..486dc9fa89 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -747,6 +747,16 @@ const ( + AXVSUBV + AXVSUBQ + ++ // LSX and LASX integer neg instructions ++ AVNEGB ++ AVNEGH ++ AVNEGW ++ AVNEGV ++ AXVNEGB ++ AXVNEGH ++ AXVNEGW ++ AXVNEGV ++ + // LSX and LASX Bit-manipulation Instructions + AVANDB + AVORB +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 7dbe9b92e6..d697b73e71 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -277,6 +277,14 @@ var Anames = []string{ + "XVSUBW", + "XVSUBV", + "XVSUBQ", ++ "VNEGB", ++ "VNEGH", ++ "VNEGW", ++ "VNEGV", ++ "XVNEGB", ++ "XVNEGH", ++ "XVNEGW", ++ "XVNEGV", + "VANDB", + "VORB", + "VXORB", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index af38bef3aa..e2c7afd82d 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1665,6 +1665,10 @@ func buildop(ctxt *obj.Link) { + opset(AVFRECIPD, r0) + opset(AVFRSQRTF, r0) + opset(AVFRSQRTD, r0) ++ opset(AVNEGB, r0) ++ opset(AVNEGH, r0) ++ opset(AVNEGW, r0) ++ opset(AVNEGV, r0) + + case AXVPCNTB: + opset(AXVPCNTH, r0) +@@ -1676,6 +1680,10 @@ func buildop(ctxt *obj.Link) { + opset(AXVFRECIPD, r0) + opset(AXVFRSQRTF, r0) + opset(AXVFRSQRTD, r0) ++ opset(AXVNEGB, r0) ++ opset(AXVNEGH, r0) ++ opset(AXVNEGW, r0) ++ opset(AXVNEGV, r0) + } + } + } +@@ -3229,6 +3237,22 @@ func (c *ctxt0) oprr(a obj.As) uint32 { + return 0x1da741 << 10 // xvfrsqrt.s + case AXVFRSQRTD: + return 0x1da742 << 10 // xvfrsqrt.d ++ case AVNEGB: ++ return 0x1ca70c << 10 // vneg.b ++ case AVNEGH: ++ return 0x1ca70d << 10 // vneg.h ++ case AVNEGW: ++ return 0x1ca70e << 10 // vneg.w ++ case AVNEGV: ++ return 0x1ca70f << 10 // vneg.d ++ case AXVNEGB: ++ return 0x1da70c << 10 // xvneg.b ++ case AXVNEGH: ++ return 0x1da70d << 10 // xvneg.h ++ case AXVNEGW: ++ return 0x1da70e << 10 // xvneg.w ++ case AXVNEGV: ++ return 0x1da70f << 10 // xvneg.d + } + + c.ctxt.Diag("bad rr opcode %v", a) +-- +2.38.1 + diff --git a/0031-internal-bytealg-add-regABI-support-in-bytealg-funct.patch b/0031-internal-bytealg-add-regABI-support-in-bytealg-funct.patch deleted file mode 100644 index df472c2ef7eb1ba83c7381891576469a20822f2e..0000000000000000000000000000000000000000 --- a/0031-internal-bytealg-add-regABI-support-in-bytealg-funct.patch +++ /dev/null @@ -1,307 +0,0 @@ -From e6ad49b3e094b709db872b1694f918bff73ea13e Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 08:55:13 +0800 -Subject: [PATCH 31/51] internal/bytealg: add regABI support in bytealg - functions on loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Ie9d5c39e8a1e011ed90ad78bc5bfa98a9cff3a0d ---- - src/internal/bytealg/compare_loong64.s | 95 ++++++++++++++---------- - src/internal/bytealg/equal_loong64.s | 21 +++++- - src/internal/bytealg/indexbyte_loong64.s | 32 ++++++-- - 3 files changed, 101 insertions(+), 47 deletions(-) - -diff --git a/src/internal/bytealg/compare_loong64.s b/src/internal/bytealg/compare_loong64.s -index c89c5a9256..311449ab18 100644 ---- a/src/internal/bytealg/compare_loong64.s -+++ b/src/internal/bytealg/compare_loong64.s -@@ -5,83 +5,102 @@ - #include "go_asm.h" - #include "textflag.h" - --TEXT ·Compare(SB),NOSPLIT,$0-56 -- MOVV a_base+0(FP), R6 -- MOVV b_base+24(FP), R7 -- MOVV a_len+8(FP), R4 -- MOVV b_len+32(FP), R5 -+TEXT ·Compare(SB),NOSPLIT,$0-56 -+#ifndef GOEXPERIMENT_regabiargs -+ MOVV a_base+0(FP), R4 -+ MOVV a_len+8(FP), R5 -+ MOVV b_base+24(FP), R6 -+ MOVV b_len+32(FP), R7 - MOVV $ret+48(FP), R13 -+#else -+ // R4 = a_base -+ // R5 = a_len -+ // R6 = a_cap (unused) -+ // R7 = b_base (want in R6) -+ // R8 = b_len (want in R7) -+ // R9 = b_cap (unused) -+ MOVV R7, R6 -+ MOVV R8, R7 -+#endif - JMP cmpbody<>(SB) - --TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 -- MOVV a_base+0(FP), R6 -- MOVV b_base+16(FP), R7 -- MOVV a_len+8(FP), R4 -- MOVV b_len+24(FP), R5 -+TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 -+#ifndef GOEXPERIMENT_regabiargs -+ MOVV a_base+0(FP), R4 -+ MOVV b_base+16(FP), R6 -+ MOVV a_len+8(FP), R5 -+ MOVV b_len+24(FP), R7 - MOVV $ret+32(FP), R13 -+#endif -+ // R4 = a_base -+ // R5 = a_len -+ // R6 = b_base -+ // R7 = b_len - JMP cmpbody<>(SB) - - // On entry: --// R4 length of a --// R5 length of b --// R6 points to the start of a --// R7 points to the start of b -+// R5 length of a -+// R7 length of b -+// R4 points to the start of a -+// R6 points to the start of b - // R13 points to the return value (-1/0/1) - TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0 -- BEQ R6, R7, samebytes // same start of a and b -+ BEQ R4, R6, samebytes // same start of a and b - -- SGTU R4, R5, R9 -+ SGTU R5, R7, R9 - BNE R0, R9, r2_lt_r1 -- MOVV R4, R14 -+ MOVV R5, R14 - JMP entry - r2_lt_r1: -- MOVV R5, R14 // R14 is min(R4, R5) -+ MOVV R7, R14 // R14 is min(R4, R5) - entry: -- ADDV R6, R14, R12 // R6 start of a, R14 end of a -- BEQ R6, R12, samebytes // length is 0 -+ ADDV R4, R14, R12 // R6 start of a, R14 end of a -+ BEQ R4, R12, samebytes // length is 0 - - SRLV $4, R14 // R14 is number of chunks - BEQ R0, R14, byte_loop - - // make sure both a and b are aligned. -- OR R6, R7, R15 -+ OR R4, R6, R15 - AND $7, R15 - BNE R0, R15, byte_loop - - PCALIGN $16 - chunk16_loop: - BEQ R0, R14, byte_loop -- MOVV (R6), R8 -- MOVV (R7), R9 -+ MOVV (R4), R8 -+ MOVV (R6), R9 - BNE R8, R9, byte_loop -- MOVV 8(R6), R16 -- MOVV 8(R7), R17 -+ MOVV 8(R4), R16 -+ MOVV 8(R6), R17 -+ ADDV $16, R4 - ADDV $16, R6 -- ADDV $16, R7 - SUBVU $1, R14 - BEQ R16, R17, chunk16_loop -+ SUBV $8, R4 - SUBV $8, R6 -- SUBV $8, R7 - - byte_loop: -- BEQ R6, R12, samebytes -- MOVBU (R6), R8 -+ BEQ R4, R12, samebytes -+ MOVBU (R4), R8 -+ ADDVU $1, R4 -+ MOVBU (R6), R9 - ADDVU $1, R6 -- MOVBU (R7), R9 -- ADDVU $1, R7 - BEQ R8, R9, byte_loop - - byte_cmp: -- SGTU R8, R9, R12 // R12 = 1 if (R8 > R9) -- BNE R0, R12, ret -- MOVV $-1, R12 -+ SGTU R8, R9, R4 // R12 = 1 if (R8 > R9) -+ BNE R0, R4, ret -+ MOVV $-1, R4 - JMP ret - - samebytes: -- SGTU R4, R5, R8 -- SGTU R5, R4, R9 -- SUBV R9, R8, R12 -+ SGTU R5, R7, R8 -+ SGTU R7, R5, R9 -+ SUBV R9, R8, R4 - - ret: -- MOVV R12, (R13) -+#ifndef GOEXPERIMENT_regabiargs -+ MOVV R4, (R13) -+#endif - RET -diff --git a/src/internal/bytealg/equal_loong64.s b/src/internal/bytealg/equal_loong64.s -index ba2a5578c3..a3ad5c1b35 100644 ---- a/src/internal/bytealg/equal_loong64.s -+++ b/src/internal/bytealg/equal_loong64.s -@@ -8,17 +8,21 @@ - #define REGCTXT R29 - - // memequal(a, b unsafe.Pointer, size uintptr) bool --TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 -+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 -+#ifndef GOEXPERIMENT_regabiargs - MOVV a+0(FP), R4 - MOVV b+8(FP), R5 -- BEQ R4, R5, eq - MOVV size+16(FP), R6 -+#endif -+ BEQ R4, R5, eq - ADDV R4, R6, R7 - PCALIGN $16 - loop: - BNE R4, R7, test - MOVV $1, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVB R4, ret+24(FP) -+#endif - RET - test: - MOVBU (R4), R9 -@@ -27,17 +31,24 @@ test: - ADDV $1, R5 - BEQ R9, R10, loop - -+ MOVB R0, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVB R0, ret+24(FP) -+#endif - RET - eq: - MOVV $1, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVB R4, ret+24(FP) -+#endif - RET - - // memequal_varlen(a, b unsafe.Pointer) bool --TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 -+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 -+#ifndef GOEXPERIMENT_regabiargs - MOVV a+0(FP), R4 - MOVV b+8(FP), R5 -+#endif - BEQ R4, R5, eq - MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure - MOVV R4, 8(R3) -@@ -45,9 +56,13 @@ TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 - MOVV R6, 24(R3) - JAL runtime·memequal(SB) - MOVBU 32(R3), R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVB R4, ret+16(FP) -+#endif - RET - eq: - MOVV $1, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVB R4, ret+16(FP) -+#endif - RET -diff --git a/src/internal/bytealg/indexbyte_loong64.s b/src/internal/bytealg/indexbyte_loong64.s -index 604970549f..03e0660973 100644 ---- a/src/internal/bytealg/indexbyte_loong64.s -+++ b/src/internal/bytealg/indexbyte_loong64.s -@@ -5,11 +5,18 @@ - #include "go_asm.h" - #include "textflag.h" - --TEXT ·IndexByte(SB),NOSPLIT,$0-40 -+TEXT ·IndexByte(SB),NOSPLIT,$0-40 -+#ifndef GOEXPERIMENT_regabiargs - MOVV b_base+0(FP), R4 - MOVV b_len+8(FP), R5 -- MOVBU c+24(FP), R6 // byte to find -- MOVV R4, R7 // store base for later -+ MOVBU c+24(FP), R7 // byte to find -+#endif -+ // R4 = b_base -+ // R5 = b_len -+ // R6 = b_cap (unused) -+ // R7 = byte to find -+ AND $0xff, R7 -+ MOVV R4, R6 // store base for later - ADDV R4, R5 // end - ADDV $-1, R4 - -@@ -18,21 +25,30 @@ loop: - ADDV $1, R4 - BEQ R4, R5, notfound - MOVBU (R4), R8 -- BNE R6, R8, loop -+ BNE R7, R8, loop - -- SUBV R7, R4 // remove base -+ SUBV R6, R4 // remove base -+#ifndef GOEXPERIMENT_regabiargs - MOVV R4, ret+32(FP) -+#endif - RET - - notfound: - MOVV $-1, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVV R4, ret+32(FP) -+#endif - RET - --TEXT ·IndexByteString(SB),NOSPLIT,$0-32 -+TEXT ·IndexByteString(SB),NOSPLIT,$0-32 -+#ifndef GOEXPERIMENT_regabiargs - MOVV s_base+0(FP), R4 - MOVV s_len+8(FP), R5 - MOVBU c+16(FP), R6 // byte to find -+#endif -+ // R4 = s_base -+ // R5 = s_len -+ // R6 = byte to find - MOVV R4, R7 // store base for later - ADDV R4, R5 // end - ADDV $-1, R4 -@@ -45,10 +61,14 @@ loop: - BNE R6, R8, loop - - SUBV R7, R4 // remove base -+#ifndef GOEXPERIMENT_regabiargs - MOVV R4, ret+24(FP) -+#endif - RET - - notfound: - MOVV $-1, R4 -+#ifndef GOEXPERIMENT_regabiargs - MOVV R4, ret+24(FP) -+#endif - RET --- -2.38.1 - diff --git a/0032-cmd-internal-obj-loong64-add-V-XV-MUL-B-H-W-V-and-V-.patch b/0032-cmd-internal-obj-loong64-add-V-XV-MUL-B-H-W-V-and-V-.patch new file mode 100644 index 0000000000000000000000000000000000000000..67724ac532b420e34fd99dc4c3ea21173f284099 --- /dev/null +++ b/0032-cmd-internal-obj-loong64-add-V-XV-MUL-B-H-W-V-and-V-.patch @@ -0,0 +1,235 @@ +From 984f12cbb1763c855882b3c8e89727ad560b38c1 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 17:46:09 +0800 +Subject: [PATCH 32/44] cmd/internal/obj/loong64: add {V,XV}MUL{B/H/W/V} and + {V,XV}MUH{B/H/W/V}[U] instructions support + +Go asm syntax: + VMUL{B/H/W/V} VK, VJ, VD + VMUH{B/H/W/V}[U] VK, VJ, VD + XVMUL{B/H/W/V} XK, XJ, XD + XVMUH{B/H/W/V}[U] XK, XJ, XD + +Equivalent platform assembler syntax: + vmul.{b/h/w/d} vd, vj, vk + vmuh.{b/h/w/d}[u] vd, vj, vk + xvmul.{b/h/w/d} xd, xj, xk + xvmuh.{b/h/w/d}[u] xd, xj, xk + +Change-Id: I8890f8a41100e4681a833c27067f0f76b593f731 +--- + .../asm/internal/asm/testdata/loong64enc1.s | 26 +++++++ + src/cmd/internal/obj/loong64/a.out.go | 26 +++++++ + src/cmd/internal/obj/loong64/anames.go | 24 +++++++ + src/cmd/internal/obj/loong64/asm.go | 72 +++++++++++++++++++ + 4 files changed, 148 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 9deb3cbafd..c8b490234f 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -749,6 +749,32 @@ lable2: + XVNEGW X2, X1 // 41389c76 + XVNEGV X2, X1 // 413c9c76 + ++ // [X]VMUL{B/H/W/V} and [X]VMUH{B/H/W/V}[U] instructions ++ VMULB V1, V2, V3 // 43048470 ++ VMULH V1, V2, V3 // 43848470 ++ VMULW V1, V2, V3 // 43048570 ++ VMULV V1, V2, V3 // 43848570 ++ VMUHB V1, V2, V3 // 43048670 ++ VMUHH V1, V2, V3 // 43848670 ++ VMUHW V1, V2, V3 // 43048770 ++ VMUHV V1, V2, V3 // 43848770 ++ VMUHBU V1, V2, V3 // 43048870 ++ VMUHHU V1, V2, V3 // 43848870 ++ VMUHWU V1, V2, V3 // 43048970 ++ VMUHVU V1, V2, V3 // 43848970 ++ XVMULB X3, X2, X1 // 410c8474 ++ XVMULH X3, X2, X1 // 418c8474 ++ XVMULW X3, X2, X1 // 410c8574 ++ XVMULV X3, X2, X1 // 418c8574 ++ XVMUHB X3, X2, X1 // 410c8674 ++ XVMUHH X3, X2, X1 // 418c8674 ++ XVMUHW X3, X2, X1 // 410c8774 ++ XVMUHV X3, X2, X1 // 418c8774 ++ XVMUHBU X3, X2, X1 // 410c8874 ++ XVMUHHU X3, X2, X1 // 418c8874 ++ XVMUHWU X3, X2, X1 // 410c8974 ++ XVMUHVU X3, X2, X1 // 418c8974 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 486dc9fa89..95744e77a1 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -757,6 +757,32 @@ const ( + AXVNEGW + AXVNEGV + ++ // LSX and LASX integer mul instructions ++ AVMULB ++ AVMULH ++ AVMULW ++ AVMULV ++ AVMUHB ++ AVMUHH ++ AVMUHW ++ AVMUHV ++ AVMUHBU ++ AVMUHHU ++ AVMUHWU ++ AVMUHVU ++ AXVMULB ++ AXVMULH ++ AXVMULW ++ AXVMULV ++ AXVMUHB ++ AXVMUHH ++ AXVMUHW ++ AXVMUHV ++ AXVMUHBU ++ AXVMUHHU ++ AXVMUHWU ++ AXVMUHVU ++ + // LSX and LASX Bit-manipulation Instructions + AVANDB + AVORB +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index d697b73e71..d0cd3a26fa 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -285,6 +285,30 @@ var Anames = []string{ + "XVNEGH", + "XVNEGW", + "XVNEGV", ++ "VMULB", ++ "VMULH", ++ "VMULW", ++ "VMULV", ++ "VMUHB", ++ "VMUHH", ++ "VMUHW", ++ "VMUHV", ++ "VMUHBU", ++ "VMUHHU", ++ "VMUHWU", ++ "VMUHVU", ++ "XVMULB", ++ "XVMULH", ++ "XVMULW", ++ "XVMULV", ++ "XVMUHB", ++ "XVMUHH", ++ "XVMUHW", ++ "XVMUHV", ++ "XVMUHBU", ++ "XVMUHHU", ++ "XVMUHWU", ++ "XVMUHVU", + "VANDB", + "VORB", + "VXORB", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index e2c7afd82d..7fb99f66e6 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1589,6 +1589,18 @@ func buildop(ctxt *obj.Link) { + opset(AVILVHH, r0) + opset(AVILVHW, r0) + opset(AVILVHV, r0) ++ opset(AVMULB, r0) ++ opset(AVMULH, r0) ++ opset(AVMULW, r0) ++ opset(AVMULV, r0) ++ opset(AVMUHB, r0) ++ opset(AVMUHH, r0) ++ opset(AVMUHW, r0) ++ opset(AVMUHV, r0) ++ opset(AVMUHBU, r0) ++ opset(AVMUHHU, r0) ++ opset(AVMUHWU, r0) ++ opset(AVMUHVU, r0) + + case AXVANDV: + opset(AXVORV, r0) +@@ -1614,6 +1626,18 @@ func buildop(ctxt *obj.Link) { + opset(AXVILVHH, r0) + opset(AXVILVHW, r0) + opset(AXVILVHV, r0) ++ opset(AXVMULB, r0) ++ opset(AXVMULH, r0) ++ opset(AXVMULW, r0) ++ opset(AXVMULV, r0) ++ opset(AXVMUHB, r0) ++ opset(AXVMUHH, r0) ++ opset(AXVMUHW, r0) ++ opset(AXVMUHV, r0) ++ opset(AXVMUHBU, r0) ++ opset(AXVMUHHU, r0) ++ opset(AXVMUHWU, r0) ++ opset(AXVMUHVU, r0) + + case AVSLLB: + opset(AVSRLB, r0) +@@ -3037,6 +3061,54 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0xE9DE << 15 // xvrotr.w + case AXVROTRV: + return 0xE9DF << 15 // xvrotr.d ++ case AVMULB: ++ return 0xe108 << 15 // vmul.b ++ case AVMULH: ++ return 0xe109 << 15 // vmul.h ++ case AVMULW: ++ return 0xe10a << 15 // vmul.w ++ case AVMULV: ++ return 0xe10b << 15 // vmul.d ++ case AVMUHB: ++ return 0xe10c << 15 // vmuh.b ++ case AVMUHH: ++ return 0xe10d << 15 // vmuh.h ++ case AVMUHW: ++ return 0xe10e << 15 // vmuh.w ++ case AVMUHV: ++ return 0xe10f << 15 // vmuh.d ++ case AVMUHBU: ++ return 0xe110 << 15 // vmuh.bu ++ case AVMUHHU: ++ return 0xe111 << 15 // vmuh.hu ++ case AVMUHWU: ++ return 0xe112 << 15 // vmuh.wu ++ case AVMUHVU: ++ return 0xe113 << 15 // vmuh.du ++ case AXVMULB: ++ return 0xe908 << 15 // xvmul.b ++ case AXVMULH: ++ return 0xe909 << 15 // xvmul.h ++ case AXVMULW: ++ return 0xe90a << 15 // xvmul.w ++ case AXVMULV: ++ return 0xe90b << 15 // xvmul.d ++ case AXVMUHB: ++ return 0xe90c << 15 // xvmuh.b ++ case AXVMUHH: ++ return 0xe90d << 15 // xvmuh.h ++ case AXVMUHW: ++ return 0xe90e << 15 // xvmuh.w ++ case AXVMUHV: ++ return 0xe90f << 15 // xvmuh.d ++ case AXVMUHBU: ++ return 0xe910 << 15 // xvmuh.bu ++ case AXVMUHHU: ++ return 0xe911 << 15 // xvmuh.hu ++ case AXVMUHWU: ++ return 0xe912 << 15 // xvmuh.wu ++ case AXVMUHVU: ++ return 0xe913 << 15 // xvmuh.du + } + + if a < 0 { +-- +2.38.1 + diff --git a/0032-runtime-add-regABI-support-in-memclr-and-memmove-fun.patch b/0032-runtime-add-regABI-support-in-memclr-and-memmove-fun.patch deleted file mode 100644 index 85a30ee7e87fdf195bd009229d3c2e514ca7f269..0000000000000000000000000000000000000000 --- a/0032-runtime-add-regABI-support-in-memclr-and-memmove-fun.patch +++ /dev/null @@ -1,95 +0,0 @@ -From df4dd7ce9dd12599e48424014bb86fb319838d1b Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 09:05:30 +0800 -Subject: [PATCH 32/51] runtime: add regABI support in memclr and memmove - functions on loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I2ff3421da41de4e1a88538e67c9baa26bcf6ffc0 ---- - src/runtime/memclr_loong64.s | 32 +++++++++++++++++--------------- - src/runtime/memmove_loong64.s | 4 +++- - 2 files changed, 20 insertions(+), 16 deletions(-) - -diff --git a/src/runtime/memclr_loong64.s b/src/runtime/memclr_loong64.s -index 7bb6f3dfc9..313e4d4f33 100644 ---- a/src/runtime/memclr_loong64.s -+++ b/src/runtime/memclr_loong64.s -@@ -6,37 +6,39 @@ - #include "textflag.h" - - // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) --TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 -- MOVV ptr+0(FP), R6 -- MOVV n+8(FP), R7 -- ADDV R6, R7, R4 -+TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 -+#ifndef GOEXPERIMENT_regabiargs -+ MOVV ptr+0(FP), R4 -+ MOVV n+8(FP), R5 -+#endif -+ ADDV R4, R5, R6 - - // if less than 8 bytes, do one byte at a time -- SGTU $8, R7, R8 -+ SGTU $8, R5, R8 - BNE R8, out - - // do one byte at a time until 8-aligned -- AND $7, R6, R8 -+ AND $7, R4, R8 - BEQ R8, words -- MOVB R0, (R6) -- ADDV $1, R6 -+ MOVB R0, (R4) -+ ADDV $1, R4 - JMP -4(PC) - - words: - // do 8 bytes at a time if there is room -- ADDV $-7, R4, R7 -+ ADDV $-7, R6, R5 - - PCALIGN $16 -- SGTU R7, R6, R8 -+ SGTU R5, R4, R8 - BEQ R8, out -- MOVV R0, (R6) -- ADDV $8, R6 -+ MOVV R0, (R4) -+ ADDV $8, R4 - JMP -4(PC) - - out: -- BEQ R6, R4, done -- MOVB R0, (R6) -- ADDV $1, R6 -+ BEQ R4, R6, done -+ MOVB R0, (R4) -+ ADDV $1, R4 - JMP -3(PC) - done: - RET -diff --git a/src/runtime/memmove_loong64.s b/src/runtime/memmove_loong64.s -index 0f139bcc13..5b7aeba698 100644 ---- a/src/runtime/memmove_loong64.s -+++ b/src/runtime/memmove_loong64.s -@@ -7,10 +7,12 @@ - // See memmove Go doc for important implementation constraints. - - // func memmove(to, from unsafe.Pointer, n uintptr) --TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 -+TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 -+#ifndef GOEXPERIMENT_regabiargs - MOVV to+0(FP), R4 - MOVV from+8(FP), R5 - MOVV n+16(FP), R6 -+#endif - BNE R6, check - RET - --- -2.38.1 - diff --git a/0033-cmd-internal-obj-loong64-add-V-XV-DIV-B-H-W-V-U-and-.patch b/0033-cmd-internal-obj-loong64-add-V-XV-DIV-B-H-W-V-U-and-.patch new file mode 100644 index 0000000000000000000000000000000000000000..967f56d10b28d942f5868f28bd56209e5a7fbeb9 --- /dev/null +++ b/0033-cmd-internal-obj-loong64-add-V-XV-DIV-B-H-W-V-U-and-.patch @@ -0,0 +1,283 @@ +From 116a2261b3a110e6ff4f9608f447e6f07156d55f Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Wed, 11 Dec 2024 18:08:16 +0800 +Subject: [PATCH 33/44] cmd/internal/obj/loong64: add {V,XV}DIV{B/H/W/V}[U] and + {V,XV}MOD{B/H/W/V}[U] instructions support + +Go asm syntax: + VDIV{B/H/W/V}[U] VK, VJ, VD + XVDIV{B/H/W/V}[U] XK, XJ, XD + VMOD{B/H/W/V}[U] VK, VJ, VD + XVMOD{B/H/W/V}[U] XK, XJ, XD + +Equivalent platform assembler syntax: + vdiv.{b/h/w/d}[u] vd, vj, vk + xvdiv.{b/h/w/d}[u] xd, xj, xk + vmod.{b/h/w/d}[u] vd, vj, vk + xvmod.{b/h/w/d}[u] xd, xj, xk + +Change-Id: I27e9bc8999e6525a27f0bf12b21cc896c5a2a69c +--- + .../asm/internal/asm/testdata/loong64enc1.s | 34 +++++++ + src/cmd/internal/obj/loong64/a.out.go | 34 +++++++ + src/cmd/internal/obj/loong64/anames.go | 32 +++++++ + src/cmd/internal/obj/loong64/asm.go | 96 +++++++++++++++++++ + 4 files changed, 196 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index c8b490234f..bbac6036cf 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -775,6 +775,40 @@ lable2: + XVMUHWU X3, X2, X1 // 410c8974 + XVMUHVU X3, X2, X1 // 418c8974 + ++ // [X]VDIV{B/H/W/V}[U] and [X]VMOD{B/H/W/V}[U] instructions ++ VDIVB V1, V2, V3 // 4304e070 ++ VDIVH V1, V2, V3 // 4384e070 ++ VDIVW V1, V2, V3 // 4304e170 ++ VDIVV V1, V2, V3 // 4384e170 ++ VDIVBU V1, V2, V3 // 4304e470 ++ VDIVHU V1, V2, V3 // 4384e470 ++ VDIVWU V1, V2, V3 // 4304e570 ++ VDIVVU V1, V2, V3 // 4384e570 ++ VMODB V1, V2, V3 // 4304e270 ++ VMODH V1, V2, V3 // 4384e270 ++ VMODW V1, V2, V3 // 4304e370 ++ VMODV V1, V2, V3 // 4384e370 ++ VMODBU V1, V2, V3 // 4304e670 ++ VMODHU V1, V2, V3 // 4384e670 ++ VMODWU V1, V2, V3 // 4304e770 ++ VMODVU V1, V2, V3 // 4384e770 ++ XVDIVB X3, X2, X1 // 410ce074 ++ XVDIVH X3, X2, X1 // 418ce074 ++ XVDIVW X3, X2, X1 // 410ce174 ++ XVDIVV X3, X2, X1 // 418ce174 ++ XVDIVBU X3, X2, X1 // 410ce474 ++ XVDIVHU X3, X2, X1 // 418ce474 ++ XVDIVWU X3, X2, X1 // 410ce574 ++ XVDIVVU X3, X2, X1 // 418ce574 ++ XVMODB X3, X2, X1 // 410ce274 ++ XVMODH X3, X2, X1 // 418ce274 ++ XVMODW X3, X2, X1 // 410ce374 ++ XVMODV X3, X2, X1 // 418ce374 ++ XVMODBU X3, X2, X1 // 410ce674 ++ XVMODHU X3, X2, X1 // 418ce674 ++ XVMODWU X3, X2, X1 // 410ce774 ++ XVMODVU X3, X2, X1 // 418ce774 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 95744e77a1..9164e9d59f 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -783,6 +783,40 @@ const ( + AXVMUHWU + AXVMUHVU + ++ // LSX and LASX integer div and mod instructions ++ AVDIVB ++ AVDIVH ++ AVDIVW ++ AVDIVV ++ AVDIVBU ++ AVDIVHU ++ AVDIVWU ++ AVDIVVU ++ AVMODB ++ AVMODH ++ AVMODW ++ AVMODV ++ AVMODBU ++ AVMODHU ++ AVMODWU ++ AVMODVU ++ AXVDIVB ++ AXVDIVH ++ AXVDIVW ++ AXVDIVV ++ AXVDIVBU ++ AXVDIVHU ++ AXVDIVWU ++ AXVDIVVU ++ AXVMODB ++ AXVMODH ++ AXVMODW ++ AXVMODV ++ AXVMODBU ++ AXVMODHU ++ AXVMODWU ++ AXVMODVU ++ + // LSX and LASX Bit-manipulation Instructions + AVANDB + AVORB +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index d0cd3a26fa..15a264c8e2 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -309,6 +309,38 @@ var Anames = []string{ + "XVMUHHU", + "XVMUHWU", + "XVMUHVU", ++ "VDIVB", ++ "VDIVH", ++ "VDIVW", ++ "VDIVV", ++ "VDIVBU", ++ "VDIVHU", ++ "VDIVWU", ++ "VDIVVU", ++ "VMODB", ++ "VMODH", ++ "VMODW", ++ "VMODV", ++ "VMODBU", ++ "VMODHU", ++ "VMODWU", ++ "VMODVU", ++ "XVDIVB", ++ "XVDIVH", ++ "XVDIVW", ++ "XVDIVV", ++ "XVDIVBU", ++ "XVDIVHU", ++ "XVDIVWU", ++ "XVDIVVU", ++ "XVMODB", ++ "XVMODH", ++ "XVMODW", ++ "XVMODV", ++ "XVMODBU", ++ "XVMODHU", ++ "XVMODWU", ++ "XVMODVU", + "VANDB", + "VORB", + "VXORB", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 7fb99f66e6..7a14137374 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1601,6 +1601,22 @@ func buildop(ctxt *obj.Link) { + opset(AVMUHHU, r0) + opset(AVMUHWU, r0) + opset(AVMUHVU, r0) ++ opset(AVDIVB, r0) ++ opset(AVDIVH, r0) ++ opset(AVDIVW, r0) ++ opset(AVDIVV, r0) ++ opset(AVMODB, r0) ++ opset(AVMODH, r0) ++ opset(AVMODW, r0) ++ opset(AVMODV, r0) ++ opset(AVDIVBU, r0) ++ opset(AVDIVHU, r0) ++ opset(AVDIVWU, r0) ++ opset(AVDIVVU, r0) ++ opset(AVMODBU, r0) ++ opset(AVMODHU, r0) ++ opset(AVMODWU, r0) ++ opset(AVMODVU, r0) + + case AXVANDV: + opset(AXVORV, r0) +@@ -1638,6 +1654,22 @@ func buildop(ctxt *obj.Link) { + opset(AXVMUHHU, r0) + opset(AXVMUHWU, r0) + opset(AXVMUHVU, r0) ++ opset(AXVDIVB, r0) ++ opset(AXVDIVH, r0) ++ opset(AXVDIVW, r0) ++ opset(AXVDIVV, r0) ++ opset(AXVMODB, r0) ++ opset(AXVMODH, r0) ++ opset(AXVMODW, r0) ++ opset(AXVMODV, r0) ++ opset(AXVDIVBU, r0) ++ opset(AXVDIVHU, r0) ++ opset(AXVDIVWU, r0) ++ opset(AXVDIVVU, r0) ++ opset(AXVMODBU, r0) ++ opset(AXVMODHU, r0) ++ opset(AXVMODWU, r0) ++ opset(AXVMODVU, r0) + + case AVSLLB: + opset(AVSRLB, r0) +@@ -3109,6 +3141,70 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0xe912 << 15 // xvmuh.wu + case AXVMUHVU: + return 0xe913 << 15 // xvmuh.du ++ case AVDIVB: ++ return 0xe1c0 << 15 // vdiv.b ++ case AVDIVH: ++ return 0xe1c1 << 15 // vdiv.h ++ case AVDIVW: ++ return 0xe1c2 << 15 // vdiv.w ++ case AVDIVV: ++ return 0xe1c3 << 15 // vdiv.d ++ case AVMODB: ++ return 0xe1c4 << 15 // vmod.b ++ case AVMODH: ++ return 0xe1c5 << 15 // vmod.h ++ case AVMODW: ++ return 0xe1c6 << 15 // vmod.w ++ case AVMODV: ++ return 0xe1c7 << 15 // vmod.d ++ case AVDIVBU: ++ return 0xe1c8 << 15 // vdiv.bu ++ case AVDIVHU: ++ return 0xe1c9 << 15 // vdiv.hu ++ case AVDIVWU: ++ return 0xe1ca << 15 // vdiv.wu ++ case AVDIVVU: ++ return 0xe1cb << 15 // vdiv.du ++ case AVMODBU: ++ return 0xe1cc << 15 // vmod.bu ++ case AVMODHU: ++ return 0xe1cd << 15 // vmod.hu ++ case AVMODWU: ++ return 0xe1ce << 15 // vmod.wu ++ case AVMODVU: ++ return 0xe1cf << 15 // vmod.du ++ case AXVDIVB: ++ return 0xe9c0 << 15 // xvdiv.b ++ case AXVDIVH: ++ return 0xe9c1 << 15 // xvdiv.h ++ case AXVDIVW: ++ return 0xe9c2 << 15 // xvdiv.w ++ case AXVDIVV: ++ return 0xe9c3 << 15 // xvdiv.d ++ case AXVMODB: ++ return 0xe9c4 << 15 // xvmod.b ++ case AXVMODH: ++ return 0xe9c5 << 15 // xvmod.h ++ case AXVMODW: ++ return 0xe9c6 << 15 // xvmod.w ++ case AXVMODV: ++ return 0xe9c7 << 15 // xvmod.d ++ case AXVDIVBU: ++ return 0xe9c8 << 15 // xvdiv.bu ++ case AXVDIVHU: ++ return 0xe9c9 << 15 // xvdiv.hu ++ case AXVDIVWU: ++ return 0xe9ca << 15 // xvdiv.wu ++ case AXVDIVVU: ++ return 0xe9cb << 15 // xvdiv.du ++ case AXVMODBU: ++ return 0xe9cc << 15 // xvmod.bu ++ case AXVMODHU: ++ return 0xe9cd << 15 // xvmod.hu ++ case AXVMODWU: ++ return 0xe9ce << 15 // xvmod.wu ++ case AXVMODVU: ++ return 0xe9cf << 15 // xvmod.du + } + + if a < 0 { +-- +2.38.1 + diff --git a/0033-cmd-internal-obj-set-morestack-arg-spilling-and-rega.patch b/0033-cmd-internal-obj-set-morestack-arg-spilling-and-rega.patch deleted file mode 100644 index a90a3525ae782bda8eee72158be129ccce172518..0000000000000000000000000000000000000000 --- a/0033-cmd-internal-obj-set-morestack-arg-spilling-and-rega.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 42077138313613c83730ec5bbb8839f2ad334373 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 09:16:21 +0800 -Subject: [PATCH 33/51] cmd/internal/obj: set morestack arg spilling and regabi - prologue on loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I60bd80818d7f308b05a3f11d71a552ddf6fa5086 ---- - src/cmd/internal/obj/loong64/obj.go | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go -index b0f5ac3087..ed5165418d 100644 ---- a/src/cmd/internal/obj/loong64/obj.go -+++ b/src/cmd/internal/obj/loong64/obj.go -@@ -626,6 +626,10 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - - p = c.ctxt.StartUnsafePoint(p, c.newprog) - -+ // Spill Arguments. This has to happen before we open -+ // any more frame space. -+ p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) -+ - // MOV REGLINK, -8/-16(SP) - p = obj.Appendp(p, c.newprog) - p.As = mov -@@ -690,6 +694,8 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - p.To.Reg = REGSP - p.Spadj = int32(-frameSize) - -+ // Unspill arguments -+ p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) - p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) - } - -@@ -801,6 +807,10 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - - p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog) - -+ // Spill the register args that could be clobbered by the -+ // morestack code -+ p = c.cursym.Func().SpillRegisterArgs(p, c.newprog) -+ - // JAL runtime.morestack(SB) - p = obj.Appendp(p, c.newprog) - -@@ -815,6 +825,7 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - } - p.Mark |= BRANCH - -+ p = c.cursym.Func().UnspillRegisterArgs(p, c.newprog) - p = c.ctxt.EndUnsafePoint(p, c.newprog, -1) - - // JMP start --- -2.38.1 - diff --git a/0034-cmd-compile-fix-If-lowering-on-loong64.patch b/0034-cmd-compile-fix-If-lowering-on-loong64.patch deleted file mode 100644 index 1b9007c1b1fa2d258f9399d9a6d7186ad7a03145..0000000000000000000000000000000000000000 --- a/0034-cmd-compile-fix-If-lowering-on-loong64.patch +++ /dev/null @@ -1,92 +0,0 @@ -From 16b02f64637d3196c8bdafd9c830e68e16193705 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 10:22:13 +0800 -Subject: [PATCH 34/51] cmd/compile: fix If lowering on loong64 - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Ib1f2130e382191a487325a064fec5c5c9f89016c ---- - .../compile/internal/ssa/_gen/LOONG64.rules | 3 ++- - .../compile/internal/ssa/rewriteLOONG64.go | 27 +++++++++++++++++-- - 2 files changed, 27 insertions(+), 3 deletions(-) - -diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -index 25caad4406..c02434c53e 100644 ---- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules -@@ -435,7 +435,7 @@ - (GetCallerSP ...) => (LoweredGetCallerSP ...) - (GetCallerPC ...) => (LoweredGetCallerPC ...) - --(If cond yes no) => (NE cond yes no) -+(If cond yes no) => (NE (MOVBUreg cond) yes no) - - // Write barrier. - (WB ...) => (LoweredWB ...) -@@ -469,6 +469,7 @@ - (EQ (SGTconst [0] x) yes no) => (GEZ x yes no) - (NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no) - (EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no) -+(MOVBUreg x:((SGT|SGTU) _ _)) => x - - // fold offset into address - (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) => (MOVVaddr [int32(off1)+int32(off2)] {sym} ptr) -diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -index bb09bd2e58..a279831747 100644 ---- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go -+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go -@@ -1854,6 +1854,26 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { - } - func rewriteValueLOONG64_OpLOONG64MOVBUreg(v *Value) bool { - v_0 := v.Args[0] -+ // match: (MOVBUreg x:(SGT _ _)) -+ // result: x -+ for { -+ x := v_0 -+ if x.Op != OpLOONG64SGT { -+ break -+ } -+ v.copyOf(x) -+ return true -+ } -+ // match: (MOVBUreg x:(SGTU _ _)) -+ // result: x -+ for { -+ x := v_0 -+ if x.Op != OpLOONG64SGTU { -+ break -+ } -+ v.copyOf(x) -+ return true -+ } - // match: (MOVBUreg x:(MOVBUload _ _)) - // result: (MOVVreg x) - for { -@@ -7689,6 +7709,7 @@ func rewriteValueLOONG64_OpZero(v *Value) bool { - return false - } - func rewriteBlockLOONG64(b *Block) bool { -+ typ := &b.Func.Config.Types - switch b.Kind { - case BlockLOONG64EQ: - // match: (EQ (FPFlagTrue cmp) yes no) -@@ -7888,10 +7909,12 @@ func rewriteBlockLOONG64(b *Block) bool { - } - case BlockIf: - // match: (If cond yes no) -- // result: (NE cond yes no) -+ // result: (NE (MOVBUreg cond) yes no) - for { - cond := b.Controls[0] -- b.resetWithControl(BlockLOONG64NE, cond) -+ v0 := b.NewValue0(cond.Pos, OpLOONG64MOVBUreg, typ.UInt64) -+ v0.AddArg(cond) -+ b.resetWithControl(BlockLOONG64NE, v0) - return true - } - case BlockLOONG64LEZ: --- -2.38.1 - diff --git a/0034-cmd-internal-obj-loong64-add-V-XV-BITCLR-BITSET-BITR.patch b/0034-cmd-internal-obj-loong64-add-V-XV-BITCLR-BITSET-BITR.patch new file mode 100644 index 0000000000000000000000000000000000000000..2a2d4b13d8efbdfe73ea616c881cc4dc7968ad49 --- /dev/null +++ b/0034-cmd-internal-obj-loong64-add-V-XV-BITCLR-BITSET-BITR.patch @@ -0,0 +1,341 @@ +From 054df785d79675c02f6bd2ad3ace9f1ce5874e84 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Thu, 12 Dec 2024 10:54:00 +0800 +Subject: [PATCH 34/44] cmd/internal/obj/loong64: add + {V,XV}{BITCLR/BITSET/BITREV}[I].{B/H/W/D} instructions support + +Go asm syntax: + V{BITCLR/BITSET/BITREV}{B/H/W/V} $1, V2, V3 + XV{BITCLR/BITSET/BITREV}{B/H/W/V} $1, X2, X3 + V{BITCLR/BITSET/BITREV}{B/H/W/V} VK, VJ, VD + XV{BITCLR/BITSET/BITREV}{B/H/W/V} XK, XJ, XD + +Equivalent platform assembler syntax: + v{bitclr/bitset/bitrev}i.{b/h/w/d} v3, v2, $1 + xv{bitclr/bitset/bitrev}i.{b/h/w/d} x3, x2, $1 + v{bitclr/bitset/bitrev}.{b/h/w/d} vd, vj, vk + xv{bitclr/bitset/bitrev}.{b/h/w/d} xd, xj, xk + +Change-Id: Id44e6cb7c22d650bb6b4d9f6faee5dcda4edb24e +--- + .../asm/internal/asm/testdata/loong64enc1.s | 50 ++++++++ + src/cmd/internal/obj/loong64/a.out.go | 25 ++++ + src/cmd/internal/obj/loong64/anames.go | 24 ++++ + src/cmd/internal/obj/loong64/asm.go | 120 ++++++++++++++++++ + 4 files changed, 219 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index bbac6036cf..19070c89ef 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -809,6 +809,56 @@ lable2: + XVMODWU X3, X2, X1 // 410ce774 + XVMODVU X3, X2, X1 // 418ce774 + ++ // [X]{VBITCLR/VBITSET/VBITREV}{B,H,W,V} instructions ++ VBITCLRB V1, V2, V3 // 43040c71 ++ VBITCLRH V1, V2, V3 // 43840c71 ++ VBITCLRW V1, V2, V3 // 43040d71 ++ VBITCLRV V1, V2, V3 // 43840d71 ++ VBITSETB V1, V2, V3 // 43040e71 ++ VBITSETH V1, V2, V3 // 43840e71 ++ VBITSETW V1, V2, V3 // 43040f71 ++ VBITSETV V1, V2, V3 // 43840f71 ++ VBITREVB V1, V2, V3 // 43041071 ++ VBITREVH V1, V2, V3 // 43841071 ++ VBITREVW V1, V2, V3 // 43041171 ++ VBITREVV V1, V2, V3 // 43841171 ++ XVBITCLRB X3, X2, X1 // 410c0c75 ++ XVBITCLRH X3, X2, X1 // 418c0c75 ++ XVBITCLRW X3, X2, X1 // 410c0d75 ++ XVBITCLRV X3, X2, X1 // 418c0d75 ++ XVBITSETB X3, X2, X1 // 410c0e75 ++ XVBITSETH X3, X2, X1 // 418c0e75 ++ XVBITSETW X3, X2, X1 // 410c0f75 ++ XVBITSETV X3, X2, X1 // 418c0f75 ++ XVBITREVB X3, X2, X1 // 410c1075 ++ XVBITREVH X3, X2, X1 // 418c1075 ++ XVBITREVW X3, X2, X1 // 410c1175 ++ XVBITREVV X3, X2, X1 // 418c1175 ++ VBITCLRB $7, V2, V3 // 433c1073 ++ VBITCLRH $15, V2, V3 // 437c1073 ++ VBITCLRW $31, V2, V3 // 43fc1073 ++ VBITCLRV $63, V2, V3 // 43fc1173 ++ VBITSETB $7, V2, V3 // 433c1473 ++ VBITSETH $15, V2, V3 // 437c1473 ++ VBITSETW $31, V2, V3 // 43fc1473 ++ VBITSETV $63, V2, V3 // 43fc1573 ++ VBITREVB $7, V2, V3 // 433c1873 ++ VBITREVH $15, V2, V3 // 437c1873 ++ VBITREVW $31, V2, V3 // 43fc1873 ++ VBITREVV $63, V2, V3 // 43fc1973 ++ XVBITCLRB $7, X2, X1 // 413c1077 ++ XVBITCLRH $15, X2, X1 // 417c1077 ++ XVBITCLRW $31, X2, X1 // 41fc1077 ++ XVBITCLRV $63, X2, X1 // 41fc1177 ++ XVBITSETB $7, X2, X1 // 413c1477 ++ XVBITSETH $15, X2, X1 // 417c1477 ++ XVBITSETW $31, X2, X1 // 41fc1477 ++ XVBITSETV $63, X2, X1 // 41fc1577 ++ XVBITREVB $7, X2, X1 // 413c1877 ++ XVBITREVH $15, X2, X1 // 417c1877 ++ XVBITREVW $31, X2, X1 // 41fc1877 ++ XVBITREVV $63, X2, X1 // 41fc1977 ++ + // MOVV C_DCON12_0, r + MOVV $0x7a90000000000000, R4 // MOVV $8831558869273542656, R4 // 04a41e03 + MOVV $0xea90000000000000, R4 // MOVV $-1544734672188080128, R4 // 04a43a03 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 9164e9d59f..1fadbc648a 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -881,6 +881,31 @@ const ( + AXVPCNTW + AXVPCNTV + ++ AVBITCLRB ++ AVBITCLRH ++ AVBITCLRW ++ AVBITCLRV ++ AVBITSETB ++ AVBITSETH ++ AVBITSETW ++ AVBITSETV ++ AVBITREVB ++ AVBITREVH ++ AVBITREVW ++ AVBITREVV ++ AXVBITCLRB ++ AXVBITCLRH ++ AXVBITCLRW ++ AXVBITCLRV ++ AXVBITSETB ++ AXVBITSETH ++ AXVBITSETW ++ AXVBITSETV ++ AXVBITREVB ++ AXVBITREVH ++ AXVBITREVW ++ AXVBITREVV ++ + // LSX and LASX floating point instructions + AVFSQRTF + AVFSQRTD +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index 15a264c8e2..aee0da0a6e 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -401,6 +401,30 @@ var Anames = []string{ + "XVPCNTH", + "XVPCNTW", + "XVPCNTV", ++ "VBITCLRB", ++ "VBITCLRH", ++ "VBITCLRW", ++ "VBITCLRV", ++ "VBITSETB", ++ "VBITSETH", ++ "VBITSETW", ++ "VBITSETV", ++ "VBITREVB", ++ "VBITREVH", ++ "VBITREVW", ++ "VBITREVV", ++ "XVBITCLRB", ++ "XVBITCLRH", ++ "XVBITCLRW", ++ "XVBITCLRV", ++ "XVBITSETB", ++ "XVBITSETH", ++ "XVBITSETW", ++ "XVBITSETV", ++ "XVBITREVB", ++ "XVBITREVH", ++ "XVBITREVW", ++ "XVBITREVV", + "VFSQRTF", + "VFSQRTD", + "VFRECIPF", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 7a14137374..657d32ae81 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1675,41 +1675,65 @@ func buildop(ctxt *obj.Link) { + opset(AVSRLB, r0) + opset(AVSRAB, r0) + opset(AVROTRB, r0) ++ opset(AVBITCLRB, r0) ++ opset(AVBITSETB, r0) ++ opset(AVBITREVB, r0) + + case AXVSLLB: + opset(AXVSRLB, r0) + opset(AXVSRAB, r0) + opset(AXVROTRB, r0) ++ opset(AXVBITCLRB, r0) ++ opset(AXVBITSETB, r0) ++ opset(AXVBITREVB, r0) + + case AVSLLH: + opset(AVSRLH, r0) + opset(AVSRAH, r0) + opset(AVROTRH, r0) ++ opset(AVBITCLRH, r0) ++ opset(AVBITSETH, r0) ++ opset(AVBITREVH, r0) + + case AXVSLLH: + opset(AXVSRLH, r0) + opset(AXVSRAH, r0) + opset(AXVROTRH, r0) ++ opset(AXVBITCLRH, r0) ++ opset(AXVBITSETH, r0) ++ opset(AXVBITREVH, r0) + + case AVSLLW: + opset(AVSRLW, r0) + opset(AVSRAW, r0) + opset(AVROTRW, r0) ++ opset(AVBITCLRW, r0) ++ opset(AVBITSETW, r0) ++ opset(AVBITREVW, r0) + + case AXVSLLW: + opset(AXVSRLW, r0) + opset(AXVSRAW, r0) + opset(AXVROTRW, r0) ++ opset(AXVBITCLRW, r0) ++ opset(AXVBITSETW, r0) ++ opset(AXVBITREVW, r0) + + case AVSLLV: + opset(AVSRLV, r0) + opset(AVSRAV, r0) + opset(AVROTRV, r0) ++ opset(AVBITCLRV, r0) ++ opset(AVBITSETV, r0) ++ opset(AVBITREVV, r0) + + case AXVSLLV: + opset(AXVSRLV, r0) + opset(AXVSRAV, r0) + opset(AXVROTRV, r0) ++ opset(AXVBITCLRV, r0) ++ opset(AXVBITSETV, r0) ++ opset(AXVBITREVV, r0) + + case AVPCNTB: + opset(AVPCNTH, r0) +@@ -3205,6 +3229,54 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0xe9ce << 15 // xvmod.wu + case AXVMODVU: + return 0xe9cf << 15 // xvmod.du ++ case AVBITCLRB: ++ return 0xe218 << 15 // vbitclr.b ++ case AVBITCLRH: ++ return 0xe219 << 15 // vbitclr.h ++ case AVBITCLRW: ++ return 0xe21a << 15 // vbitclr.w ++ case AVBITCLRV: ++ return 0xe21b << 15 // vbitclr.d ++ case AVBITSETB: ++ return 0xe21c << 15 // vbitset.b ++ case AVBITSETH: ++ return 0xe21d << 15 // vbitset.h ++ case AVBITSETW: ++ return 0xe21e << 15 // vbitset.w ++ case AVBITSETV: ++ return 0xe21f << 15 // vbitset.d ++ case AVBITREVB: ++ return 0xe220 << 15 // vbitrev.b ++ case AVBITREVH: ++ return 0xe221 << 15 // vbitrev.h ++ case AVBITREVW: ++ return 0xe222 << 15 // vbitrev.w ++ case AVBITREVV: ++ return 0xe223 << 15 // vbitrev.d ++ case AXVBITCLRB: ++ return 0xea18 << 15 // xvbitclr.b ++ case AXVBITCLRH: ++ return 0xea19 << 15 // xvbitclr.h ++ case AXVBITCLRW: ++ return 0xea1a << 15 // xvbitclr.w ++ case AXVBITCLRV: ++ return 0xea1b << 15 // xvbitclr.d ++ case AXVBITSETB: ++ return 0xea1c << 15 // xvbitset.b ++ case AXVBITSETH: ++ return 0xea1d << 15 // xvbitset.h ++ case AXVBITSETW: ++ return 0xea1e << 15 // xvbitset.w ++ case AXVBITSETV: ++ return 0xea1f << 15 // xvbitset.d ++ case AXVBITREVB: ++ return 0xea20 << 15 // xvbitrev.b ++ case AXVBITREVH: ++ return 0xea21 << 15 // xvbitrev.h ++ case AXVBITREVW: ++ return 0xea22 << 15 // xvbitrev.w ++ case AXVBITREVV: ++ return 0xea23 << 15 // xvbitrev.d + } + + if a < 0 { +@@ -3668,6 +3740,54 @@ func (c *ctxt0) opirr(a obj.As) uint32 { + return 0x1DCD<<18 | 0x1<<15 // xvsrai.w + case AXVSRAV: + return 0x1DCD<<18 | 0x1<<16 // xvsrai.d ++ case AVBITCLRB: ++ return 0x1CC4<<18 | 0x1<<13 // vbitclri.b ++ case AVBITCLRH: ++ return 0x1CC4<<18 | 0x1<<14 // vbitclri.h ++ case AVBITCLRW: ++ return 0x1CC4<<18 | 0x1<<15 // vbitclri.w ++ case AVBITCLRV: ++ return 0x1CC4<<18 | 0x1<<16 // vbitclri.d ++ case AVBITSETB: ++ return 0x1CC5<<18 | 0x1<<13 // vbitseti.b ++ case AVBITSETH: ++ return 0x1CC5<<18 | 0x1<<14 // vbitseti.h ++ case AVBITSETW: ++ return 0x1CC5<<18 | 0x1<<15 // vbitseti.w ++ case AVBITSETV: ++ return 0x1CC5<<18 | 0x1<<16 // vbitseti.d ++ case AVBITREVB: ++ return 0x1CC6<<18 | 0x1<<13 // vbitrevi.b ++ case AVBITREVH: ++ return 0x1CC6<<18 | 0x1<<14 // vbitrevi.h ++ case AVBITREVW: ++ return 0x1CC6<<18 | 0x1<<15 // vbitrevi.w ++ case AVBITREVV: ++ return 0x1CC6<<18 | 0x1<<16 // vbitrevi.d ++ case AXVBITCLRB: ++ return 0x1DC4<<18 | 0x1<<13 // xvbitclri.b ++ case AXVBITCLRH: ++ return 0x1DC4<<18 | 0x1<<14 // xvbitclri.h ++ case AXVBITCLRW: ++ return 0x1DC4<<18 | 0x1<<15 // xvbitclri.w ++ case AXVBITCLRV: ++ return 0x1DC4<<18 | 0x1<<16 // xvbitclri.d ++ case AXVBITSETB: ++ return 0x1DC5<<18 | 0x1<<13 // xvbitseti.b ++ case AXVBITSETH: ++ return 0x1DC5<<18 | 0x1<<14 // xvbitseti.h ++ case AXVBITSETW: ++ return 0x1DC5<<18 | 0x1<<15 // xvbitseti.w ++ case AXVBITSETV: ++ return 0x1DC5<<18 | 0x1<<16 // xvbitseti.d ++ case AXVBITREVB: ++ return 0x1DC6<<18 | 0x1<<13 // xvbitrevi.b ++ case AXVBITREVH: ++ return 0x1DC6<<18 | 0x1<<14 // xvbitrevi.h ++ case AXVBITREVW: ++ return 0x1DC6<<18 | 0x1<<15 // xvbitrevi.w ++ case AXVBITREVV: ++ return 0x1DC6<<18 | 0x1<<16 // xvbitrevi.d + } + + if a < 0 { +-- +2.38.1 + diff --git a/0035-crypto-chacha20-add-loong64-SIMD-implementation.patch b/0035-crypto-chacha20-add-loong64-SIMD-implementation.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c8eb2b11612f20bef6bf8f08996bedaa193b3b8 --- /dev/null +++ b/0035-crypto-chacha20-add-loong64-SIMD-implementation.patch @@ -0,0 +1,490 @@ +From d6bdc012b1c105a007d0fb5d7d1642f1a5653b1d Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Fri, 13 Dec 2024 17:09:31 +0800 +Subject: [PATCH 35/44] crypto/chacha20: add loong64 SIMD implementation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The performance of chacha20 has been greatly improved. + +goos: linux +goarch: loong64 +pkg: golang.org/x/crypto/chacha20 +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +ChaCha20/64 171.9n ± 0% 159.3n ± 0% -7.33% (p=0.000 n=20) +ChaCha20/256 592.2n ± 0% 142.8n ± 0% -75.89% (p=0.000 n=20) +ChaCha20/10x25 981.5n ± 0% 518.8n ± 0% -47.14% (p=0.000 n=20) +ChaCha20/4096 8.991µ ± 0% 1.732µ ± 0% -80.74% (p=0.000 n=20) +ChaCha20/100x40 10.651µ ± 0% 5.135µ ± 0% -51.79% (p=0.000 n=20) +ChaCha20/65536 143.43µ ± 0% 28.76µ ± 0% -79.95% (p=0.000 n=20) +ChaCha20/1000x65 146.17µ ± 0% 37.13µ ± 0% -74.60% (p=0.000 n=20) +geomean 5.721µ 1.962µ -65.70% + + | bench.old | bench.new | + | B/s | B/s vs base | +ChaCha20/64 355.1Mi ± 0% 383.1Mi ± 0% +7.89% (p=0.000 n=20) +ChaCha20/256 412.2Mi ± 0% 1710.2Mi ± 0% +314.86% (p=0.000 n=20) +ChaCha20/10x25 242.9Mi ± 0% 459.6Mi ± 0% +89.19% (p=0.000 n=20) +ChaCha20/4096 434.5Mi ± 0% 2255.8Mi ± 0% +419.22% (p=0.000 n=20) +ChaCha20/100x40 358.1Mi ± 0% 742.9Mi ± 0% +107.44% (p=0.000 n=20) +ChaCha20/65536 435.8Mi ± 0% 2173.2Mi ± 0% +398.72% (p=0.000 n=20) +ChaCha20/1000x65 424.1Mi ± 0% 1669.4Mi ± 0% +293.64% (p=0.000 n=20) +geomean 373.9Mi 1.065Gi +191.55% + +goos: linux +goarch: loong64 +pkg: golang.org/x/crypto/chacha20 +cpu: Loongson-3A5000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +ChaCha20/64 234.5n ± 0% 295.8n ± 0% +26.14% (p=0.000 n=20) +ChaCha20/256 782.0n ± 0% 274.6n ± 0% -64.88% (p=0.000 n=20) +ChaCha20/10x25 1340.0n ± 0% 752.7n ± 0% -43.83% (p=0.000 n=20) +ChaCha20/4096 11.744µ ± 0% 3.455µ ± 0% -70.58% (p=0.000 n=20) +ChaCha20/100x40 14.151µ ± 0% 7.435µ ± 0% -47.46% (p=0.000 n=20) +ChaCha20/65536 188.05µ ± 0% 54.33µ ± 0% -71.11% (p=0.000 n=20) +ChaCha20/1000x65 191.44µ ± 0% 66.29µ ± 0% -65.37% (p=0.000 n=20) +geomean 7.604µ 3.436µ -54.81% + + | bench.old | bench.new | + | B/s | B/s vs base | +ChaCha20/64 260.3Mi ± 0% 206.3Mi ± 0% -20.73% (p=0.000 n=20) +ChaCha20/256 312.2Mi ± 0% 888.9Mi ± 0% +184.75% (p=0.000 n=20) +ChaCha20/10x25 177.9Mi ± 0% 316.8Mi ± 0% +78.08% (p=0.000 n=20) +ChaCha20/4096 332.6Mi ± 0% 1130.8Mi ± 0% +239.95% (p=0.000 n=20) +ChaCha20/100x40 269.6Mi ± 0% 513.1Mi ± 0% +90.34% (p=0.000 n=20) +ChaCha20/65536 332.4Mi ± 0% 1150.5Mi ± 0% +246.16% (p=0.000 n=20) +ChaCha20/1000x65 323.8Mi ± 0% 935.2Mi ± 0% +188.81% (p=0.000 n=20) +geomean 281.3Mi 622.6Mi +121.31% + +Change-Id: Iab4934d78b845e3b248bd5d0a9a62e4e9c516831 +--- + .../x/crypto/chacha20/chacha_loong64.go | 17 + + .../x/crypto/chacha20/chacha_loong64.s | 374 ++++++++++++++++++ + .../x/crypto/chacha20/chacha_noasm.go | 2 +- + 3 files changed, 392 insertions(+), 1 deletion(-) + create mode 100644 src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.go + create mode 100644 src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.s + +diff --git a/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.go b/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.go +new file mode 100644 +index 0000000000..d0f5d909fc +--- /dev/null ++++ b/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.go +@@ -0,0 +1,17 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build gc && !purego ++ ++package chacha20 ++ ++const bufSize = 256 ++ ++//go:noescape ++func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32) ++ ++func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) { ++ // add cpu.Loong64.HasLSX check TODO ++ xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter) ++} +diff --git a/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.s b/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.s +new file mode 100644 +index 0000000000..883c8d992a +--- /dev/null ++++ b/src/vendor/golang.org/x/crypto/chacha20/chacha_loong64.s +@@ -0,0 +1,374 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// derived from chacha_arm64.s ++ ++//go:build gc && !purego ++ ++#include "textflag.h" ++ ++DATA ·constants+0x00(SB)/4, $0x61707865 ++DATA ·constants+0x04(SB)/4, $0x3320646e ++DATA ·constants+0x08(SB)/4, $0x79622d32 ++DATA ·constants+0x0c(SB)/4, $0x6b206574 ++GLOBL ·constants(SB), NOPTR|RODATA, $32 ++ ++DATA ·incRotMatrix+0x00(SB)/4, $0x00000000 ++DATA ·incRotMatrix+0x04(SB)/4, $0x00000001 ++DATA ·incRotMatrix+0x08(SB)/4, $0x00000002 ++DATA ·incRotMatrix+0x0c(SB)/4, $0x00000003 ++GLOBL ·incRotMatrix(SB), NOPTR|RODATA, $32 ++ ++#define NUM_ROUNDS 10 ++ ++// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32) ++TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0 ++ MOVV dst+0(FP), R4 ++ MOVV src+24(FP), R5 ++ MOVV src_len+32(FP), R6 ++ MOVV key+48(FP), R7 ++ MOVV nonce+56(FP), R8 ++ MOVV counter+64(FP), R9 ++ ++ MOVV $·constants(SB), R10 ++ MOVV $·incRotMatrix(SB), R11 ++ ++ MOVW (R9), R12 ++ ++loop: ++ MOVV $NUM_ROUNDS, R15 ++ // load 4-32bit data from incRotMatrix added to counter ++ VMOVQ (R11), V30 ++ ++ // load contants ++ // VLDREPL.W $0, R10, V0 ++ WORD $0x30200140 ++ // VLDREPL.W $1, R10, V1 ++ WORD $0x30200541 ++ // VLDREPL.W $2, R10, V2 ++ WORD $0x30200942 ++ // VLDREPL.W $3, R10, V3 ++ WORD $0x30200d43 ++ ++ // load keys ++ // VLDREPL.W $0, R7, V4 ++ WORD $0x302000e4 ++ // VLDREPL.W $1, R7, V5 ++ WORD $0x302004e5 ++ // VLDREPL.W $2, R7, V6 ++ WORD $0x302008e6 ++ // VLDREPL.W $3, R7, V7 ++ WORD $0x30200ce7 ++ // VLDREPL.W $4, R7, V8 ++ WORD $0x302010e8 ++ // VLDREPL.W $5, R7, V9 ++ WORD $0x302014e9 ++ // VLDREPL.W $6, R7, V10 ++ WORD $0x302018ea ++ // VLDREPL.W $7, R7, V11 ++ WORD $0x30201ceb ++ ++ // load counter + nonce ++ // VLDREPL.W $0, R9, V12 ++ WORD $0x3020012c ++ ++ // VLDREPL.W $0, R8, V13 ++ WORD $0x3020010d ++ // VLDREPL.W $1, R8, V14 ++ WORD $0x3020050e ++ // VLDREPL.W $2, R8, V15 ++ WORD $0x3020090f ++ ++ // update counter ++ VADDW V30, V12, V12 ++ ++chacha: ++ // V0..V3 += V4..V7 ++ // V12..V15 <<<= ((V12..V15 XOR V0..V3), 16) ++ VADDW V0, V4, V0 ++ VADDW V1, V5, V1 ++ VADDW V2, V6, V2 ++ VADDW V3, V7, V3 ++ VXORV V12, V0, V12 ++ VXORV V13, V1, V13 ++ VXORV V14, V2, V14 ++ VXORV V15, V3, V15 ++ VROTRW $16, V12, V12 ++ VROTRW $16, V13, V13 ++ VROTRW $16, V14, V14 ++ VROTRW $16, V15, V15 ++ ++ // V8..V11 += V12..V15 ++ // V4..V7 <<<= ((V4..V7 XOR V8..V11), 12) ++ VADDW V8, V12, V8 ++ VADDW V9, V13, V9 ++ VADDW V10, V14, V10 ++ VADDW V11, V15, V11 ++ VXORV V4, V8, V4 ++ VXORV V5, V9, V5 ++ VXORV V6, V10, V6 ++ VXORV V7, V11, V7 ++ VROTRW $20, V4, V4 ++ VROTRW $20, V5, V5 ++ VROTRW $20, V6, V6 ++ VROTRW $20, V7, V7 ++ ++ // V0..V3 += V4..V7 ++ // V12..V15 <<<= ((V12..V15 XOR V0..V3), 8) ++ VADDW V0, V4, V0 ++ VADDW V1, V5, V1 ++ VADDW V2, V6, V2 ++ VADDW V3, V7, V3 ++ VXORV V12, V0, V12 ++ VXORV V13, V1, V13 ++ VXORV V14, V2, V14 ++ VXORV V15, V3, V15 ++ VROTRW $24, V12, V12 ++ VROTRW $24, V13, V13 ++ VROTRW $24, V14, V14 ++ VROTRW $24, V15, V15 ++ ++ // V8..V11 += V12..V15 ++ // V4..V7 <<<= ((V4..V7 XOR V8..V11), 7) ++ VADDW V12, V8, V8 ++ VADDW V13, V9, V9 ++ VADDW V14, V10, V10 ++ VADDW V15, V11, V11 ++ VXORV V4, V8, V4 ++ VXORV V5, V9, V5 ++ VXORV V6, V10, V6 ++ VXORV V7, V11, V7 ++ VROTRW $25, V4, V4 ++ VROTRW $25, V5, V5 ++ VROTRW $25, V6, V6 ++ VROTRW $25, V7, V7 ++ ++ // V0..V3 += V5..V7, V4 ++ // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 16) ++ VADDW V0, V5, V0 ++ VADDW V1, V6, V1 ++ VADDW V2, V7, V2 ++ VADDW V3, V4, V3 ++ VXORV V15, V0, V15 ++ VXORV V12, V1, V12 ++ VXORV V13, V2, V13 ++ VXORV V14, V3, V14 ++ VROTRW $16, V15, V15 ++ VROTRW $16, V12, V12 ++ VROTRW $16, V13, V13 ++ VROTRW $16, V14, V14 ++ ++ // V10,V11,V8,V9 += V15,V12,V13,V14 ++ // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 12) ++ VADDW V10, V15, V10 ++ VADDW V11, V12, V11 ++ VADDW V8, V13, V8 ++ VADDW V9, V14, V9 ++ VXORV V5, V10, V5 ++ VXORV V6, V11, V6 ++ VXORV V7, V8, V7 ++ VXORV V4, V9, V4 ++ VROTRW $20, V5, V5 ++ VROTRW $20, V6, V6 ++ VROTRW $20, V7, V7 ++ VROTRW $20, V4, V4 ++ ++ // V0..V3 += V5..V7, V4 ++ // V15,V12-V14 <<<= ((V15,V12-V14 XOR V0..V3), 8) ++ VADDW V5, V0, V0 ++ VADDW V6, V1, V1 ++ VADDW V7, V2, V2 ++ VADDW V4, V3, V3 ++ VXORV V15, V0, V15 ++ VXORV V12, V1, V12 ++ VXORV V13, V2, V13 ++ VXORV V14, V3, V14 ++ VROTRW $24, V15, V15 ++ VROTRW $24, V12, V12 ++ VROTRW $24, V13, V13 ++ VROTRW $24, V14, V14 ++ ++ // V10,V11,V8,V9 += V15,V12,V13,V14 ++ // V5,V6,V7,V4 <<<= ((V5,V6,V7,V4 XOR V10,V11,V8,V9), 7) ++ VADDW V15, V10, V10 ++ VADDW V12, V11, V11 ++ VADDW V13, V8, V8 ++ VADDW V14, V9, V9 ++ VXORV V5, V10, V5 ++ VXORV V6, V11, V6 ++ VXORV V7, V8, V7 ++ VXORV V4, V9, V4 ++ VROTRW $25, V5, V5 ++ VROTRW $25, V6, V6 ++ VROTRW $25, V7, V7 ++ VROTRW $25, V4, V4 ++ ++ SUBV $1, R15 ++ BNE R15, R0, chacha ++ ++ // load origin contants ++ // VLDREPL.W $0, R10, V16 ++ WORD $0x30200150 ++ // VLDREPL.W $1, R10, V17 ++ WORD $0x30200551 ++ // VLDREPL.W $2, R10, V18 ++ WORD $0x30200952 ++ // VLDREPL.W $3, R10, V19 ++ WORD $0x30200d53 ++ ++ // load origin keys ++ // VLDREPL.W $0, R7, V20 ++ WORD $0x302000f4 ++ // VLDREPL.W $1, R7, V21 ++ WORD $0x302004f5 ++ // VLDREPL.W $2, R7, V22 ++ WORD $0x302008f6 ++ // VLDREPL.W $3, R7, V23 ++ WORD $0x30200cf7 ++ // VLDREPL.W $4, R7, V24 ++ WORD $0x302010f8 ++ // VLDREPL.W $5, R7, V25 ++ WORD $0x302014f9 ++ // VLDREPL.W $6, R7, V26 ++ WORD $0x302018fa ++ // VLDREPL.W $7, R7, V27 ++ WORD $0x30201cfb ++ ++ // add back the initial state to generate the key stream ++ VADDW V30, V12, V12 // update counter in advance to prevent V30 from being overwritten ++ VADDW V16, V0, V0 ++ VADDW V17, V1, V1 ++ VADDW V18, V2, V2 ++ VADDW V19, V3, V3 ++ ++ // load origin counter + nonce ++ // VLDREPL.W $0, R9, V28 ++ WORD $0x3020013c ++ // VLDREPL.W $0, R8, V29 ++ WORD $0x3020011d ++ // VLDREPL.W $1, R8, V30 ++ WORD $0x3020051e ++ // VLDREPL.W $2, R8, V31 ++ WORD $0x3020091f ++ ++ VADDW V20, V4, V4 ++ VADDW V21, V5, V5 ++ VADDW V22, V6, V6 ++ VADDW V23, V7, V7 ++ VADDW V24, V8, V8 ++ VADDW V25, V9, V9 ++ VADDW V26, V10, V10 ++ VADDW V27, V11, V11 ++ VADDW V28, V12, V12 ++ VADDW V29, V13, V13 ++ VADDW V30, V14, V14 ++ VADDW V31, V15, V15 ++ ++ // shuffle ++ VILVLW V0, V1, V16 ++ VILVHW V0, V1, V17 ++ VILVLW V2, V3, V18 ++ VILVHW V2, V3, V19 ++ VILVLW V4, V5 ,V20 ++ VILVHW V4, V5, V21 ++ VILVLW V6, V7, V22 ++ VILVHW V6, V7, V23 ++ VILVLW V8, V9, V24 ++ VILVHW V8, V9, V25 ++ VILVLW V10, V11, V26 ++ VILVHW V10, V11, V27 ++ VILVLW V12, V13, V28 ++ VILVHW V12, V13, V29 ++ VILVLW V14, V15, V30 ++ VILVHW V14, V15, V31 ++ VILVLV V16, V18, V0 ++ VILVHV V16, V18, V4 ++ VILVLV V17, V19, V8 ++ VILVHV V17, V19, V12 ++ ++ // load src data from R5 ++ VMOVQ 0(R5), V16 ++ VMOVQ 16(R5), V17 ++ VMOVQ 32(R5), V18 ++ VMOVQ 48(R5), V19 ++ ++ VILVLV V20, V22, V1 ++ VILVHV V20, V22, V5 ++ VILVLV V21, V23, V9 ++ VILVHV V21, V23, V13 ++ ++ VMOVQ 64(R5), V20 ++ VMOVQ 80(R5), V21 ++ VMOVQ 96(R5), V22 ++ VMOVQ 112(R5), V23 ++ ++ VILVLV V24, V26, V2 ++ VILVHV V24, V26, V6 ++ VILVLV V25, V27, V10 ++ VILVHV V25, V27, V14 ++ ++ VMOVQ 128(R5), V24 ++ VMOVQ 144(R5), V25 ++ VMOVQ 160(R5), V26 ++ VMOVQ 176(R5), V27 ++ ++ VILVLV V28, V30, V3 ++ VILVHV V28, V30, V7 ++ VILVLV V29, V31, V11 ++ VILVHV V29, V31, V15 ++ ++ VMOVQ 192(R5), V28 ++ VMOVQ 208(R5), V29 ++ VMOVQ 224(R5), V30 ++ VMOVQ 240(R5), V31 ++ ++ VXORV V0, V16, V16 ++ VXORV V1, V17, V17 ++ VXORV V2, V18, V18 ++ VXORV V3, V19, V19 ++ ++ VMOVQ V16, 0(R4) ++ VMOVQ V17, 16(R4) ++ VMOVQ V18, 32(R4) ++ VMOVQ V19, 48(R4) ++ ++ VXORV V4, V20, V20 ++ VXORV V5, V21, V21 ++ VXORV V6, V22, V22 ++ VXORV V7, V23, V23 ++ ++ VMOVQ V20, 64(R4) ++ VMOVQ V21, 80(R4) ++ VMOVQ V22, 96(R4) ++ VMOVQ V23, 112(R4) ++ ++ VXORV V8, V24, V24 ++ VXORV V9, V25, V25 ++ VXORV V10, V26, V26 ++ VXORV V11, V27, V27 ++ ++ VMOVQ V24, 128(R4) ++ VMOVQ V25, 144(R4) ++ VMOVQ V26, 160(R4) ++ VMOVQ V27, 176(R4) ++ ++ VXORV V12, V28, V28 ++ VXORV V13, V29, V29 ++ VXORV V14, V30, V30 ++ VXORV V15, V31, V31 ++ ++ VMOVQ V28, 192(R4) ++ VMOVQ V29, 208(R4) ++ VMOVQ V30, 224(R4) ++ VMOVQ V31, 240(R4) ++ ++ ADD $4, R12, R12 ++ MOVW R12, (R9) // update counter ++ ++ ADDV $256, R4, R4 ++ ADDV $256, R5, R5 ++ SUBV $256, R6, R6 ++ BNE R6, R0, loop ++ ++ RET +diff --git a/src/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go b/src/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go +index c709b72847..3853cc0e0b 100644 +--- a/src/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go ++++ b/src/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build (!arm64 && !s390x && !ppc64 && !ppc64le) || !gc || purego ++//go:build (!arm64 && !loong64 && !s390x && !ppc64 && !ppc64le) || !gc || purego + + package chacha20 + +-- +2.38.1 + diff --git a/0035-runtime-internal-syscall-use-ABIInternal-for-Syscall.patch b/0035-runtime-internal-syscall-use-ABIInternal-for-Syscall.patch deleted file mode 100644 index 70d343ba9dc670f705af7489da2ad5f770c6c580..0000000000000000000000000000000000000000 --- a/0035-runtime-internal-syscall-use-ABIInternal-for-Syscall.patch +++ /dev/null @@ -1,87 +0,0 @@ -From a2d7a462c2c5a642e062088b302ae80fce7f2c66 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 22 Aug 2023 19:50:03 +0800 -Subject: [PATCH 35/51] runtime/internal/syscall: use ABIInternal for Syscall6 - on loong64 - -Updates #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: I9ff50a2e5060f99826e2e8e1d99d86f9bca10e0c ---- - .../internal/syscall/asm_linux_loong64.s | 41 ++++++++++++++++++- - 1 file changed, 40 insertions(+), 1 deletion(-) - -diff --git a/src/runtime/internal/syscall/asm_linux_loong64.s b/src/runtime/internal/syscall/asm_linux_loong64.s -index d6a33f90a7..11c5bc2468 100644 ---- a/src/runtime/internal/syscall/asm_linux_loong64.s -+++ b/src/runtime/internal/syscall/asm_linux_loong64.s -@@ -5,7 +5,32 @@ - #include "textflag.h" - - // func Syscall6(num, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, errno uintptr) --TEXT ·Syscall6(SB),NOSPLIT,$0-80 -+// -+// We need to convert to the syscall ABI. -+// -+// arg | ABIInternal | Syscall -+// --------------------------- -+// num | R4 | R11 -+// a1 | R5 | R4 -+// a2 | R6 | R5 -+// a3 | R7 | R6 -+// a4 | R8 | R7 -+// a5 | R9 | R8 -+// a6 | R10 | R9 -+// -+// r1 | R4 | R4 -+// r2 | R5 | R5 -+// err | R6 | part of R4 -+TEXT ·Syscall6(SB),NOSPLIT,$0-80 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R4, R11 // syscall entry -+ MOVV R5, R4 -+ MOVV R6, R5 -+ MOVV R7, R6 -+ MOVV R8, R7 -+ MOVV R9, R8 -+ MOVV R10, R9 -+#else - MOVV num+0(FP), R11 // syscall entry - MOVV a1+8(FP), R4 - MOVV a2+16(FP), R5 -@@ -13,7 +38,15 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 - MOVV a4+32(FP), R7 - MOVV a5+40(FP), R8 - MOVV a6+48(FP), R9 -+#endif - SYSCALL -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R0, R5 // r2 is not used. Always set to 0. -+ MOVW $-4096, R12 -+ BGEU R12, R4, ok -+ SUBVU R4, R0, R6 // errno -+ MOVV $-1, R4 // r1 -+#else - MOVW $-4096, R12 - BGEU R12, R4, ok - MOVV $-1, R12 -@@ -21,9 +54,15 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 - MOVV R0, r2+64(FP) - SUBVU R4, R0, R4 - MOVV R4, errno+72(FP) -+#endif - RET - ok: -+#ifdef GOEXPERIMENT_regabiargs -+ // r1 already in R4 -+ MOVV R0, R6 // errno -+#else - MOVV R4, r1+56(FP) - MOVV R0, r2+64(FP) // r2 is not used. Always set to 0. - MOVV R0, errno+72(FP) -+#endif - RET --- -2.38.1 - diff --git a/0036-cmd-compile-internal-buildcfg-enable-regABI-on-loong.patch b/0036-cmd-compile-internal-buildcfg-enable-regABI-on-loong.patch deleted file mode 100644 index dfb9a1cbb6a74b2221a0a56893e959b61fcad4ba..0000000000000000000000000000000000000000 --- a/0036-cmd-compile-internal-buildcfg-enable-regABI-on-loong.patch +++ /dev/null @@ -1,132 +0,0 @@ -From 973c47b829abcf41b6ad05167d3ef3ac8974e81b Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 16 Aug 2023 10:39:38 +0800 -Subject: [PATCH 36/51] cmd/compile, internal/buildcfg: enable regABI on - loong64, and add loong64 in test func hasRegisterABI -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -goos: linux -goarch: loong64 -pkg: test/bench/go1 -cpu: Loongson-3A5000 @ 2500.00MHz - │ bench.old │ bench.new │ - │ sec/op │ sec/op vs base │ -Template 116.4m ± 1% 101.3m ± 0% -12.94% (p=0.000 n=20) -Gzip 417.2m ± 0% 419.4m ± 0% +0.53% (p=0.000 n=20) -Gunzip 87.41m ± 0% 84.61m ± 0% -3.20% (p=0.000 n=20) -FmtFprintfEmpty 97.87n ± 0% 81.05n ± 0% -17.19% (p=0.000 n=20) -FmtFprintfString 151.1n ± 0% 140.9n ± 0% -6.75% (p=0.000 n=20) -FmtFprintfInt 155.6n ± 0% 143.0n ± 0% -8.10% (p=0.000 n=20) -FmtFprintfIntInt 236.9n ± 0% 225.1n ± 0% -5.00% (p=0.000 n=20) -FmtFprintfPrefixedInt 316.8n ± 0% 331.9n ± 0% +4.77% (p=0.000 n=20) -FmtFprintfFloat 401.5n ± 0% 380.0n ± 0% -5.35% (p=0.000 n=20) -FmtManyArgs 925.3n ± 0% 910.1n ± 0% -1.64% (p=0.000 n=20) -BinaryTree17 14.04 ± 1% 12.84 ± 0% -8.52% (p=0.000 n=20) -RegexpMatchEasy0_32 133.1n ± 0% 121.3n ± 0% -8.87% (p=0.000 n=20) -RegexpMatchEasy0_1K 1.363µ ± 0% 1.337µ ± 0% -1.91% (p=0.000 n=20) -RegexpMatchEasy1_32 162.7n ± 0% 152.6n ± 0% -6.24% (p=0.000 n=20) -RegexpMatchEasy1_1K 1.505µ ± 0% 1.740µ ± 0% +15.61% (p=0.000 n=20) -RegexpMatchMedium_32 1.429µ ± 0% 1.299µ ± 0% -9.10% (p=0.000 n=20) -RegexpMatchMedium_1K 41.76µ ± 0% 38.16µ ± 0% -8.61% (p=0.000 n=20) -RegexpMatchHard_32 2.094µ ± 0% 2.157µ ± 0% +3.01% (p=0.000 n=20) -RegexpMatchHard_1K 63.25µ ± 0% 64.72µ ± 0% +2.33% (p=0.000 n=20) -JSONEncode 18.00m ± 1% 17.46m ± 1% -3.05% (p=0.000 n=20) -JSONDecode 79.49m ± 0% 72.42m ± 0% -8.89% (p=0.000 n=20) -Revcomp 1.147 ± 0% 1.255 ± 0% +9.39% (p=0.000 n=20) -Fannkuch11 3.623 ± 0% 3.410 ± 0% -5.87% (p=0.000 n=20) -Fannkuch11 3.623 ± 0% 3.410 ± 0% -5.87% (p=0.000 n=20) -GobDecode 14.26m ± 0% 12.92m ± 0% -9.36% (p=0.000 n=20) -GobEncode 16.86m ± 1% 14.96m ± 0% -11.28% (p=0.000 n=20) -GoParse 8.721m ± 0% 8.125m ± 1% -6.84% (p=0.000 n=20) -Mandelbrot200 7.203m ± 0% 7.171m ± 0% -0.44% (p=0.000 n=20) -HTTPClientServer 83.96µ ± 0% 80.83µ ± 0% -3.72% (p=0.000 n=20) -TimeParse 415.3n ± 0% 389.1n ± 0% -6.31% (p=0.000 n=20) -TimeFormat 506.4n ± 0% 495.9n ± 0% -2.06% (p=0.000 n=20) -geomean 102.6µ 98.04µ -4.40% - - │ bench.old │ bench.new │ - │ B/s │ B/s vs base │ -Template 15.90Mi ± 1% 18.26Mi ± 0% +14.88% (p=0.000 n=20) -Gzip 44.36Mi ± 0% 44.12Mi ± 0% -0.53% (p=0.000 n=20) -Gunzip 211.7Mi ± 0% 218.7Mi ± 0% +3.31% (p=0.000 n=20) -RegexpMatchEasy0_32 229.3Mi ± 0% 251.6Mi ± 0% +9.72% (p=0.000 n=20) -RegexpMatchEasy0_1K 716.4Mi ± 0% 730.3Mi ± 0% +1.94% (p=0.000 n=20) -RegexpMatchEasy1_32 187.6Mi ± 0% 200.0Mi ± 0% +6.64% (p=0.000 n=20) -RegexpMatchEasy1_1K 649.1Mi ± 0% 561.3Mi ± 0% -13.52% (p=0.000 n=20) -RegexpMatchMedium_32 21.35Mi ± 0% 23.50Mi ± 0% +10.05% (p=0.000 n=20) -RegexpMatchMedium_1K 23.38Mi ± 0% 25.59Mi ± 0% +9.42% (p=0.000 n=20) -RegexpMatchHard_32 14.57Mi ± 0% 14.14Mi ± 0% -2.95% (p=0.000 n=20) -RegexpMatchHard_1K 15.44Mi ± 0% 15.09Mi ± 0% -2.29% (p=0.000 n=20) -JSONEncode 102.8Mi ± 1% 106.0Mi ± 1% +3.15% (p=0.000 n=20) -JSONDecode 23.28Mi ± 0% 25.55Mi ± 0% +9.75% (p=0.000 n=20) -Revcomp 211.3Mi ± 0% 193.1Mi ± 0% -8.58% (p=0.000 n=20) -GobDecode 51.34Mi ± 0% 56.64Mi ± 0% +10.33% (p=0.000 n=20) -GobEncode 43.42Mi ± 1% 48.93Mi ± 0% +12.71% (p=0.000 n=20) -GoParse 6.337Mi ± 0% 6.800Mi ± 1% +7.30% (p=0.000 n=20) -geomean 61.24Mi 63.63Mi +3.91% - -Update #40724 - -Co-authored-by: Xiaolin Zhao -Change-Id: Ica823b5fbe5b95705d07e9968cb9395fb51b97e4 ---- - src/cmd/compile/internal/ssa/config.go | 4 ++-- - src/cmd/compile/internal/ssa/debug_lines_test.go | 2 +- - src/internal/buildcfg/exp.go | 4 +++- - 3 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go -index 31a6ee1af8..2d90457379 100644 ---- a/src/cmd/compile/internal/ssa/config.go -+++ b/src/cmd/compile/internal/ssa/config.go -@@ -296,8 +296,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo - c.registers = registersLOONG64[:] - c.gpRegMask = gpRegMaskLOONG64 - c.fpRegMask = fpRegMaskLOONG64 -- // c.intParamRegs = paramIntRegLOONG64 -- // c.floatParamRegs = paramFloatRegLOONG64 -+ c.intParamRegs = paramIntRegLOONG64 -+ c.floatParamRegs = paramFloatRegLOONG64 - c.FPReg = framepointerRegLOONG64 - c.LinkReg = linkRegLOONG64 - c.hasGReg = true -diff --git a/src/cmd/compile/internal/ssa/debug_lines_test.go b/src/cmd/compile/internal/ssa/debug_lines_test.go -index cf115107a1..af9e2a34cf 100644 ---- a/src/cmd/compile/internal/ssa/debug_lines_test.go -+++ b/src/cmd/compile/internal/ssa/debug_lines_test.go -@@ -44,7 +44,7 @@ func testGoArch() string { - - func hasRegisterABI() bool { - switch testGoArch() { -- case "amd64", "arm64", "ppc64", "ppc64le", "riscv": -+ case "amd64", "arm64", "loong64", "ppc64", "ppc64le", "riscv": - return true - } - return false -diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go -index 513070c8af..0f29233fb3 100644 ---- a/src/internal/buildcfg/exp.go -+++ b/src/internal/buildcfg/exp.go -@@ -65,6 +65,8 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { - case "amd64", "arm64", "ppc64le", "ppc64", "riscv64": - regabiAlwaysOn = true - regabiSupported = true -+ case "loong64": -+ regabiSupported = true - } - - baseline := goexperiment.Flags{ -@@ -129,7 +131,7 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { - flags.RegabiWrappers = true - flags.RegabiArgs = true - } -- // regabi is only supported on amd64, arm64, riscv64, ppc64 and ppc64le. -+ // regabi is only supported on amd64, arm64, loong64, riscv64, ppc64 and ppc64le. - if !regabiSupported { - flags.RegabiWrappers = false - flags.RegabiArgs = false --- -2.38.1 - diff --git a/0036-internal-bytealg-optimize-Count-String-in-loong64.patch b/0036-internal-bytealg-optimize-Count-String-in-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..3a513c22d1714506707ce1b4ebb7eeff0af434cd --- /dev/null +++ b/0036-internal-bytealg-optimize-Count-String-in-loong64.patch @@ -0,0 +1,268 @@ +From 1698704d825764d2cbdbbf2718c582cf45d66fb0 Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Tue, 10 Dec 2024 21:06:28 +0800 +Subject: [PATCH 36/44] internal/bytealg: optimize Count{,String} in loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Benchmark on Loongson 3A6000 and 3A5000: + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +CountSingle/10 12.81n ± 0% 10.74n ± 0% -16.16% (p=0.000 n=10) +CountSingle/32 33.135n ± 0% 8.007n ± 0% -75.84% (p=0.000 n=10) +CountSingle/4K 4057.0n ± 0% 207.5n ± 0% -94.89% (p=0.000 n=10) +CountSingle/4M 4161.7µ ± 0% 217.1µ ± 0% -94.78% (p=0.000 n=10) +CountSingle/64M 68.722m ± 0% 3.717m ± 11% -94.59% (p=0.000 n=10) +geomean 13.76µ 1.705µ -87.61% + + | bench.old | bench.new | + | B/s | B/s vs base | +CountSingle/10 744.4Mi ± 0% 887.8Mi ± 0% +19.26% (p=0.000 n=10) +CountSingle/32 921.0Mi ± 0% 3811.5Mi ± 0% +313.84% (p=0.000 n=10) +CountSingle/4K 962.7Mi ± 0% 18825.3Mi ± 0% +1855.40% (p=0.000 n=10) +CountSingle/4M 961.2Mi ± 0% 18425.4Mi ± 0% +1817.02% (p=0.000 n=10) +CountSingle/64M 931.3Mi ± 0% 17216.0Mi ± 10% +1748.62% (p=0.000 n=10) +geomean 900.1Mi 7.092Gi +706.88% + +goos: linux +goarch: loong64 +pkg: bytes +cpu: Loongson-3A5000-HV @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +CountSingle/10 14.03n ± 1% 14.82n ± 0% +5.67% (p=0.000 n=10) +CountSingle/32 36.23n ± 0% 11.61n ± 0% -67.95% (p=0.000 n=10) +CountSingle/4K 4367.0n ± 0% 323.5n ± 0% -92.59% (p=0.000 n=10) +CountSingle/4M 4538.6µ ± 0% 381.2µ ± 0% -91.60% (p=0.000 n=10) +CountSingle/64M 76.575m ± 22% 7.971m ± 0% -89.59% (p=0.000 n=10) +geomean 15.05µ 2.790µ -81.46% + + | bench.old | bench.new | + | B/s | B/s vs base | +CountSingle/10 680.0Mi ± 1% 643.7Mi ± 0% -5.34% (p=0.000 n=10) +CountSingle/32 842.2Mi ± 0% 2628.4Mi ± 0% +212.07% (p=0.000 n=10) +CountSingle/4K 894.5Mi ± 0% 12075.4Mi ± 0% +1249.95% (p=0.000 n=10) +CountSingle/4M 881.3Mi ± 0% 10492.9Mi ± 0% +1090.57% (p=0.000 n=10) +CountSingle/64M 835.8Mi ± 18% 8028.7Mi ± 0% +860.61% (p=0.000 n=10) +geomean 822.9Mi 4.334Gi +439.27% + +Change-Id: I0a45139965b3e5eb09ab22be75145302f88a1915 +--- + src/internal/bytealg/bytealg.go | 3 + + src/internal/bytealg/count_loong64.s | 110 ++++++++++++++++++-------- + src/internal/cpu/cpu.go | 1 + + src/internal/cpu/cpu_loong64.go | 1 + + src/internal/cpu/cpu_loong64_hwcap.go | 2 + + 5 files changed, 85 insertions(+), 32 deletions(-) + +diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go +index 6b79a2e1fa..a5f71ce342 100644 +--- a/src/internal/bytealg/bytealg.go ++++ b/src/internal/bytealg/bytealg.go +@@ -18,6 +18,9 @@ const ( + offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) + + offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9) ++ ++ offsetLOONG64HasLSX = unsafe.Offsetof(cpu.Loong64.HasLSX) ++ offsetLOONG64HasLASX = unsafe.Offsetof(cpu.Loong64.HasLASX) + ) + + // MaxLen is the maximum length of the string to be searched for (argument b) in Index. +diff --git a/src/internal/bytealg/count_loong64.s b/src/internal/bytealg/count_loong64.s +index db8ba2cb24..5c9dfeb0eb 100644 +--- a/src/internal/bytealg/count_loong64.s ++++ b/src/internal/bytealg/count_loong64.s +@@ -25,17 +25,81 @@ TEXT ·CountString(SB),NOSPLIT,$0-32 + // R5 = s_len + // R6 = byte to count + TEXT countbody<>(SB),NOSPLIT,$0 +- MOVV R0, R7 // count +- ADDV R4, R5 // end ++ MOVV R0, R7 // count ++ ++ // short path to handle 0-byte case ++ BEQ R5, done ++ ++ // jump directly to tail length < 4 ++ MOVV $4, R8 ++ BLT R5, R8, tail ++ ++ // jump directly to genericCountBody if length < 16 ++ MOVV $16, R8 ++ BLT R5, R8, genericCountBody ++ ++ // jump directly to lsxCountBody if length < 64 ++ MOVV $64, R8 ++ BLT R5, R8, lsxCountBody ++lasxCountBody: ++ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R8 ++ BEQ R8, lsxCountBody ++ MOVV $32, R9 ++ XVMOVQ R6, X0.B32 ++ ++ PCALIGN $16 ++lasxLoop: ++ XVMOVQ (R4), X1 ++ XVSEQB X0, X1, X2 ++ XVANDB $1, X2, X2 ++ XVPCNTV X2, X3 ++ XVMOVQ X3.V[0], R8 ++ ADDV R8, R7 ++ XVMOVQ X3.V[1], R8 ++ ADDV R8, R7 ++ XVMOVQ X3.V[2], R8 ++ ADDV R8, R7 ++ XVMOVQ X3.V[3], R8 ++ ADDV R8, R7 ++ ADDV $-32, R5 ++ ADDV $32, R4 ++ BGE R5, R9, lasxLoop ++ ++lsxCountBody: ++ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R8 ++ BEQ R8, genericCountBody ++ // jump directly to genericCountBody if length < 16 ++ MOVV $16, R9 ++ BLT R5, R9, genericCountBody ++ VMOVQ R6, V0.B16 ++ ++ PCALIGN $16 ++lsxLoop: ++ VMOVQ (R4), V1 ++ VSEQB V0, V1, V2 ++ VANDB $1, V2, V2 ++ VPCNTV V2, V3 ++ VMOVQ V3.V[0], R8 ++ ADDV R8, R7 ++ VMOVQ V3.V[1], R8 ++ ADDV R8, R7 ++ ADDV $-16, R5 ++ ADDV $16, R4 ++ BGE R5, R9, lsxLoop ++ ++ // Work with genericCountBody shorter than 16 bytes ++genericCountBody: ++ MOVV $4, R9 + MOVV $1, R17 + +-loop: +- ADDV $8, R4, R9 ++ PCALIGN $16 ++genericLoop: + BLT R5, R9, tail +- MOVV (R4), R8 ++ ADDV $-4, R5 ++ MOVWU (R4)(R5), R8 + + AND $0xff, R8, R10 +- WORD $0xcf210b // bstrpick.w r11, r8, 15, 8 ++ BSTRPICKW $15, R8, $8, R11 + XOR R6, R10, R10 + XOR R6, R11, R11 + MASKNEZ R10, R17, R12 +@@ -43,8 +107,8 @@ loop: + ADDV R7, R12, R7 + ADDV R7, R13, R7 + +- WORD $0xd7410a // bstrpick.w r10, r8, 23, 16 +- WORD $0xdf610b // bstrpick.w r11, r8, 31, 24 ++ BSTRPICKW $23, R8, $16, R10 ++ BSTRPICKW $31, R8, $24, R11 + XOR R6, R10, R10 + XOR R6, R11, R11 + MASKNEZ R10, R17, R12 +@@ -52,35 +116,17 @@ loop: + ADDV R7, R12, R7 + ADDV R7, R13, R7 + +- WORD $0xe7810a // bstrpick.w r10, r8, 39, 32 +- WORD $0xefa10b // bstrpick.w r11, r8, 47, 40 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 +- +- WORD $0xf7c10a // bstrpick.w r10, r8, 55, 48 +- WORD $0xffe10b // bstrpick.w r11, r8, 63, 56 +- XOR R6, R10, R10 +- XOR R6, R11, R11 +- MASKNEZ R10, R17, R12 +- MASKNEZ R11, R17, R13 +- ADDV R7, R12, R7 +- ADDV R7, R13, R7 +- +- MOVV R9, R4 +- JMP loop ++ JMP genericLoop + ++ // Work with tail shorter than 4 bytes ++ PCALIGN $16 + tail: +- BEQ R4, R5, done +- MOVBU (R4), R8 +- ADDV $1, R4 ++ BEQ R5, done ++ ADDV $-1, R5 ++ MOVBU (R4)(R5), R8 + BNE R6, R8, tail + ADDV $1, R7 + JMP tail +- + done: + MOVV R7, R4 + RET +diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go +index cd3db10523..2443b31fc8 100644 +--- a/src/internal/cpu/cpu.go ++++ b/src/internal/cpu/cpu.go +@@ -83,6 +83,7 @@ var ARM64 struct { + var Loong64 struct { + _ CacheLinePad + HasLSX bool // support 128-bit vector extension ++ HasLASX bool // support 256-bit vector extension + HasCRC32 bool // support CRC instruction + HasLAMCAS bool // support AMCAS[_DB].{B/H/W/D} + HasLAM_BH bool // support AM{SWAP/ADD}[_DB].{B/H} instruction +diff --git a/src/internal/cpu/cpu_loong64.go b/src/internal/cpu/cpu_loong64.go +index 92583d0bca..9a58ea251c 100644 +--- a/src/internal/cpu/cpu_loong64.go ++++ b/src/internal/cpu/cpu_loong64.go +@@ -27,6 +27,7 @@ func get_cpucfg(reg uint32) uint32 + func doinit() { + options = []option{ + {Name: "lsx", Feature: &Loong64.HasLSX}, ++ {Name: "lasx", Feature: &Loong64.HasLASX}, + {Name: "crc32", Feature: &Loong64.HasCRC32}, + {Name: "lamcas", Feature: &Loong64.HasLAMCAS}, + {Name: "lam_bh", Feature: &Loong64.HasLAM_BH}, +diff --git a/src/internal/cpu/cpu_loong64_hwcap.go b/src/internal/cpu/cpu_loong64_hwcap.go +index 58397adae8..6c6b8a81f2 100644 +--- a/src/internal/cpu/cpu_loong64_hwcap.go ++++ b/src/internal/cpu/cpu_loong64_hwcap.go +@@ -13,12 +13,14 @@ var HWCap uint + // HWCAP bits. These are exposed by the Linux kernel. + const ( + hwcap_LOONGARCH_LSX = 1 << 4 ++ hwcap_LOONGARCH_LASX = 1 << 5 + ) + + func hwcapInit() { + // TODO: Features that require kernel support like LSX and LASX can + // be detected here once needed in std library or by the compiler. + Loong64.HasLSX = hwcIsSet(HWCap, hwcap_LOONGARCH_LSX) ++ Loong64.HasLASX = hwcIsSet(HWCap, hwcap_LOONGARCH_LASX) + } + + func hwcIsSet(hwc uint, val uint) bool { +-- +2.38.1 + diff --git a/0037-cmd-internal-obj-cmd-asm-reclassify-32-bit-immediate.patch b/0037-cmd-internal-obj-cmd-asm-reclassify-32-bit-immediate.patch new file mode 100644 index 0000000000000000000000000000000000000000..ccad194868f90528439f26426f941baa445ffaca --- /dev/null +++ b/0037-cmd-internal-obj-cmd-asm-reclassify-32-bit-immediate.patch @@ -0,0 +1,690 @@ +From a713105842cd7b88dbb573980731062c218a8310 Mon Sep 17 00:00:00 2001 +From: limeidan +Date: Mon, 16 Dec 2024 16:31:37 +0800 +Subject: [PATCH 37/44] cmd/internal/obj, cmd/asm: reclassify 32-bit immediate + value + +Change-Id: If9fd257ca0837a8c8597889c4f5ed3d4edc602c1 +--- + .../asm/internal/asm/testdata/loong64enc1.s | 4 +- + .../asm/internal/asm/testdata/loong64enc2.s | 2 +- + src/cmd/internal/obj/loong64/a.out.go | 31 +- + src/cmd/internal/obj/loong64/asm.go | 376 +++++++----------- + src/cmd/internal/obj/loong64/cnames.go | 25 +- + 5 files changed, 186 insertions(+), 252 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index 19070c89ef..b40d86e596 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -107,8 +107,8 @@ lable2: + MOVV $4(R4), R5 // 8510c002 + MOVW $-1, R4 // 04fcff02 + MOVV $-1, R4 // 04fcff02 +- MOVW $1, R4 // 0404c002 +- MOVV $1, R4 // 0404c002 ++ MOVW $1, R4 // 04048003 ++ MOVV $1, R4 // 04048003 + ADD $-1, R4, R5 // 85fcbf02 + ADD $-1, R4 // 84fcbf02 + ADDV $-1, R4, R5 // 85fcff02 +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc2.s b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +index ee3bad74b1..91aed4e2c7 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc2.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc2.s +@@ -12,7 +12,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 + AND $-1, R4, R5 // 1efcbf0285f81400 + AND $-1, R4 // 1efcbf0284f81400 + MOVW $-1, F4 // 1efcbf02c4a71401 +- MOVW $1, F4 // 1e048002c4a71401 ++ MOVW $1, F4 // 1e048003c4a71401 + TEQ $4, R4, R5 // 8508005c04002a00 + TEQ $4, R4 // 0408005c04002a00 + TNE $4, R4, R5 // 8508005804002a00 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index 1fadbc648a..f2d4c41d68 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -325,19 +325,26 @@ const ( + C_XREG + C_ARNG // Vn. + C_ELEM // Vn.[index] ++ + C_ZCON +- C_SCON // 12 bit signed +- C_UCON // 32 bit signed, low 12 bits 0 +- +- // When the immediate value is SCON, it can choose either the ADDCON implementation +- // or the ANDCON implementation, using ADD0CON/AND0CON to distinguish them, so that +- // the program can choose the implementation with fewer instructions. +- C_ADD0CON +- C_AND0CON +- +- C_ADDCON // -0x800 <= v < 0 +- C_ANDCON // 0 < v <= 0xFFF +- C_LCON // other 32 ++ C_U1CON // 1 bit unsigned constant ++ C_U2CON // 2 bit unsigned constant ++ C_U3CON // 3 bit unsigned constant ++ C_U4CON // 4 bit unsigned constant ++ C_U5CON // 5 bit unsigned constant ++ C_U6CON // 6 bit unsigned constant ++ C_U7CON // 7 bit unsigned constant ++ C_U8CON // 8 bit unsigned constant ++ C_S5CON // 5 bit signed constant ++ C_US12CON // same as C_S12CON, increase the priority of C_S12CON in special cases. ++ C_UU12CON // same as C_U12CON, increase the priority of C_U12CON in special cases. ++ C_S12CON // 12 bit signed constant, -0x800 < v <= 0x7ff ++ C_U12CON // 12 bit unsigned constant, 0 < v <= 0xfff ++ C_12CON // 12 bit signed constant, or 12 bit unsigned constant ++ C_U15CON // 15 bit unsigned constant ++ C_15CON20_0 // 15 bit unsigned constant, low 12 bits 0 ++ C_32CON20_0 // 32 bit signed, low 12 bits 0 ++ C_32CON // other 32 bit signed + + // 64 bit signed, lo32 bits 0, hi20 bits are not 0, hi12 bits can + // be obtained by sign extension of the hi20 bits. +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 657d32ae81..2480cf9382 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -51,12 +51,6 @@ const ( + // branchLoopHead marks loop entry. + // Used to insert padding for under-aligned loops. + branchLoopHead +- immFiledSi5 // The encoding of the immediate field in the instruction is 5-bits +- immFiledUi3 // The encoding of the immediate field in the instruction is 3-bits +- immFiledUi4 // The encoding of the immediate field in the instruction is 4-bits +- immFiledUi5 // The encoding of the immediate field in the instruction is 5-bits +- immFiledUi6 // The encoding of the immediate field in the instruction is 6-bits +- immFiledUi8 // The encoding of the immediate field in the instruction is 8-bits + ) + + var optab = []Optab{ +@@ -94,45 +88,41 @@ var optab = []Optab{ + {ACMPEQF, C_FREG, C_FREG, C_NONE, C_FCCREG, C_NONE, 2, 4, 0, 0}, + {AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVSEQB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, immFiledSi5}, +- {AXVSEQB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, immFiledSi5}, +- {AVSEQB, C_ADDCON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, immFiledSi5}, +- {AXVSEQB, C_ADDCON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, immFiledSi5}, ++ {AVSEQB, C_S5CON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, 0}, ++ {AXVSEQB, C_S5CON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, 0}, + + {AVANDV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVANDV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVANDB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, immFiledUi8}, +- {AXVANDB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, immFiledUi8}, +- {AVANDB, C_ADDCON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, immFiledUi8}, +- {AXVANDB, C_ADDCON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, immFiledUi8}, ++ {AVANDB, C_U8CON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, 0}, ++ {AXVANDB, C_U8CON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, 0}, + + {AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVSLLB, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 29, 4, 0, immFiledUi3}, +- {AXVSLLB, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 29, 4, 0, immFiledUi3}, +- {AVSLLB, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 29, 4, 0, immFiledUi3}, +- {AXVSLLB, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 29, 4, 0, immFiledUi3}, ++ {AVSLLB, C_U3CON, C_VREG, C_NONE, C_VREG, C_NONE, 29, 4, 0, 0}, ++ {AXVSLLB, C_U3CON, C_XREG, C_NONE, C_XREG, C_NONE, 29, 4, 0, 0}, ++ {AVSLLB, C_U3CON, C_NONE, C_NONE, C_VREG, C_NONE, 29, 4, 0, 0}, ++ {AXVSLLB, C_U3CON, C_NONE, C_NONE, C_XREG, C_NONE, 29, 4, 0, 0}, + + {AVSLLH, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSLLH, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVSLLH, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, immFiledUi4}, +- {AXVSLLH, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, immFiledUi4}, +- {AVSLLH, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 31, 4, 0, immFiledUi4}, +- {AXVSLLH, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 31, 4, 0, immFiledUi4}, ++ {AVSLLH, C_U4CON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0}, ++ {AXVSLLH, C_U4CON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0}, ++ {AVSLLH, C_U4CON, C_NONE, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0}, ++ {AXVSLLH, C_U4CON, C_NONE, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0}, + + {AVSLLW, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSLLW, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVSLLW, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 32, 4, 0, immFiledUi5}, +- {AXVSLLW, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 32, 4, 0, immFiledUi5}, +- {AVSLLW, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 32, 4, 0, immFiledUi5}, +- {AXVSLLW, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 32, 4, 0, immFiledUi5}, ++ {AVSLLW, C_U5CON, C_VREG, C_NONE, C_VREG, C_NONE, 32, 4, 0, 0}, ++ {AXVSLLW, C_U5CON, C_XREG, C_NONE, C_XREG, C_NONE, 32, 4, 0, 0}, ++ {AVSLLW, C_U5CON, C_NONE, C_NONE, C_VREG, C_NONE, 32, 4, 0, 0}, ++ {AXVSLLW, C_U5CON, C_NONE, C_NONE, C_XREG, C_NONE, 32, 4, 0, 0}, + + {AVSLLV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVSLLV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, +- {AVSLLV, C_SCON, C_VREG, C_NONE, C_VREG, C_NONE, 33, 4, 0, immFiledUi6}, +- {AXVSLLV, C_SCON, C_XREG, C_NONE, C_XREG, C_NONE, 33, 4, 0, immFiledUi6}, +- {AVSLLV, C_SCON, C_NONE, C_NONE, C_VREG, C_NONE, 33, 4, 0, immFiledUi6}, +- {AXVSLLV, C_SCON, C_NONE, C_NONE, C_XREG, C_NONE, 33, 4, 0, immFiledUi6}, ++ {AVSLLV, C_U6CON, C_VREG, C_NONE, C_VREG, C_NONE, 33, 4, 0, 0}, ++ {AXVSLLV, C_U6CON, C_XREG, C_NONE, C_XREG, C_NONE, 33, 4, 0, 0}, ++ {AVSLLV, C_U6CON, C_NONE, C_NONE, C_VREG, C_NONE, 33, 4, 0, 0}, ++ {AXVSLLV, C_U6CON, C_NONE, C_NONE, C_XREG, C_NONE, 33, 4, 0, 0}, + + {ACLOW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0}, + {AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0}, +@@ -229,48 +219,46 @@ var optab = []Optab{ + + {AMOVW, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0}, + {AMOVV, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0}, +- {AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, +- {AMOVV, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, +- {AMOVV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, +- +- {AMOVW, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, +- {AMOVV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, +- {AMOVW, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, +- {AMOVV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, ++ {AMOVW, C_12CON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ {AMOVV, C_12CON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0}, ++ ++ {AMOVW, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, ++ {AMOVV, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0}, ++ {AMOVW, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, ++ {AMOVV, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP}, + {AMOVV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 67, 4, 0, NOTUSETMP}, + {AMOVV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 68, 8, 0, NOTUSETMP}, + {AMOVV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 69, 12, 0, NOTUSETMP}, + {AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP}, + +- {AADD, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AADD, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AADD, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- {AADD, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- +- {AADDV, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AADDV, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AADDV, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- {AADDV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- +- {AAND, C_AND0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AAND, C_AND0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, +- {AAND, C_ADDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- {AAND, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, +- +- {AADD, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- {AADD, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- {AADDV, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- {AADDV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- {AAND, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- {AAND, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, +- +- {AADD, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, +- {AADDV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, +- {AAND, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, +- {AADD, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, +- {AADDV, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, +- {AAND, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADD, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADD, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADD, C_U12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AADD, C_U12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AADDV, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADDV, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AADDV, C_U12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AADDV, C_U12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AAND, C_UU12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AAND, C_UU12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0}, ++ {AAND, C_S12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ {AAND, C_S12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0}, ++ ++ {AADD, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADD, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADDV, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AADDV, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AAND, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ {AAND, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0}, ++ ++ {AADD, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADDV, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AAND, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADD, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AADDV, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, ++ {AAND, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0}, + + {AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, + {AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0}, +@@ -289,18 +277,18 @@ var optab = []Optab{ + {AAND, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + {AAND, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0}, + +- {ASLL, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, +- {ASLL, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLL, C_U5CON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLL, C_U5CON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, + +- {ASLLV, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, +- {ASLLV, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLLV, C_U6CON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, ++ {ASLLV, C_U6CON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0}, + +- {ABSTRPICKW, C_SCON, C_REG, C_SCON, C_REG, C_NONE, 17, 4, 0, 0}, +- {ABSTRPICKW, C_SCON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0}, ++ {ABSTRPICKW, C_U6CON, C_REG, C_U6CON, C_REG, C_NONE, 17, 4, 0, 0}, ++ {ABSTRPICKW, C_U6CON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0}, + {ABSTRPICKW, C_ZCON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0}, + + {ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, +- {ASYSCALL, C_ANDCON, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, ++ {ASYSCALL, C_U15CON, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0}, + + {ABEQ, C_REG, C_REG, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0}, + {ABEQ, C_REG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0}, +@@ -348,8 +336,7 @@ var optab = []Optab{ + {AMOVV, C_FREG, C_NONE, C_NONE, C_FCCREG, C_NONE, 30, 4, 0, 0}, + {AMOVV, C_FCCREG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0}, + +- {AMOVW, C_ADDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0}, +- {AMOVW, C_ANDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0}, ++ {AMOVW, C_12CON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0}, + + {AMOVB, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, + {AMOVW, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0}, +@@ -363,13 +350,13 @@ var optab = []Optab{ + {AMOVBU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, + {AMOVWU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0}, + +- {AWORD, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0}, ++ {AWORD, C_32CON, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0}, + {AWORD, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0}, + + {AMOVV, C_GOTADDR, C_NONE, C_NONE, C_REG, C_NONE, 65, 8, 0, 0}, + +- {ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, +- {ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, ++ {ATEQ, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, ++ {ATEQ, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0}, + + {ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0}, + {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0}, +@@ -409,12 +396,12 @@ var optab = []Optab{ + + {AVMOVQ, C_ELEM, C_NONE, C_NONE, C_ARNG, C_NONE, 45, 4, 0, 0}, + +- {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, +- {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, ++ {obj.APCALIGN, C_U12CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, ++ {obj.APCDATA, C_32CON, C_NONE, C_NONE, C_32CON, C_NONE, 0, 0, 0, 0}, + {obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0}, +- {obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0}, ++ {obj.AFUNCDATA, C_U12CON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0}, + {obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, +- {obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689 ++ {obj.ANOP, C_32CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689 + {obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, + {obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, +@@ -857,34 +844,35 @@ func (c *ctxt0) aclass(a *obj.Addr) int { + } + + if c.instoffset >= 0 { +- if c.instoffset == 0 { +- return C_ZCON +- } +- if c.instoffset <= 0x7ff { +- return C_SCON +- } +- if c.instoffset <= 0xfff { +- return C_ANDCON +- } +- if c.instoffset&0xfff == 0 && isuint32(uint64(c.instoffset)) { // && ((instoffset & (1<<31)) == 0) +- return C_UCON ++ sbits := bits.Len64(uint64(c.instoffset)) ++ switch { ++ case sbits <=8: ++ return C_ZCON + sbits ++ case sbits <= 12: ++ if c.instoffset <= 0x7ff { ++ return C_US12CON ++ } ++ return C_U12CON ++ case sbits <= 15: ++ if c.instoffset & 0xfff == 0 { ++ return C_15CON20_0 ++ } ++ return C_U15CON + } +- if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) { +- return C_LCON ++ } else { ++ sbits := bits.Len64(uint64(^c.instoffset)) ++ switch { ++ case sbits < 5: ++ return C_S5CON ++ case sbits < 12: ++ return C_S12CON + } +- return C_LCON + } + +- if c.instoffset >= -0x800 { +- return C_ADDCON ++ if c.instoffset&0xfff == 0 { ++ return C_32CON20_0 + } +- if c.instoffset&0xfff == 0 && isint32(c.instoffset) { +- return C_UCON +- } +- if isint32(c.instoffset) { +- return C_LCON +- } +- return C_LCON ++ return C_32CON + + case obj.TYPE_BRANCH: + return C_BRAN +@@ -1130,10 +1118,11 @@ func (c *ctxt0) oplook(p *obj.Prog) *Optab { + + ops := oprange[p.As&obj.AMask] + c1 := &xcmp[a1] ++ c3 := &xcmp[a3] + c4 := &xcmp[a4] + for i := range ops { + op := &ops[i] +- if (int(op.reg) == a2) && int(op.from3) == a3 && c1[op.from1] && c4[op.to1] && (int(op.to2) == a5) { ++ if (int(op.reg) == a2) && c3[op.from3] && c1[op.from1] && c4[op.to1] && (int(op.to2) == a5) { + p.Optab = uint16(cap(optab) - cap(ops) + i + 1) + return op + } +@@ -1151,21 +1140,41 @@ func cmp(a int, b int) bool { + } + switch a { + case C_DCON: +- if b == C_LCON || b == C_DCON32_0 || +- b == C_DCON12_0 || b == C_DCON20S_0 || +- b == C_DCON12_20S || b == C_DCON12_12S || +- b == C_DCON20S_20 || b == C_DCON32_20 || +- b == C_DCON20S_12S || b == C_DCON32_12S || +- b == C_DCON12_32S || b == C_DCON20S_32 || +- b == C_DCON12_12U || b == C_DCON20S_12U || +- b == C_DCON32_12U { +- return true +- } +- fallthrough +- case C_LCON: +- if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON { +- return true +- } ++ return cmp(C_32CON, b) || cmp(C_DCON12_20S, b) || cmp(C_DCON32_12S, b) || b == C_DCON12_0 ++ case C_32CON: ++ return cmp(C_32CON20_0, b) || cmp(C_U15CON, b) || cmp(C_S12CON, b) ++ case C_32CON20_0: ++ return b == C_15CON20_0 || b == C_ZCON ++ case C_U15CON: ++ return cmp(C_U12CON, b) || b == C_15CON20_0 ++ case C_12CON: ++ return cmp(C_U12CON, b) || cmp(C_S12CON, b) ++ case C_UU12CON: ++ return cmp(C_U12CON, b) ++ case C_U12CON: ++ return cmp(C_U8CON, b) || b == C_US12CON ++ case C_U8CON: ++ return cmp(C_U7CON, b) ++ case C_U7CON: ++ return cmp(C_U6CON, b) ++ case C_U6CON: ++ return cmp(C_U5CON, b) ++ case C_U5CON: ++ return cmp(C_U4CON, b) ++ case C_U4CON: ++ return cmp(C_U3CON, b) ++ case C_U3CON: ++ return cmp(C_U2CON, b) ++ case C_U2CON: ++ return cmp(C_U1CON, b) ++ case C_U1CON: ++ return cmp(C_ZCON, b) ++ case C_US12CON: ++ return cmp(C_S12CON, b) ++ case C_S12CON: ++ return cmp(C_S5CON, b) || cmp(C_U8CON, b) || b == C_US12CON ++ case C_S5CON: ++ return cmp(C_ZCON, b) || cmp(C_U4CON, b) + + case C_DCON12_0: + +@@ -1183,62 +1192,20 @@ func cmp(a int, b int) bool { + return true + } + +- case C_ADD0CON: +- if b == C_ADDCON { +- return true +- } +- fallthrough +- +- case C_ADDCON: +- if b == C_ZCON || b == C_SCON { +- return true +- } +- +- case C_AND0CON: +- if b == C_ANDCON { +- return true +- } +- fallthrough +- +- case C_ANDCON: +- if b == C_ZCON || b == C_SCON { +- return true +- } +- +- case C_UCON: +- if b == C_ZCON { +- return true +- } +- +- case C_SCON: +- if b == C_ZCON { +- return true +- } +- + case C_LACON: +- if b == C_SACON { +- return true +- } ++ return b == C_SACON + + case C_LAUTO: +- if b == C_SAUTO { +- return true +- } ++ return b == C_SAUTO + + case C_REG: +- if b == C_ZCON { +- return true +- } ++ return b == C_ZCON + + case C_LOREG: +- if b == C_ZOREG || b == C_SOREG { +- return true +- } ++ return b == C_ZOREG || b == C_SOREG + + case C_SOREG: +- if b == C_ZOREG { +- return true +- } ++ return b == C_ZOREG + } + + return false +@@ -1881,7 +1848,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(o.param) + } + a := add +- if o.from1 == C_ANDCON { ++ if o.from1 == C_12CON && v > 0 { + a = AOR + } + +@@ -2008,15 +1975,9 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + if r == 0 { + r = int(p.To.Reg) + } +- +- switch o.flag { +- case immFiledSi5: +- c.checkimmFiled(p, v, 5, true) +- o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } +- ++ ++ o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) ++ + case 14: // add $ui8,[r1],r2 + v := c.regoff(&p.From) + r := int(p.Reg) +@@ -2024,13 +1985,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(p.To.Reg) + } + +- switch o.flag { +- case immFiledUi8: +- c.checkimmFiled(p, v, 8, false) +- o1 = OP_8IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } ++ o1 = OP_8IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 15: // teq $c r,r + v := c.regoff(&p.From) +@@ -2185,13 +2140,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(p.To.Reg) + } + +- switch o.flag { +- case immFiledUi3: +- c.checkimmFiled(p, v, 3, false) +- o1 = OP_3IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } ++ o1 = OP_3IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 30: // mov gr/fr/fcc/fcsr, fr/fcc/fcsr/gr + a := c.specialFpMovInst(p.As, oclass(&p.From), oclass(&p.To)) +@@ -2204,13 +2153,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(p.To.Reg) + } + +- switch o.flag { +- case immFiledUi4: +- c.checkimmFiled(p, v, 4, false) +- o1 = OP_4IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } ++ o1 = OP_4IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 32: // add $ui5,[r1],r2 + v := c.regoff(&p.From) +@@ -2219,13 +2162,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(p.To.Reg) + } + +- switch o.flag { +- case immFiledUi5: +- c.checkimmFiled(p, v, 5, false) +- o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } ++ o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 33: // add $ui6,[r1],r2 + v := c.regoff(&p.From) +@@ -2234,18 +2171,12 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { + r = int(p.To.Reg) + } + +- switch o.flag { +- case immFiledUi6: +- c.checkimmFiled(p, v, 6, false) +- o1 = OP_6IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) +- default: +- c.ctxt.Diag("Invalid immediate value type\n%v", p) +- } ++ o1 = OP_6IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg)) + + case 34: // mov $con,fr + v := c.regoff(&p.From) + a := AADDU +- if o.from1 == C_ANDCON { ++ if v > 0 { + a = AOR + } + a2 := c.specialFpMovInst(p.As, C_REG, oclass(&p.To)) +@@ -2702,21 +2633,6 @@ func (c *ctxt0) checkindex(p *obj.Prog, index uint32, mask uint32) { + } + } + +-// checkimmFiled checks whether the immediate value exceeds the valid encoding range +-func (c *ctxt0) checkimmFiled(p *obj.Prog, imm int32, bits uint8, isSigned bool) { +- if isSigned { +- bound := int32(1 << (bits - 1)) +- if imm < -bound || imm > bound { +- c.ctxt.Diag("signed immediate %v exceeds the %d-bit range: %v", imm, bits, p) +- } +- } else { +- mask := uint32(0xffffffff) << bits +- if uint32(imm) != (uint32(imm) & ^mask) { +- c.ctxt.Diag("unsigned immediate %v exceeds the %d-bit range: %v", imm, bits, p) +- } +- } +-} +- + func (c *ctxt0) vregoff(a *obj.Addr) int64 { + c.instoffset = 0 + c.aclass(a) +diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go +index a2f04a22ee..1d38f1ee36 100644 +--- a/src/cmd/internal/obj/loong64/cnames.go ++++ b/src/cmd/internal/obj/loong64/cnames.go +@@ -14,13 +14,24 @@ var cnames0 = []string{ + "ARNG", + "ELEM", + "ZCON", +- "SCON", +- "UCON", +- "ADD0CON", +- "AND0CON", +- "ADDCON", +- "ANDCON", +- "LCON", ++ "U1CON", ++ "U2CON", ++ "U3CON", ++ "U4CON", ++ "U5CON", ++ "U6CON", ++ "U7CON", ++ "U8CON", ++ "S5CON", ++ "US12CON", ++ "UU12CON", ++ "S12CON", ++ "U12CON", ++ "12CON", ++ "U15CON", ++ "15CON20_0", ++ "32CON20_0", ++ "32CON", + "DCON20S_0", + "DCON12_0", + "DCON32_0", +-- +2.38.1 + diff --git a/0037-internal-abi-internal-buildcfg-always-enable-registe.patch b/0037-internal-abi-internal-buildcfg-always-enable-registe.patch deleted file mode 100644 index cab468180028409043f4642d3be3ce271cf26787..0000000000000000000000000000000000000000 --- a/0037-internal-abi-internal-buildcfg-always-enable-registe.patch +++ /dev/null @@ -1,121 +0,0 @@ -From aa48248b897254b759d0587287b1e276da3ea549 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 30 Aug 2023 17:08:22 +0800 -Subject: [PATCH 37/51] internal/abi, internal/buildcfg: always enable register - ABI on loong64 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -goos: linux -goarch: loong64 -pkg: test/bench/go1 -cpu: Loongson-3C5000 @ 2200.00MHz - │ old.bench │ new.bench │ - │ sec/op │ sec/op vs base │ -BinaryTree17 14.48 ± 1% 12.71 ± 1% -12.19% (p=0.000 n=20) -Fannkuch11 3.873 ± 0% 4.117 ± 0% +6.28% (p=0.000 n=20) -FmtFprintfEmpty 110.00n ± 0% 91.88n ± 0% -16.47% (p=0.000 n=20) -FmtFprintfString 178.4n ± 0% 153.8n ± 0% -13.79% (p=0.000 n=20) -FmtFprintfInt 179.2n ± 0% 163.2n ± 0% -8.93% (p=0.000 n=20) -FmtFprintfIntInt 272.9n ± 0% 258.1n ± 0% -5.42% (p=0.000 n=20) -FmtFprintfPrefixedInt 382.8n ± 0% 344.1n ± 0% -10.11% (p=0.000 n=20) -FmtFprintfFloat 453.5n ± 0% 426.8n ± 0% -5.89% (p=0.000 n=20) -FmtManyArgs 1.052µ ± 0% 1.024µ ± 0% -2.71% (p=0.000 n=20) -GobDecode 15.96m ± 1% 13.77m ± 0% -13.72% (p=0.000 n=20) -GobEncode 17.95m ± 3% 15.25m ± 3% -15.03% (p=0.000 n=20) -Gzip 478.5m ± 0% 476.6m ± 0% -0.40% (p=0.000 n=20) -Gunzip 106.55m ± 0% 90.42m ± 0% -15.14% (p=0.000 n=20) -HTTPClientServer 109.4µ ± 0% 105.3µ ± 1% -3.75% (p=0.000 n=20) -JSONEncode 21.12m ± 0% 19.41m ± 0% -8.10% (p=0.000 n=20) -JSONDecode 83.09m ± 0% 77.81m ± 0% -6.35% (p=0.000 n=20) -Mandelbrot200 8.180m ± 0% 8.149m ± 0% -0.37% (p=0.000 n=20) -GoParse 9.657m ± 0% 8.913m ± 0% -7.70% (p=0.000 n=20) -RegexpMatchEasy0_32 152.4n ± 0% 136.5n ± 0% -10.43% (p=0.000 n=20) -RegexpMatchEasy0_1K 1.748µ ± 0% 1.713µ ± 0% -2.00% (p=0.000 n=20) -RegexpMatchEasy1_32 201.0n ± 0% 184.4n ± 0% -8.26% (p=0.000 n=20) -RegexpMatchEasy1_1K 1.852µ ± 0% 1.806µ ± 0% -2.48% (p=0.000 n=20) -RegexpMatchMedium_32 1.577µ ± 0% 1.525µ ± 0% -3.30% (p=0.000 n=20) -RegexpMatchMedium_1K 46.34µ ± 0% 45.22µ ± 0% -2.41% (p=0.000 n=20) -RegexpMatchHard_32 2.364µ ± 0% 2.458µ ± 0% +3.98% (p=0.000 n=20) -RegexpMatchHard_1K 71.53µ ± 0% 73.89µ ± 0% +3.30% (p=0.000 n=20) -Revcomp 1.474 ± 0% 1.308 ± 0% -11.21% (p=0.000 n=20) -Template 135.0m ± 0% 121.9m ± 0% -9.72% (p=0.000 n=20) -TimeParse 470.9n ± 0% 441.8n ± 0% -6.18% (p=0.000 n=20) -TimeFormat 584.6n ± 0% 563.3n ± 0% -3.63% (p=0.000 n=20) -geomean 118.1µ 110.3µ -6.58% - - │ old.bench │ new.bench │ - │ B/s │ B/s vs base │ -GobDecode 45.86Mi ± 1% 53.15Mi ± 0% +15.90% (p=0.000 n=20) -GobEncode 40.79Mi ± 3% 48.00Mi ± 3% +17.69% (p=0.000 n=20) -Gzip 38.68Mi ± 0% 38.83Mi ± 0% +0.41% (p=0.000 n=20) -Gunzip 173.7Mi ± 0% 204.7Mi ± 0% +17.84% (p=0.000 n=20) -JSONEncode 87.62Mi ± 0% 95.34Mi ± 0% +8.81% (p=0.000 n=20) -JSONDecode 22.27Mi ± 0% 23.78Mi ± 0% +6.79% (p=0.000 n=20) -GoParse 5.717Mi ± 0% 6.199Mi ± 0% +8.42% (p=0.000 n=20) -RegexpMatchEasy0_32 200.3Mi ± 0% 223.6Mi ± 0% +11.68% (p=0.000 n=20) -RegexpMatchEasy0_1K 558.5Mi ± 0% 570.2Mi ± 0% +2.10% (p=0.000 n=20) -RegexpMatchEasy1_32 151.8Mi ± 0% 165.5Mi ± 0% +9.02% (p=0.000 n=20) -RegexpMatchEasy1_1K 527.2Mi ± 0% 540.8Mi ± 0% +2.57% (p=0.000 n=20) -RegexpMatchMedium_32 19.35Mi ± 0% 20.02Mi ± 0% +3.45% (p=0.000 n=20) -RegexpMatchMedium_1K 21.08Mi ± 0% 21.59Mi ± 0% +2.44% (p=0.000 n=20) -RegexpMatchHard_32 12.91Mi ± 0% 12.42Mi ± 0% -3.84% (p=0.000 n=20) -RegexpMatchHard_1K 13.66Mi ± 0% 13.22Mi ± 0% -3.21% (p=0.000 n=20) -Revcomp 164.5Mi ± 0% 185.3Mi ± 0% +12.62% (p=0.000 n=20) -Template 13.71Mi ± 0% 15.19Mi ± 0% +10.78% (p=0.000 n=20) -geomean 52.97Mi 56.71Mi +7.06% - -Change-Id: I31497848e1fea6beb289a8f4d9a36795ee03253f ---- - src/internal/abi/abi_generic.go | 2 +- - src/internal/abi/abi_loong64.go | 2 -- - src/internal/buildcfg/exp.go | 4 +--- - 3 files changed, 2 insertions(+), 6 deletions(-) - -diff --git a/src/internal/abi/abi_generic.go b/src/internal/abi/abi_generic.go -index 76ef2e2898..a08d3208d4 100644 ---- a/src/internal/abi/abi_generic.go -+++ b/src/internal/abi/abi_generic.go -@@ -2,7 +2,7 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build !goexperiment.regabiargs && !amd64 && !arm64 && !ppc64 && !ppc64le && !riscv64 -+//go:build !goexperiment.regabiargs && !amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64 - - package abi - -diff --git a/src/internal/abi/abi_loong64.go b/src/internal/abi/abi_loong64.go -index c2306ae8d8..10ad89815b 100644 ---- a/src/internal/abi/abi_loong64.go -+++ b/src/internal/abi/abi_loong64.go -@@ -2,8 +2,6 @@ - // Use of this source code is governed by a BSD-style - // license that can be found in the LICENSE file. - --//go:build goexperiment.regabiargs -- - package abi - - const ( -diff --git a/src/internal/buildcfg/exp.go b/src/internal/buildcfg/exp.go -index 0f29233fb3..7c7cefba7b 100644 ---- a/src/internal/buildcfg/exp.go -+++ b/src/internal/buildcfg/exp.go -@@ -62,11 +62,9 @@ func ParseGOEXPERIMENT(goos, goarch, goexp string) (*ExperimentFlags, error) { - // always on. - var regabiSupported, regabiAlwaysOn bool - switch goarch { -- case "amd64", "arm64", "ppc64le", "ppc64", "riscv64": -+ case "amd64", "arm64", "loong64", "ppc64le", "ppc64", "riscv64": - regabiAlwaysOn = true - regabiSupported = true -- case "loong64": -- regabiSupported = true - } - - baseline := goexperiment.Flags{ --- -2.38.1 - diff --git a/0038-all-delete-loong64-non-register-ABI-fallback-path.patch b/0038-all-delete-loong64-non-register-ABI-fallback-path.patch deleted file mode 100644 index 321602af25e6795477365c22b3ce37bdb0ee4046..0000000000000000000000000000000000000000 --- a/0038-all-delete-loong64-non-register-ABI-fallback-path.patch +++ /dev/null @@ -1,493 +0,0 @@ -From f923c49a1b0b731d5db787e951d05df1f6d9f848 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 30 Aug 2023 17:49:55 +0800 -Subject: [PATCH 38/51] all: delete loong64 non-register ABI fallback path - -Change-Id: I6f292cbdd184c584b8a8c8af005b446be3e03a67 ---- - src/internal/bytealg/compare_loong64.s | 18 ---- - src/internal/bytealg/equal_loong64.s | 24 ----- - src/internal/bytealg/indexbyte_loong64.s | 22 ----- - src/reflect/asm_loong64.s | 10 -- - src/runtime/asm_loong64.s | 98 ------------------- - .../internal/syscall/asm_linux_loong64.s | 26 ----- - src/runtime/memclr_loong64.s | 4 - - src/runtime/memmove_loong64.s | 5 - - 8 files changed, 207 deletions(-) - -diff --git a/src/internal/bytealg/compare_loong64.s b/src/internal/bytealg/compare_loong64.s -index 311449ab18..df72a1122b 100644 ---- a/src/internal/bytealg/compare_loong64.s -+++ b/src/internal/bytealg/compare_loong64.s -@@ -6,13 +6,6 @@ - #include "textflag.h" - - TEXT ·Compare(SB),NOSPLIT,$0-56 --#ifndef GOEXPERIMENT_regabiargs -- MOVV a_base+0(FP), R4 -- MOVV a_len+8(FP), R5 -- MOVV b_base+24(FP), R6 -- MOVV b_len+32(FP), R7 -- MOVV $ret+48(FP), R13 --#else - // R4 = a_base - // R5 = a_len - // R6 = a_cap (unused) -@@ -21,17 +14,9 @@ TEXT ·Compare(SB),NOSPLIT,$0-56 - // R9 = b_cap (unused) - MOVV R7, R6 - MOVV R8, R7 --#endif - JMP cmpbody<>(SB) - - TEXT runtime·cmpstring(SB),NOSPLIT,$0-40 --#ifndef GOEXPERIMENT_regabiargs -- MOVV a_base+0(FP), R4 -- MOVV b_base+16(FP), R6 -- MOVV a_len+8(FP), R5 -- MOVV b_len+24(FP), R7 -- MOVV $ret+32(FP), R13 --#endif - // R4 = a_base - // R5 = a_len - // R6 = b_base -@@ -100,7 +85,4 @@ samebytes: - SUBV R9, R8, R4 - - ret: --#ifndef GOEXPERIMENT_regabiargs -- MOVV R4, (R13) --#endif - RET -diff --git a/src/internal/bytealg/equal_loong64.s b/src/internal/bytealg/equal_loong64.s -index a3ad5c1b35..830b09bd2c 100644 ---- a/src/internal/bytealg/equal_loong64.s -+++ b/src/internal/bytealg/equal_loong64.s -@@ -9,20 +9,12 @@ - - // memequal(a, b unsafe.Pointer, size uintptr) bool - TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 --#ifndef GOEXPERIMENT_regabiargs -- MOVV a+0(FP), R4 -- MOVV b+8(FP), R5 -- MOVV size+16(FP), R6 --#endif - BEQ R4, R5, eq - ADDV R4, R6, R7 - PCALIGN $16 - loop: - BNE R4, R7, test - MOVV $1, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVB R4, ret+24(FP) --#endif - RET - test: - MOVBU (R4), R9 -@@ -32,23 +24,13 @@ test: - BEQ R9, R10, loop - - MOVB R0, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVB R0, ret+24(FP) --#endif - RET - eq: - MOVV $1, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVB R4, ret+24(FP) --#endif - RET - - // memequal_varlen(a, b unsafe.Pointer) bool - TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 --#ifndef GOEXPERIMENT_regabiargs -- MOVV a+0(FP), R4 -- MOVV b+8(FP), R5 --#endif - BEQ R4, R5, eq - MOVV 8(REGCTXT), R6 // compiler stores size at offset 8 in the closure - MOVV R4, 8(R3) -@@ -56,13 +38,7 @@ TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17 - MOVV R6, 24(R3) - JAL runtime·memequal(SB) - MOVBU 32(R3), R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVB R4, ret+16(FP) --#endif - RET - eq: - MOVV $1, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVB R4, ret+16(FP) --#endif - RET -diff --git a/src/internal/bytealg/indexbyte_loong64.s b/src/internal/bytealg/indexbyte_loong64.s -index 03e0660973..c9591b3cda 100644 ---- a/src/internal/bytealg/indexbyte_loong64.s -+++ b/src/internal/bytealg/indexbyte_loong64.s -@@ -6,11 +6,6 @@ - #include "textflag.h" - - TEXT ·IndexByte(SB),NOSPLIT,$0-40 --#ifndef GOEXPERIMENT_regabiargs -- MOVV b_base+0(FP), R4 -- MOVV b_len+8(FP), R5 -- MOVBU c+24(FP), R7 // byte to find --#endif - // R4 = b_base - // R5 = b_len - // R6 = b_cap (unused) -@@ -28,24 +23,13 @@ loop: - BNE R7, R8, loop - - SUBV R6, R4 // remove base --#ifndef GOEXPERIMENT_regabiargs -- MOVV R4, ret+32(FP) --#endif - RET - - notfound: - MOVV $-1, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVV R4, ret+32(FP) --#endif - RET - - TEXT ·IndexByteString(SB),NOSPLIT,$0-32 --#ifndef GOEXPERIMENT_regabiargs -- MOVV s_base+0(FP), R4 -- MOVV s_len+8(FP), R5 -- MOVBU c+16(FP), R6 // byte to find --#endif - // R4 = s_base - // R5 = s_len - // R6 = byte to find -@@ -61,14 +45,8 @@ loop: - BNE R6, R8, loop - - SUBV R7, R4 // remove base --#ifndef GOEXPERIMENT_regabiargs -- MOVV R4, ret+24(FP) --#endif - RET - - notfound: - MOVV $-1, R4 --#ifndef GOEXPERIMENT_regabiargs -- MOVV R4, ret+24(FP) --#endif - RET -diff --git a/src/reflect/asm_loong64.s b/src/reflect/asm_loong64.s -index 520f0afdd5..c0dc244497 100644 ---- a/src/reflect/asm_loong64.s -+++ b/src/reflect/asm_loong64.s -@@ -34,13 +34,8 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$432 - JAL runtime·spillArgs(SB) - MOVV REGCTXT, 32(R3) // save REGCTXT > args of moveMakeFuncArgPtrs < LOCAL_REGARGS - --#ifdef GOEXPERIMENT_regabiargs - MOVV REGCTXT, R4 - MOVV R25, R5 --#else -- MOVV REGCTXT, 8(R3) -- MOVV R25, 16(R3) --#endif - JAL ·moveMakeFuncArgPtrs(SB) - MOVV 32(R3), REGCTXT // restore REGCTXT - -@@ -66,13 +61,8 @@ TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$432 - ADDV $LOCAL_REGARGS, R3, R25 // spillArgs using R25 - JAL runtime·spillArgs(SB) - MOVV REGCTXT, 32(R3) // save REGCTXT > args of moveMakeFuncArgPtrs < LOCAL_REGARGS --#ifdef GOEXPERIMENT_regabiargs - MOVV REGCTXT, R4 - MOVV R25, R5 --#else -- MOVV REGCTXT, 8(R3) -- MOVV R25, 16(R3) --#endif - JAL ·moveMakeFuncArgPtrs(SB) - MOVV 32(R3), REGCTXT // restore REGCTXT - MOVV REGCTXT, 8(R3) -diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index 0a970ef20c..3c24e33cb3 100644 ---- a/src/runtime/asm_loong64.s -+++ b/src/runtime/asm_loong64.s -@@ -124,12 +124,7 @@ TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0 - // Fn must never return. It should gogo(&g->sched) - // to keep running g. - TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8 --#ifdef GOEXPERIMENT_regabiargs - MOVV R4, REGCTXT --#else -- MOVV fn+0(FP), REGCTXT --#endif -- - // Save caller state in g->sched - MOVV R3, (g_sched+gobuf_sp)(g) - MOVV R1, (g_sched+gobuf_pc)(g) -@@ -670,7 +665,6 @@ TEXT ·checkASM(SB),NOSPLIT,$0-1 - MOVB R19, ret+0(FP) - RET - --#ifdef GOEXPERIMENT_regabiargs - // spillArgs stores return values from registers to a *internal/abi.RegArgs in R25. - TEXT ·spillArgs(SB),NOSPLIT,$0-0 - MOVV R4, (0*8)(R25) -@@ -742,13 +736,6 @@ TEXT ·unspillArgs(SB),NOSPLIT,$0-0 - MOVD (30*8)(R25), F14 - MOVD (31*8)(R25), F15 - RET --#else --TEXT ·spillArgs(SB),NOSPLIT,$0-0 -- RET -- --TEXT ·unspillArgs(SB),NOSPLIT,$0-0 -- RET --#endif - - // gcWriteBarrier informs the GC about heap pointer writes. - // -@@ -878,155 +865,70 @@ TEXT runtime·gcWriteBarrier8(SB),NOSPLIT,$0 - // then tail call to the corresponding runtime handler. - // The tail call makes these stubs disappear in backtraces. - TEXT runtime·panicIndex(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicIndex(SB) - TEXT runtime·panicIndexU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicIndexU(SB) - TEXT runtime·panicSliceAlen(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSliceAlen(SB) - TEXT runtime·panicSliceAlenU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSliceAlenU(SB) - TEXT runtime·panicSliceAcap(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSliceAcap(SB) - TEXT runtime·panicSliceAcapU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSliceAcapU(SB) - TEXT runtime·panicSliceB(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicSliceB(SB) - TEXT runtime·panicSliceBU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicSliceBU(SB) - TEXT runtime·panicSlice3Alen(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R23, R4 - MOVV R24, R5 --#else -- MOVV R23, x+0(FP) -- MOVV R24, y+8(FP) --#endif - JMP runtime·goPanicSlice3Alen(SB) - TEXT runtime·panicSlice3AlenU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R23, R4 - MOVV R24, R5 --#else -- MOVV R23, x+0(FP) -- MOVV R24, y+8(FP) --#endif - JMP runtime·goPanicSlice3AlenU(SB) - TEXT runtime·panicSlice3Acap(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R23, R4 - MOVV R24, R5 --#else -- MOVV R23, x+0(FP) -- MOVV R24, y+8(FP) --#endif - JMP runtime·goPanicSlice3Acap(SB) - TEXT runtime·panicSlice3AcapU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R23, R4 - MOVV R24, R5 --#else -- MOVV R23, x+0(FP) -- MOVV R24, y+8(FP) --#endif - JMP runtime·goPanicSlice3AcapU(SB) - TEXT runtime·panicSlice3B(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSlice3B(SB) - TEXT runtime·panicSlice3BU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R21, R4 - MOVV R23, R5 --#else -- MOVV R21, x+0(FP) -- MOVV R23, y+8(FP) --#endif - JMP runtime·goPanicSlice3BU(SB) - TEXT runtime·panicSlice3C(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicSlice3C(SB) - TEXT runtime·panicSlice3CU(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R20, R4 - MOVV R21, R5 --#else -- MOVV R20, x+0(FP) -- MOVV R21, y+8(FP) --#endif - JMP runtime·goPanicSlice3CU(SB) - TEXT runtime·panicSliceConvert(SB),NOSPLIT,$0-16 --#ifdef GOEXPERIMENT_regabiargs - MOVV R23, R4 - MOVV R24, R5 --#else -- MOVV R23, x+0(FP) -- MOVV R24, y+8(FP) --#endif - JMP runtime·goPanicSliceConvert(SB) -diff --git a/src/runtime/internal/syscall/asm_linux_loong64.s b/src/runtime/internal/syscall/asm_linux_loong64.s -index 11c5bc2468..ff8ad75b05 100644 ---- a/src/runtime/internal/syscall/asm_linux_loong64.s -+++ b/src/runtime/internal/syscall/asm_linux_loong64.s -@@ -22,7 +22,6 @@ - // r2 | R5 | R5 - // err | R6 | part of R4 - TEXT ·Syscall6(SB),NOSPLIT,$0-80 --#ifdef GOEXPERIMENT_regabiargs - MOVV R4, R11 // syscall entry - MOVV R5, R4 - MOVV R6, R5 -@@ -30,39 +29,14 @@ TEXT ·Syscall6(SB),NOSPLIT,$0-80 - MOVV R8, R7 - MOVV R9, R8 - MOVV R10, R9 --#else -- MOVV num+0(FP), R11 // syscall entry -- MOVV a1+8(FP), R4 -- MOVV a2+16(FP), R5 -- MOVV a3+24(FP), R6 -- MOVV a4+32(FP), R7 -- MOVV a5+40(FP), R8 -- MOVV a6+48(FP), R9 --#endif - SYSCALL --#ifdef GOEXPERIMENT_regabiargs - MOVV R0, R5 // r2 is not used. Always set to 0. - MOVW $-4096, R12 - BGEU R12, R4, ok - SUBVU R4, R0, R6 // errno - MOVV $-1, R4 // r1 --#else -- MOVW $-4096, R12 -- BGEU R12, R4, ok -- MOVV $-1, R12 -- MOVV R12, r1+56(FP) -- MOVV R0, r2+64(FP) -- SUBVU R4, R0, R4 -- MOVV R4, errno+72(FP) --#endif - RET - ok: --#ifdef GOEXPERIMENT_regabiargs - // r1 already in R4 - MOVV R0, R6 // errno --#else -- MOVV R4, r1+56(FP) -- MOVV R0, r2+64(FP) // r2 is not used. Always set to 0. -- MOVV R0, errno+72(FP) --#endif - RET -diff --git a/src/runtime/memclr_loong64.s b/src/runtime/memclr_loong64.s -index 313e4d4f33..1d45e82d49 100644 ---- a/src/runtime/memclr_loong64.s -+++ b/src/runtime/memclr_loong64.s -@@ -7,10 +7,6 @@ - - // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) - TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 --#ifndef GOEXPERIMENT_regabiargs -- MOVV ptr+0(FP), R4 -- MOVV n+8(FP), R5 --#endif - ADDV R4, R5, R6 - - // if less than 8 bytes, do one byte at a time -diff --git a/src/runtime/memmove_loong64.s b/src/runtime/memmove_loong64.s -index 5b7aeba698..a94cf999bc 100644 ---- a/src/runtime/memmove_loong64.s -+++ b/src/runtime/memmove_loong64.s -@@ -8,11 +8,6 @@ - - // func memmove(to, from unsafe.Pointer, n uintptr) - TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24 --#ifndef GOEXPERIMENT_regabiargs -- MOVV to+0(FP), R4 -- MOVV from+8(FP), R5 -- MOVV n+16(FP), R6 --#endif - BNE R6, check - RET - --- -2.38.1 - diff --git a/0038-crypto-internal-poly1305-implement-function-update-i.patch b/0038-crypto-internal-poly1305-implement-function-update-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..e18caf23e121c1bd51c48e07b0a441cfffe140d6 --- /dev/null +++ b/0038-crypto-internal-poly1305-implement-function-update-i.patch @@ -0,0 +1,298 @@ +From 9e01e315f3ea08fc01854bf8beb2cdeb9ff6dddc Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Thu, 19 Dec 2024 15:38:48 +0800 +Subject: [PATCH 38/44] crypto/internal/poly1305: implement function update in + assembly on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There is some improvement in performance on Loongson 3A5000 and 3A6000. + +goos: linux +goarch: loong64 +pkg: golang.org/x/crypto/internal/poly1305 +cpu: Loongson-3A5000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +64 122.8n ± 0% 101.2n ± 0% -17.59% (p=0.000 n=10) +1K 1152.0n ± 0% 779.4n ± 0% -32.34% (p=0.000 n=10) +2M 2.356m ± 0% 1.556m ± 0% -33.94% (p=0.000 n=10) +64Unaligned 122.7n ± 0% 102.5n ± 0% -16.46% (p=0.000 n=10) +1KUnaligned 1152.0n ± 0% 802.4n ± 0% -30.35% (p=0.000 n=10) +2MUnaligned 2.336m ± 0% 1.582m ± 0% -32.26% (p=0.000 n=10) +Write64 77.92n ± 0% 57.45n ± 0% -26.27% (p=0.000 n=10) +Write1K 1106.0n ± 0% 736.2n ± 0% -33.44% (p=0.000 n=10) +Write2M 2.356m ± 0% 1.562m ± 0% -33.69% (p=0.000 n=10) +Write64Unaligned 77.87n ± 0% 59.71n ± 0% -23.33% (p=0.000 n=10) +Write1KUnaligned 1106.0n ± 0% 749.5n ± 0% -32.23% (p=0.000 n=10) +Write2MUnaligned 2.335m ± 0% 1.580m ± 0% -32.34% (p=0.000 n=10) +geomean 6.373µ 4.530µ -28.93% + + | bench.old | bench.new | + | B/s | B/s vs base | +64 497.1Mi ± 0% 603.3Mi ± 0% +21.37% (p=0.000 n=10) +1K 847.6Mi ± 0% 1252.9Mi ± 0% +47.82% (p=0.000 n=10) +2M 849.0Mi ± 0% 1285.3Mi ± 0% +51.39% (p=0.000 n=10) +64Unaligned 497.4Mi ± 0% 595.5Mi ± 0% +19.73% (p=0.000 n=10) +1KUnaligned 847.6Mi ± 0% 1217.1Mi ± 0% +43.59% (p=0.000 n=10) +2MUnaligned 856.3Mi ± 0% 1264.0Mi ± 0% +47.61% (p=0.000 n=10) +Write64 783.3Mi ± 0% 1062.4Mi ± 0% +35.64% (p=0.000 n=10) +Write1K 882.8Mi ± 0% 1326.5Mi ± 0% +50.25% (p=0.000 n=10) +Write2M 849.0Mi ± 0% 1280.3Mi ± 0% +50.80% (p=0.000 n=10) +Write64Unaligned 783.8Mi ± 0% 1022.3Mi ± 0% +30.43% (p=0.000 n=10) +Write1KUnaligned 882.8Mi ± 0% 1303.0Mi ± 0% +47.59% (p=0.000 n=10) +Write2MUnaligned 856.5Mi ± 0% 1266.0Mi ± 0% +47.81% (p=0.000 n=10) +geomean 772.2Mi 1.061Gi +40.72% + +goos: linux +goarch: loong64 +pkg: golang.org/x/crypto/internal/poly1305 +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +64 92.06n ± 0% 77.56n ± 0% -15.75% (p=0.000 n=10) +1K 998.4n ± 0% 683.0n ± 0% -31.59% (p=0.000 n=10) +2M 1.978m ± 0% 1.323m ± 0% -33.11% (p=0.000 n=10) +64Unaligned 92.06n ± 0% 77.56n ± 0% -15.75% (p=0.000 n=10) +1KUnaligned 998.4n ± 0% 683.0n ± 0% -31.59% (p=0.000 n=10) +2MUnaligned 1.979m ± 0% 1.369m ± 0% -30.82% (p=0.000 n=10) +Write64 65.25n ± 0% 50.39n ± 0% -22.77% (p=0.000 n=10) +Write1K 970.7n ± 0% 656.8n ± 0% -32.34% (p=0.000 n=10) +Write2M 1.966m ± 0% 1.323m ± 0% -32.73% (p=0.000 n=10) +Write64Unaligned 65.24n ± 0% 50.37n ± 0% -22.79% (p=0.000 n=10) +Write1KUnaligned 970.8n ± 0% 656.8n ± 0% -32.34% (p=0.000 n=10) +Write2MUnaligned 1.966m ± 0% 1.368m ± 0% -30.42% (p=0.000 n=10) +geomean 5.319µ 3.834µ -27.93% + + | bench.old | bench.new | + | B/s | B/s vs base | +64 663.0Mi ± 0% 786.9Mi ± 0% +18.69% (p=0.000 n=10) +1K 978.1Mi ± 0% 1429.8Mi ± 0% +46.18% (p=0.000 n=10) +2M 1011.0Mi ± 0% 1511.4Mi ± 0% +49.50% (p=0.000 n=10) +64Unaligned 663.0Mi ± 0% 786.9Mi ± 0% +18.69% (p=0.000 n=10) +1KUnaligned 978.1Mi ± 0% 1429.8Mi ± 0% +46.18% (p=0.000 n=10) +2MUnaligned 1010.6Mi ± 0% 1460.9Mi ± 0% +44.56% (p=0.000 n=10) +Write64 935.4Mi ± 0% 1211.3Mi ± 0% +29.49% (p=0.000 n=10) +Write1K 1006.0Mi ± 0% 1486.9Mi ± 0% +47.81% (p=0.000 n=10) +Write2M 1017.3Mi ± 0% 1512.1Mi ± 0% +48.64% (p=0.000 n=10) +Write64Unaligned 935.5Mi ± 0% 1211.7Mi ± 0% +29.53% (p=0.000 n=10) +Write1KUnaligned 1005.9Mi ± 0% 1486.9Mi ± 0% +47.81% (p=0.000 n=10) +Write2MUnaligned 1017.1Mi ± 0% 1461.8Mi ± 0% +43.71% (p=0.000 n=10) +geomean 925.3Mi 1.254Gi +38.75% + +Change-Id: Iec990384a7be9a89a019c2b3b546d9fc59a2d58e +--- + .../x/crypto/internal/poly1305/mac_noasm.go | 2 +- + .../x/crypto/internal/poly1305/sum_loong64.go | 47 +++++++ + .../x/crypto/internal/poly1305/sum_loong64.s | 131 ++++++++++++++++++ + 3 files changed, 179 insertions(+), 1 deletion(-) + create mode 100644 src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.go + create mode 100644 src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.s + +diff --git a/src/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go b/src/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go +index bd896bdc76..8d99551fee 100644 +--- a/src/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go ++++ b/src/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build (!amd64 && !ppc64le && !ppc64 && !s390x) || !gc || purego ++//go:build (!amd64 && !loong64 && !ppc64le && !ppc64 && !s390x) || !gc || purego + + package poly1305 + +diff --git a/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.go b/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.go +new file mode 100644 +index 0000000000..d4dc8f91ec +--- /dev/null ++++ b/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.go +@@ -0,0 +1,47 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build gc && !purego ++ ++package poly1305 ++ ++//go:noescape ++func update(state *macState, msg []byte) ++ ++// mac is a wrapper for macGeneric that redirects calls that would have gone to ++// updateGeneric to update. ++// ++// Its Write and Sum methods are otherwise identical to the macGeneric ones, but ++// using function pointers would carry a major performance cost. ++type mac struct{ macGeneric } ++ ++func (h *mac) Write(p []byte) (int, error) { ++ nn := len(p) ++ if h.offset > 0 { ++ n := copy(h.buffer[h.offset:], p) ++ if h.offset+n < TagSize { ++ h.offset += n ++ return nn, nil ++ } ++ p = p[n:] ++ h.offset = 0 ++ update(&h.macState, h.buffer[:]) ++ } ++ if n := len(p) - (len(p) % TagSize); n > 0 { ++ update(&h.macState, p[:n]) ++ p = p[n:] ++ } ++ if len(p) > 0 { ++ h.offset += copy(h.buffer[h.offset:], p) ++ } ++ return nn, nil ++} ++ ++func (h *mac) Sum(out *[16]byte) { ++ state := h.macState ++ if h.offset > 0 { ++ update(&state, h.buffer[:h.offset]) ++ } ++ finalize(out, &state.h, &state.s) ++} +diff --git a/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.s b/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.s +new file mode 100644 +index 0000000000..baf0c95333 +--- /dev/null ++++ b/src/vendor/golang.org/x/crypto/internal/poly1305/sum_loong64.s +@@ -0,0 +1,131 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build gc && !purego ++ ++// func update(state *macState, msg []byte) ++TEXT ·update(SB), $0-32 ++ MOVV state+0(FP), R4 ++ MOVV msg_base+8(FP), R5 ++ MOVV msg_len+16(FP), R6 ++ MOVV $16, R7 ++ MOVV (R4), R8 // h0 ++ MOVV 8(R4), R9 // h1 ++ MOVV 16(R4), R10 // h2 ++ MOVV 24(R4), R11 // r0 ++ MOVV 32(R4), R12 // r1 ++ ++ BLT R6, R7, bytes_between_0_and_15 ++ ++loop: ++ MOVV (R5), R14 // msg[0:8] ++ MOVV 8(R5), R16 // msg[8:16] ++ ADDV R14, R8, R8 // h0 ++ ADDV R9, R16, R27 ++ SGTU R14, R8, R24 // h0.carry ++ SGTU R9, R27, R28 ++ ADDV R27, R24, R9 // h1 ++ SGTU R27, R9, R24 ++ OR R24, R28, R24 // h1.carry ++ ADDV $1, R24, R24 ++ ADDV R10, R24, R10 // h2 ++ ++ ADDV $16, R5, R5 // msg = msg[16:] ++ ++multiply: ++ MULV R8, R11, R13 // h0r0.lo ++ MULHVU R8, R11, R16 // h0r0.hi ++ MOVV R13, R14 ++ MOVV R16, R15 ++ MULV R9, R11, R13 // h1r0.lo ++ MULHVU R9, R11, R16 // h1r0.hi ++ ADDV R13, R15, R15 ++ SGTU R13, R15, R24 ++ ADDV R24, R16, R16 ++ MULV R10, R11, R25 ++ ADDV R16, R25, R25 ++ MULV R8, R12, R13 // h0r1.lo ++ MULHVU R8, R12, R16 // h0r1.hi ++ ADDV R13, R15, R15 ++ SGTU R13, R15, R24 ++ ADDV R24, R16, R16 ++ MOVV R16, R8 ++ MULV R10, R12, R26 // h2r1 ++ MULV R9, R12, R13 // h1r1.lo ++ MULHVU R9, R12, R16 // h1r1.hi ++ ADDV R13, R25, R25 ++ ADDV R16, R26, R27 ++ SGTU R13, R25, R24 ++ SGTU R16, R27, R28 ++ ADDV R27, R24, R26 ++ SGTU R27, R26, R24 ++ OR R24, R28, R24 ++ ADDV R8, R25, R25 ++ SGTU R8, R25, R24 ++ ADDV R24, R26, R26 ++ MOVV R14, R8 ++ MOVV R15, R9 ++ MOVV R25, R10 ++ MOVV R25, R14 ++ AND $3, R10, R10 ++ AND $-4, R14, R14 ++ ADDV R14, R8, R8 ++ ADDV R26, R9, R27 ++ SGTU R14, R8, R24 ++ SGTU R26, R27, R28 ++ ADDV R27, R24, R9 ++ SGTU R27, R9, R24 ++ OR R24, R28, R24 ++ ADDV R24, R10, R10 ++ SLLV $62, R26, R27 ++ SRLV $2, R25, R28 ++ SRLV $2, R26, R26 ++ OR R27, R28, R25 ++ ADDV R25, R8, R8 ++ ADDV R26, R9, R27 ++ SGTU R25, R8, R24 ++ SGTU R26, R27, R28 ++ ADDV R27, R24, R9 ++ SGTU R27, R9, R24 ++ OR R24, R28, R24 ++ ADDV R24, R10, R10 ++ ++ SUBV $16, R6, R6 ++ BGE R6, R7, loop ++ ++bytes_between_0_and_15: ++ BEQ R6, R0, done ++ MOVV $1, R14 ++ XOR R15, R15 ++ XOR R25, R25 ++ ADDV R6, R5, R5 ++ ++flush_buffer: ++ SRLV $56, R14, R24 ++ SLLV $8, R15, R28 ++ OR R24, R28, R15 ++ SLLV $8, R14, R14 ++ MOVBU -1(R5), R25 ++ XOR R25, R14, R14 ++ SUBV $1, R5, R5 ++ SUBV $1, R6, R6 ++ BNE R6, R0, flush_buffer ++ ++ ADDV R14, R8, R8 ++ SGTU R14, R8, R24 ++ ADDV R15, R9, R27 ++ SGTU R15, R27, R28 ++ ADDV R27, R24, R9 ++ SGTU R27, R9, R24 ++ OR R24, R28, R24 ++ ADDV R10, R24, R10 ++ ++ MOVV $16, R6 ++ JMP multiply ++ ++done: ++ MOVV R8, (R4) ++ MOVV R9, 8(R4) ++ MOVV R10, 16(R4) ++ RET +-- +2.38.1 + diff --git a/0039-cmd-internal-obj-loong64-using-LookupABI-to-find-duf.patch b/0039-cmd-internal-obj-loong64-using-LookupABI-to-find-duf.patch deleted file mode 100644 index b199d863440541130101f135e0dbd8b3bbe57dbe..0000000000000000000000000000000000000000 --- a/0039-cmd-internal-obj-loong64-using-LookupABI-to-find-duf.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 9135ea504d0f3c80c78fc8a7d91d329e070b127f Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Sun, 8 Oct 2023 08:36:19 +0800 -Subject: [PATCH 39/51] cmd/internal/obj/loong64: using LookupABI to find - duff{copy,zero} when rewriting GOT - -Because register-passing parameters have been enabled, using Lookup -to find duffcopy and duffzero fails and returns incorrect values. - -Change-Id: I67ddec30c4a61ec1a0c7f7c27df682c0b5c36068 ---- - src/cmd/internal/obj/loong64/obj.go | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go -index ed5165418d..f1850f1caa 100644 ---- a/src/cmd/internal/obj/loong64/obj.go -+++ b/src/cmd/internal/obj/loong64/obj.go -@@ -100,9 +100,9 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { - if p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO { - var sym *obj.LSym - if p.As == obj.ADUFFZERO { -- sym = ctxt.Lookup("runtime.duffzero") -+ sym = ctxt.LookupABI("runtime.duffzero", obj.ABIInternal) - } else { -- sym = ctxt.Lookup("runtime.duffcopy") -+ sym = ctxt.LookupABI("runtime.duffcopy", obj.ABIInternal) - } - offset := p.To.Offset - p.As = AMOVV --- -2.38.1 - diff --git a/0039-runtime-optimize-the-implementation-of-memclrNoHeapP.patch b/0039-runtime-optimize-the-implementation-of-memclrNoHeapP.patch new file mode 100644 index 0000000000000000000000000000000000000000..289a1f5ac28f28b391aab13473136c3cdf82e036 --- /dev/null +++ b/0039-runtime-optimize-the-implementation-of-memclrNoHeapP.patch @@ -0,0 +1,374 @@ +From 0e94e34886a3632315e444c5fd0ba448239c500e Mon Sep 17 00:00:00 2001 +From: chenguoqi +Date: Tue, 31 Dec 2024 18:31:50 +0800 +Subject: [PATCH 39/44] runtime: optimize the implementation of + memclrNoHeapPointers on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: runtime +cpu: Loongson-3A6000 @ 2500.00MHz + │ bench.old │ bench.new │ + │ sec/op │ sec/op vs base │ +Memclr/5 2.456n ± 0% 3.202n ± 0% +30.37% (p=0.000 n=10) +Memclr/16 2.806n ± 0% 2.810n ± 1% +0.14% (p=0.002 n=10) +Memclr/64 5.053n ± 1% 5.045n ± 1% ~ (p=0.591 n=10) +Memclr/256 10.240n ± 0% 6.027n ± 0% -41.14% (p=0.000 n=10) +Memclr/4096 107.00n ± 0% 30.46n ± 0% -71.53% (p=0.000 n=10) +Memclr/65536 1676.0n ± 0% 431.3n ± 0% -74.26% (p=0.000 n=10) +Memclr/1M 52.52µ ± 0% 32.81µ ± 0% -37.54% (p=0.000 n=10) +Memclr/4M 210.0µ ± 0% 131.3µ ± 0% -37.48% (p=0.000 n=10) +Memclr/8M 420.0µ ± 0% 262.8µ ± 1% -37.43% (p=0.000 n=10) +Memclr/16M 846.7µ ± 0% 528.8µ ± 0% -37.55% (p=0.000 n=10) +Memclr/64M 3.388m ± 0% 2.180m ± 1% -35.66% (p=0.000 n=10) +MemclrUnaligned/0_5 4.382n ± 0% 4.006n ± 0% -8.59% (p=0.000 n=10) +MemclrUnaligned/0_16 4.600n ± 0% 4.204n ± 0% -8.60% (p=0.000 n=10) +MemclrUnaligned/0_64 5.604n ± 0% 5.005n ± 0% -10.69% (p=0.000 n=10) +MemclrUnaligned/0_256 10.340n ± 0% 6.808n ± 0% -34.16% (p=0.000 n=10) +MemclrUnaligned/0_4096 107.10n ± 0% 33.81n ± 0% -68.43% (p=0.000 n=10) +MemclrUnaligned/0_65536 1701.0n ± 0% 441.6n ± 0% -74.04% (p=0.000 n=10) +MemclrUnaligned/1_5 4.386n ± 0% 4.004n ± 0% -8.71% (p=0.000 n=10) +MemclrUnaligned/1_16 4.597n ± 0% 4.203n ± 0% -8.56% (p=0.000 n=10) +MemclrUnaligned/1_64 7.204n ± 0% 7.106n ± 0% -1.36% (p=0.000 n=10) +MemclrUnaligned/1_256 12.580n ± 0% 9.796n ± 0% -22.13% (p=0.000 n=10) +MemclrUnaligned/1_4096 115.60n ± 0% 38.63n ± 0% -66.58% (p=0.000 n=10) +MemclrUnaligned/1_65536 1709.0n ± 0% 446.5n ± 0% -73.87% (p=0.000 n=10) +MemclrUnaligned/4_5 4.386n ± 0% 4.005n ± 0% -8.69% (p=0.000 n=10) +MemclrUnaligned/4_16 4.597n ± 0% 4.203n ± 0% -8.57% (p=0.000 n=10) +MemclrUnaligned/4_64 7.204n ± 0% 7.104n ± 0% -1.39% (p=0.000 n=10) +MemclrUnaligned/4_256 12.58n ± 0% 10.66n ± 0% -15.22% (p=0.000 n=10) +MemclrUnaligned/4_4096 114.30n ± 0% 39.99n ± 0% -65.01% (p=0.000 n=10) +MemclrUnaligned/4_65536 1709.0n ± 0% 449.8n ± 0% -73.68% (p=0.000 n=10) +MemclrUnaligned/7_5 4.381n ± 0% 4.002n ± 0% -8.64% (p=0.000 n=10) +MemclrUnaligned/7_16 4.597n ± 0% 4.202n ± 0% -8.59% (p=0.000 n=10) +MemclrUnaligned/7_64 7.204n ± 0% 7.104n ± 0% -1.39% (p=0.000 n=10) +MemclrUnaligned/7_256 12.58n ± 0% 10.60n ± 0% -15.74% (p=0.000 n=10) +MemclrUnaligned/7_4096 115.50n ± 0% 39.75n ± 0% -65.58% (p=0.000 n=10) +MemclrUnaligned/7_65536 1709.0n ± 0% 447.1n ± 0% -73.84% (p=0.000 n=10) +MemclrUnaligned/0_1M 52.52µ ± 0% 32.80µ ± 0% -37.56% (p=0.000 n=10) +MemclrUnaligned/0_4M 210.0µ ± 0% 131.2µ ± 0% -37.53% (p=0.000 n=10) +MemclrUnaligned/0_8M 419.9µ ± 0% 262.5µ ± 0% -37.48% (p=0.000 n=10) +MemclrUnaligned/0_16M 845.0µ ± 0% 528.1µ ± 0% -37.51% (p=0.000 n=10) +MemclrUnaligned/0_64M 3.406m ± 0% 2.165m ± 1% -36.44% (p=0.000 n=10) +MemclrUnaligned/1_1M 52.53µ ± 0% 32.80µ ± 0% -37.55% (p=0.000 n=10) +MemclrUnaligned/1_4M 210.2µ ± 0% 131.3µ ± 0% -37.55% (p=0.000 n=10) +MemclrUnaligned/1_8M 419.9µ ± 0% 262.4µ ± 0% -37.50% (p=0.000 n=10) +MemclrUnaligned/1_16M 844.2µ ± 0% 528.0µ ± 0% -37.46% (p=0.000 n=10) +MemclrUnaligned/1_64M 3.369m ± 0% 2.161m ± 5% -35.84% (p=0.000 n=10) +MemclrUnaligned/4_1M 52.53µ ± 0% 32.80µ ± 0% -37.55% (p=0.000 n=10) +MemclrUnaligned/4_4M 210.2µ ± 0% 131.2µ ± 0% -37.59% (p=0.000 n=10) +MemclrUnaligned/4_8M 419.9µ ± 0% 262.4µ ± 0% -37.52% (p=0.000 n=10) +MemclrUnaligned/4_16M 844.5µ ± 0% 527.9µ ± 0% -37.49% (p=0.000 n=10) +MemclrUnaligned/4_64M 3.366m ± 0% 2.173m ± 0% -35.46% (p=0.000 n=10) +MemclrUnaligned/7_1M 52.52µ ± 0% 32.80µ ± 0% -37.55% (p=0.000 n=10) +MemclrUnaligned/7_4M 210.2µ ± 0% 131.5µ ± 0% -37.45% (p=0.000 n=10) +MemclrUnaligned/7_8M 419.9µ ± 0% 262.6µ ± 0% -37.47% (p=0.000 n=10) +MemclrUnaligned/7_16M 844.4µ ± 0% 529.0µ ± 0% -37.36% (p=0.000 n=10) +MemclrUnaligned/7_64M 3.372m ± 1% 2.201m ± 0% -34.72% (p=0.000 n=10) +MemclrRange/1K_2K 2703.0n ± 0% 948.1n ± 0% -64.93% (p=0.000 n=10) +MemclrRange/2K_8K 8.826µ ± 0% 2.458µ ± 0% -72.15% (p=0.000 n=10) +MemclrRange/4K_16K 8.325µ ± 0% 2.210µ ± 0% -73.45% (p=0.000 n=10) +MemclrRange/160K_228K 83.40µ ± 0% 31.27µ ± 0% -62.50% (p=0.000 n=10) +MemclrKnownSize1 0.4003n ± 0% 0.4002n ± 0% -0.02% (p=0.027 n=10) +MemclrKnownSize2 0.4003n ± 0% 0.4002n ± 0% -0.02% (p=0.000 n=10) +MemclrKnownSize4 0.4003n ± 0% 0.4002n ± 0% -0.02% (p=0.000 n=10) +MemclrKnownSize8 0.4003n ± 0% 0.4002n ± 0% -0.02% (p=0.000 n=10) +MemclrKnownSize16 0.4213n ± 1% 0.8007n ± 0% +90.03% (p=0.000 n=10) +MemclrKnownSize32 2.001n ± 0% 1.602n ± 0% -19.94% (p=0.000 n=10) +MemclrKnownSize64 2.010n ± 0% 2.402n ± 0% +19.47% (p=0.000 n=10) +MemclrKnownSize112 3.202n ± 0% 2.803n ± 0% -12.46% (p=0.000 n=10) +MemclrKnownSize128 3.442n ± 0% 3.236n ± 0% -6.00% (p=0.000 n=10) +MemclrKnownSize192 5.204n ± 0% 5.205n ± 0% ~ (p=0.279 n=10) +MemclrKnownSize248 6.301n ± 0% 6.299n ± 0% -0.03% (p=0.000 n=10) +MemclrKnownSize256 6.707n ± 0% 6.704n ± 0% -0.04% (p=0.018 n=10) +MemclrKnownSize512 13.610n ± 0% 6.989n ± 0% -48.65% (p=0.000 n=10) +MemclrKnownSize1024 26.420n ± 0% 8.458n ± 0% -67.99% (p=0.000 n=10) +MemclrKnownSize4096 103.30n ± 0% 28.02n ± 0% -72.88% (p=0.000 n=10) +MemclrKnownSize512KiB 26.28µ ± 0% 16.41µ ± 0% -37.53% (p=0.000 n=10) +geomean 624.0n 397.1n -36.37% + +Change-Id: I702b9c1991cf13f9338c189c5ef59cb2c6f279de +--- + src/runtime/cpuflags.go | 3 +- + src/runtime/memclr_loong64.s | 214 ++++++++++++++++++++++++----------- + 2 files changed, 152 insertions(+), 65 deletions(-) + +diff --git a/src/runtime/cpuflags.go b/src/runtime/cpuflags.go +index e81e50f5df..06424642c7 100644 +--- a/src/runtime/cpuflags.go ++++ b/src/runtime/cpuflags.go +@@ -20,7 +20,8 @@ const ( + + offsetMIPS64XHasMSA = unsafe.Offsetof(cpu.MIPS64X.HasMSA) + +- offsetLOONG64HasLSX = unsafe.Offsetof(cpu.Loong64.HasLSX) ++ offsetLOONG64HasLSX = unsafe.Offsetof(cpu.Loong64.HasLSX) ++ offsetLOONG64HasLASX = unsafe.Offsetof(cpu.Loong64.HasLASX) + ) + + var ( +diff --git a/src/runtime/memclr_loong64.s b/src/runtime/memclr_loong64.s +index 346b210c8d..0d0d9f0cbb 100644 +--- a/src/runtime/memclr_loong64.s ++++ b/src/runtime/memclr_loong64.s +@@ -11,6 +11,7 @@ + // R5: n + // R6: ptrend + // R7: tmp ++// R8: tmp + + // Algorithm: + // +@@ -38,44 +39,129 @@ + + // func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) + TEXT runtime·memclrNoHeapPointers(SB),NOSPLIT,$0-16 +- BEQ R5, clr_0 ++ // <=64 bytes, clear directly, not check aligned ++generic_small: + ADDV R4, R5, R6 ++ BEQ R4, R6, clr_0 ++ MOVV $2, R7 ++ BLT R5, R7, clr_1 ++ MOVV $3, R7 ++ BLT R5, R7, clr_2 ++ MOVV $4, R7 ++ BLT R5, R7, clr_3 ++ MOVV $5, R7 ++ BLT R5, R7, clr_4 ++ MOVV $8, R7 ++ BLT R5, R7, clr_5_7 ++ MOVV $9, R7 ++ BLT R5, R7, clr_8 ++ MOVV $17, R7 ++ BLT R5, R7, clr_9_16 ++ MOVV $33, R7 ++ BLT R5, R7, clr_17_32 ++ MOVV $65, R7 ++ BLT R5, R7, clr_33_64 + +-tail: +- // <=64 bytes, clear directly, not check aligned +- SGTU $2, R5, R7 +- BNE R7, clr_1 +- SGTU $3, R5, R7 +- BNE R7, clr_2 +- SGTU $4, R5, R7 +- BNE R7, clr_3 +- SGTU $5, R5, R7 +- BNE R7, clr_4 +- SGTU $8, R5, R7 +- BNE R7, clr_5through7 +- SGTU $9, R5, R7 +- BNE R7, clr_8 +- SGTU $17, R5, R7 +- BNE R7, clr_9through16 +- SGTU $33, R5, R7 +- BNE R7, clr_17through32 +- SGTU $65, R5, R7 +- BNE R7, clr_33through64 ++lasx_large: ++ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R7 ++ BEQ R7, lsx_large ++ ++ // X0 = 0 ++ XVMOVQ R0, X0.V4 ++ ++ // check 32-byte alignment ++ AND $31, R4, R7 ++ BEQ R7, lasx_large_aligned ++ XVMOVQ X0, (R4) ++ SUBV R7, R4 ++ ADDV R7, R5 ++ SUBV $32, R5 // newn = n - (32 - (ptr & 31)) ++ ADDV $32, R4 // newptr = ptr + (32 - (ptr & 31)) ++ ++lasx_large_aligned: ++ MOVV $256, R8 ++ BLT R5, R8, lasx_small ++lasx_large_body: ++ XVMOVQ X0, 0(R4) ++ XVMOVQ X0, 32(R4) ++ XVMOVQ X0, 64(R4) ++ XVMOVQ X0, 96(R4) ++ XVMOVQ X0, 128(R4) ++ XVMOVQ X0, 160(R4) ++ XVMOVQ X0, 192(R4) ++ XVMOVQ X0, 224(R4) ++ SUBV $256, R5 ++ ADDV $256, R4 ++ BGE R5, R8, lasx_large_body ++ ++lasx_small: ++ MOVV $32, R8 ++ BLT R5, R8, generic_small ++lasx_small_body: ++ XVMOVQ X0, (R4) ++ SUBV $32, R5 ++ ADDV $32, R4 ++ BGE R5, R8, lasx_small_body ++lasx_tail: ++ JMP generic_small ++ ++lsx_large: ++ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R7 ++ BEQ R7, generic_large ++ ++ // V0 = 0 ++ VMOVQ R0, V0.V2 + ++ // check 16-byte alignment ++ AND $15, R4, R7 ++ BEQ R7, lsx_large_aligned ++ VMOVQ V0, (R4) ++ SUBV R7, R4 ++ ADDV R7, R5 ++ SUBV $16, R5 // newn = n - (16 - (ptr & 15)) ++ ADDV $16, R4 // newptr = ptr + (16 - (ptr & 15)) ++ ++lsx_large_aligned: ++ MOVV $128, R8 ++ BLT R5, R8, lsx_small ++lsx_large_body: ++ VMOVQ V0, 0(R4) ++ VMOVQ V0, 16(R4) ++ VMOVQ V0, 32(R4) ++ VMOVQ V0, 48(R4) ++ VMOVQ V0, 64(R4) ++ VMOVQ V0, 80(R4) ++ VMOVQ V0, 96(R4) ++ VMOVQ V0, 112(R4) ++ SUBV $128, R5 ++ ADDV $128, R4 ++ BGE R5, R8, lsx_large_body ++ ++lsx_small: ++ MOVV $16, R8 ++ BLT R5, R8, generic_small ++lsx_small_body: ++ VMOVQ V0, (R4) ++ SUBV $16, R5 ++ ADDV $16, R4 ++ BGE R5, R8, lsx_small_body ++lsx_tail: ++ JMP generic_small ++ ++generic_large: + // n > 64 bytes, check aligned + AND $7, R4, R7 +- BEQ R7, body +- +-head: ++ BEQ R7, generic_large_aligned + MOVV R0, (R4) +- SUBV R7, R4 + ADDV R7, R5 +- ADDV $8, R4 // newptr = ptr + (8 - (ptr & 7)) ++ SUBV R7, R4 + SUBV $8, R5 // newn = n - (8 - (ptr & 7)) +- SGTU $65, R5, R7 +- BNE R7, clr_33through64 ++ ADDV $8, R4 // newptr = ptr + (8 - (ptr & 7)) + +-body: ++generic_large_aligned: ++ MOVV $65, R7 ++ BLT R5, R7, generic_small ++generic_large_body: + MOVV R0, (R4) + MOVV R0, 8(R4) + MOVV R0, 16(R4) +@@ -84,52 +170,52 @@ body: + MOVV R0, 40(R4) + MOVV R0, 48(R4) + MOVV R0, 56(R4) +- ADDV $-64, R5 ++ SUBV $64, R5 + ADDV $64, R4 +- SGTU $65, R5, R7 +- BEQ R7, body +- BEQ R5, clr_0 +- JMP tail ++ BGE R5, R7, generic_large_body ++generic_tail: ++ JMP generic_small + +-clr_0: ++clr_33_64: ++ MOVV R0, (R4) ++ MOVV R0, 8(R4) ++ MOVV R0, 16(R4) ++ MOVV R0, 24(R4) ++ MOVV R0, -32(R6) ++ MOVV R0, -24(R6) ++ MOVV R0, -16(R6) ++ MOVV R0, -8(R6) + RET +-clr_1: +- MOVB R0, (R4) ++ ++clr_17_32: ++ MOVV R0, (R4) ++ MOVV R0, 8(R4) ++ MOVV R0, -16(R6) ++ MOVV R0, -8(R6) + RET +-clr_2: +- MOVH R0, (R4) ++clr_9_16: ++ MOVV R0, (R4) ++ MOVV R0, -8(R6) + RET +-clr_3: +- MOVH R0, (R4) +- MOVB R0, 2(R4) ++clr_8: ++ MOVV R0, (R4) + RET +-clr_4: ++clr_5_7: + MOVW R0, (R4) ++ MOVW R0, -4(R6) + RET +-clr_5through7: ++clr_4: + MOVW R0, (R4) +- MOVW R0, -4(R6) + RET +-clr_8: +- MOVV R0, (R4) ++clr_3: ++ MOVH R0, (R4) ++ MOVB R0, 2(R4) + RET +-clr_9through16: +- MOVV R0, (R4) +- MOVV R0, -8(R6) ++clr_2: ++ MOVH R0, (R4) + RET +-clr_17through32: +- MOVV R0, (R4) +- MOVV R0, 8(R4) +- MOVV R0, -16(R6) +- MOVV R0, -8(R6) ++clr_1: ++ MOVB R0, (R4) + RET +-clr_33through64: +- MOVV R0, (R4) +- MOVV R0, 8(R4) +- MOVV R0, 16(R4) +- MOVV R0, 24(R4) +- MOVV R0, -32(R6) +- MOVV R0, -24(R6) +- MOVV R0, -16(R6) +- MOVV R0, -8(R6) ++clr_0: + RET +-- +2.38.1 + diff --git a/0040-cmd-internal-cmd-link-unify-the-relocation-naming-st.patch b/0040-cmd-internal-cmd-link-unify-the-relocation-naming-st.patch deleted file mode 100644 index fe4beb365d2cd15178865295910f074a7dd789f0..0000000000000000000000000000000000000000 --- a/0040-cmd-internal-cmd-link-unify-the-relocation-naming-st.patch +++ /dev/null @@ -1,404 +0,0 @@ -From 2ce8a854ea6f8859f3a40bbf4b626b8d6fcb5974 Mon Sep 17 00:00:00 2001 -From: limeidan -Date: Wed, 11 Oct 2023 18:01:59 +0800 -Subject: [PATCH 40/51] cmd/internal, cmd/link: unify the relocation naming - style of loong64 - -Change-Id: Ica24a7c351b26a4375bbc52b719c69b78e89c5df ---- - src/cmd/internal/obj/loong64/asm.go | 30 +++++------ - src/cmd/internal/objabi/reloctype.go | 28 +++++------ - src/cmd/internal/objabi/reloctype_string.go | 18 +++---- - src/cmd/link/internal/loong64/asm.go | 56 ++++++++++----------- - 4 files changed, 65 insertions(+), 67 deletions(-) - -diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go -index 74ee2b6cea..0034f7bdb9 100644 ---- a/src/cmd/internal/obj/loong64/asm.go -+++ b/src/cmd/internal/obj/loong64/asm.go -@@ -1677,7 +1677,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.To.Sym - rel.Add = p.To.Offset -- rel.Type = objabi.R_ADDRLOONG64U -+ rel.Type = objabi.R_LOONG64_ADDR_HI - - o2 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) - rel2 := obj.Addrel(c.cursym) -@@ -1685,7 +1685,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel2.Siz = 4 - rel2.Sym = p.To.Sym - rel2.Add = p.To.Offset -- rel2.Type = objabi.R_ADDRLOONG64 -+ rel2.Type = objabi.R_LOONG64_ADDR_LO - - case 51: // mov addr,r ==> pcalau12i + lw - o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP)) -@@ -1694,14 +1694,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.From.Sym - rel.Add = p.From.Offset -- rel.Type = objabi.R_ADDRLOONG64U -+ rel.Type = objabi.R_LOONG64_ADDR_HI - o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) - rel2.Siz = 4 - rel2.Sym = p.From.Sym - rel2.Add = p.From.Offset -- rel2.Type = objabi.R_ADDRLOONG64 -+ rel2.Type = objabi.R_LOONG64_ADDR_LO - - case 52: // mov $lext, r - // NOTE: this case does not use REGTMP. If it ever does, -@@ -1712,14 +1712,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.From.Sym - rel.Add = p.From.Offset -- rel.Type = objabi.R_ADDRLOONG64U -+ rel.Type = objabi.R_LOONG64_ADDR_HI - o2 = OP_12IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) - rel2.Siz = 4 - rel2.Sym = p.From.Sym - rel2.Add = p.From.Offset -- rel2.Type = objabi.R_ADDRLOONG64 -+ rel2.Type = objabi.R_LOONG64_ADDR_LO - - case 53: // mov r, tlsvar ==> lu12i.w + ori + add r2, regtmp + sw o(regtmp) - // NOTE: this case does not use REGTMP. If it ever does, -@@ -1730,14 +1730,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.To.Sym - rel.Add = p.To.Offset -- rel.Type = objabi.R_ADDRLOONG64TLSU -+ rel.Type = objabi.R_LOONG64_TLS_LE_HI - o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) - rel2.Siz = 4 - rel2.Sym = p.To.Sym - rel2.Add = p.To.Offset -- rel2.Type = objabi.R_ADDRLOONG64TLS -+ rel2.Type = objabi.R_LOONG64_TLS_LE_LO - o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP)) - o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg)) - -@@ -1750,14 +1750,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.From.Sym - rel.Add = p.From.Offset -- rel.Type = objabi.R_ADDRLOONG64TLSU -+ rel.Type = objabi.R_LOONG64_TLS_LE_HI - o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) - rel2.Siz = 4 - rel2.Sym = p.From.Sym - rel2.Add = p.From.Offset -- rel2.Type = objabi.R_ADDRLOONG64TLS -+ rel2.Type = objabi.R_LOONG64_TLS_LE_LO - o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP)) - o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg)) - -@@ -1770,14 +1770,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.From.Sym - rel.Add = p.From.Offset -- rel.Type = objabi.R_ADDRLOONG64TLSU -+ rel.Type = objabi.R_LOONG64_TLS_LE_HI - o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) - rel2.Siz = 4 - rel2.Sym = p.From.Sym - rel2.Add = p.From.Offset -- rel2.Type = objabi.R_ADDRLOONG64TLS -+ rel2.Type = objabi.R_LOONG64_TLS_LE_LO - o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(p.To.Reg)) - - case 56: // mov r, tlsvar IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + st.d -@@ -1787,7 +1787,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.To.Sym - rel.Add = 0x0 -- rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI -+ rel.Type = objabi.R_LOONG64_TLS_IE_HI - o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) -@@ -1805,7 +1805,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Siz = 4 - rel.Sym = p.From.Sym - rel.Add = 0x0 -- rel.Type = objabi.R_LOONG64_TLS_IE_PCREL_HI -+ rel.Type = objabi.R_LOONG64_TLS_IE_HI - o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP)) - rel2 := obj.Addrel(c.cursym) - rel2.Off = int32(c.pc + 4) -@@ -1858,7 +1858,7 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { - rel.Off = int32(c.pc) - rel.Siz = 4 - rel.Sym = p.From.Sym -- rel.Type = objabi.R_LOONG64_GOTPCREL_HI -+ rel.Type = objabi.R_LOONG64_GOT_HI - rel.Add = 0x0 - o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg)) - rel2 := obj.Addrel(c.cursym) -diff --git a/src/cmd/internal/objabi/reloctype.go b/src/cmd/internal/objabi/reloctype.go -index 241a79817c..b1934e424f 100644 ---- a/src/cmd/internal/objabi/reloctype.go -+++ b/src/cmd/internal/objabi/reloctype.go -@@ -291,34 +291,32 @@ const ( - - // Loong64. - -- // R_ADDRLOONG64 resolves to the low 12 bits of an external address, by encoding -- // it into the instruction. -- R_ADDRLOONG64 -- -- // R_ADDRLOONG64U resolves to the sign-adjusted "upper" 20 bits (bit 5-24) of an -+ // R_LOONG64_ADDR_HI resolves to the sign-adjusted "upper" 20 bits (bit 5-24) of an - // external address, by encoding it into the instruction. -- R_ADDRLOONG64U -+ // R_LOONG64_ADDR_LO resolves to the low 12 bits of an external address, by encoding -+ // it into the instruction. -+ R_LOONG64_ADDR_HI -+ R_LOONG64_ADDR_LO - -- // R_ADDRLOONG64TLS resolves to the low 12 bits of a TLS address (offset from -+ // R_LOONG64_TLS_LE_HI resolves to the high 20 bits of a TLS address (offset from - // thread pointer), by encoding it into the instruction. -- R_ADDRLOONG64TLS -- -- // R_ADDRLOONG64TLSU resolves to the high 20 bits of a TLS address (offset from -+ // R_LOONG64_TLS_LE_LO resolves to the low 12 bits of a TLS address (offset from - // thread pointer), by encoding it into the instruction. -- R_ADDRLOONG64TLSU -+ R_LOONG64_TLS_LE_HI -+ R_LOONG64_TLS_LE_LO - - // R_CALLLOONG64 resolves to non-PC-relative target address of a CALL (BL/JIRL) - // instruction, by encoding the address into the instruction. - R_CALLLOONG64 - -- // R_LOONG64_TLS_IE_PCREL_HI and R_LOONG64_TLS_IE_LO relocates a pcalau12i, ld.d -+ // R_LOONG64_TLS_IE_HI and R_LOONG64_TLS_IE_LO relocates a pcalau12i, ld.d - // pair to compute the address of the GOT slot of the tls symbol. -- R_LOONG64_TLS_IE_PCREL_HI -+ R_LOONG64_TLS_IE_HI - R_LOONG64_TLS_IE_LO - -- // R_LOONG64_GOTPCREL_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute -+ // R_LOONG64_GOT_HI and R_LOONG64_GOT_LO relocates an pcalau12i, ld.d pair to compute - // the address of the GOT slot of the referenced symbol. -- R_LOONG64_GOTPCREL_HI -+ R_LOONG64_GOT_HI - R_LOONG64_GOT_LO - - // R_JMPLOONG64 resolves to non-PC-relative target address of a JMP instruction, -diff --git a/src/cmd/internal/objabi/reloctype_string.go b/src/cmd/internal/objabi/reloctype_string.go -index e0649a5b0a..3e61c77dc0 100644 ---- a/src/cmd/internal/objabi/reloctype_string.go -+++ b/src/cmd/internal/objabi/reloctype_string.go -@@ -1,4 +1,4 @@ --// Code generated by "stringer -type=RelocType"; DO NOT EDIT. -+// Code generated by "stringer -type=RelocType cmd/internal/objabi/reloctype.go"; DO NOT EDIT. - - package objabi - -@@ -74,14 +74,14 @@ func _() { - _ = x[R_RISCV_TLS_IE_ITYPE-64] - _ = x[R_RISCV_TLS_IE_STYPE-65] - _ = x[R_PCRELDBL-66] -- _ = x[R_ADDRLOONG64-67] -- _ = x[R_ADDRLOONG64U-68] -- _ = x[R_ADDRLOONG64TLS-69] -- _ = x[R_ADDRLOONG64TLSU-70] -+ _ = x[R_LOONG64_ADDR_HI-67] -+ _ = x[R_LOONG64_ADDR_LO-68] -+ _ = x[R_LOONG64_TLS_LE_HI-69] -+ _ = x[R_LOONG64_TLS_LE_LO-70] - _ = x[R_CALLLOONG64-71] -- _ = x[R_LOONG64_TLS_IE_PCREL_HI-72] -+ _ = x[R_LOONG64_TLS_IE_HI-72] - _ = x[R_LOONG64_TLS_IE_LO-73] -- _ = x[R_LOONG64_GOTPCREL_HI-74] -+ _ = x[R_LOONG64_GOT_HI-74] - _ = x[R_LOONG64_GOT_LO-75] - _ = x[R_JMPLOONG64-76] - _ = x[R_ADDRMIPSU-77] -@@ -93,9 +93,9 @@ func _() { - _ = x[R_INITORDER-83] - } - --const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_ADDRLOONG64R_ADDRLOONG64UR_ADDRLOONG64TLSR_ADDRLOONG64TLSUR_CALLLOONG64R_LOONG64_TLS_IE_PCREL_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOTPCREL_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER" -+const _RelocType_name = "R_ADDRR_ADDRPOWERR_ADDRARM64R_ADDRMIPSR_ADDROFFR_SIZER_CALLR_CALLARMR_CALLARM64R_CALLINDR_CALLPOWERR_CALLMIPSR_CONSTR_PCRELR_TLS_LER_TLS_IER_GOTOFFR_PLT0R_PLT1R_PLT2R_USEFIELDR_USETYPER_USEIFACER_USEIFACEMETHODR_USEGENERICIFACEMETHODR_METHODOFFR_KEEPR_POWER_TOCR_GOTPCRELR_JMPMIPSR_DWARFSECREFR_DWARFFILEREFR_ARM64_TLS_LER_ARM64_TLS_IER_ARM64_GOTPCRELR_ARM64_GOTR_ARM64_PCRELR_ARM64_PCREL_LDST8R_ARM64_PCREL_LDST16R_ARM64_PCREL_LDST32R_ARM64_PCREL_LDST64R_ARM64_LDST8R_ARM64_LDST16R_ARM64_LDST32R_ARM64_LDST64R_ARM64_LDST128R_POWER_TLS_LER_POWER_TLS_IER_POWER_TLSR_POWER_TLS_IE_PCREL34R_POWER_TLS_LE_TPREL34R_ADDRPOWER_DSR_ADDRPOWER_GOTR_ADDRPOWER_GOT_PCREL34R_ADDRPOWER_PCRELR_ADDRPOWER_TOCRELR_ADDRPOWER_TOCREL_DSR_ADDRPOWER_D34R_ADDRPOWER_PCREL34R_RISCV_CALLR_RISCV_CALL_TRAMPR_RISCV_PCREL_ITYPER_RISCV_PCREL_STYPER_RISCV_TLS_IE_ITYPER_RISCV_TLS_IE_STYPER_PCRELDBLR_LOONG64_ADDR_HIR_LOONG64_ADDR_LOR_LOONG64_TLS_LE_HIR_LOONG64_TLS_LE_LOR_CALLLOONG64R_LOONG64_TLS_IE_HIR_LOONG64_TLS_IE_LOR_LOONG64_GOT_HIR_LOONG64_GOT_LOR_JMPLOONG64R_ADDRMIPSUR_ADDRMIPSTLSR_ADDRCUOFFR_WASMIMPORTR_XCOFFREFR_PEIMAGEOFFR_INITORDER" - --var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 880, 894, 910, 927, 940, 965, 984, 1005, 1021, 1033, 1044, 1057, 1068, 1080, 1090, 1102, 1113} -+var _RelocType_index = [...]uint16{0, 6, 17, 28, 38, 47, 53, 59, 68, 79, 88, 99, 109, 116, 123, 131, 139, 147, 153, 159, 165, 175, 184, 194, 210, 233, 244, 250, 261, 271, 280, 293, 307, 321, 335, 351, 362, 375, 394, 414, 434, 454, 467, 481, 495, 509, 524, 538, 552, 563, 585, 607, 621, 636, 659, 676, 694, 715, 730, 749, 761, 779, 798, 817, 837, 857, 867, 884, 901, 920, 939, 952, 971, 990, 1006, 1022, 1034, 1045, 1058, 1069, 1081, 1091, 1103, 1114} - - func (i RelocType) String() string { - i -= 1 -diff --git a/src/cmd/link/internal/loong64/asm.go b/src/cmd/link/internal/loong64/asm.go -index d1296c3309..99b568cfbb 100644 ---- a/src/cmd/link/internal/loong64/asm.go -+++ b/src/cmd/link/internal/loong64/asm.go -@@ -34,7 +34,7 @@ func gentext(ctxt *ld.Link, ldr *loader.Loader) { - // 0: 1a000004 pcalau12i $a0, 0 - // 0: R_LARCH_PCALA_HI20 local.moduledata - o(0x1a000004) -- rel, _ := initfunc.AddRel(objabi.R_ADDRLOONG64U) -+ rel, _ := initfunc.AddRel(objabi.R_LOONG64_ADDR_HI) - rel.SetOff(0) - rel.SetSiz(4) - rel.SetSym(ctxt.Moduledata) -@@ -42,7 +42,7 @@ func gentext(ctxt *ld.Link, ldr *loader.Loader) { - // 4: 02c00084 addi.d $a0, $a0, 0 - // 4: R_LARCH_PCALA_LO12 local.moduledata - o(0x02c00084) -- rel2, _ := initfunc.AddRel(objabi.R_ADDRLOONG64) -+ rel2, _ := initfunc.AddRel(objabi.R_LOONG64_ADDR_LO) - rel2.SetOff(4) - rel2.SetSiz(4) - rel2.SetSym(ctxt.Moduledata) -@@ -84,12 +84,12 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, - default: - return false - } -- case objabi.R_ADDRLOONG64TLS: -+ case objabi.R_LOONG64_TLS_LE_LO: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_TLS_LE_LO12) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) - -- case objabi.R_ADDRLOONG64TLSU: -+ case objabi.R_LOONG64_TLS_LE_HI: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_TLS_LE_HI20) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) -@@ -99,7 +99,7 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, - out.Write64(uint64(elf.R_LARCH_B26) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) - -- case objabi.R_LOONG64_TLS_IE_PCREL_HI: -+ case objabi.R_LOONG64_TLS_IE_HI: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_HI20) | uint64(elfsym)<<32) - out.Write64(uint64(0x0)) -@@ -109,17 +109,17 @@ func elfreloc1(ctxt *ld.Link, out *ld.OutBuf, ldr *loader.Loader, s loader.Sym, - out.Write64(uint64(elf.R_LARCH_TLS_IE_PC_LO12) | uint64(elfsym)<<32) - out.Write64(uint64(0x0)) - -- case objabi.R_ADDRLOONG64: -+ case objabi.R_LOONG64_ADDR_LO: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_PCALA_LO12) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) - -- case objabi.R_ADDRLOONG64U: -+ case objabi.R_LOONG64_ADDR_HI: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_PCALA_HI20) | uint64(elfsym)<<32) - out.Write64(uint64(r.Xadd)) - -- case objabi.R_LOONG64_GOTPCREL_HI: -+ case objabi.R_LOONG64_GOT_HI: - out.Write64(uint64(sectoff)) - out.Write64(uint64(elf.R_LARCH_GOT_PC_HI20) | uint64(elfsym)<<32) - out.Write64(uint64(0x0)) -@@ -147,10 +147,10 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade - switch r.Type() { - default: - return val, 0, false -- case objabi.R_ADDRLOONG64, -- objabi.R_LOONG64_GOTPCREL_HI, -- objabi.R_LOONG64_GOT_LO, -- objabi.R_ADDRLOONG64U: -+ case objabi.R_LOONG64_ADDR_HI, -+ objabi.R_LOONG64_ADDR_LO, -+ objabi.R_LOONG64_GOT_HI, -+ objabi.R_LOONG64_GOT_LO: - // set up addend for eventual relocation via outer symbol. - rs, _ := ld.FoldSubSymbolOffset(ldr, rs) - rst := ldr.SymType(rs) -@@ -158,11 +158,11 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade - ldr.Errorf(s, "missing section for %s", ldr.SymName(rs)) - } - return val, 1, true -- case objabi.R_ADDRLOONG64TLS, -- objabi.R_ADDRLOONG64TLSU, -+ case objabi.R_LOONG64_TLS_LE_HI, -+ objabi.R_LOONG64_TLS_LE_LO, - objabi.R_CALLLOONG64, - objabi.R_JMPLOONG64, -- objabi.R_LOONG64_TLS_IE_PCREL_HI, -+ objabi.R_LOONG64_TLS_IE_HI, - objabi.R_LOONG64_TLS_IE_LO: - return val, 1, true - } -@@ -176,18 +176,18 @@ func archreloc(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, r loade - return r.Add(), noExtReloc, isOk - case objabi.R_GOTOFF: - return ldr.SymValue(r.Sym()) + r.Add() - ldr.SymValue(syms.GOT), noExtReloc, isOk -- case objabi.R_ADDRLOONG64, -- objabi.R_ADDRLOONG64U: -+ case objabi.R_LOONG64_ADDR_HI, -+ objabi.R_LOONG64_ADDR_LO: - pc := ldr.SymValue(s) + int64(r.Off()) - t := calculatePCAlignedReloc(r.Type(), ldr.SymAddr(rs)+r.Add(), pc) -- if r.Type() == objabi.R_ADDRLOONG64 { -+ if r.Type() == objabi.R_LOONG64_ADDR_LO { - return int64(val&0xffc003ff | (t << 10)), noExtReloc, isOk - } - return int64(val&0xfe00001f | (t << 5)), noExtReloc, isOk -- case objabi.R_ADDRLOONG64TLS, -- objabi.R_ADDRLOONG64TLSU: -+ case objabi.R_LOONG64_TLS_LE_HI, -+ objabi.R_LOONG64_TLS_LE_LO: - t := ldr.SymAddr(rs) + r.Add() -- if r.Type() == objabi.R_ADDRLOONG64TLS { -+ if r.Type() == objabi.R_LOONG64_TLS_LE_LO { - return int64(val&0xffc003ff | ((t & 0xfff) << 10)), noExtReloc, isOk - } - return int64(val&0xfe00001f | (((t) >> 12 << 5) & 0x1ffffe0)), noExtReloc, isOk -@@ -207,20 +207,20 @@ func archrelocvariant(*ld.Target, *loader.Loader, loader.Reloc, sym.RelocVariant - - func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sym) (loader.ExtReloc, bool) { - switch r.Type() { -- case objabi.R_ADDRLOONG64, -- objabi.R_ADDRLOONG64U, -- objabi.R_LOONG64_GOTPCREL_HI, -+ case objabi.R_LOONG64_ADDR_HI, -+ objabi.R_LOONG64_ADDR_LO, -+ objabi.R_LOONG64_GOT_HI, - objabi.R_LOONG64_GOT_LO: - - return ld.ExtrelocViaOuterSym(ldr, r, s), true - -- case objabi.R_ADDRLOONG64TLS, -- objabi.R_ADDRLOONG64TLSU, -+ case objabi.R_LOONG64_TLS_LE_HI, -+ objabi.R_LOONG64_TLS_LE_LO, - objabi.R_CONST, - objabi.R_GOTOFF, - objabi.R_CALLLOONG64, - objabi.R_JMPLOONG64, -- objabi.R_LOONG64_TLS_IE_PCREL_HI, -+ objabi.R_LOONG64_TLS_IE_HI, - objabi.R_LOONG64_TLS_IE_LO: - return ld.ExtrelocSimple(ldr, r), true - } -@@ -229,7 +229,7 @@ func extreloc(target *ld.Target, ldr *loader.Loader, r loader.Reloc, s loader.Sy - - func isRequestingLowPageBits(t objabi.RelocType) bool { - switch t { -- case objabi.R_ADDRLOONG64: -+ case objabi.R_LOONG64_ADDR_LO: - return true - } - return false --- -2.38.1 - diff --git a/0040-runtime-race-add-the-implementation-of-atomic.-Or-An.patch b/0040-runtime-race-add-the-implementation-of-atomic.-Or-An.patch new file mode 100644 index 0000000000000000000000000000000000000000..39a261b130f8233e6a4ef7081b817fd0c5160f66 --- /dev/null +++ b/0040-runtime-race-add-the-implementation-of-atomic.-Or-An.patch @@ -0,0 +1,75 @@ +From 88b165cf7d4cb6a77f47d3c291d3ee7e1f13695e Mon Sep 17 00:00:00 2001 +From: Guoqi Chen +Date: Fri, 10 Jan 2025 10:31:47 +0800 +Subject: [PATCH 40/44] runtime/race: add the implementation of atomic.{Or,And} + on loong64 + +Change-Id: Ia4298a4d92fce210e3c743b2d5ce2b28b82d4971 +--- + src/runtime/race_loong64.s | 50 +++++++++++++++++++++++ + 2 files changed, 50 insertions(+) + +diff --git a/src/runtime/race_loong64.s b/src/runtime/race_loong64.s +index 04f264b21b..e6c11d44f7 100644 +--- a/src/runtime/race_loong64.s ++++ b/src/runtime/race_loong64.s +@@ -308,6 +308,56 @@ TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24 + GO_ARGS + JMP sync∕atomic·AddInt64(SB) + ++// And ++TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20 ++ GO_ARGS ++ MOVV $__tsan_go_atomic32_fetch_and(SB), RCALL ++ JAL racecallatomic<>(SB) ++ RET ++ ++TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ MOVV $__tsan_go_atomic64_fetch_and(SB), RCALL ++ JAL racecallatomic<>(SB) ++ RET ++ ++TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20 ++ GO_ARGS ++ JMP sync∕atomic·AndInt32(SB) ++ ++TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ JMP sync∕atomic·AndInt64(SB) ++ ++TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ JMP sync∕atomic·AndInt64(SB) ++ ++// Or ++TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20 ++ GO_ARGS ++ MOVV $__tsan_go_atomic32_fetch_or(SB), RCALL ++ JAL racecallatomic<>(SB) ++ RET ++ ++TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ MOVV $__tsan_go_atomic64_fetch_or(SB), RCALL ++ JAL racecallatomic<>(SB) ++ RET ++ ++TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20 ++ GO_ARGS ++ JMP sync∕atomic·OrInt32(SB) ++ ++TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ JMP sync∕atomic·OrInt64(SB) ++ ++TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24 ++ GO_ARGS ++ JMP sync∕atomic·OrInt64(SB) ++ + // CompareAndSwap + TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17 + GO_ARGS +-- +2.38.1 + diff --git a/0041-cmd-internal-obj-loong64-add-F-MAXA-MINA-.-S-D-instr.patch b/0041-cmd-internal-obj-loong64-add-F-MAXA-MINA-.-S-D-instr.patch new file mode 100644 index 0000000000000000000000000000000000000000..847b4b1f3f22a9f0cb249f5e573e8f6c950ac84f --- /dev/null +++ b/0041-cmd-internal-obj-loong64-add-F-MAXA-MINA-.-S-D-instr.patch @@ -0,0 +1,107 @@ +From e652e32e37bfd898af333a32b73cfde6ab2116fa Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Mon, 30 Dec 2024 10:08:58 +0800 +Subject: [PATCH 41/44] cmd/internal/obj/loong64: add F{MAXA/MINA}.{S/D} + instructions + +Go asm syntax: + F{MAXA/MINA}{F/D} FK, FJ, FD + +Equivalent platform assembler syntax: + f{maxa/mina}.{s/d} fd, fj, fk + +Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html + +Change-Id: I6790657d2f36bdf5e6818b6c0aaa48117e782b8d +--- + src/cmd/asm/internal/asm/testdata/loong64enc1.s | 9 +++++++++ + src/cmd/internal/obj/loong64/a.out.go | 6 ++++++ + src/cmd/internal/obj/loong64/anames.go | 4 ++++ + src/cmd/internal/obj/loong64/asm.go | 12 ++++++++++++ + 4 files changed, 31 insertions(+) + +diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +index b40d86e596..32d3b3f0a2 100644 +--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s ++++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s +@@ -346,6 +346,15 @@ lable2: + FTINTVF F0, F1 // 01241b01 + FTINTVD F0, F1 // 01281b01 + ++ FMAXAF F4, F5, F6 // a6900c01 ++ FMAXAF F4, F5 // a5900c01 ++ FMAXAD F4, F5, F6 // a6100d01 ++ FMAXAD F4, F5 // a5100d01 ++ FMINAF F4, F5, F6 // a6900e01 ++ FMINAF F4, F5 // a5900e01 ++ FMINAD F4, F5, F6 // a6100f01 ++ FMINAD F4, F5 // a5100f01 ++ + FTINTRMWF F0, F2 // 02041a01 + FTINTRMWD F0, F2 // 02081a01 + FTINTRMVF F0, F2 // 02241a01 +diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go +index f2d4c41d68..857ea649e7 100644 +--- a/src/cmd/internal/obj/loong64/a.out.go ++++ b/src/cmd/internal/obj/loong64/a.out.go +@@ -688,6 +688,12 @@ const ( + AFMAXF + AFMAXD + ++ // 3.2.1.4 ++ AFMAXAF ++ AFMAXAD ++ AFMINAF ++ AFMINAD ++ + // 3.2.1.7 + AFCOPYSGF + AFCOPYSGD +diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go +index aee0da0a6e..d2acdf7042 100644 +--- a/src/cmd/internal/obj/loong64/anames.go ++++ b/src/cmd/internal/obj/loong64/anames.go +@@ -223,6 +223,10 @@ var Anames = []string{ + "FMIND", + "FMAXF", + "FMAXD", ++ "FMAXAF", ++ "FMAXAD", ++ "FMINAF", ++ "FMINAD", + "FCOPYSGF", + "FCOPYSGD", + "FSCALEBF", +diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go +index 2480cf9382..31f5376f8e 100644 +--- a/src/cmd/internal/obj/loong64/asm.go ++++ b/src/cmd/internal/obj/loong64/asm.go +@@ -1347,6 +1347,10 @@ func buildop(ctxt *obj.Link) { + opset(AFCOPYSGD, r0) + opset(AFSCALEBF, r0) + opset(AFSCALEBD, r0) ++ opset(AFMAXAF, r0) ++ opset(AFMAXAD, r0) ++ opset(AFMINAF, r0) ++ opset(AFMINAD, r0) + + case AFMADDF: + opset(AFMADDD, r0) +@@ -2811,6 +2815,14 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { + return 0x211 << 15 // fmax.s + case AFMAXD: + return 0x212 << 15 // fmax.d ++ case AFMAXAF: ++ return 0x219 << 15 // fmaxa.s ++ case AFMAXAD: ++ return 0x21a << 15 // fmaxa.d ++ case AFMINAF: ++ return 0x21d << 15 // fmina.s ++ case AFMINAD: ++ return 0x21e << 15 // fmina.d + case AFSCALEBF: + return 0x221 << 15 // fscaleb.s + case AFSCALEBD: +-- +2.38.1 + diff --git a/0041-cmd-link-internal-loadelf-remove-useless-relocation-.patch b/0041-cmd-link-internal-loadelf-remove-useless-relocation-.patch deleted file mode 100644 index ff0c600382513bb104aef0a9820aad1dfd07faee..0000000000000000000000000000000000000000 --- a/0041-cmd-link-internal-loadelf-remove-useless-relocation-.patch +++ /dev/null @@ -1,31 +0,0 @@ -From 42188b511e4b16720defa3fe9178df3f6e130bfa Mon Sep 17 00:00:00 2001 -From: limeidan -Date: Tue, 24 Oct 2023 19:50:32 +0800 -Subject: [PATCH 41/51] cmd/link/internal/loadelf: remove useless relocation - size information of loong64 - -Change-Id: I86de74df2b6a205b454e3fee55607dcf28fb4a5f ---- - src/cmd/link/internal/loadelf/ldelf.go | 6 +----- - 1 file changed, 1 insertion(+), 5 deletions(-) - -diff --git a/src/cmd/link/internal/loadelf/ldelf.go b/src/cmd/link/internal/loadelf/ldelf.go -index 5ab7cf2204..1dc6a3f1f3 100644 ---- a/src/cmd/link/internal/loadelf/ldelf.go -+++ b/src/cmd/link/internal/loadelf/ldelf.go -@@ -1007,11 +1007,7 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { - MIPS64 | uint32(elf.R_MIPS_GOT_DISP)<<16: - return 4, 4, nil - -- case LOONG64 | uint32(elf.R_LARCH_SOP_PUSH_PCREL)<<16, -- LOONG64 | uint32(elf.R_LARCH_SOP_PUSH_GPREL)<<16, -- LOONG64 | uint32(elf.R_LARCH_SOP_PUSH_ABSOLUTE)<<16, -- LOONG64 | uint32(elf.R_LARCH_MARK_LA)<<16, -- LOONG64 | uint32(elf.R_LARCH_SOP_POP_32_S_0_10_10_16_S2)<<16, -+ case LOONG64 | uint32(elf.R_LARCH_MARK_LA)<<16, - LOONG64 | uint32(elf.R_LARCH_MARK_PCREL)<<16, - LOONG64 | uint32(elf.R_LARCH_32_PCREL)<<16: - return 4, 4, nil --- -2.38.1 - diff --git a/0042-cmd-link-internal-loadelf-add-additional-relocations.patch b/0042-cmd-link-internal-loadelf-add-additional-relocations.patch deleted file mode 100644 index 6283e406b6e4fad5ec16d87d106001d9c90bcbc4..0000000000000000000000000000000000000000 --- a/0042-cmd-link-internal-loadelf-add-additional-relocations.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 5abed3380e38a302aaf57a07ad6850630a28fbc8 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Wed, 25 Oct 2023 17:54:51 +0800 -Subject: [PATCH 42/51] cmd/link/internal/loadelf: add additional relocations - for loong64 - -The Linker Relaxation feature on Loong64 is already supported in binutils 2.41. -The intermediate code generated after enabling this feature introduces three -reloc types. - -elf.R_LARCH_B26 -elf.R_LARCH_ADD32 -elf.R_LARCH_SUB32 - -Fixes #63725 - -Signed-off-by: Guoqi Chen -Change-Id: I53a000d99049a14a7eb9497909c8d4d8b8913757 ---- - src/cmd/link/internal/loadelf/ldelf.go | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/cmd/link/internal/loadelf/ldelf.go b/src/cmd/link/internal/loadelf/ldelf.go -index 1dc6a3f1f3..dcb34b871b 100644 ---- a/src/cmd/link/internal/loadelf/ldelf.go -+++ b/src/cmd/link/internal/loadelf/ldelf.go -@@ -1009,6 +1009,9 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { - - case LOONG64 | uint32(elf.R_LARCH_MARK_LA)<<16, - LOONG64 | uint32(elf.R_LARCH_MARK_PCREL)<<16, -+ LOONG64 | uint32(elf.R_LARCH_ADD32)<<16, -+ LOONG64 | uint32(elf.R_LARCH_SUB32)<<16, -+ LOONG64 | uint32(elf.R_LARCH_B26)<<16, - LOONG64 | uint32(elf.R_LARCH_32_PCREL)<<16: - return 4, 4, nil - --- -2.38.1 - diff --git a/0042-math-implement-func-archExp-and-archExp2-in-assembly.patch b/0042-math-implement-func-archExp-and-archExp2-in-assembly.patch new file mode 100644 index 0000000000000000000000000000000000000000..a9303c24b03f58fa52421ec53e6a08ff1d6c7e5f --- /dev/null +++ b/0042-math-implement-func-archExp-and-archExp2-in-assembly.patch @@ -0,0 +1,358 @@ +From f463c4a1db9ac0e4be9d67bc53f4ddb8515232d3 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Tue, 31 Dec 2024 21:02:47 +0800 +Subject: [PATCH 42/44] math: implement func archExp and archExp2 in assembly + on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: math +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Exp 26.30n ± 0% 12.93n ± 0% -50.85% (p=0.000 n=10) +ExpGo 26.86n ± 0% 26.92n ± 0% +0.22% (p=0.000 n=10) +Expm1 16.76n ± 0% 16.75n ± 0% ~ (p=0.060 n=10) +Exp2 23.05n ± 0% 12.12n ± 0% -47.42% (p=0.000 n=10) +Exp2Go 23.41n ± 0% 23.47n ± 0% +0.28% (p=0.000 n=10) +geomean 22.97n 17.54n -23.64% + +goos: linux +goarch: loong64 +pkg: math/cmplx +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Exp 51.32n ± 0% 35.41n ± 0% -30.99% (p=0.000 n=10) + +goos: linux +goarch: loong64 +pkg: math +cpu: Loongson-3A5000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Exp 50.27n ± 0% 48.75n ± 1% -3.01% (p=0.000 n=10) +ExpGo 50.72n ± 0% 50.44n ± 0% -0.55% (p=0.000 n=10) +Expm1 28.40n ± 0% 28.32n ± 0% ~ (p=0.360 n=10) +Exp2 50.09n ± 0% 21.49n ± 1% -57.10% (p=0.000 n=10) +Exp2Go 50.05n ± 0% 49.69n ± 0% -0.72% (p=0.000 n=10) +geomean 44.85n 37.52n -16.35% + +goos: linux +goarch: loong64 +pkg: math/cmplx +cpu: Loongson-3A5000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Exp 88.56n ± 0% 67.29n ± 0% -24.03% (p=0.000 n=10) + +Change-Id: I89e456d26fc075d83335ee4a31227d2aface5714 +--- + src/math/exp2_asm.go | 2 +- + src/math/exp2_noasm.go | 2 +- + src/math/exp_asm.go | 2 +- + src/math/exp_loong64.s | 236 +++++++++++++++++++++++++++++++++++++++++ + src/math/exp_noasm.go | 2 +- + 5 files changed, 240 insertions(+), 4 deletions(-) + create mode 100644 src/math/exp_loong64.s + +diff --git a/src/math/exp2_asm.go b/src/math/exp2_asm.go +index c26b2c3fab..1e78759374 100644 +--- a/src/math/exp2_asm.go ++++ b/src/math/exp2_asm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build arm64 ++//go:build arm64 || loong64 + + package math + +diff --git a/src/math/exp2_noasm.go b/src/math/exp2_noasm.go +index c2b409329f..847138b622 100644 +--- a/src/math/exp2_noasm.go ++++ b/src/math/exp2_noasm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !arm64 ++//go:build !arm64 && !loong64 + + package math + +diff --git a/src/math/exp_asm.go b/src/math/exp_asm.go +index 424442845b..125529fca3 100644 +--- a/src/math/exp_asm.go ++++ b/src/math/exp_asm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build amd64 || arm64 || s390x ++//go:build amd64 || arm64 || loong64 || s390x + + package math + +diff --git a/src/math/exp_loong64.s b/src/math/exp_loong64.s +new file mode 100644 +index 0000000000..3d24214289 +--- /dev/null ++++ b/src/math/exp_loong64.s +@@ -0,0 +1,236 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "textflag.h" ++ ++#define NearZero 0x3e30000000000000 // 2**-28 ++#define PosInf 0x7ff0000000000000 ++#define FracMask 0x000fffffffffffff ++#define C1 0x3cb0000000000000 // 2**-52 ++ ++DATA exprodata<>+0(SB)/8, $0.0 ++DATA exprodata<>+8(SB)/8, $0.5 ++DATA exprodata<>+16(SB)/8, $1.0 ++DATA exprodata<>+24(SB)/8, $2.0 ++DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi ++DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo ++DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e ++DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow ++DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow ++DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2 ++DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2 ++DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero ++GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96 ++ ++DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1 ++DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2 ++DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3 ++DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4 ++DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5 ++GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40 ++ ++// Exp returns e**x, the base-e exponential of x. ++// This is an assembly implementation of the method used for function Exp in file exp.go. ++// ++// func Exp(x float64) float64 ++TEXT ·archExp(SB),$0-16 ++ MOVD x+0(FP), F0 // F0 = x ++ ++ MOVV $exprodata<>+0(SB), R10 ++ MOVD 56(R10), F1 // Overflow ++ MOVD 64(R10), F2 // Underflow ++ MOVD 88(R10), F3 // NearZero ++ MOVD 16(R10), F17 // 1.0 ++ ++ CMPEQD F0, F0, FCC0 ++ BFPF isNaN // x = NaN, return NaN ++ ++ CMPGTD F0, F1, FCC0 ++ BFPT overflow // x > Overflow, return PosInf ++ ++ CMPGTD F2, F0, FCC0 ++ BFPT underflow // x < Underflow, return 0 ++ ++ ABSD F0, F5 ++ CMPGTD F3, F5, FCC0 ++ BFPT nearzero // fabs(x) < NearZero, return 1 + x ++ ++ // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 ++ // computed as r = hi - lo for extra precision. ++ MOVD 0(R10), F5 // 0.0 ++ MOVD 8(R10), F3 // 0.5 ++ MOVD 48(R10), F2 // Log2e ++ CMPGTD F0, F5, FCC0 ++ BFPT add // x > 0 ++sub: ++ FMSUBD F3, F2, F0, F3 // Log2e*x - 0.5 ++ JMP 2(PC) ++add: ++ FMADDD F3, F2, F0, F3 // Log2e*x + 0.5 ++ ++ FTINTRZVD F3, F4 // float64 -> int64 ++ MOVV F4, R5 // R5 = int(k) ++ FFINTDV F4, F3 // int64 -> float64 ++ ++ MOVD 32(R10), F4 // F4 = Ln2Hi ++ MOVD 40(R10), F5 // F5 = Ln2Lo ++ FNMSUBD F0, F3, F4, F4 // F4 = hi = x - float64(int(k))*Ln2Hi ++ MULD F3, F5, F5 // F5 = lo = float64(int(k)) * Ln2Lo ++ SUBD F5, F4, F6 // F6 = r = hi - lo ++ MULD F6, F6, F7 // F7 = t = r * r ++ ++ // compute c ++ MOVV $expmultirodata<>+0(SB), R11 ++ MOVD 32(R11), F8 // F8 = P5 ++ MOVD 24(R11), F9 // F9 = P4 ++ FMADDD F9, F8, F7, F13 // P4+t*P5 ++ MOVD 16(R11), F10 // F10 = P3 ++ FMADDD F10, F13, F7, F13 // P3+t*(P4+t*P5) ++ MOVD 8(R11), F11 // F11 = P2 ++ FMADDD F11, F13, F7, F13 // P2+t*(P3+t*(P4+t*P5)) ++ MOVD 0(R11), F12 // F12 = P1 ++ FMADDD F12, F13, F7, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) ++ FNMSUBD F6, F13, F7, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) ++ ++ // compute y ++ MOVD 24(R10), F14 // F14 = 2.0 ++ SUBD F13, F14, F14 // F14 = 2 - c ++ MULD F6, F13, F15 // F15 = r*c ++ DIVD F14, F15, F15 // F15 = (r*c)/(2-c) ++ SUBD F15, F5, F15 // F15 = lo-(r*c)/(2-c) ++ SUBD F4, F15, F15 // F15 = (lo-(r*c)/(2-c))-hi ++ SUBD F15, F17, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) ++ ++ // inline Ldexp(y, k), benefit: ++ // 1, no parameter pass overhead. ++ // 2, skip unnecessary checks for Inf/NaN/Zero ++ MOVV F16, R4 ++ MOVV $FracMask, R9 ++ AND R9, R4, R6 // fraction ++ SRLV $52, R4, R7 // exponent ++ ADDV R5, R7 // R5 = int(k) ++ MOVV $1, R12 ++ BGE R7, R12, normal ++ ADDV $52, R7 // denormal ++ MOVV $C1, R8 ++ MOVV R8, F17 // m = 2**-52 ++normal: ++ SLLV $52, R7 ++ OR R7, R6, R4 ++ MOVV R4, F0 ++ MULD F17, F0 // return m * x ++ MOVD F0, ret+8(FP) ++ RET ++nearzero: ++ ADDD F17, F0, F0 ++isNaN: ++ MOVD F0, ret+8(FP) ++ RET ++underflow: ++ MOVV R0, ret+8(FP) ++ RET ++overflow: ++ MOVV $PosInf, R4 ++ MOVV R4, ret+8(FP) ++ RET ++ ++ ++// Exp2 returns 2**x, the base-2 exponential of x. ++// This is an assembly implementation of the method used for function Exp2 in file exp.go. ++// ++// func Exp2(x float64) float64 ++TEXT ·archExp2(SB),$0-16 ++ MOVD x+0(FP), F0 // F0 = x ++ ++ MOVV $exprodata<>+0(SB), R10 ++ MOVD 72(R10), F1 // Overflow2 ++ MOVD 80(R10), F2 // Underflow2 ++ MOVD 88(R10), F3 // NearZero ++ ++ CMPEQD F0, F0, FCC0 ++ BFPF isNaN // x = NaN, return NaN ++ ++ CMPGTD F0, F1, FCC0 ++ BFPT overflow // x > Overflow, return PosInf ++ ++ CMPGTD F2, F0, FCC0 ++ BFPT underflow // x < Underflow, return 0 ++ ++ // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 ++ // computed as r = hi - lo for extra precision. ++ MOVD 0(R10), F10 // 0.0 ++ MOVD 8(R10), F2 // 0.5 ++ CMPGTD F0, F10, FCC0 ++ BFPT add ++sub: ++ SUBD F2, F0, F3 // x - 0.5 ++ JMP 2(PC) ++add: ++ ADDD F2, F0, F3 // x + 0.5 ++ ++ FTINTRZVD F3, F4 // float64 -> int64 ++ MOVV F4, R5 // R5 = int(k) ++ FFINTDV F4, F3 // F3 = float64(int(k)) ++ ++ MOVD 32(R10), F4 // F4 = Ln2Hi ++ MOVD 40(R10), F5 // F5 = Ln2Lo ++ SUBD F3, F0, F3 // t = x - float64(int(k)) ++ MULD F3, F4 // F4 = hi = t * Ln2Hi ++ FNMSUBD F10, F3, F5, F5 // F5 = lo = -t * Ln2Lo ++ SUBD F5, F4, F6 // F6 = r = hi - lo ++ MULD F6, F6, F7 // F7 = t = r * r ++ ++ // compute c ++ MOVV $expmultirodata<>+0(SB), R11 ++ MOVD 32(R11), F8 // F8 = P5 ++ MOVD 24(R11), F9 // F9 = P4 ++ FMADDD F9, F8, F7, F13 // P4+t*P5 ++ MOVD 16(R11), F10 // F10 = P3 ++ FMADDD F10, F13, F7, F13 // P3+t*(P4+t*P5) ++ MOVD 8(R11), F11 // F11 = P2 ++ FMADDD F11, F13, F7, F13 // P2+t*(P3+t*(P4+t*P5)) ++ MOVD 0(R11), F12 // F12 = P1 ++ FMADDD F12, F13, F7, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5))) ++ FNMSUBD F6, F13, F7, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))) ++ ++ // compute y ++ MOVD 24(R10), F14 // F14 = 2.0 ++ SUBD F13, F14, F14 // F14 = 2 - c ++ MULD F6, F13, F15 // F15 = r*c ++ DIVD F14, F15 // F15 = (r*c)/(2-c) ++ ++ MOVD 16(R10), F17 // 1.0 ++ SUBD F15, F5, F15 // lo-(r*c)/(2-c) ++ SUBD F4, F15, F15 // (lo-(r*c)/(2-c))-hi ++ SUBD F15, F17, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi) ++ ++ // inline Ldexp(y, k), benefit: ++ // 1, no parameter pass overhead. ++ // 2, skip unnecessary checks for Inf/NaN/Zero ++ MOVV F16, R4 ++ MOVV $FracMask, R9 ++ SRLV $52, R4, R7 // exponent ++ AND R9, R4, R6 // fraction ++ ADDV R5, R7 // R5 = int(k) ++ MOVV $1, R12 ++ BGE R7, R12, normal ++ ++ ADDV $52, R7 // denormal ++ MOVV $C1, R8 ++ MOVV R8, F17 // m = 2**-52 ++normal: ++ SLLV $52, R7 ++ OR R7, R6, R4 ++ MOVV R4, F0 ++ MULD F17, F0 // return m * x ++isNaN: ++ MOVD F0, ret+8(FP) ++ RET ++underflow: ++ MOVV R0, ret+8(FP) ++ RET ++overflow: ++ MOVV $PosInf, R4 ++ MOVV R4, ret+8(FP) ++ RET +diff --git a/src/math/exp_noasm.go b/src/math/exp_noasm.go +index bd3f02412a..bf5e84b736 100644 +--- a/src/math/exp_noasm.go ++++ b/src/math/exp_noasm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !amd64 && !arm64 && !s390x ++//go:build !amd64 && !arm64 && !loong64 && !s390x + + package math + +-- +2.38.1 + diff --git a/0043-cmd-link-add-new-relocations-numbered-101-to-109-for.patch b/0043-cmd-link-add-new-relocations-numbered-101-to-109-for.patch deleted file mode 100644 index ef0bcc033e1e59b683df4a501dab32a12122f3c3..0000000000000000000000000000000000000000 --- a/0043-cmd-link-add-new-relocations-numbered-101-to-109-for.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 913dd677b2f47cdd5ae2284f9c98e35b405ec80b Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 26 Oct 2023 15:12:29 +0800 -Subject: [PATCH 43/51] cmd/link: add new relocations numbered 101 to 109 for - loong64 - -Signed-off-by: Guoqi Chen -Change-Id: Ieb845ce03a039f7ee9a39f243145193b5f9ab1dc ---- - src/cmd/link/internal/loadelf/ldelf.go | 15 ++++++++++++++- - src/debug/elf/elf.go | 18 ++++++++++++++++++ - 2 files changed, 32 insertions(+), 1 deletion(-) - -diff --git a/src/cmd/link/internal/loadelf/ldelf.go b/src/cmd/link/internal/loadelf/ldelf.go -index dcb34b871b..2a013dde0d 100644 ---- a/src/cmd/link/internal/loadelf/ldelf.go -+++ b/src/cmd/link/internal/loadelf/ldelf.go -@@ -1007,15 +1007,28 @@ func relSize(arch *sys.Arch, pn string, elftype uint32) (uint8, uint8, error) { - MIPS64 | uint32(elf.R_MIPS_GOT_DISP)<<16: - return 4, 4, nil - -+ case LOONG64 | uint32(elf.R_LARCH_ADD8)<<16, -+ LOONG64 | uint32(elf.R_LARCH_SUB8)<<16: -+ return 1, 1, nil -+ -+ case LOONG64 | uint32(elf.R_LARCH_ADD16)<<16, -+ LOONG64 | uint32(elf.R_LARCH_SUB16)<<16: -+ return 2, 2, nil -+ - case LOONG64 | uint32(elf.R_LARCH_MARK_LA)<<16, - LOONG64 | uint32(elf.R_LARCH_MARK_PCREL)<<16, -+ LOONG64 | uint32(elf.R_LARCH_ADD24)<<16, - LOONG64 | uint32(elf.R_LARCH_ADD32)<<16, -+ LOONG64 | uint32(elf.R_LARCH_SUB24)<<16, - LOONG64 | uint32(elf.R_LARCH_SUB32)<<16, - LOONG64 | uint32(elf.R_LARCH_B26)<<16, - LOONG64 | uint32(elf.R_LARCH_32_PCREL)<<16: - return 4, 4, nil - -- case LOONG64 | uint32(elf.R_LARCH_64)<<16: -+ case LOONG64 | uint32(elf.R_LARCH_64)<<16, -+ LOONG64 | uint32(elf.R_LARCH_ADD64)<<16, -+ LOONG64 | uint32(elf.R_LARCH_SUB64)<<16, -+ LOONG64 | uint32(elf.R_LARCH_64_PCREL)<<16: - return 8, 8, nil - - case S390X | uint32(elf.R_390_8)<<16: -diff --git a/src/debug/elf/elf.go b/src/debug/elf/elf.go -index c982c684ba..63acddc166 100644 ---- a/src/debug/elf/elf.go -+++ b/src/debug/elf/elf.go -@@ -2365,6 +2365,15 @@ const ( - R_LARCH_TLS_GD_HI20 R_LARCH = 98 - R_LARCH_32_PCREL R_LARCH = 99 - R_LARCH_RELAX R_LARCH = 100 -+ R_LARCH_DELETE R_LARCH = 101 -+ R_LARCH_ALIGN R_LARCH = 102 -+ R_LARCH_PCREL20_S2 R_LARCH = 103 -+ R_LARCH_CFA R_LARCH = 104 -+ R_LARCH_ADD6 R_LARCH = 105 -+ R_LARCH_SUB6 R_LARCH = 106 -+ R_LARCH_ADD_ULEB128 R_LARCH = 107 -+ R_LARCH_SUB_ULEB128 R_LARCH = 108 -+ R_LARCH_64_PCREL R_LARCH = 109 - ) - - var rlarchStrings = []intName{ -@@ -2457,6 +2466,15 @@ var rlarchStrings = []intName{ - {98, "R_LARCH_TLS_GD_HI20"}, - {99, "R_LARCH_32_PCREL"}, - {100, "R_LARCH_RELAX"}, -+ {101, "R_LARCH_DELETE"}, -+ {102, "R_LARCH_ALIGN"}, -+ {103, "R_LARCH_PCREL20_S2"}, -+ {104, "R_LARCH_CFA"}, -+ {105, "R_LARCH_ADD6"}, -+ {106, "R_LARCH_SUB6"}, -+ {107, "R_LARCH_ADD_ULEB128"}, -+ {108, "R_LARCH_SUB_ULEB128"}, -+ {109, "R_LARCH_64_PCREL"}, - } - - func (i R_LARCH) String() string { return stringName(uint32(i), rlarchStrings, false) } --- -2.38.1 - diff --git a/0043-math-implement-func-archLog-in-assembly-on-loong64.patch b/0043-math-implement-func-archLog-in-assembly-on-loong64.patch new file mode 100644 index 0000000000000000000000000000000000000000..f01c831480e7521902cef28d0a507e0a1dfbf614 --- /dev/null +++ b/0043-math-implement-func-archLog-in-assembly-on-loong64.patch @@ -0,0 +1,217 @@ +From 066bd3bf1a03e21cc27b463164461a56ce107d59 Mon Sep 17 00:00:00 2001 +From: Xiaolin Zhao +Date: Mon, 6 Jan 2025 15:40:06 +0800 +Subject: [PATCH 43/44] math: implement func archLog in assembly on loong64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: loong64 +pkg: math +cpu: Loongson-3A6000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Log 18.87n ± 0% 12.85n ± 0% -31.90% (p=0.000 n=10) +Logb 5.203n ± 0% 5.604n ± 0% +7.71% (p=0.000 n=10) +Log1p 16.78n ± 0% 16.78n ± 0% ~ (p=0.450 n=10) +Log10 20.47n ± 0% 13.59n ± 0% -33.61% (p=0.000 n=10) +Log2 6.804n ± 0% 8.805n ± 0% +29.40% (p=0.000 n=10) +geomean 11.81n 10.77n -8.82% + +goos: linux +goarch: loong64 +pkg: math +cpu: Loongson-3A5000 @ 2500.00MHz + | bench.old | bench.new | + | sec/op | sec/op vs base | +Log 28.28n ± 0% 24.95n ± 1% -11.78% (p=0.000 n=10) +Logb 7.609n ± 0% 7.207n ± 0% -5.29% (p=0.000 n=10) +Log1p 27.27n ± 0% 27.18n ± 1% ~ (p=0.078 n=10) +Log10 29.56n ± 0% 26.56n ± 0% -10.16% (p=0.000 n=10) +Log2 11.43n ± 0% 10.41n ± 0% -8.92% (p=0.000 n=10) +geomean 18.17n 16.83n -7.38% + +Change-Id: I42a17280874c28b31a3b5c75fc19ddac90c92f32 +--- + src/math/log_asm.go | 2 +- + src/math/log_loong64.s | 140 +++++++++++++++++++++++++++++++++++++++++ + src/math/log_stub.go | 2 +- + 3 files changed, 142 insertions(+), 2 deletions(-) + create mode 100644 src/math/log_loong64.s + +diff --git a/src/math/log_asm.go b/src/math/log_asm.go +index 848cce13b2..82372d1e64 100644 +--- a/src/math/log_asm.go ++++ b/src/math/log_asm.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build amd64 || s390x ++//go:build amd64 || loong64 || s390x + + package math + +diff --git a/src/math/log_loong64.s b/src/math/log_loong64.s +new file mode 100644 +index 0000000000..534295cb53 +--- /dev/null ++++ b/src/math/log_loong64.s +@@ -0,0 +1,140 @@ ++// Copyright 2025 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++#include "textflag.h" ++ ++DATA logrodata<>+0(SB)/8, $0.5 ++DATA logrodata<>+8(SB)/8, $1.0 ++DATA logrodata<>+16(SB)/8, $2.0 ++DATA logrodata<>+24(SB)/8, $7.07106781186547524401e-01 // sqrt(2)/2 ++DATA logrodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi ++DATA logrodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo ++DATA logrodata<>+48(SB)/8, $6.666666666666735130e-01 // L1 ++DATA logrodata<>+56(SB)/8, $3.999999999940941908e-01 // L2 ++DATA logrodata<>+64(SB)/8, $2.857142874366239149e-01 // L3 ++DATA logrodata<>+72(SB)/8, $2.222219843214978396e-01 // L4 ++DATA logrodata<>+80(SB)/8, $1.818357216161805012e-01 // L5 ++DATA logrodata<>+88(SB)/8, $1.531383769920937332e-01 // L6 ++DATA logrodata<>+96(SB)/8, $1.479819860511658591e-01 // L7 ++DATA logrodata<>+104(SB)/8, $2.2250738585072014e-308 // 2**-1022 ++GLOBL logrodata<>+0(SB), NOPTR|RODATA, $112 ++ ++#define NaN 0x7FF8000000000001 ++#define NegInf 0xFFF0000000000000 ++#define PosInf 0x7FF0000000000000 ++ ++// func Log(x float64) float64 ++TEXT ·archLog(SB),NOSPLIT,$0 ++ // test bits for special cases ++ MOVD x+0(FP), F0 ++ MOVV x+0(FP), R4 ++ MOVV $logrodata<>+0(SB), R10 ++ FCLASSD F0, F4 ++ MOVV F4, R5 ++ AND $67, R5, R6 // NaN or +Inf ++ AND $544, R5, R7 // +0 or -0 ++ AND $28, R5, R8 // <0 ++ BNE R6, R0, isInfOrNaN ++ BNE R7, R0, isZero ++ BNE R8, R0, isNegative ++ ++ // reduce ++ // f1, ki := Frexp(x) FIXME ++ MOVD 104(R10), F4 ++ ABSD F0, F1 ++ CMPGED F1, F4, FCC0 ++ BFPT direct_return ++ MOVV $0x10000000000000, R5 // 1 << 52 ++ MULV R4, R5, R4 // R4 = y ++ MOVV $-52, R15 // R15 = ki (exp) ++ JMP 2(PC) ++direct_return: ++ MOVV $0, R15 // R15 = ki (exp) F0 = y ++ ++ MOVV $0x000FFFFFFFFFFFFF, R5 ++ AND R4, R5, R7 // x &^= mask << shift ++ MOVV $0x3FE0000000000000, R6 // (-1 + bias) << shift ++ OR R6, R7 // x |= (-1 + bias) << shift ++ MOVV R7, F2 // F2 = f1 ++ SRLV $52, R4 // x >> shift ++ AND $0x7FF, R4 // (x>>shift)&mask ++ SUBV $0x3FE, R4 // int((x>>shift)&mask) - bias + 1 ++ ADDV R4, R15, R4 // R4 = exp ++ ++ // if f1 < math.Sqrt2/2 { k -= 1; f1 *= 2 } ++ MOVD 0(R10), F10 // 0.5 ++ MOVD 8(R10), F3 // 1.0 ++ MOVD 16(R10), F4 // 2.0 ++ MOVD 24(R10), F0 // sqrt(2)/2 ++ CMPGED F2, F0, FCC0 // if f1 >= Sqrt2/2 ++ BFPT next ++ MULD F4, F2, F2 // f1 *= 2 ++ SUBV $1, R4, R4 ++next: ++ MOVV R4, F1 // k-- ++ FFINTDV F1, F1 // F1 = k ++ // f := f1 - 1 ++ SUBD F3, F2, F2 ++ ++ // compute ++ MOVD 96(R10), F17 // L7 ++ MOVD 80(R10), F15 // L5 ++ MOVD 64(R10), F13 // L3 ++ MOVD 48(R10), F11 // L1 ++ ADDD F4, F2, F3 // 2 + f ++ DIVD F3, F2, F4 // s := f / (2 + f) ++ MULD F4, F4, F5 // s2 := s * s ++ MULD F5, F5, F6 // s4 := s2 * s2 ++ // t1 := s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) ++ MULD F17, F6, F7 // s4*L7 ++ ADDD F15, F7 // L5+s4*L7 ++ MULD F6, F7 // s4*(L5+s4*L7) ++ ADDD F13, F7 // L3+s4*(L5+s4*L7) ++ MULD F6, F7 // s4*(L3+s4*(L5+s4*L7)) ++ ADDD F11, F7 // L1 + s4*(L3+s4*(L5+s4*L7)) ++ MULD F5, F7 // s2 * (L1 + s4*(L3+s4*(L5+s4*L7))) ++ ++ MOVD 88(R10), F16 // L6 ++ MOVD 72(R10), F14 // L4 ++ MOVD 56(R10), F12 // L2 ++ // t2 := s4 * (L2 + s4*(L4+s4*L6)) ++ MULD F6, F16, F8 // s4*L6 ++ ADDD F14, F8 // L4+s4*L6 ++ MULD F6, F8 // s4*(L4+s4*L6) ++ ADDD F12, F8 // L2 + s4*(L4+s4*L6) ++ MULD F6, F8 // s4 * (L2 + s4*(L4+s4*L6)) ++ ++ // R := t1 + t2 ++ ADDD F7, F8 ++ ++ // hfsq := 0.5 * f * f ++ MULD F2, F2, F12 // f * f ++ MULD F10, F12, F9 // 0.5 * f * f ++ ++ // return k*Ln2Hi - ((hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f) ++ MOVD 40(R10), F19 // Ln2Lo ++ MOVD 32(R10), F18 // Ln2Hi ++ // f9=hfsq, f1=k, f4=s, f8=R, f2=f ++ ADDD F9, F8, F10 // F10 = hfsq+R ++ MULD F1, F19, F11 // F11 = k*Ln2Lo ++ MULD F10, F4, F12 // F12 = s*(hfsq+R) ++ MULD F1, F18, F15 // F15 = k*Ln2Hi ++ ADDD F12, F11, F13 // F13 = s*(hfsq+R) + k*Ln2Lo ++ SUBD F13, F9, F14 // F14 = hfsq - (s*(hfsq+R) + k*Ln2Lo) ++ SUBD F2, F14, F14 // F14 = (hfsq - (s*(hfsq+R) + k*Ln2Lo)) - f ++ SUBD F14, F15, F0 ++ MOVD F0, ret+8(FP) ++ RET ++ ++isInfOrNaN: ++ MOVV R4, ret+8(FP) // +Inf or NaN, return x ++ RET ++isNegative: ++ MOVV $NaN, R4 ++ MOVV R4, ret+8(FP) // return NaN ++ RET ++isZero: ++ MOVV $NegInf, R4 ++ MOVV R4, ret+8(FP) // return -Inf ++ RET +diff --git a/src/math/log_stub.go b/src/math/log_stub.go +index d35992bf37..1dd4058435 100644 +--- a/src/math/log_stub.go ++++ b/src/math/log_stub.go +@@ -2,7 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + +-//go:build !amd64 && !s390x ++//go:build !amd64 && !loong64 && !s390x + + package math + +-- +2.38.1 + diff --git a/0044-api-add-new-relocations-numbered-101-to-109-for-loon.patch b/0044-api-add-new-relocations-numbered-101-to-109-for-loon.patch deleted file mode 100644 index 230988c443f0efa0407c4a4fa21bcab3d466a3f2..0000000000000000000000000000000000000000 --- a/0044-api-add-new-relocations-numbered-101-to-109-for-loon.patch +++ /dev/null @@ -1,44 +0,0 @@ -From e352e1eb642a64b0656d25f7c279544702da6e22 Mon Sep 17 00:00:00 2001 -From: chenguoqi -Date: Fri, 27 Oct 2023 16:10:20 +0800 -Subject: [PATCH 44/51] api: add new relocations numbered 101 to 109 for - loong64 - -Signed-off-by: chenguoqi -Change-Id: If078f7865e8d5c5ae963e79cf7157eca7e7a0817 ---- - api/go1.20.txt | 18 ++++++++++++++++++ - 1 file changed, 18 insertions(+) - -diff --git a/api/go1.20.txt b/api/go1.20.txt -index 8deb435a68..6fe543d3e8 100644 ---- a/api/go1.20.txt -+++ b/api/go1.20.txt -@@ -118,6 +118,24 @@ pkg debug/elf, const R_LARCH_TLS_LE_HI20 = 83 #54222 - pkg debug/elf, const R_LARCH_TLS_LE_HI20 R_LARCH #54222 - pkg debug/elf, const R_LARCH_TLS_LE_LO12 = 84 #54222 - pkg debug/elf, const R_LARCH_TLS_LE_LO12 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_DELETE = 101 #54222 -+pkg debug/elf, const R_LARCH_DELETE R_LARCH #54222 -+pkg debug/elf, const R_LARCH_ALIGN = 102 #54222 -+pkg debug/elf, const R_LARCH_ALIGN R_LARCH #54222 -+pkg debug/elf, const R_LARCH_PCREL20_S2 = 103 #54222 -+pkg debug/elf, const R_LARCH_PCREL20_S2 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_CFA = 104 #54222 -+pkg debug/elf, const R_LARCH_CFA R_LARCH #54222 -+pkg debug/elf, const R_LARCH_ADD6 = 105 #54222 -+pkg debug/elf, const R_LARCH_ADD6 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_SUB6 = 106 #54222 -+pkg debug/elf, const R_LARCH_SUB6 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_ADD_ULEB128 = 107 #54222 -+pkg debug/elf, const R_LARCH_ADD_ULEB128 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_SUB_ULEB128 = 108 #54222 -+pkg debug/elf, const R_LARCH_SUB_ULEB128 R_LARCH #54222 -+pkg debug/elf, const R_LARCH_64_PCREL = 109 #54222 -+pkg debug/elf, const R_LARCH_64_PCREL R_LARCH #54222 - pkg debug/elf, const R_PPC64_ADDR16_HIGHER34 = 136 #54345 - pkg debug/elf, const R_PPC64_ADDR16_HIGHER34 R_PPC64 #54345 - pkg debug/elf, const R_PPC64_ADDR16_HIGHERA34 = 137 #54345 --- -2.38.1 - diff --git a/0044-cmd-go-internal-work-allow-a-bunch-of-loong64-specif.patch b/0044-cmd-go-internal-work-allow-a-bunch-of-loong64-specif.patch new file mode 100644 index 0000000000000000000000000000000000000000..40422df348c4672d8c6836dd29ad3fe19709142c --- /dev/null +++ b/0044-cmd-go-internal-work-allow-a-bunch-of-loong64-specif.patch @@ -0,0 +1,126 @@ +From fc3470aafbb3facc619e4813eaf0ea10d5c7eda9 Mon Sep 17 00:00:00 2001 +From: WANG Xuerui +Date: Sun, 9 Feb 2025 18:57:49 +0800 +Subject: [PATCH 44/44] cmd/go/internal/work: allow a bunch of loong64-specific + flags + +Recognize and allow all LoongArch-specific CFLAGS as standardized +in the LoongArch Toolchain Conventions v1.1, and implemented in current +versions of GCC and Clang, to enable advanced cgo use cases on loong64. +These flags are also allowed for linker invocations in case of possible +LTO. + +See: https://github.com/loongson/la-toolchain-conventions/blob/releases/v1.1/LoongArch-toolchain-conventions-EN.adoc#list + +While at it, also add support for -mtls-dialect as some C programs +may benefit performance-wise from the optional TLSDESC usage. This flag +is not specific to loong64 though; it is available for amd64, arm, +arm64, loong64, riscv64 and x86. + +Fixes #71597. + +Change-Id: I35d2507edb71fa324ae429a3ae3c739644a9cac1 +--- + src/cmd/go/internal/work/security.go | 13 ++++++++-- + src/cmd/go/internal/work/security_test.go | 31 +++++++++++++++++++++++ + 2 files changed, 42 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/go/internal/work/security.go b/src/cmd/go/internal/work/security.go +index 50bfd0ab70..c3d62ddc23 100644 +--- a/src/cmd/go/internal/work/security.go ++++ b/src/cmd/go/internal/work/security.go +@@ -96,17 +96,21 @@ var validCompilerFlags = []*lazyregexp.Regexp{ + re(`-g([^@\-].*)?`), + re(`-m32`), + re(`-m64`), +- re(`-m(abi|arch|cpu|fpu|tune)=([^@\-].*)`), ++ re(`-m(abi|arch|cpu|fpu|simd|tls-dialect|tune)=([^@\-].*)`), + re(`-m(no-)?v?aes`), + re(`-marm`), + re(`-m(no-)?avx[0-9a-z]*`), + re(`-mcmodel=[0-9a-z-]+`), + re(`-mfloat-abi=([^@\-].*)`), ++ re(`-m(soft|single|double)-float`), + re(`-mfpmath=[0-9a-z,+]*`), + re(`-m(no-)?avx[0-9a-z.]*`), + re(`-m(no-)?ms-bitfields`), + re(`-m(no-)?stack-(.+)`), + re(`-mmacosx-(.+)`), ++ re(`-m(no-)?relax`), ++ re(`-m(no-)?strict-align`), ++ re(`-m(no-)?(lsx|lasx|frecipe|div32|lam-bh|lamcas|ld-seq-sa)`), + re(`-mios-simulator-version-min=(.+)`), + re(`-miphoneos-version-min=(.+)`), + re(`-mlarge-data-threshold=[0-9]+`), +@@ -166,8 +170,13 @@ var validLinkerFlags = []*lazyregexp.Regexp{ + re(`-flat_namespace`), + re(`-g([^@\-].*)?`), + re(`-headerpad_max_install_names`), +- re(`-m(abi|arch|cpu|fpu|tune)=([^@\-].*)`), ++ re(`-m(abi|arch|cpu|fpu|simd|tls-dialect|tune)=([^@\-].*)`), ++ re(`-mcmodel=[0-9a-z-]+`), + re(`-mfloat-abi=([^@\-].*)`), ++ re(`-m(soft|single|double)-float`), ++ re(`-m(no-)?relax`), ++ re(`-m(no-)?strict-align`), ++ re(`-m(no-)?(lsx|lasx|frecipe|div32|lam-bh|lamcas|ld-seq-sa)`), + re(`-mmacosx-(.+)`), + re(`-mios-simulator-version-min=(.+)`), + re(`-miphoneos-version-min=(.+)`), +diff --git a/src/cmd/go/internal/work/security_test.go b/src/cmd/go/internal/work/security_test.go +index 35af621764..48f98100a5 100644 +--- a/src/cmd/go/internal/work/security_test.go ++++ b/src/cmd/go/internal/work/security_test.go +@@ -50,10 +50,35 @@ var goodCompilerFlags = [][]string{ + {"-ftls-model=local-dynamic"}, + {"-g"}, + {"-ggdb"}, ++ {"-mabi=lp64d"}, + {"-march=souza"}, + {"-mcmodel=medium"}, + {"-mcpu=123"}, + {"-mfpu=123"}, ++ {"-mtls-dialect=gnu"}, ++ {"-mtls-dialect=gnu2"}, ++ {"-mtls-dialect=trad"}, ++ {"-mtls-dialect=desc"}, ++ {"-mtls-dialect=xyz"}, ++ {"-msimd=lasx"}, ++ {"-msimd=xyz"}, ++ {"-mdouble-float"}, ++ {"-mrelax"}, ++ {"-mstrict-align"}, ++ {"-mlsx"}, ++ {"-mlasx"}, ++ {"-mfrecipe"}, ++ {"-mlam-bh"}, ++ {"-mlamcas"}, ++ {"-mld-seq-sa"}, ++ {"-mno-relax"}, ++ {"-mno-strict-align"}, ++ {"-mno-lsx"}, ++ {"-mno-lasx"}, ++ {"-mno-frecipe"}, ++ {"-mno-lam-bh"}, ++ {"-mno-lamcas"}, ++ {"-mno-ld-seq-sa"}, + {"-mlarge-data-threshold=16"}, + {"-mtune=happybirthday"}, + {"-mstack-overflow"}, +@@ -96,7 +121,13 @@ var badCompilerFlags = [][]string{ + {"-march=@dawn"}, + {"-march=-dawn"}, + {"-mcmodel=@model"}, ++ {"-mfpu=@0"}, ++ {"-mfpu=-0"}, + {"-mlarge-data-threshold=@12"}, ++ {"-mtls-dialect=@gnu"}, ++ {"-mtls-dialect=-gnu"}, ++ {"-msimd=@none"}, ++ {"-msimd=-none"}, + {"-std=@c99"}, + {"-std=-c99"}, + {"-x@c"}, +-- +2.38.1 + diff --git a/0046-cmd-internal-runtime-change-the-LR-parameter-registe.patch b/0046-cmd-internal-runtime-change-the-LR-parameter-registe.patch deleted file mode 100644 index c5090b6b4f27f9116ba1b3fa12458256cebf1e77..0000000000000000000000000000000000000000 --- a/0046-cmd-internal-runtime-change-the-LR-parameter-registe.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 732fba1be44d6c2f5e1462ff785ffecb378f01ad Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Fri, 17 Nov 2023 17:41:18 +0800 -Subject: [PATCH 46/51] cmd/internal,runtime: change the LR parameter register - convention when calling morestack to R31 - -Change-Id: I7e74ad8caf63f60a8caace3cb638bf88ea4630e9 ---- - src/cmd/internal/obj/loong64/obj.go | 4 ++-- - src/runtime/asm_loong64.s | 6 +++--- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go -index f1850f1caa..5033982cbc 100644 ---- a/src/cmd/internal/obj/loong64/obj.go -+++ b/src/cmd/internal/obj/loong64/obj.go -@@ -792,14 +792,14 @@ func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog { - p.To.Type = obj.TYPE_BRANCH - p.Mark |= BRANCH - -- // MOV LINK, R30 -+ // MOV LINK, R31 - p = obj.Appendp(p, c.newprog) - - p.As = mov - p.From.Type = obj.TYPE_REG - p.From.Reg = REGLINK - p.To.Type = obj.TYPE_REG -- p.To.Reg = REG_R30 -+ p.To.Reg = REG_R31 - if q != nil { - q.To.SetTarget(p) - p.Mark |= LABEL -diff --git a/src/runtime/asm_loong64.s b/src/runtime/asm_loong64.s -index 3c24e33cb3..af41d57553 100644 ---- a/src/runtime/asm_loong64.s -+++ b/src/runtime/asm_loong64.s -@@ -214,7 +214,7 @@ noswitch: - - // Called during function prolog when more stack is needed. - // Caller has already loaded: --// loong64: R30: LR -+// loong64: R31: LR - // - // The traceback routines see morestack on a g0 as being - // the top of a stack (for example, morestack calling newstack -@@ -238,12 +238,12 @@ TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0 - // Set g->sched to context in f. - MOVV R3, (g_sched+gobuf_sp)(g) - MOVV R1, (g_sched+gobuf_pc)(g) -- MOVV R30, (g_sched+gobuf_lr)(g) -+ MOVV R31, (g_sched+gobuf_lr)(g) - MOVV REGCTXT, (g_sched+gobuf_ctxt)(g) - - // Called from f. - // Set m->morebuf to f's caller. -- MOVV R30, (m_morebuf+gobuf_pc)(R7) // f's caller's PC -+ MOVV R31, (m_morebuf+gobuf_pc)(R7) // f's caller's PC - MOVV R3, (m_morebuf+gobuf_sp)(R7) // f's caller's SP - MOVV g, (m_morebuf+gobuf_g)(R7) - --- -2.38.1 - diff --git a/0048-runtime-Mark-race-functions-on-loong64-as-ABInternal.patch b/0048-runtime-Mark-race-functions-on-loong64-as-ABInternal.patch deleted file mode 100644 index d01d45501b62e6d52133596aa9428f649e6ecb51..0000000000000000000000000000000000000000 --- a/0048-runtime-Mark-race-functions-on-loong64-as-ABInternal.patch +++ /dev/null @@ -1,132 +0,0 @@ -From d2ea24ad8cd398ad1a4401b1996726d405cf96a0 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Thu, 2 Nov 2023 20:57:03 +0800 -Subject: [PATCH 48/51] runtime: Mark race functions on loong64 as ABInternal - -This adds ABIInternal to the race function declarations. - -Signed-off-by: Guoqi Chen -Change-Id: I123de85437344138c1942f5c5490d0bf7e27565d ---- - src/runtime/race_loong64.s | 43 +++++++++++++++++++++++++++++++------- - 1 file changed, 36 insertions(+), 7 deletions(-) - -diff --git a/src/runtime/race_loong64.s b/src/runtime/race_loong64.s -index 5e7bd6d716..0512efc045 100644 ---- a/src/runtime/race_loong64.s -+++ b/src/runtime/race_loong64.s -@@ -39,8 +39,12 @@ - // Called from instrumented code. - // Defined as ABIInternal so as to avoid introducing a wrapper, - // which would make caller's PC ineffective. --TEXT runtime·raceread(SB), NOSPLIT, $0-8 -+TEXT runtime·raceread(SB), NOSPLIT, $0-8 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R4, RARG1 -+#else - MOVV addr+0(FP), RARG1 -+#endif - MOVV R1, RARG2 - // void __tsan_read(ThreadState *thr, void *addr, void *pc); - MOVV $__tsan_read(SB), RCALL -@@ -64,8 +68,12 @@ TEXT runtime·racereadpc(SB), NOSPLIT, $0-24 - // Called from instrumented code. - // Defined as ABIInternal so as to avoid introducing a wrapper, - // which would make caller's PC ineffective. --TEXT runtime·racewrite(SB), NOSPLIT, $0-8 -+TEXT runtime·racewrite(SB), NOSPLIT, $0-8 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R4, RARG1 -+#else - MOVV addr+0(FP), RARG1 -+#endif - MOVV R1, RARG2 - // void __tsan_write(ThreadState *thr, void *addr, void *pc); - MOVV $__tsan_write(SB), RCALL -@@ -89,9 +97,14 @@ TEXT runtime·racewritepc(SB), NOSPLIT, $0-24 - // Called from instrumented code. - // Defined as ABIInternal so as to avoid introducing a wrapper, - // which would make caller's PC ineffective. --TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 -+TEXT runtime·racereadrange(SB), NOSPLIT, $0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R5, RARG2 -+ MOVV R4, RARG1 -+#else - MOVV addr+0(FP), RARG1 - MOVV size+8(FP), RARG2 -+#endif - MOVV R1, RARG3 - // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc); - MOVV $__tsan_read_range(SB), RCALL -@@ -116,9 +129,14 @@ TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24 - // Called from instrumented code. - // Defined as ABIInternal so as to avoid introducing a wrapper, - // which would make caller's PC ineffective. --TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 -+TEXT runtime·racewriterange(SB), NOSPLIT, $0-16 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R5, RARG2 -+ MOVV R4, RARG1 -+#else - MOVV addr+0(FP), RARG1 - MOVV size+8(FP), RARG2 -+#endif - MOVV R1, RARG3 - // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc); - MOVV $__tsan_write_range(SB), RCALL -@@ -167,8 +185,12 @@ ret: - - // func runtime·racefuncenter(pc uintptr) - // Called from instrumented code. --TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 -+TEXT runtime·racefuncenter(SB), NOSPLIT, $0-8 -+#ifdef GOEXPERIMENT_regabiargs -+ MOVV R4, RCALL -+#else - MOVV callpc+0(FP), RCALL -+#endif - JMP racefuncenter<>(SB) - - // Common code for racefuncenter -@@ -184,7 +206,7 @@ TEXT racefuncenter<>(SB), NOSPLIT, $0-0 - - // func runtime·racefuncexit() - // Called from instrumented code. --TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 -+TEXT runtime·racefuncexit(SB), NOSPLIT, $0-0 - load_g - MOVV g_racectx(g), RARG0 // race context - // void __tsan_func_exit(ThreadState *thr); -@@ -454,10 +476,13 @@ rest: - BEQ R14, g, noswitch // branch if already on g0 - MOVV R14, g - -+#ifdef GOEXPERIMENT_regabiargs -+ JAL runtime·racecallback(SB) -+#else - MOVV RARG0, 8(R3) // func arg - MOVV RARG1, 16(R3) // func arg - JAL runtime·racecallback(SB) -- -+#endif - // All registers are smashed after Go code, reload. - MOVV g_m(g), R15 - MOVV m_curg(R15), g // g = m->curg -@@ -471,9 +496,13 @@ ret: - - noswitch: - // already on g0 -+#ifdef GOEXPERIMENT_regabiargs -+ JAL runtime·racecallback(SB) -+#else - MOVV RARG0, 8(R3) // func arg - MOVV RARG1, 16(R3) // func arg - JAL runtime·racecallback(SB) -+#endif - JMP ret - - // tls_g, g value for each thread in TLS --- -2.38.1 - diff --git a/0050-cmd-dist-update-isUnsupportedVMASize-test-skip.patch b/0050-cmd-dist-update-isUnsupportedVMASize-test-skip.patch deleted file mode 100644 index b12578e3328cb4e26dc89c95ff7419b37c1d2c8e..0000000000000000000000000000000000000000 --- a/0050-cmd-dist-update-isUnsupportedVMASize-test-skip.patch +++ /dev/null @@ -1,35 +0,0 @@ -From 18b864d77a801bc1e3fdfac259c8e45253f9a321 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Mon, 11 Dec 2023 15:17:30 +0800 -Subject: [PATCH 50/51] cmd/dist: update isUnsupportedVMASize test skip - -Change-Id: Iba08f2c0bcc09fca34079e7f4c183d97b3f21ca3 ---- - src/cmd/dist/test.go | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go -index 8a1568c068..3822c5f949 100644 ---- a/src/cmd/dist/test.go -+++ b/src/cmd/dist/test.go -@@ -1526,7 +1526,7 @@ func (t *tester) makeGOROOTUnwritable() (undo func()) { - // internal/platform.RaceDetectorSupported, which can't be used here - // because cmd/dist can not import internal packages during bootstrap. - // The race detector only supports 48-bit VMA on arm64. But we don't have --// a good solution to check VMA size(See https://golang.org/issue/29948) -+// a good solution to check VMA size(See https://go.dev/issue/29948) - // raceDetectorSupported will always return true for arm64. But race - // detector tests may abort on non 48-bit VMA configuration, the tests - // will be marked as "skipped" in this case. -@@ -1638,7 +1638,7 @@ func buildModeSupported(compiler, buildmode, goos, goarch string) bool { - // arm64 machine configured with 39-bit VMA) - func isUnsupportedVMASize(w *work) bool { - unsupportedVMA := []byte("unsupported VMA range") -- return w.dt.name == "race" && bytes.Contains(w.out.Bytes(), unsupportedVMA) -+ return strings.Contains(w.dt.name, "race") && bytes.Contains(w.out.Bytes(), unsupportedVMA) - } - - // isEnvSet reports whether the environment variable evar is --- -2.38.1 - diff --git a/0051-runtime-race-update-race_linux_loong64.syso.patch b/0051-runtime-race-update-race_linux_loong64.syso.patch deleted file mode 100644 index 15b999d6be1193015556105c25991c4cc8e19fde..0000000000000000000000000000000000000000 --- a/0051-runtime-race-update-race_linux_loong64.syso.patch +++ /dev/null @@ -1,1946 +0,0 @@ -From 8dbb24c31e04c972924da2cdf1523f6675b15e92 Mon Sep 17 00:00:00 2001 -From: Guoqi Chen -Date: Tue, 12 Dec 2023 08:57:42 +0800 -Subject: [PATCH 51/51] runtime/race: update race_linux_loong64.syso - -Signed-off-by: Guoqi Chen -Change-Id: I74fcc442d307384adb16bf5c6698722390dc4d00 ---- - src/runtime/race/race_linux_loong64.syso | Bin 644528 -> 645456 bytes - 1 file changed, 0 insertions(+), 0 deletions(-) - -diff --git a/src/runtime/race/race_linux_loong64.syso b/src/runtime/race/race_linux_loong64.syso -index 677bff08c831db424ef2ccf73b85ef8c7e6192c3..6fdb3bad77751956e4c1ee6c0732ddcc3a7fc3dc 100644 -GIT binary patch -delta 165256 -zcmZ^s4_w`4^~b-L@#n7Lgozt>qFyE{Q<|{xZ;BIiIZ#xlm{g>oo1&sJrNp8xGfXm4 -z*2o8y43v@*3zXWbsjN^}En~KHCMqQ5R5F&U=oi-SobUNOBl~mwKOGuK}*B>r_M=x -zCdL=q{@)WW^sRa7f)fiC`2Mr~&K0ltPI~IoC!KY!ukoq-H=kPKE8YIg_b&4dZ(s7* -zzhcJkeQHtvH+{bH?GJxH6jN`u-?I6Xn8SVBH~ipQU(!=Q{K0!!zM}09zVc{{bLUg% -z9C*p+D|%|lAJ6gmhM&6hwUtZT9w*T$gFO{i -z`hCZm!3|VJ){dWT@ZBBsRqi;q&UfkpU+0b+ZuI>wHRiEZJI-qLQNz#J@#0;+&;rwY -zkFU-Qea$yxF8Z2p(~gB-_Z>)&dHe8=w+4LYSNlfHX$xacUs&ucO& -zHHPq47YU9foG@a3wkYN`-^h;p6JtK`k$TVKd8d>S^UUIy?L;pLj7@L>G?ixHc)~5)Sc`Zry=HT8Q~fOsn{Uo6x~ca1n}ZFv-g4co -zwKuKHTD5A`%HZwSH`ZDG;>*j+S2|AP=Gz*ty5;8jJFbHNRpNgYyI%!&s`T>PHV1F3 -zz4`ja>%Xx1*5GY-+;sIV_4M?rTZ1>%-g@Kb8nS}5pRc{XzV_<+&B0r44whek^XHm^ -zwKs1H*4E!%d&h0T+cq}_8*lyG=HRur+!{RpbJx_}c75$CZq4SK+2^Vw5pSiMIUs9} -zv*Z1*Ecn=2reoKFBoo}f(EpQX7qq1C^NDS~DnzuF{4hZJ*CWSg90NUM{gONwzR -z>diLR+N3BoyZP0K6cy$WziN`A(4?o(tNl_`nvMJ_*KFmYT8cI^loEG_uS;I_NKs^x -zk0q1CQWTmJe$^?(m=qII9F`)}^s-5*6irg>lp@XemXekwMX?lpQVdA3*W9r*Zi6|n -zG%l5r-Oc9XZefDU;<9~ng6hr2Wu){-F(AcGDMqA7HIvKYHc;EB;L&i4X(jl+F(I!QT+0AxMQpB1={Hjih -z7AfZ3B2ao(c>maG-)S};Pdn7a*^%WGvz27(WVe`-qT5U|9FSs6ii1)lnvxSpOO=8q -zB)@7gTe$F>UM_~+A~01NxymB5kF6@CsF9*kiWVt4rI?i>+iW|L9J-{Kl43>*#~fm< -zMv8hVI;3#S#vo}KQe;a}EJe8#EmE{g(Je*46vI-`?B*y3q}Xq^q|+-JO)h4nm~#s= -zmQKUyH~UzbmV%Bb{A$4Dte_ozCl|bXabY@ExaNnbFuPaKo|DT(s!2bIOmn&5otg{N -zauS*DGF$oiAV1$P&nNhKl9^=FT(c!3?pU)pBkpQCn9}pBIEf4z`_9W{AO>TkoSz!L -zlSy7*C76!V?q+40kR;P=D?6ED4a%z;vzuX|ImAV&6n#=unDkS~vdL^cg|zi%h>K1s -zrliO)lMJ&=@~PCwdQ-v$O*e*PQaGmfRC=|^EdqOMkrj$~Y6G8pV$D9biZ#AWH=bK` -zn;d?1*ewEGo5EdfyBbU_TMbGvVz$w%K-zVYjeE>)HXe{-!Yu-QH%2z@FzG9KU%NT7 -zJZd(Q%m;riy4@mBaeHLr3^T-5HBxj*anQo#(@3i_C0sPSMWClOa-dAJg{%UNUyUqR -znO+XkEk(Q8$FDMt?{q41rPwP)vB_aLD@B&6rB{LGb_tukKuX)m&1M_R1NTL)b$rL} -z)8h(!bQt+}7R~%VQ}S_|2pwk2$H}SffyhpUrk7-%8C-OkeQZ3|9oaZ&e5*(hye -z1M_Ft$Af8(efsv0sWzQ%f+={gcS9o6I)$ -zq5kB8j)QDG_VdWbbnIa{4 -zy`UN5qR=gJVq-DYJjRt#c&C{p??88aWVih$IiG@0xkZkDVPvb+D7?#T$&cGmmh`d6 -zZdpm;JUv#1dH%W-AvRW+N4W?&BngclW0=6rALdIWYl1i -z*HY0g#h6*SHf}>s?un688jiy2O)m#YHNG>cD3v0`zN>>W{8dH -zz)wYvQy95|*eJZwBp1=u;w0Bld1hqaYEx1~H6)o&6~&$UF+1~;&NjCe#jT<LMM&5zZPl#-fGB%e{vzz3yUFU~; -zpFsC%_z>#cB)pUMHQuaSPkFe#<;^C29p$=LifOZvUYVA4G*#QpR(c*7yfWOe1QiXN -zA(n?dvW-jErW`kDlFuQltlID(^HwX*iMyS8s+X;HUK81h-i#KsYg?B(5{ia$C5CZAFtw -zK9|}WYkJS6_Y>>Rjk_R_@#V;-&1|z_`E(SyCbT2wytsJZ@~Rf`Z$}tgRaJ{wu|Dp5 -zbin@AdbO3fB9PJ=IfX>i%T^gu^huFq_AzXhg5E~*t45P^ -zKJ75R^XYk-spZ0NwsAp+U{>7cbO0t)=6oY^brn(g{wRFF9O7_o-;5kC@mrC3XB1AC -zj2q&v3easG9?v72?avW2xq;fVM~Yb~I!tmY&B}h$TT1OEgbei-F+(g}?$ZX_SZBoog -zF=w_C4D=01HdCJWy>udDx$Q?HXCPifw@S83C -zir%eIk<yX`W`7rU9bMWZRdWKq&8avgvfi*Voc|HK|3Q=fN$Sjl -z9y!J@M+Z8^vpjx!xVLQ+9eh7YH(9C6$tosgmHEF<(&X$hC09_Yoo7ULO3602UJ1d=++=QZj@5@Ikir|*~;=x$XSr%7K%)F -zFIk>}?0ytWCRaeF%N3H#=qlCS=-BISU6B3Y)Qj%+Q}85kdg*TC;6YRJX{s&LBOm`U -zNv6(Z7PUCB%i-M-MxTLkm -zf)!sBZkq;P<>4oR2Td)<=%*N_tt~EC=D#>Rz&S9?_VDw;cX|R}1Wq4wM>}=pghwtj+X$PQcKRfCPdn`|SHRwXS$O-d1h4Y& -z&EWJYIRy&zd*s`WR6j{_?E#Ox)kduhudNL{)x+-tFZA&H!4H^EeS_|5^?T$eAqVjp -zo2~4p;51XdK|5W8M}7fv>L;y(lrMs3diWnq@4YlH$07H4G#nDsJkyNY2ivR`Y@GP*+Gv> -zfvut{tY&AO4dvmV0Ix7Z6fV%@kuQUMz)Z4S{HgG$wYD~s+`;Yj$hSbQ@$k=sH+?!h -z+?T-V{UC)iJ3FYoUk95ty~G09)!{A=Ku$ILST6lcxcr!{$@sRBJm`^s44IB2EGKz_ -z{}OVKsb#svBkzMeX10->Dm-cy_>9MX4S0pe{!H*T4=(|ql{oW9#!?71 -z9*0Z7M@-3fYR-&D{w(C6*+O!1TTOV{H=0SpW^j9)zwCfVy92ICwc$Q3Hl&B&4PIz` -zos>zHNB$P%5tBpm@(z#uDCCJ+b9HCjs*=>J!)x6Mz0Sj*1y8yr-2Qo!^R2jL3x5ll -zP8^)nPLKC%wpNo))@5~`An!mf_Hg-P*A-5WhyM%qV`eLd^V<(=ky>1kpFHMx&h$O#na9)2nKgol3$+<&bTw$*?qdH8iMAGWq8!NeVHesX`@lECL&WyqQESy9Sn -zGf9cA{fa9OJEh^ObC1ifa8f-LZ38bf+o;*e#UA-P#@9uAz?g~i2dX^U6L6g{hsd>T -zr$_!dWdC(e*!BzXVh{fnc&6D%RfcLj@+*+rJp2vt84v$6cv77cwoQXicW2Ba{On>gL{%=l9CE+Od5}hX%;SBH*+LR>4~rf -zu1#he*#yRJ46h&rnLgWQxxyoV12TOG$2Al_6qn?C$kaYW^X3sq2R#*jAH1hNyrx0$ -zpoi};+bEprc!-j3@o3M%=YSa^o8*HY`S<1!;q*VkD*LAJ8YjW&)6;HhTaQQn8)W*5 -z9m$~?kNhEI=jQOJu`BpBy^HYhMc`E)z68A3!vo;`!cRL=OAx|{$Kh0$U*NPG-?zCr -zYmRuhPx2rSn;eoudpz=akjFjz0`Mje-w3|R!>honJ%{_N!0UyZ7rq^r9BTF$-2|g{ -z4{rqT_V6!)S9!I_*N*HB(jLtA6J+y1(x$;dP$>l7wh2(@1S8gzWqZj6TJ#kAG -zUgDB3JB~^3rBlGm#1cO1vU|)Wy<`>NOIA0!|<2@S`ppqKp5MM<~o}Z(LGf2&~H!Xs1ahn}yFprZ0nX -zm`^@JHUAc@#TV{FpF@#L;5Epz9{EpZh~;-&`8lU5+}W8ilPv!Wa*Id)uStHC`ZP9! -z`E%wzdJ%fg@ea%qSN^rf*Kq>ig&rm7^51%>zyn;W!Pb`8&i{ -zI2RwuBNor-S0Im5$IrQP#Yf~-W{BmhPG;}2brR~+rwrn%Rh#E0CElF -zKbRqseg86(Y_){0EqD~_a6Gnq=*La+kI3qLw+LJXt1(!mAl6N$gscM1kf$Kqc7Mxk -zVflxUr#y#)bC4SmC)aFc -z`N~yNcjM8*6Cr=q46*zp$nB7A@K?}oOF!1p-Il~%x;$d -z3AxZCpI{ELym7SzZ-Q)7yWOM@liUNj53(JCUz&|1m%RnqkDNM@`B7^mYB#v;-;=?6 -z!0Tauff?f3Y9J>aE8NcOJIy4^gOD2_+hpD_$v-CfLt822O$bgGJ}lpnD>=L!j-^(!S|*LxBK%cc@pk0c%jJ7CE!^B0ma~VfR8U1V80Lii13hO -zf3;W+hZo^ch!cyQ-v0qFKS2y_JC^24Le1cfXuw(E_NR@uTPndTgB*u0=i4Nnb7Pn= -z&+h~ucOB@LpRj)%JeV%P@|VFkfoH+~L+}o8cM@=`Pc6U*L7<*@w@bjY-OF#sv0eQ+ -z@H%kY6Azhf;@}4SWRlrQoF*0-C^Y -z6&`X%AlM4J;ZQ->9lY78Gz#7XKH{p5^FDYXT}trQErqjett7M`yal`%ypk>n+!3Ad -ziEjC8t_6od-7H-g|m@(x-)@S&XF6CH%GB=2k%6j -zB6#i+9&*MYBte*g!vuH=cm{d$lsOD;XYv)`nds>t>>mOz1-CQ#CGaMf(>_`ZhyOy@ -z35QMKC1*>;Ufv3xMrT~!Yy(E4@Yr<{Ckys&!}ylNDhS08Y{gfA_kp`pfb8!A9{>*$ -z=cVVhh{W)>3#^dg0&tJ3-X1B&7{kH##4F$< -z;8kSH+fgMn-u&k5n6eY-DR-9Fq!K)pE*5yB+2w9ujly@DL%Zm`$3G!g!OafViRVf+ -z)!;#dqu+QwO}lXm$;r+|PVoZawgJ1~)(p1>xV;5F4L$;X)Ok|P09}RfW_!92e5Z$B -zCp_egLa_OM6%Knm4&Mde@3H?K_yLdo2jElSNs^b7x?U1J>~fk$Hq#9d9J+MjE$BWv -z&KJOg;MV@T;P&_Iqp;rxJ_CDt>2Ci6&&8y5dpee+)6BkS=tB2W<9n8hdb5p+z-^z9 -zG}ptL4SD;PEBhWXIc)rM_f^hr7?ipuXDMYIl)rNv=I)r#bw~F;6YSy -z3wWQ0e+zun!*_vC>G*sAd<(*1k3&kCB$Pr|QM?7+N5@$Qo(pbURtsJXZYSH9!PkS^ -z3Gyg$_XJP_Arlq+3Jz`H?cnc&rxXg<1ithFNwf*v+Mf*`OBXY|S^Lf4b!%|~NEO}L -z4xt4O&EP)=p98nQHkt&ti!E60zfcnD#O$>EYzk?5eXNc?^A1aqj#~|3AI1l`w$DtlPu~;J5M85@|3T_kn -z1^5{Fptw2jfQMS>gu`2#6i(Vjl8~Ry5WEdY;amnj44xe?!Vkgi>JnA3e+j&Z<_vFk -zH~t5Bh49dkE=s*vDz5Q36oEH_CrUi$dhiyHeLHw3xSe#5g3p4>&-fNN&P&Gk9Gyb{ -z1wEUFinlr`oYYGsuP$)vICm=mp8~geZ3LeIw^QIIa0dZu5a(;cLyrB9s}1xMIPj43 -zX6MX1;2q!%B0GyJBvHBya5rne4m<o5#njzBolJ1?4z -z&r=8f6>^KGo+TS4uXb=-O*weC$9@ZVzlV2$4}17?!s*Q+1iKHt4Tl5Zb7YkN2Py>E6y6pda(K{qvlS;-NlMe+?;0cSn5pP -zY@8u*KX}kp>0$*u)8*#jJ#inaU39tBndOO*37+lY7l7x2&m#5>HV$<>Z#I|v&7nQ? -zmD3&@&LfZ5aNsH8?HmR#1-I*FE&iljOgEh@hYH)L*=Fchbcuc@$vmVxsf&5DS$r0J -z7r3p4zAwve_dy=>xcwM>KX{=;bzT#H8s_I6kL5cU(ViZlg_6Fwzzy71Hf -zCh&8Fhn%J>h3tiJ9UNxAE+7~DF7Rn2)DOM`+%9O6ogl(%;D@h~*L|>0`LrY?zy7C# -zT`GjLA&lQI09Opo_26~XLf%THaK37G|Asnn2=XB0LCE9a_RFdV=mf)C!e^v@`!&ua -z!A>T4>1PF0gKq}!10Mk2Mx5U0nJf0v=h9EYr~;jt38M+KbuZ-@q+mRl>#q`yJ=?hy -zywk(Cf=_`LBis(~j2Z!c@ZW=Hffs?tQ_fUx$f<*1=WI{}rye{D4rSn79{bOOcZ1tW -z@gVpZcsAlZ4NgCOp)E9j!TAA%O|CG1ZqK5Q;~v@zUWh;&z_YFvV2=zpfOlhdVep-Y -z!R-=92T;Fr07AoN2~;eFb2N1^S8TsnXm4E?gO6a>q#ky+uY=oVky?Gip8@Zwlh^i? -z^%imW(tZGf?Yg)csdxf>G)5dh3vQQEnilNb4c<{NpaS;KfV-c7lVpE7_7eE$7F=3) -z!GXU(%Pp>Mlt9H&IA?)(gAc?09&o!J)BZSdcoID0OY+)Ix_7{9zzglAee%^(@w_08 -zYr(tVVE2yezz>2qA<-@y=gR_SW2F2oc-2;UodNp~LlBy+fI|U)kCuDFE)dlQhX(M> -zy9C%bEsufQ6{DtL{|0!~-2&EwAGJy1CwD`b@riPd=^dlP!VRSJy`mb=>Fs{_wSP^_ -zyAf{`Jo6p_c8*Pe=Yn^^{%D%(?oI`cfpyjhKg}t|F@7f;J_CoWb^&%Iwwl^~bSis- -zF}qp5hGfnr)ktvE;e6d3B001dav$Vrhzf{aNlvhK{GF++w@b7WQ!bd!Arq^0iN-w06+M9 -zQFzMrZoMHV3l5oZI9mj#61);T1l|u`1^zQ|r%!;j|1b(KzCjvX>Du4u*mqb?D}){d -z>Of~b4W9iysh|}V9|j)=w++aonB1e|;G1AyZff__?Yk@8B6JI^iXRi%9+@5oZw9yC -zZ-Uz;#8P9#@AP`9t>y>vx(-=i1#Z_AO9lVF@Q^bxAP%-4UxtI1FSto6u&a-CA<#xavU4z2?z(Dk_goZ^eXuJVF7k-$K5Omb%Gbez5skb -zxSa@}0nhxg7`mU!QvFUR1iP-;9t8Rq_@*5KM!+w-MJmqNDIf{_Tj06i-3vr`7Thj{ -zRtG*u+&$eEJ}ZXP2~wWYfC^ra*Sp|wCHTazg$Jc@I>7B}X~XF1pMnqX6~nq%Dc=RB -z<J+3SqsZk2GEe-J~O -zS?)&Pd7^Nk%OQ6{?ty$4c<##r>M_M9!0k$HrLg}HJndBhiQu`np?Yu(i&H23G{-K} -zR*YsEIK+-i1WX|3PiEr*%J*Z95^pDDJ3$MQd$o_J0yY^iRG2ZSr -z>95gdn;C5v` -z+p+%xw=46FBGH^?so1W}XJ_U -z`RIKFx3~|7;6m63!N)!PEaL9DB;|Ni42R3%&;mXKz7@RT1hG#8?+0%OUl0Bk_!#&m -z@ML;pvqC-x;P%souDg%~Zh>7vu*DVWY#W8Y4sKT#+=)0#C@jaRPZuzNIOl?=t`Kg& -z7U0yAkna2iX7gXN`PN4q#N=70deWCg*f|%5%xC(rAaY9NezbXz$#7 -z3A}NwPN2l()MF|_x*PN%a9S8UguoQf0s2D9Z) -zbSi9yY?qiEj}`ex;LRH)T!mogb@095L4;d;w^TFzNwK%@#MgoctAyKuxL$b3X@`&z -zFEwn1gI&53`8eCb$F2%b=y_W~jc|Joo&~q7SN5R+C)1E~e~i_#{i69VDPC%}y+s}J -zAn7Zd`(RClgmvD6U;Whr>_O<sj_b<^&imBiC&q35?L$Fi@`ixrOfND@DXvt5#N8bOb1XZr__*pDudr|qWX&(x+5 -zT>0T6vhDW^z9G5T^*d)5ipf{No4zFR$=%%^1CRZ(@F4gY@fA)lUeVbOX`Okm8~-%N -zuKH;&);=eK(~Lk3aQ&|F{e|A+Ev6~LePmYywFiu|!D~WN!7SXaBEG`;?veWKB`mFS -zNZ|q(ckpe)=_40rei+hs=2?E116uE~mqJJu-R(Lekw*f#tsgZw9v~uo&9O+&#g= -zj{|RPlQ@}(zY4q?+)aq;cOHh2_BC;shQrI?{(FR1g0Hw=Dy{}E1iuKp6Wq?$+rX#6 -zQ?TnlNZh^NwM)EKAkgb@uuHtQf(L1j<`&x}UJpoKP7V0LeFE&U3>GIu`W@x&cW4g(5wcwjwhvRh^Z{`jc}Vi|BivQs -zd%&~7`PQ;KqTm$P-5v$+=$1IC;6D?7n&TDrk+8zq0W-(hkK-Aw{QfTG`ve{7xSoE* -zv(0}E+^$S}5dF32A+b;Gk@{N@co}#XI2{Pw?R;>%b}hQYX%bHNiF+kbIV$)89GV^# -zZWDSH+^%NZgbEVqLX{g}SF^PpyD|#D5WJ<2<1d>3No1qxeUCcidFXce+ji758HLY* -zH-A_1t&qYw{@aq5m#+ig`#mwtMEuVP4>?|;2@Z$h&;y4a@Xp61knP!F@GkH^*uMr| -z)Gzks;0x$v#vRz^@+;;)7hMNo1`c*%#>c_u9+yDU9UJOy7Mpa(su;dX_NDaR6GrygbHp4 -zp98M}?*X?<9NPoe%f#Iyyj|ni9w83Gq4hnf*p6`;U1{=M*$fw>T`SpssGUY##fkQRAPuOKFir)y -zT|9XpM(U?Sl{oDBr#RROz5%`md;kgk9K7-0;r71^g4rfi0dALI-ielVf!j5f+b}8q2X5C`?uUKW_oUvCT|RjR!j%x(j+LfX -zO9P!=@TsN3Z3{-g?E=g8&L@8+=gt*;d}dF|3m+4X1(vD*(WM)#fSWO6sJH=q>Uc3s -zjg?Gyg4-pRbJ5jrgWDySGr*Tq(>bBaOodh9aIvuMY4;C4CYO2oMh+%CsljP~6r{4{6$YN=qS6wZBc -zunRQzBhYuuA_J`x;>5XRN2YfXG -zx1{E9w-Io=zH<$-cpu!Z?>y_5a3|7R5bplgHi@$KKJ^``APDX!W -zD0D0Ad%zF>QtU0?1#VY~w)|D_xiMwW$ESZnuqc8%25#4l -zw)|>vyKb~?*cXVq=Lakt?R24K--Lr*H+mHOC*VzSG8sm|{|Igul0LXd#-{v-(tu#9 -z*q39-Mv1R*p66HGe#beC$#vY1#GxJz^%(mKaJvL`CMv!Q+`-b*_C&UW_zGu_8=D{Y -zVxL<+$ow})_*}Bo?^Kh(v)8U+9YjLk0k>;d+e={nNS^Jxvc%B7>rNWt3SK-?K?W+m -zjyT)TeO&BQVbK+Wkg`gIb_g%HfyRymO2o4%*!RHxF!(X_7N0AyD_^&Op9G$Qe@tiJ -z{ar+yzl;KS9Ia1_GktfA!De|R|VEJ$m_X6=NkeK;WLxK1eh(m$IEGSs)hAEI} -z1$pLgDe<)-@n0(*YsF)&c&ru6wUW$Qsduf^yVk8KFWap*FWZfhm+iXeWxK)hvfU_o -z*{*wDwi_%j+l`W!?MBJV$vKvD%FC8$IT9@=&%AzY{BarLks}^C@DR(@V!1lUoVGOn -zINxe>&eHhu)#9*59M*`#8ga;#oN^`6Tyf9MHKR-8(_`}T%*0X}9})A!G*23nC#}en -zpm|b7p2W_R*fOAb`Qnf-QSv29zPRU0C*+H}3}IfrM3F(uD_DJ!+ZQsFc`|r;1>z{f -zoL7)*?q3$49g^6EQemM~Q7D#$l4zmSR47%fmG%}&zH22z8Nz%Fp$vAOjB=iga=wgY -zz6@`^3}}9KUL_}*pW|kcFGHCxLzutX%_3ihB7e1;O#W&&i~QAY_4#Yu>hssQIpwdB -zC~MZ3$5P|dLpg3^@>he3ql{s`j9R{oVZMxLKK<&FlgV}aB0tw{c7Cp8K|hCcHK{jO -zg5^oQd6I<;Z+@OQQ5Z0%?*AXub?~eqlj^+up)L -zlenDriae9PJU)0zfec@P3}%6hVu9?d1+sS*Wapbpm&ZSxoa;`CHL?p+kz;(v#itz2 -znXJikyPAqMCgZsH>yMW%N0;TvF(Xe77I|yU&g0_KLeh{NX^8B|`8hIDIhZKWq~l~K -z&(D$8$xfcXS{&rKl`lI)et`^TfsC+t%Px~I$Gd!*^VvN5GTZZIwin3pr$8ocflS&0 -zIqnqX@y?~^AMW_@ -z@%RE?;8d& -zN8Vt3g3ou6IeRF6gYOb^7Z;y2za5JIPiPa~hp_uQxDMigyP|OUHw=zjowhH+8w78h -zO()E!_nwMBAw1*;(!$7zksbC%;VHp+`=eF2Whh>)c(&p-3AnW65DjMEQ}N5nnxK~w -z`YA3TSndGl2I_qK!8Z{9nX;#Ur#Ku>3+P928%rm9+Bn>`D!tMbE_QjH;?0WRs5t#! -zT&gBCs^lA#JQsxzo-|*Lwx3b_T{T=88T0mfZ}=v9E`$K -zGUwyy_(9@s`_Cm^SI`;7p-*vbe?W0v!Ia|KK6d4igidj^L(;qumQb6i-9dA;9daqT -z-$Q#$zl3q7mD?uOfsLmfsmEznd_Zw+zf1AYD|;-Cv)bXKct{DeaNq;5=B1~jVpFm+ -ze%Ytk?M~%Z0h!%2AGU798`}QmTkcu;` -zxV9e`F2{{WlmlHrxf@rYc?~!Z!uOT^CU71E&Fhu@50rg_vj2fT{5PB0pHY9cK(~|A -zO@oV6uD-#7;@VB=wer3*+85}$G*>{e{9RO*@alFY~QNxGU-33xnP%L;iylj -zMog>Po>ZK!?%a*-f1-G)*-GI8<&e2;KUY(w&J3|!uVniTcMeC3)wA59Oak -zFgQ;d&HX0lY1%1La=F>^<<(!RaB1M&y_;3IV#V!^O7^$Q#jyP|%D!6J|5EWL;j&Xa -zrv$s`3kP~Z@$x*bMlK?Ls(78^dlhd~{5OjCEB>>D%Oog2jw|7}%3)4%T|s8P_;R8z -zD*H+^^bB=uhmwD<EDE!>DX6P#YrN#S6i9~qwr$I`5({O -z?SSICW3vjyg8SnQeU(D*@!gFD?XV{bPg^^0Z`4Ej4DAp6rDL= -zfj&^TofVn);1itaRV;T$}8| -ze^d2TmCW-Q#oeh2d(D%$r)k?jfBG^YMY!4Ui}>Ww0F1cM&WY!-t0bX&KUML|R1)Q% -zn77xwlUqldH$JbKM4ODtLM*TD_+4`p8+g*Qau9g6=)k4m?x{Uwb`4|GmL -z_u4qP_W&~i?mfUvt9qVQ`5spM{}lIM;I=R1UW)3=lSC2RhCi(wlEArQZC?oPy(}tH -z_WH7@Ka7W@m-%rJ!bgTqaXmjrR3M$uG&m>pp=!X4vOmJPHFK(h8PzMm)-$I;aebLKpt!alQ5^R1 -zZsbHW^a4$(qzb90lnVQ0VkY+gkno7;DU}6#@02Qz!mGi(acYEz+#7?sg8C>94N)p+ -zj$+>u#lAhv{zh)AY`mltZ{C)5hdGE1KlVhiuh}RzybtTc?k1D-E4te6fXoA~&$0A> -z7)drAzl!&Vcrcz*=h0#1Gp-I7_P>RCW7&Tb%KQywZ~rNR*FN!5GNg?;U02{0y~t9+ -zscLrG<%qnaS`K?}AJ&s)$y)Mwo`H4h(5*jMUed;p`6p{D%V6@ -z@^ena;Y}|JA#kg+ltZK9dY5bp4@6$F?J_05p*?97G7sJhs(pJ^xHD8WQ;M%vJgri~ -zay#-A&jjaoXr3)x4$fqO$leJ -z3T73zUoqh-V?Q}x@j7LnsCbFuDT;r=&KSN$Z~ryw271oBUcbbpPZkdCr4hl5Le;uqLH;_kSDA3l1N!+&Zu8(xf0E=z?G=lE;YkPL91A?K;&O2N6|zftxTruRi^ -zQ)LvlM#c4f=u=$x`<^I#T5&z!Vyh@0du6VLG!fk6o#wew_ -z)G)u{yyHH&-wpB -z1=tzIXH@a?{pQNw$1gS2zmHEmY6b#l^0N5`^}1U0<71&=Uxl#q!eN}BXl5ARDj`!zrFTbAX!P1etqh -zQq|THrJ4b7uI55NJzyJD)o6YYoLloJ6@OZ|dEo$^^=Dw@os);71a+>IS~=)jDyZM2 -zzsh?)WN)}MaPA77V|Em7SB~Vi9=W+f&nuO^<~732j#uN87uUgvJ9cV;dH+><`>n^T -z5jrRNp9FfqHlVnpI~d%X%rH1NN+&a_;#@EBvxv3W*^1M$-YXG_qL@-@mbYITL1qwI@dOs94=bSZZ7C!Y$_IaD4(y3D>$2tiS+^3I%guxTC(8BIdD$+plYCFhQ{f5lvKk;a!S#1s@X_#S+K@c -z-Ca{(3TG-_p~j{d+*{jvsjU(+_qMK$j($|zx=kvouB}e-OI2-MidXv0#jjHjmA_6s -zG+_3qY>*#=Y&Gb)Ec$zhkuLCd3V)WDu7QTLkU3F^f$ -z<<0v|p-(CuHzh1jK<4)UUDZ8fwvb#ltGu=BTrE+&-5Cqc;XY8|{AM55mI|4>Q_Df) -zo1`5tGc2lEH_1afNhi=*74{!0Y^0HIb8A56(&bQ+bz~-Q*UifXqp0 -zxyp^YxH>GVS@kC#<3G_DZ*tw~5VG-4ltYu`&>Uth`S4>4IOj0CV4k;14(*UR2Q7EH -zQ3G93qV}8Qx9Iw8KzV<`Ttvu}zeU|P2F{iK--7u{C(IVIDLbg#v^)cTE_vwvbv=K~ -zNE?Uyj|x|6_EES%1!Qi6maB|!icX2ukiAjs6}P|R;97Qp^V#NfKNHzT!1-*W`J9R~ -zmSC-E(f7^n^H00Gz7`5aaJjzKRRr#>paPtm|DkGrr8z|H3)Dd7=4-jmr2m-?vGuAp -z-KlovVQ%L4Roaazi|;3xd;d(+uKmw6s2wT|C)RD7`_OHMsGd-tN<+&7QR+{+RzmQw -z=>lz_@qxLIa>z5k%aaeJ>9r53{sS$H;(^&3;ze*}357Jofhg|BB$5lK% -zm!@yv+TOXy`Eja2o^#7|LS0+txlX9-wt24YiyG(o5&IA^yBxMR(;7H< -z6KYd|>Qq+`e15(HU2*2^^ZZD~gbInfck8`lRs{!dLe=2jgvy(w0^#Q2cWLf*ze{tk -z;tL}1+%hHpzv_z;&N~uG&5`+_;=188!cTM$%UYNVb1?V5 -z#|Omss3FB)k{Y-nA5%L~xfyzo8d3?F8=~cEaBoBGI^^8J`a7eWV9yZRG#%weO$q6~{bnB*2PuarZn>+Y*CyUEN)M@TAatHZ33 -z02TIgYL2463cJ3A0By|m{2ysyD%TWy)GQn|8{em#_q4dGdtBG@nAuA5;{A}3^E2;L -z&b!{HoayTJZX^e}?oty!&VqACCM)|nH)YepwALZqv_0Q>5PK64qFBdAodEA$(MpwB(0-GRnqqSV`MqS(x7S%lb -z0Us|upyP4&-Q++UhfPsoi_C7yrffZA4y)yIaBnBpsBm`82M)iJY;wHxgOcZ!FdQYciKeftY1rz=lmEeT;6r)&mjG9;0p9tKN7*Y -z0?jkPHxOU0dNbFQ{DYcb44Lac&rc87%FPy%L)DN=2_L7t>!P^TgL|`RQ}(y1X7(wr -zzrQdh{6zO_a9{A-tQ_Y69Jqon&|RB<(yS@^C(W7}2;5HnzGc?b{*z{n^EHW4>XBo? -zy|cy-&f(Hj!xPPJ3KvLHZu;Z?R97y`RPvE8*;1rz#kE_h;@X}*g#1_hva_oobMnWl -zWHu=`J$Uts-(a4m5Fs74P09Mc?Tq40e(osQg7-+teKey6&;G*@o{EcT-`Ic8GPnMZd_9To6wcD*n1Ubo?+L1ZXH-4< -zy=~IhB^*zVAFC5yiplvm^>rF#?l0|@4bJK5{>lYc{pH3lQgObdx@%Z*-Cd)?aQ8)p!>_;?$+a;FZ6Uw1HaIdXb{|+Xr_u2@}DI^`O*Hz7Y?KkO4%xq=EY{~|M4Ln -zGLMJ8S6>muZ4)?;$BAnE>dhpFYlO_>p}!*2q`2M>GQUBFv~jqg3YTR{{zKuiA$!B+ -zDz3xT-0Mbjds%-YqulgTI5YO2cz-;%G(Fb5@gKUh(kWKmH}4x3)A;7dH+v4gD`L&M -zIgf9B6yH`;%f21f_oP_!u-CUEif^ac&Av0%HzU^k*X!F8#W&WZe@HhQgB{W&Zt}_I -zvxH3bhji)THNxvW~rrz^P$+&gVr6xT!CYJC5r+S*mP -zEETTP7|2#9GCrao(vyFX6tnaE=^P<;xFpBT6ImBu0RB4@|(i)LY -z7*(?Fgb5W@&+~(d>rR-KE}v21beGRcZF5T2wWZ!KBjxS#pyIl=43q3jpdau<=JD6H -zWt$R`%W{>hYpYOEb=#^G*R@rfUaqZ1h09hwStqsCD_PgpuA=JNx)j&7jhdX81ez}~ -z339Z_q4^S%u=H~6I~K*)Z|3{4*e}odaXmQig{xKSRc1HU72pM1dE@TamnSXe5Xoh& -zkhz=JDDU>yNEF9nBywZT#jy$D$Nd`ka@Xlmr29SO8|a42 -zUH4;2%Y7U)y&QUsLWkzHVpki7kGEaH2P8l_6}nwn;GBYPR(2Ho_24`_xv`EpCoW;3 -zsfn9!^wr@8Kjxaovtw#r1CpYZSjnwXg1>`Remj_4UHdj`)P+Kr@VZc(qZhndBVX -zA#;xUggK(PextQl@we69F%ItS>j`jgU#E4uxnJV$>|sCr6v5^CEBob&>*-YiE)|$J -z5)$YGn}h`ZxU|-vz~8v=anWS9QIXTEqUh$ffb(Q5R9#8eKI9hYfXtn!$uJ5w^jy(6Cm?j8AR6=$28%^ku& -z`fj9KIp`U|s~hvi3F+S^?25w271uLp8XPld@gnM|#6{Fma}c~86>D-9(QwkY0H~-< -zdf3gvO+!zqpyE2c%qTn?oNGH<4NI=sP1OX7AoH+jnZAuka;Q{A4a?#?^VxkWL7wwt -zuHxTRqfixvw}EqZ>r{5_!p+@@bgb!u5$C9lddyH_!m`DEDmHzC=KVzK=;=h-O(x7% -z(tXn=`6wz5yVg0gVb+|pY*{^0awa7zlWenvyaPp$IR`DTH@ze;E`=!3d(yN?t6-E#K1UPrqdev1^ZjitXWbP_0&$>|;&xJ)b -z&m2uT3?xwwv8E)6Do-?%oJxw>Lazd;kIXkk=a6Q4**GXZS>luH`YbLC^D#e6vNaTo -zb-7qqN)1(E)>0ck(seP{P$NEd;?p2LjZu8s%(i5@r0jsqJ+NXvqBEo1j`7b6{zv6G -zcj)=5L+MaJ;Q}d;Ih>Z$O!^X%gOJZ9S$9#U*>ViY_TQv(@KO~#TY?v>;992NB~b9i -z6=A{6`%9=VrK& -zak<40D7PZT^~WO3;M@gesta0-FNJ!j9WwW?mg&7M$)Ro)Rrhc9cO?t&8LAZAdxjbV -z=dAUKVt*9-gThz4d#iSsj^f~aPZHW-l8;ST=HASqi~3c^CM;d9&+LgX<2?S8V6G(H -zR3A%2nqnr&kM^mpTx6Q$rF0O;Qr5cl?Y1(Ca-r%|y7fWcfgZ@*r&^}3dQsHHgJDt4 -z#Y-uN@}-o+PRU`9*+OP?JYwtG17X(ENPe7EyjRTw|6^jqEqp}r3~-(Y7pa^xjc*y{ -zoDG?C)-rt>Me^bz6_w62H!q`{>zC09y;_B>P+{xLHu4KKsIXdY3X3Nh^JACdkE-kk -z6i-lmr{ez?KOg6yaC2&ELUL#tM%*@iFVpE42kt8U0a7eDPj3A|ksq8Vx8_+YPM=?5 -z@?({7Sx7*?ud7ypH1AV^^gDzJ#otVruV7B`*Ax$aUu?Kx@5P@r@6Z({%<}>84TLtT -zQ5-Z|0yH^xK`!;kqo$YS&>j_4?aE)J{^JAUj$1C0i&f)Z}Il4MIfx`7Ew?hf@;rbQVpGys@a960l7&coe -zTwnw;ca@g+m?4(;s;IiF4wy-jL*vR@H@*Hxk{It3y3MVM>o)f(u9F`C=f2R{4GK56 -zpGXIcT`=-CfevaX(iv}$icM#{H%_GeZTv*q-+BfmNe=#L)iVRe7o?6JhRk8Lyc67e -z02)7_D7NF%@I{+1}Sohja-b6`VU6&nE~hi;!H-4l1`g5lN2jt -z|6v{Xi_RqWNy+d+D)c$h4_@x!h2R`V_hYf*`XsboxO~}JS5N^5&R6$%rRiNk`R-Nm -z2Bc5P&~b{NatpUdb^W`d@Tn+#MsXcKOLl5ga8g2YSvK}?9)qwP!MPLeQ^_@fbDPdo -zbC%}KNi-W;RX8oTo82sTsHl2xZZ(HU4t3yY$H{y~1@8mrWPY7M57>4ou9tub{zMGe -zUcYlMQhZ1?sz$in_1Cuy*6)x&+}diDP^lTppypIU=6tnWZ6--xTmzZYpdJ5&lWE6K -zJDGNT|IYca`s*zzrsQPW@zWr4SS<&^C7y)f$4nLO#ChywnO+LFI9s_5)A5JX80R!9 -ze=5e@I{i(^8sj^K(x`*XX=u3~+?z(t&*sBjsXB}9^-{P%17r@TWxDxG^5SO5=&Tn{ -zp)B{FLhohhE-&f64s(bLx~WS=s5{JB>gC7GpUZQOqQ6->sJK2Kj6~rByXNDxsu>r2 -z#^vX_JEyK-L>k9Di}o5@qDQzjsLJB3yZ$fQo0R#9mNU7SgsQl3eja@cjR -zwg1tF-D^~r`=6EcnO~)NBDiuPEG#FV?qK<*{fp?|q -z#QorW;MKhJ7X)bY@?BAQ<1bxvxhbtXJvizL9~q=Qifj9d=OWwhRb1PTgL8M)#x4?P -zew;Arr%`_$gwEYxtGa*2Y$Q2!ScTO`LHXx4AN`(c&GS-&cT!I)uG7o;vcdsn_=F7j8DJqFXJ) -z%2}^EKC1W)>Y%b$@vBu5S$m~1oV0#-Tnx@hY*vk2FWkIBNd!t^#66*n=wA$-K@*}< -z#iqmax-)1(6rDkb=W4TW6`ic=Of8x5r<_#eG=^DAR{Yom&Y4`NGHG^$1lm*%TJDe> -zI>VxxRoRq7RyO6(Ejjd=joFj~y{DiYLW5z}Du>_7bMB?PRj;-wev9hWPQ@Qnyj$_R -zRGico-S~3e)D_H%@R39H)bAuA?v(3QryMjTIkcx9hRg$_FJgC=J+WtpYIX*t)_lDw`kO4Rj= -z>;1nnjGGr%^YLdj4RxJc9nG^t6qY_L -zY&=TTjC~UQLMrsSaGBuc9$sbkQEj1W$lT(3)gg7@Jc-u(=>gkxlyE68iGjD8TyXC2 -zn^gQl;pVzLI?@!w$Xi*3*_KD|lSlIKKKW)9pel+_ovQE_WnXU&QLI1{WNxpPTTFUB -z?E|eUs-8D(;M}_(CP-8Hu^pV7t$B~xO5xV^soJ)vWcH|Pbnos5=iVBobxQJSf=%Zq -zBrYCT!fmnUgaQ)M3TU66RyEwJYM3!41#~P3{+_dxtE%;W^JVsHX1`4kxF-EG;d;|c -zer1i2xtp}yq~huR8BpQ$JLy5=E2LhZP;OsT-ABL2V>xR-=O{lT`J&?KA -zw7eHwj>sfpJE*u$ls@LBaG^OB?(?c!V*eoi-rc1boZI(Hwa=D=bNe)J0O!`+uHrWe -zH_OhXdn8RT@>bRyC1|S(+N6TEnJs5ho7$s<>rh;OS7^WD`pZ~TitDf5B)u$s!)>}l -zRi7f(UlS^h^aU2U=&^$g%^W+ -z`*FQ+c|)Pku+?zz4oHpJ$NAQ&dUQ9;Dt>Old^6|3x&99ocU~16@2SQQ&eeZW&Auez -z=DKyX7pK68JLh?IXh>CD?{;Y_XtN4Rzg%5MLzJQ1bhsjOh~;9)-W{miq@P3Q^NMru -zRoXAbnk&!oe3iCJh1MB0DL$yWr3KvEU9I5W?&<^Q?)tK{Og`}LH` -zn9sDueC$__b81?Sn37^X_Cw}2Yk9AVr>E+zTgu|kV@X@S6vh?FrwtIsBohu`8<-xRBc*5U`j|X8&{R; -z2{xg)9W7J_>U{6yESG*;G}h+LU0}lx?p~IJ?*XC>uGN-R*~-df4a^GzS4Wj|92|W+-f?_ -z5N$%*W(a~HW3vQlWJZ`f8kLfg+O#sH#7fB!yFv732vX9@kosmXmlzJH$QKKHrLMNQ>NS9P+= -zo6zrF^35;lcWnz2X*0VDrJxRQCO(32??yPe0P#E-!ZC!SdEA{4b9xe9nZ+Xb8&TF} -zaAvm(&g?e9c`d5(r%`eJM?Wt`fG%%{zCO?gIoQ|-CW_Z0}xY#nso+=6`N!<-{h{x2||0SNg|2!f`=rU^*uW8D-E+L){)En0oLEh*Nr1Th@vSr*a$JmrC -zW1f1nA{Li%FZg#*-hS|xVVaB?*W8YRj~mzAjHl(F^6~jf^j9?CcOyVOI45)yI4ATV -zINJ{!*M$BX{zq1{_VUu|($VOOQvaz7J4~!ZI9dR=V#p@lokw-YHnYwcn&;eUG1n}Z -zS)=V*&)`gZ8T{LFDfI=ii&eiscCj@xX!Zk0Lw>361+t56n_1Hskng@qzg@dn@{Abd -zrFk%3J~*pUXk5dcL^yeHzJx0(GqZ-HxxyW|O0Vd_D^*v6GmAQKQGMH0qPqVoQQcq$ -zO=A#gG`mw0XJ4zCHI2a>cc1jzrMd%t8LtbRs(TRbc7*G7hw4T30K(BcljDw5nd+J5?a=oaYGJ~ofLK^bu&=-p8c{8i(A^DE<3uCI6;Fs}Mz^Qr-;rMuUIu3ttHyPkUQEg#_F6-GX18u!k0)%u<+;1~8ofZ*AZ6RO -zmRhz^=#Ym|Un6D9{~sSo%Qh+p#%tuj$b^f{epu{GxYW2N{Cy-`hW1Rj+_)yp_Ep9; -zy-)b`{F~2ecSiCnN|{~rGrRmNk!W!HUo5k$3E`M_GdR<31=rd2Vx3(tmNIpiLGu_x -znY!Jb5@%npnKg|unO)Lv*X$aAU&b2(XE{a??o$Xi>JGj{R8Jrr%~S4}m}_R>m8xgK -znZ-P~sJ{CpqI&)%qI$^;s(Ku0th)Op&c1astLkz2(nDiR^%ndxUSd@`$V(>;RVR&W -zxX&P5o;%Sfste7m;b<;#r^H-S2Cr0I4$dqp!A14u*NW=mYejXf88nTF9Qngu_3qHM -zqPo${n#M$qyIK0}S`sbr%Xn?zRNZb|m&y;4nPBoO(Z+R2karu`^iJhO?fsIFZ`Qn2 -z$~0hh&Cl#nBl%{Hm}`d3ti@-UM!=c&7`T+_j+aWAhF>aWnlyvvF_{yEi2j&ycS)Rm -zb7s~wCglyI^xL&07U7rimcd!3RpT1&i#Z|On!Dy@qI%QJ8jj{|cS6iHIlu5d;;NJ2 -z%pxCLRNwb9QN8{$QC(~XO=Aja$TxSIM0JIkHH|6xa(`1ybv67lUL819*CX7Q5Kev} -zNWv90BOJ}G?zEU|+ToRB-2u)ly1+$s&2^%>`Z`hFYX((4jWh<_vFk+jkeOBWw47E; -zzg=TJ3crju4o=mR2=`Tln{xL{xT0Bvqgft(`*Kme2(MJV1kNm0z(w_KFBjGQFBjG8 -zW>D2LNMp;Lk~sSkzZ5PnRXro8)zWX5>O8Z@kBtT3R9$Oa!`+2&b?(sLiRu)>(JYUq -z5p%i`Ua7hVoT~eb$KUz&qdl`5Fs|DX-}nuA?yhK-Q;NK1Dci8MR2{R>AqTH!DcgjZ -zwQRHUB$DRX{yA;-c-dx+$4@Ed(4Gm;8;_q-EE(7IzUk9DU;e2dm)tM;6|I?F^D}#7 -z-f#!5m)W&t=J-ys4bD2}{3_~g2RnD$^*Xz*monv>LGzeHnTp*hiL~x8$@-3nKc~EP42juYg*uyv#S-HS+s-8n!4u(QN4VF -zsO~bu5s5R8G~~I15N)-CffnsyEH7X)MTFGU>N#tP^Wu -zkXL+3B*CdV-?)Z5jd1d1Qwdj8V&+uLEXS~8vXfN6D^*v5Q+2g*U6_2cQH%D>uFkkF -zOtx?E+?GsIq->4WQgtjshkQ^diSBDNb1E{seXop_t<&uBvUP(qv0mdE?)wNQ -zzwPo$Dchi#Q!%rAKqKawQFvw9#=u#&32>P#4@&lK`<0@4+6b}f-Q_+`9$aH?)ZxSt}N{NluoqPi8~ -zXl{20#az<~uN>vi+C(+Vxm+Eo& -zWxPpns-8x;GYBVNzk0Q(o<}&E7u^Xl*DS*;Rj+_Ei#2djec!7^_4=!G^J+HDpsH7q -zM&e!p@>03~1D*t5m9y&C#L7@$_V_|C24@*cjcfd0AWoUP^EFb2N`#}i+MO12O&z?l -z3@LDyp#faVP}3%5sBV+NZ#ILbv4%9{M&LFX{0=i~HP+;;TKer8{BHPVyk2ms?nk&^ -zBbhBOJ}6?!aqB^*Fpz^#nMxm;x8ox4l+W_rF$D&zeD1uOp2GcS_>yTQak% -zUYGM8>9XIm9^BeZ9qqzJNP;&TfLc8$A+9$zb;m` -zCbP$vNDDX1%BHW_ef0L+QK{%S_Hghr8Y``m3 -zZ-O(6ZE#Wj*_%Z5_)VfZxh{I-rK-1(Mu9smarPCPSygZ4xP7mWsV*~ne5@kh9M)hUFdS#CBKb4?SxQgt&pRkwnR>IWrzxBYri-C+h*y^S=w-GMiV>RvOe -z>TS6c{f3z80r+LSA#kc5LAVDHZq%KUa77acN3%REMa(rb@JiLQ;LKtkTvT7(A*$;; -zMD>yxRCQvn{^4J%?s$i&UN^Id7+&%awPbO(=1nM%#B`I%k*C69l0$HZJyiEu1a6*$wb1(!12eO$^ke_YB` -zZwAdH2W669T9Y{YTFk6zJi46xGFMP}O-zqs*O@ -zIQuHhtg7?&y1S*{F4fiW%XoF*R9%m77a^Sd`tO@Xbu+@z-0H3ob4@$Ea;!VRnMD`4 -zsQ%iUMfLQXMRl(kRCPYmkcT@-oP9%PR@M1?-R1ufQ#}g5j5iKW)sqOfAK~P?IR7B3 -zXAzF(d3UFnYZl>^s+Yi-#R|Bn-uI89I`1Dv^|~2Ubpg`YatHrWR40BXTwbcWV6S_< -z^xHMod1j9v>I%TAy4bjedj!JCZMqV!s2t&FmLHfAb4@k8QgtmjRj0s3^$lI3y17eK -zH=04yC`1}9?nIZUZZos0F5K(BCjEA)?u1{)>jtOlUW6+`xIVZ4Euwl5;b2F@%dz(w_E-y*8V-y*7~&7i7_k%rs^BysjFm|0a9?{)j$8dJRtzl^sEPSxuO -zcNxNMxMOb>)!PV1vpnHM%r$u%l7_tEyIDRsvnT`?)elPcZu_mGy3`D+x&&#+O+arG -z)m3Iz)g^n~4R4F7u7h93s|Tm*MuafQ#y@ -zyG3<Uo5t -zdC?sdbImfmQuPWrvseQc)pxvIR1d#hFKn1WRhJ=+#Qg%~rE(t+o&+!3>mC$GyUI{t -z_W1Tz49+r?8rS$wK%6pn&CODVN`#}i+MN({O&z?l3@LDyp#faVaNo^RhV`3e@SDw` -zX_O-kx#uS>gWq9htw#A?cXc|Zx*L8OuNR!E`w{NR2shyFlyF7E2uJg%J1yp#ad@Tb -z32+X<#yT*DIei?5aoT@hw?jXW# -zx%(wtQO@rrj=VG+&GJC=6Qa7n%<=855S&?*fQ#zePKfIM6Qa7@463>kX;isW5@%nn -znKg~dz3x8gw@Yo- -zWp|gDYgXZvs@K4o#Rj;jeoC*XF7Fl9+h$PJ)ks602G%R8^Zy`RUaGo!ue(k9?HcQ1 -zv&VO{QgEs+H?HBXM7RofM#2?UBOJ~0OpUwyk>IQJCZUPt8H@{0%cfU(ix0*rI -zs6`qb?oNrbuglD;x^}O-NBZqj-3z~r*AGtBg9vvB;fCD7cZ=##grj-f9TRiSB)n4f -z6gab(0Tzr@+MVrErcCvUjl6T2?44!?}I2~O492=_dM -zOKeI2d1)HY2hTIE@vXhTE>Yl4y+`U$Z1(v6Rsznv%D|-#m-k5>iu;}tEgTxv#M^AcjD4-m+DRUWxQ>0s?Pae -zaU?GdcOAkd-TvD|b%B{R9L>e}{fY{5DZtZ3a!F8EK^4 -zX^FG1!OWUQv%C|3e@t~V{4!oEI90bJ-1P|8;f}puRCgmB&AslVm}~msm18{s&Mbz& -zMfHP{z1x1js2(+gs%}9V6Yju(sGc&js&0{Y;sY_&v+&D!^Wapyh;YXcZpodJa7C*K -zNAtQnBj%b-c%|wsaAuMCKS^6&IyXf1)h9)D{Yg=sX9iW>iZlw{@spyu#LTL?Ro;n9 -zzg;uD9DW(E5}c~55$;BWt95%mAgb#Tj^;*pP|P*W@JiJ!;LM^8TvXrj0Z~2t0a4v) -z236gLGU~XP|8ZrMK -zZD!$AYBsJ3y#ayS5P-Y` -zex40K1%LAX^fNU+&!X^8>RDd;;Ta8G)W5!yoq(S=!p{`^2=}#>zKIz)VqI3j`3T~51b7nyY=Cz_ -z{T};Y{(Lz5soT4^uBHy-@zvC6JgQt8nNFKQ6M8E$oiVNnkno>=&E^Gw9ciezWLDK4(0h@DezOZXN#L?*03V6B};Nhh<0KLL9TJ -z9JzzS3)Q?iQaQ%sBb79+`Z?lx;Pg}Ac{;j6#}38TAs)EIcsy_^0^E!OmVq;HCH&vK -zH?30rs~QG&s5KrBT!#Q@1Wtj|PXqk#wAc7eFt9_j@p#}i=l*9-Wqj@!&AsF%j!!Q-vi^O>Wu3I;$yW^#&ulD -zn~bY~zP*L=r@dOupbGlrqVp%^Jc18$>_W#|prjj|&ynndKeitLXZu0p*ungEI}gL@ -z+)TRijYM#skgR~G&_3^>VW`(Iq=m+{g8bcu;)mMdPD@BbI~<({M_u3*2s{b?O|;+4 -z!mBQ{L{&dKbY%>}?}DQd@FybhIQS8?p9Fsr+D{qJ2;Mo|mE`%1G6snsy#|fzm{IXi -zmiBoUg(2W!bHex$@$=p=x6+A87~U5Q@?&D&QEGRG#2h%=&l|_&iUcgce?R;$MqKo3 -zMs&1-0PL^^&i3oZ -z%&cQev-}>9crDt@;#HnOt|@FsPSQE~p)9XKj687uW<QG5ZXJ$@?KbQM)5cTE-DRJX -z>BGzZi)Pj-`v~8kkqb^DhYsZp30Q0Ow*aHm=Ipz7(A8%ZwjZ{oKjQ5rDf) -z1vnj5ApqOgg0p>{aTTm_ca~2I0k}ujgVRw10`4 -z*Wvn&>!c%JHLjCx0F!RRxDE|@&ZRoUqF*y&;DX5DIafzH_`R4!-QYh1?+2&`Nk^GcwBio+A~nSajnQnRHVtcR)oC8b9coTa`!&= -z|73DxUj1;yw;}^>&llt@VBXAHNd6?(I{4qAFeODn-*ws$xF=7_?(_kq-HUF_ -zZp?Umd!IC}<>L0f2tQ2-zjStczdBaX;RbYAHLm%75Q(lC*ZPvL!_S{k-YxhUgrDsq -zTpgn!`AA>DxUzENIuJ~>5_}&7RD+*JLa7W_{ojM3-VE`;jhPP8{)cGa1bz_$w}F2b -z?Ym-pXLss1Lp<;RI#j~Zpz-(-YY2XL#2N#~TzO?;1p!`w0ITQjkk~}~uc7_6ah)r- -zV~i8-%)jeJgGYrW!2@6toF|Lr;G9Yo#^bsxjcfg=yTS9zqjx1ALS9YQAtfC?rCSS_8M224(|Hkj(h;^htPfy?T!1nFk}V|OyLu7^iOa!4M*g2@WV}h4V;@y -zd$D`*mvb+X-&i{|C2twJiv4(LdHxyUdfag_r+dw;S@P&UW?akjVRV~-JMw9?=T -z7mogv8=>frI`ExSZLu^Uz(-KPM))Uhx(p|HoWRWpz=hR<0CyljEBMtoA?!e4w(rVt -zU2*KtZHD;N=rexS>5g&r^Cb7eugHjUUgllyM@TFFQRG`-TnkQKXk5dy9w~4x=tgim -zx9}s{3!d&Is$TsjCwBIxy -zFZh=6xPbB!^RJ`-Q}>-|83~?$c4zEjroeG<7HYzHJkC7Y)6c@$?VldiWDOk{VEyb4 -zQTuIo%~vJ&!~w5IX9zzB$uX`ogqMqx;9N0z#&rgcUEc?2s}Z58QwNcS3O^IQ=xk|4w_2-wFde -zv>A^F9tYnwHL~zk<2p3Y#t^Itf3j9f4IhJob?cx*K)&fteqE-_7UJ-*wv9L+hr7h1 -z!{)^JN#h!Z?eoFezQA*TsfmFLt%I)Nhhl9P8&`e$hQ%(0B}$F!nq&KN -zs7^jx{uX4`RsbG6vxn!ciwU+YcJoQjcP?54mgpL(U9G&8!ut -zS)Okt=7SUP%9%U~ub+U|DerX_UTL29UKg@>U53|xf!7u9bsb)5-i*AurrF%Qi?5{oW8=LE_XIF|IPlAX6T^_brh*VP=&{ -zvy8Qv4^G1?WzN9sr{HzgdtHE6n&qPt^}3wJ>ngl{8eZ4D*9~~3c`Ndo-p=AR=kb1G -zXa)WiUX$QFE6z8rC1LwQaJDb@{Olt_t98(QirYdrI6oVh1m{d$1m{nWZh}`QRi3@J -zVc=Y-FAGbAr=`u{&vGA`lle{afSEPII0`snTnj}$3wL~funhkBWF}d8{3N&?kopA}Xc?SCIrF4Y(F@Ri$#}-UufI60EcYFB>_3CC -z?=+r@@jm045N|AMcyb8LPh{7@@yTmCu?YjuM)IB#9HB#^%DB!6Ts2En8&_rIJ@CUt -zI2LiyuL^dR&N^r%K8s4s8`nyZFCxIdpw!FYDeyJ$YTf{!_tc=6nHCyX#k>cm*tjYt -zuRwdwtxB}d@M!#J&S4OriKdE>5KkbRjb|p2w$<0d!O)&#-}p4owW6OxMVp?s$Im}C -z{0i@;nyrJDnjKof*}l!V4keFXBjB7v%f^o+E|%Z@{Uv_Y(4H&;7~PWth^d -z2*8uAHRD>-FCx(m<61QGE%@QZ_{7u0%>B2$7h0O~SJIvSZz)I~?Ej89CC1}Rr_{K{ -zXZtF&=S9&vaGo4CdY+E1pG+Ze69SMo!x2ZQ4V=4r2RKhAM~rJ>zqGd^6lB~Cs_aWh -zXdaI4&UO11Wm77|kgsIvmv}~4Eq7xYml=;A|H_TWkAIcMHQ_J&y!>uiWd=?7%kp(? -zO^lVPHMpefuE0gflrc9a(|c^G~=(Jk{!l1;jhTE1il|j -zxC;)MShsOaY}dJvaZQYT41V5^k(vg7v@eL?-WL&oryffPK)wP;rd(2xcLc^&_%wuX -z7+2xso6o?N77pDO0{q2Y_5=BW?gJRX+NvS#5vHk2$JS!A33E@d_CR$=Vo@lA@c%rq& -zWOG2PdHTbJj|~}Hj9xty#zC3Z-C#6<&}S>pZ;2;qvy#V_9`&0lZ3nq -z?cb1TEPvN}{`BZl1jVJ`h#l&UYqg)~uD?fS5Y3G+Q)Y7(cP(g7-Cf|+J!u@e{j#4z -zhjNsE+IYMmGicAiD_PpFp*?TGEv~@_JWod8(weYAu@Y0pRl$E?T}^|}g3lRO0c^jJ -z;i{h<7R{g|#48eO;4i=oY=2Hjl-KLlz@GwPTgFwuHziR!aLI34z`%r(wLX9<&<>eb -zJ~-PK7>^4mF&?Yjd5KbY=1lITFRg$-R;mh|ajLV#X+)fF$*J#|+)Ez$E#%#be&3QO -z$%_42o&pcR&aYbyfpf#3LcIS(oN0HB$SRsMvzCwM1$RQs=|yn8;-sWM~OpW%i?DndB!yW+ZTYdeW7tbJ$7cF+}8jD -z-}DWGe-{&f0sLv0;LBNf&fy?n=MEB$pEQFi*fGez^NeezytO1BoD-)20sbE)r_r0kGlOm%o>_1XPr@Del^pML -z+$sGl-(91B6}tO>DVN@g>vk%#Ytt<;uEM?pVWp9iqH;5}lqOJ4a! -zEZ@B6huMEQ5-tY60=yjj0C=@=T`i09uu_S06Tg6wGJ7hr2c7lcY~O%5ywKSO{wAnaX=NOVFODe5J;9B3XNXe62NuX8Z$WoHDKhLf(J?yu#fK -zzK9A=!#@Xb=7>9QT`on0bvr*UXrcPGVsa07062DNEiz5WPZ<%v?#EuE_-Bg!+bsu(8UxQ6*L!W6&{d7Xpze0SDhTqhiPlX0!kPcjv%mH%lrgAT!tLH^xhJU)R68$v>R-A8^e -zA4bn%LmhIb@0X_w4_hnsI`)jAAAw1Hr9XgWohrOoJfh%YHCBf0};HaW8aIUBxaPEwo -z#-$SbWG-jI%5B|l&mVOUyVm5eR6HjQh2 -zd2+pFTLmmqUxOo# -z@dh{pbY34S%HzxexIYtkkw}qO^9@1qFHq7J@Ll)z8rMp&eTO^#fGny`GiyP3E<8bw -zxjJQBH;1FxWoC@)0FftN;nUUjJdosgF7Li#hI;Fu2|W%0y0h?pa29+JoCO~Ozl868 -zhRqO9XdWGyP^u-296xVp%EH^hcOFQ1ov7@AfeQMJ$4lLBJYMQ4`1w)p?Vn9v@>m`p -zR?Mtx@t2r5tGDk@URtznR-GoaZr*+EMvxXCXi3S&&P7zZRaH~Nlzo2 -z8MA8|8KfrLpV2$0Q7&L)l7r#v7F?kdpg}7{j1xZl)n!4zzfUM3om?9SzqL&Xu!-m3^Wf#UWDQ_`gV23K>@dJK?aa{>Ki*7WoE8z+5Be_!S?OYAuUp;U)2Hpq$H{ktr -z2R>#zey|%ydpoa)P6QKE2yiJJ&7R#M%5)9wUz^ONpGa(?J$2{3ItFq3D6MyfkpkDc$%pCAtms5>v7`86}XZu3W-RCZnAJd?mChMRR>*-1P -z!(QFsyqTgGoJ+6IxbCKW!{2XQH*@2Di7mhpKWkhDXMn9Ny!>@x{xH#M@T;+6x?Xo- -zDD{SW;l*-z*}#hFML({XA>%qi_oMx=aUBWr(JX!@jH6J#Ad_Z5rNpJ&(E&#kxN2Mz -z`aKd_H?9ehZ@>?QRkY*qQie*^w};Ad)ii;>5bgWGUj#k|&f96GGFh)m;+$I0?^_O7vX3)~9A@gc5u8ZVfb2C&f@f+8%yatM! -z(Vl@@jO!4i{UZ~D=)h7BxdZZ6`RIt5H7{PKoJKe9;%neMd~LWB;_jteaL3z15^o4~ -zxz^4BRBXO+m7k9~78}>Ju07A){IJ;JyB_@>hJH=p4Jca|IOjt*;&51c(B5t=@X|Bc6&YE#FNP{BW7O*UHxtrU3+0&`7vy1O1O`_dfD -z{BW-Bc*vKR@am20mavKN4aPM*c_aMrgMcQ_{q<+QCu~Ir25vX5fq83Ihj9%|-U&Yp -z+y(xZ+?{nPk7R~{_lnJce;iXR@y1Xgj%YbJH`7Y+ouiJ%?}UMl2Ebp8IXrAUK4nIX -z$EQr;o6enRweU#4&PfJnv<_P8KVpnqjBBaMTM=N_6a%OK!p@z9(g{9lvC0gpjMr7G -zjjJ;9M&p|3|02;QRTka`&LPY13VwKYSPxFYdFi)= -z4*bM0-*{Yc3EJ~f1r6Ze&lP$0>Hz1%D|?JbzW~gPM$v)$+LUoE_5Y#N)8Ov}pE0gO -z!1ilV`9 -z0H>U7gkk%fxB2WeJliKd_uCug -zFVJ1)xqoy5g;ZDv9a46v0%!Ybl`2d+baZA_pPIESSX0ocA7e5buG&=xbq -z15bg!3^Tg$=CC8$gMXqKRBT)olb0A*|4r~yA^Z{AQ^)d?-lzU=qSEcZT&|yYpx?{U -zug`eATYuE8Xf{i?b$3X;ZkSoK^iNZYVkWBWEuwl=o=nt6hH7{6isYsHYVi*8`U~7W -zmnSbd?~4hy?~3FlZu#!4;-_hS -z#`Oe)e89M7{a2K05dO)BJog7Wp8O442dx+P=@A4-;JZGf;CsNwjq5nE{UrFF^Kqb? -zGDCcz=D}Zu`sTmGS5DjWDS9Q~uSWY?<65DecgkAEIx}ddI|ljpARIBk5FC9>hAi_M -zb*B#`FMaG7x-tF?_=dab(K7gLNN)}1*Me_?zcV)zOBVY(gZygnYVZdz9u45;|MJLN -zml?D!=b=NlyGwE`>NT@ghi3W0nMxRdTb?gXg1-)Ori|;n=B*ji#&tl*=g^*?h|GKL -zZ`q)sxYyT31CW;(*8t?L#&sI8?(N|FQQz)ee)vS^cZC7rJMu;2THj{e#=7JVl}deA -z&8+pMdEFfsb9w__S>J+phiEsUz9r!NXsjQcG6%sqjg~yO6Jh9IMF%=s2dAUF_k={R -zgr7R_8t?}2*It1Ap$P^C=mMvs6>uiH22MZueIe1Eg{kpNVBnj^QseQdTy8u*m8;O6 -zGq4)%$$N!I`gLl)8Dl(e9hC26D*s*qXQJE2l# -zt;G3I+-h7aDm<*eHaOz%=5&Em!4NnvKrex}BhhW}AWY(y^ao|ZAV2O3PQ}&WRNM;A -z)zJmc_qo%`3#1G>mQ)~DFrKJaFsz?ekZ)jQUl((F#mrhrnpY8yGoC%_qC(H(q}$X_zE%1_|1w(5?F`PjOdRX)ud@JjjR -z?+^0%BP&(LH4Lv4Rl7q^mMrSbtYK)bcPGl_O-lp3GFmgd@{`&Y@3jqHY3_);7IkLv -zDqqo;pmH0NrnaZYozeg6^O^K#=`(;#_&$5kXEKaTXdaEcrpK~)EgT5dd^HwX3Y?2| -z#CZG&F={-1gc$eyMbTT(Tx=f`)+oA%F$@pI^e!{50AE#9ezLTg<{&E=TsLFVJlc+Wx-z4gc>)N^idqll) -zT|(py!rk`MlF3~^6xeC4wf;P8_kh0^Wf%p&349!!Ti-Oewf8%~91J`4ktCMES)O&{ -z@$zgKkC$i5xR&RkC{N<{Qa@l?9~yFQ4+B}3bg!?Jy^?3u1!h-=ypB<5T>X((p#3SF -zR9B%rd986BiBBcvkId~6b!JfId;^m*uFA>nYFOYo3wZ@3yMsayBmr -z&Z5`5FdG()>pahgm?dytN?kRs>ezlA+}isIv0;X|>`icf3cYPy6WWi2690Veswa(W -zLTsP^&sdNghXU&mPpA+99)`fh;Pg|9z-(Wh;i{h7HvHskR`yTETmLQ@|O>jjCX6SFY98y!}_-vhp7Jid8v8;@_^ -zso@ZqewrdK`c(l99o9ih_i&7HC-@(6_~|jOm1FxpaJKI^9#xJ%!7>WNEl|9Wg)bV{ -zN*v9VKkT()Tq{9d{*h3L$6Nbo{HyK=hN@)7jr^nv_%pyeJ{sCH(Oz)+Sp}z`t&i@x -zB#Ne`z7TI(w%z_}*(H-BK8{wF=6rBgw!nCNP8J%EPnlxl8lJmnspn^3t1q(-nh-nm -z8`r?^$H>irS7C&oTtE^*-#t -z-Qd4{NWy)zHhDo&uR&d_Y~1hes!d+%st(D?4^OcM5p)E~C%~D^0yxev6WicyUosYk -z)VQBZ%`kinfm^|OraA%sakQTW=VC~F$_Lh}mCApaSNW%&>J3^U8k((xmY;7J<-Uu< -za%wvWe~->MlfQ@EAu*@N%&cB%F8*}T&w5pXQ%)WDTOhL^oHExu_a6ZAjJ)_?&mFiK -z{3-nb;hFV7^G|P`2i#ffO#cTC=C?F?jGdOQoVnE**AXBeg`chq3d6{4!|?0#-968fD;Iy4 -z8|-bh!gZWXAO%9KH5t@tQLc>dvKE2F^={_2AqR -z`aSnEjx%c*9T;F8oJ(NaxDM=@^W+bE<(&%UA}=(qa$F3^l_%USqZ7M}C@Fdp<@2EOZ9*KoEWszFN@-kyb*d?5tp -z1+aQ>9vlY0@YK+Oh7ojNVJF@FQnaIUX4YxXo8dOljlwsU -z#^a~r<5_t5m(ML={41VoJsxpBP6y-}4fV1mPMKW`@(3KH`#o>N*L?>~hy`vjt~xJ)j#h9!N~y!R>Sp^c@L%yh>25Q` -zE73d?65`CMzU$mP=Nsqd{l-mdz10_R2nm` -zD}$dXj(dLgM~$=AK`U68si6FO!MIkAe9^d$C_hwRf*(F_Yz2PK@DXlnaKwi`Z5od& -zOU#B!d=|&M9OLo!Rp0dOcX2%ftNd1Q#KlmTg*Rs5tyy^ae`fJ7JURo6uflrkphHlE -zA!q>SDr`2cL&Ns1;B4P!T;is8?ML0guxnfBHLeLg5()Jo0C_+Ba83+>`wtEM8E|40 -z1`feI_~$Ujg>$~7@%_Hc_^vUQq`CsSe1IBg8c#bj$P612c6fiyS -z{a>e!xZ(AF#~b22d3WFpkZ)Xv=1~}$0(b0M*?mgPtW$&LGIvtUuCFn6!K5lnm}=wk -z!Zu`S-vj=vTnTQkA#gio^EZI6nL%|FL&tigBfVv2)j@OOyS^;SImL5~YZzdiYOUw~i(U*|XB{+O25vAOPqYb~?VF9q6K#Y4%XmjdI}CKxg#eeKfIZ-B-)lS` -zcyb|3w4I5m%e3mWH^kRmwek4W==+{;ALrXy_~L(uz?|Vzi+d8!llakf=TqDSzJKlx -zoyK)&E=MK0-2RuzlIS(Fju6fL?vR+%1Mte3vkcDk*0b>HAB607>L?*6jB9qs?7Uh2 -znV9t4{qSXS{ylAtG)Fd?b$gnU@@RMXzS3POD9w~=e|YZDGvI7ruoT*Vo&`1@AMs-2 -z7+JrMx5CklNmb=vZQx%(fUztA#?KBAIV$~;PZWV8Ui;(pxeaSS&cc6kZeI0M&vjA! -z8xmbIu7h?3rqptVyDwdrytru1{#S>M%{JVb>twfWdUHU66h=h=O$8WTm`dzIXK%_oUwt5?>ddvK{==3 -z061sV0ywAgl5rJq00LHwM^7E3VGWM>lQxAv4@1KMJ>Xo{jsFwclNYZBu70BLOiFTz -zN*dggn`3Ku0%rPa`$ZTbPO)}yR=mr2d`k9!TYEn>dd(0Yp&@V{p2rfYN&x7rVyALa@j9KWm%y)a3;E79E(9BB@!E%_Nt#962I~R;vG`PRq -zxE`DWnvAP3wr>H~_P+kosMQSdA?Pw5PiPbEzld#NZY@-33cTyrfqyCRbYc^RF9$k4^WcN<)ASqlBl=Y_4Ly;;^RJ@A5I6%T)7r=RdSTi0!RIVG34_U!|A@H-ze>DDOF!00lI&f}~DdX_~ZD@aEQp#$t4&yp` -zm+=+d&inSbkGCWrQPg7%bhffVpF7cw9$s7XLkeiPG}>8Y@2L4bi^#J_Me~!e_ua(zC|nx>wQu8{F4Ax5J2Xq!SMW -z&XF$6!c*Y$80i-9og_rD_evgu7&CcgMdyXv?=@=omV=>Tnxp3 -z4IMZ%-QcXy7Wk{&t!=SW`m)4%8YK#IH{w?skL#;49@p0lKX1a!=mfXI`t52I23|lJ -z%MxH7?WubioQK-pJwX^RicaoH2LrFREug~?D{RifrymIx%>-B47ya{bK -zCj@v1&Qj|42L2#U>8FhA*%F^bIqmlRy^L{xSm6i_ZUAcoY%#B!TDvH -zKKSQj8TvhU?(gMV(U3LLU~DAMl-C?b;hQIHbGbn>mveqHXrkl-;~Iv%`GVkwrOSV4 -z#Ql}i$D%j|-Vl9j^L;o2EitYE*uD&$?aPfHmi81@0soKVmD5TX=%^L}9*0Cz;A~%S -zJRW!$d}mwGhnN*#=t~rzMJ4XecDW$An#H{4&7*mtH@{sx=1yzxaWiY#Xr6R?ZW8m9 -znd8Y$yMtm*&!9I)HM!3R)oxtU&EQjPp -z0RJ>L&&0!nV)FbfyhM0(e9!>wP=yYxL|qo%kcD?<;R}JM6TMK7U*HvJNy|{u0&rHS -z#JHA}?aRQ~zT9}IsC#2a@?lT=9&%_#8{V`&4gU08cXx-}J;*~>@gqX@^Ds@C!JnU$ -zMD5iI?hwE7lHi96uNBK7^GO;;e&?Y72v6I_d!D -zJ=wik__T4Y5Whz`V?4UXOv7B34)d7~F-OUwkmwJfYz&-pV-Z~UKwp1xns{Vz#LY1e -zoO5LwoRzD4RPe*V9pLbvP7K4qlFop0%r?Q{NDiGLFcX~xXQH*2X?xLsSpH(B?UBKA -z1{eZoLbKp(zW~mJN-qzImZvdGTg`ywp#zg>Gag?Zn{d>Du`j$L1o#nn8Te<6N8?`w -z!;_IvKREY;5%5I>7&RVGbQ$gM&1@1PZ1sxsUJ@zPA^0d9^^^n!JmOa!2z=+XN%}5< -zzbff!{y{c2zM;>1wC|^hk#`u^ikCrnC)$$_8P|L-K>G>!vG)*uVNId~-=|I?fOQa; -zA_VREezfH=K>*ut8rOvQ_G{}g>2r4|EA?DE{1_D<24`gp9~;{58VTcCxud8=t?_!* -zPs1oWu%u&|4$_`{-nb@u0RpVT4=b^T_F8FQf9brA0PK+WxS))s?lm5t#D$L!?OEz# -z{{#i?;M`xj!TFYa3jDPBiN=2s20B^={~-czfODhT0_TZ+ -z;z^+rJTpr^={!IG4$B1O#@6=a(1DIR!0D(9oPG+Q8vJms)K+A;#OLHHJeV=;=831B -zo3DX0K+fNU_T;5;?)A&Hi?~55qOaC0d@Ku}$-Kse5BHa-MhY -z4%^xI^UvLW_DJCLzY^n_jXbd#GwAfvS6=Gej>@v|2JmB8BOT}F=|ra);zO{Rg{O}C -zgyQYHjUSez4jt`7mIxYT%DahY)nj>f;<3~|K`#^Z_`jmH%izAz-jNmqS!;1v+o -zd-ZuYNYVU`y=%Q%hxiz`z9=~2QF$7i$29q|fL^)uyv2L(k~xRz9tx$P&qgiRDuseKo2+rkF*8jLZb4oMk@!Bh_yF;tDKUOnT5%s4F&0I -z*T7u1i)hbPRsLQtAfC{C#HIe~h#S>{b%<{;<^A4~w&x9EmEiPKWjyYu&bTI2#`gdz -zbfDrE1fb$pw5Q@xwC6rD3H~OVE7AI!GDEyX(+F@BlV}-^xW3D84He>kFb2-{+qdFk -zDrbAa`-3CS_MVf0^C;MBTm`Taea7RHcWlRx=;!R6+c6{(JPIy@^C;N-0iRGjK#%cw -zMf=b`s%TVy$vXMLpp26!-*|iy^%#!_=!G9{;R9Lxq=(^%lWqhZ$fptD0?dg`wC501 -z4timlDE(Aq4*mygKEK`38Hf#nB%vrhy&Pm$f -zyd!PTNm?}&_SDRkf-J_yb|d>EXoV8nQQ6^t5>S8i5#q+bq=l0wm7oJB;|e%ub@NAp -z0Ja|k=j6@*sBfP>EWtQ?H_f2@cNl`iNC@zI@aj7Q=j5#i=j3e!m&qH}Uy~W)v$xrJ -zd@^>!5hriq$9zKZNm^`N{T$udxE?Pz^qWBybC%AqgZt@+w#^WP5i}swuwVwzHarHN4xaw!7!x@9{%n6PBGcj)bi2R#x117+EHa=}U{3H_O49?Y7_sO7)i>+kLb1k)vvHVM}#~6=QE}iH$g9hfL?+2Gvwe!mR -zR0wc%XVS~F+l|L3Z!tI%YK^$)*TAqiGFn9sr!p9zWXe8ed}i^1J@ZgsT*d3 -zBbK@uoTZ)z=cFFHYsb&-PYX7JVb@VH?;ATEc3na$24|@!!FNh6BI~~y{P;@S_&1m# -zUg}2U@ltof5lg)c&QdpjD+FexThlPG)QjLO^$Ivk-SD3w083py7dR(%r}5}ynWY;* -z2hQpdaJH}db_l>yFZ@^FoYafPtMb~8saxhl0G4_MoTXj^50#eqP2UNQ -zSn5`AmUo38UVPL7Jei9t9)QRQ5S-E0xmbx4q?d2zRU|^}6e-;9;)Gfy2rEWDI -zFLf{cu+r-h7yXB0rEXsdfm!NKaF%)%oTc6X=cH~q6a4SxPwG_vJQ!H&UT~ByF#^s~ -zPk^(1>VJa&Or@joU;0Hbu++=O&?66gOc^(H#7)IGln -z0a)raa8_;uoI_BzmhmI{Iop%J&KP#@2ZdR9H8_W$r&1EE_%DHTj@5&+a^v8v+!Qz~m%nZPRXnJTnB*_?KP~0&oZh!C9dRa1Oy5IESF>p}{|g -zAoWmtuzh?8w$On?(3KY)@s}5tE(-kBsA$ftOv)ha3-1%Z69PdgY#!f+l;Gxw(syf -zolpZibXteF{4NBjK;Uk0`sp#Q{@K0{{NnT7eP2jk@>rI60RC9cp{QPu9fA2E(ijE5 -z*u8L4Do^tSqq?`NnL($_tfDvEXT*Hd)8KrT&cWy5mF*Y7S(7D%Vfb}->I=zBYc|lW -z5>JudM7Nrxq-d`#@W14`JHD8_q=-MDlJkf#GyleY=Zhkm*%X*rN9b?d17c3oTw><< -zL?{Jkv~on_!mI#)IaE~}*YMWfANuQHV270PctZ6Ez~2mR0H?4f_-Ff;N2GndG$VFs -z%@Vi`oR#Q6VE(FdCpdqhx5s$&j0tw=%NQgfcIY=AufzZX@JEaX!6|GQ{@a3L9(V$ryVYzKzI1M$PUKwThd=|+QND5A_4%X61>meuiE$lDwl4!``|^m}sTY@K -zwRO;hZp3kX@X~W9I+KMjX5nk(doFNyOvz2WR9;;e0_8kZ<@i^vaXrU-03QX{8P{_> -z@_OT1!Ak65&BnD-#$PS}l<)nTL6v_i8KLNpHgG=IELjv1ok06B;~MyH5qR9)KP5ZT -zl$kX?%`@)6mt;qpg;$QtJox!2OVcAm7Pp`*y^ji^OEV%|3auU97BQ>K%b -zT=WchnTovZn}L^S$RmWLnd|;?GVkF0uKp6kUJJ%Gt19?g^jR&#AI+;#R@bg&@wy4l -z6ZUOHdnTeKF7q|hEb`oMr==b==b2e+ljkn`YVy)uClAHweFH|b#2uHWMdj#CbEVra -z=3`Z6j_+=D;MC9n&NfmcSWzC6rbY8yAM+W5PZ6K>DfbTyk3BV1n0i6DuJ@@Z~*`d`sXjR#v&3L?c -zbKo4!Q6SHLmf=>*42T$&7dT)l={by>4HXin35a -ze!aiaxK@z75$&IZ08MC5K44sR@4=yUxGWtMe;{)Z;QC-R|7k=TZd?oNQpA9N$@;)!9-9XyBtRc25}M==?zjjN-hx$f$3%R9))Ojh!D%IqqHycvNm -zLc(qE$Arhxo_S3e*QxPEAJ%M=My*6sK=kt-K?VW@jFRw~Xf91C -z*P5)^Y2u=VHrr*`XcMI^bYn}_TsFbf!ew?0iXZe2nxUwmSOZIUVax6kMFqt%!KkE- -zny~B&OZW&x2%0LPdq3yAvph3<)&05db#;E^KIgul_j#Z9c|Xp3&YUD#C{-2c5w}?J -zdc-YJe52~Ga>K`VaD(Bc!NdBm6*sB^`quq`;iUz)sJYi}cdj$r@&^<}ES2F&Z=J>`=|mbfOVxOORC -z@1gc9USG#}XGV7qbQ@XHCQl3Myo6`!p5#wb2bTsxl)N_f)3L^vLWRBAwiBEsAyUg&h?kbeMe)FZ$@OCw-`UYihzW#J3 -zT%!`I)Iiy%c>Oo`+7*9B_qgK6sfH!bz1px&wbL3uH^C94WvN~)f0s*8sw!A&3W~00 -z_F}_J&)=#7SYmkTdCiv_K6ZtuRQ};)>IN3&qXJdH2sXdc@X|#)Rcfp8Nhfb&SL3Je -z@*a2Zd!B@)XH_EWR6Tk?G#FmmQKH(hR#mU<8*Trin~W^YEE$P~?=x+`Q`NN?U82lZ -z`QD=V>B@fVAE<6`R6TTNKKqrQ&mBL`Uobyl1nDnsN;XSi(^N(6s^Dvi?=*buirAy< -zYg9vyE52O$=~Mhjr+b_cepU(7m7@X0k5c_PX!zKGE-U-V%1_+9t0VkD#gCY0?j{E{PMfZT>bp`FJ0>7Fwoyx!BABa1m9O(+W4KH1j -zqB7BAcTeRS6h9cL)JgN9eB`j6b -zV~gQqTd)<6{e~CE5xH0WeO(pv7+!i#kL?7*OV4ROOZiDt?U-wLslHV8T&}}A|L~?v -z35!et={a4XMDg0b)bO!AS8jNzSnp6Om7hIvbEulnO65pzUFsCS9EFpu2{`*(PlYQ3 -zW!=09)9)-DasC6A{mRXB)W^!X=I&TRor)bI&rn^vI7;rN?79t?x;=ATe}Nev5Ef&p -z!0lKka0m7Xd=n=eNcoUwOHeGZ6g2|h!Zv|#qf4M1@gEWX5_tmOL6rj=1^x;L1(xBG -zKmap(Y*^~RItTVRaKeF~Y|Pz;}`IF`*L04%9fX&4Dfl;*Z-Vc>-0aa$uvt-8e`{uT%p&&g~4ed_oNpPq2IM -z!Hg3$z6VPkSm(eV2TlmAK+1m-zK3E5Y8=?+K$ipYCvB5Ff$yV=kobL7b2VJ7ExpzR -zuz&&lK*_a^+@R#|DY*g9ob*gj->*h$meUgZ&#TWu?@7-D_bh&4dY0dWHl6h(Agzx- -zT2J%G&?QfT>t+=8v3~vSaGmiWQ=tH>MAnZSwi;PhnOO)?Mvu5F8RU=lK2I9XN<9O8 -zo=i8tsy*7znkV&>$v@@EM8O#zt>BTX5bIBQZgns8VEH9as_P5%%W35(ynLP|R*GG@ -z_^p$&7FOe#e)4_fUH8KGDLb?hi8OWph_7G$ -zmK7wNquUbJLZM7-3xuc@;>V~bK|Uk}R*%FF2U(y+^ls|zA`i};^4x?Bwv$#lXIWa} -zo?QFGpN -zD;>0*r{ijrvGbk04W0KCe2HWCEgnoB;Co?rD$nu{^yDLba6fho(B=IIv02icUuyjm -z$NDF>wW|n)EaaMn`inNKlFZ1=;;QzHM7ZU&$Aev;ct)h_|H2;WS?Quv=wP9A{laUj -zgvO7^sTh{9IBPeb;kdIWSN;Xg9eZA}kLlqc$rTw;q(PtD=0OU$l2&ce}) -z!~zf2aJiH7KW6=_KLS)AafJVIBL6>}$m#k!i54?8?S~DhkGCec@|=aM#j-Mgp1>_c -z5qe1{>Iq$fsZ)60kT%jvNY~H*&C_W6g0TjLBdrPP52$6Lz}(TwGO-4$V&xJe%Yt5r -zkjVO3bE}c%YSe%ZksFlreX4X3hAGQRewdd7_mZ#9+7gxg4J!GZOcq1t6>QC?yfmy# -z;uXJEwudcp0amcw-68bFHIlB-$3%M|I({=cs;a#ggO*Dc#> -zHZM(D_A)PsQ7s#aYFS}|-Lj<#cFRIMi)xv%wRiA$p+k)CLYa>+3d1~`sNe1FQ3J^} -z60;Mn)F{2r=rT;HFQ*>$O8}9V!epHCKRqY1mItvd(V9c(B19Q)H(z7OsQnpyqiOXs -zlnE4~el&|#VB=`(KAt5e$$(eSEc6!jP83phuN{faeY^&*!zyzs`)r=dzBH=dQg!b> -zfDT%7+j=62^}5iX&dHmdY^CwoH<3yq0?AC0esB4JN>bcNJd(^L=@+3pRg$Q`Og&xe -zkt92lxj&f|55qTxP1H~2i%gV~o!_HO%O)lvx@Vk~ -zkf`63&%&L;=aY~6r3>?v#@^ksd&XJzZQqp~OK$tx#>M2Z-1LSl1H_kS9ln!E5tQ?uC=87Tm!z#1^iY+l^E+qqFwpo6<=h1hd0U1I{1-L$D%`IJvg$AEL}7f!?AL%ktNVV -zGJl2K3D&AwQZL1%^7A=dg=KX$9oD$q+gs>ctgy69!} -zH8DNjttz9wka|?C=C38#OHmb8T|@W!`=_mpQ9iQvbLuTYNL -zw$8oa&Gr_>{V>;7F3qldHVj4S%@}~ -z^?ot3A(YY6fPRtnelfE0Na-T@xL1(<;yLNBCVT5Jv01J4`dLK6C@l9{sqQ|@-W#~5 -zj)L46Pt^aILf`V90cWXd`F$u==AK-Uy|gHo2k -z6za#Cjb`9D#`UpsOO(7LR*o|%a7y=PShuF@x2Vi-580#c3lvVZ?u!aEa($HEeHFl -z7l?(EnYS;=y=-aO6X_e&3Z1Do9viSqWPSS-ncWimV+dukzQ@oZP>5j~%Uytdr7u9v -z3_JC5GsFi=Z8PXe_jR)xMycqh%y8mo+GXlz($nMIx}{pFXr0O3-s3np)2`&wOnVz$ -zcrD%O4b@_`9;afJ$htOt>39O6Yw7C=bO?qwAZHcO47egepRm>qRQ)0_>o1RM4wUznEbrvcAS- -zc~}QOQq?IWUQhWBHBXP2sGLX&Mb?3iPp}KyrI|3w%>TOU?fegm@z2$$s5Zuu10ZKs -zj9h1AS@VGaWu~=emhG$GfvRk~-*#kE`vM2EZLKgTW@-$Lz8aJl9XT;EuTaSiU|8gU -zYVlSzq(&o$GbR1}D954v95k{tq8S0o?kn*)oWe=cZ11h)^opEvNftF=VdqKSql`hn -zoSx;DV-5#o-5hzO%^&V_BXMOenS{Ay%E|B!HfJ6uX!bl3jbd_vUqH;J%VO-9LmzAT -zWrCcRIj0Xfrzo9I`@T6=rhC5!KU}~Z8M{|(JJikRt>##-Q-7-%4{`Rpzs)aP=3BE} -zKfv%@I$vqwHU{XrWC!8?7O!YLbvIgeo$e28YjHhU==&`@c=wA=DfCFQ@)@}MBU7-4 -zb9n*%wZS)^EA=Ha-Xxci=j+lTpE*r*&t7%Y*0@wnx~K&9q*Nz-C&nLx|w!=x`kUg3>GS{esed1O!eYBrt$pff4X+A5GX~6fR<;a9nVr;0^hUbmm)wwkMM`cyT23h@tmaN -zISEsadgQ24j+Si|Qy$4TOV828cj4Pgs6s&CZiEQ#brCn)XxIebHcIOe5ZHte0WHtd -zkLP*HeegX`{xkvtXAmM_(hDNLK$&am3t}%sqY%7rCE*&2UV#VT+fH~N0fBCW2=1;( -z9$%DJY-h<59=U#lz>74>LP&spA#ee{UlQKt-)SOv5h7q2bAf?hS{beejCskr(R*x% -zGbk73;EtE98{BdE2)<-3bT7(B*r2zS=SBJO3zkq9ETt~EI3L~}#3j@P%c%=4qb|6T -z`c7-1w>IBdv*!N|?%2tCT3`VTUWZ?B -z-|GkpR-ZvwaPI+l|B*QUApC;O{Rj#U9Yk2LULkJ2EP#0`E1n;kj%@^Sp -zJVaga*hPc|J#WFgmzet&{DN772nz0f3t_?I)c={-I0(OBWf(!hzCnZqSH2DJzYvo@ -zgI{oEJA#7wze8BCx&z*SC9ds&U$Ei}1O*EYBP^J48Qy(z{to5w7ydR9&29@13sLkQ!X(`LN5K0svFLsH1^Y%KC|Gg?VZq}^;C+RdY{4(M -zy&FNn{a%CxSAPKSeqy&5e!|w!@N$|cwZ2c5|!L%s|3O0O>u;8%_c;6(Je*wSXPzHj6nU@h3 -z%$f@CA!5%6_yxVjLS3$CF44zZKEU?X+GxLkw|%_yzY;7p%SkVZme6-xr>`;LvRd3U*Q#EO`{(BOKWG -zJvstE-S@Q=R2<j*KvE7b)&m{ -K4T4?P%>M$&%h}2R - -delta 165379 -zcmZ^s4P2dN`NyBb4scXdnCLi)3X^R(QDNh4iW3wS6crR5DJmE$<|-K;|LM$Q;$xmJPJS-R -z_suO)Ctl>sd-{?S^A`C&-E!B`*L^FVzV@Uu&+#=p{kM~&WVo&xyW|eJMLWNby}1h}z#` -z&OX`qK~!(cmixYSg0Ii~GR?Q-5a(#;x6WWx^v;F5RXe`B()Z8A!$!UvwTWcMaVj6k -z@EyP7hiCh`{k}qTVX^OAU*(Q_i+xLczRn$wpXd9gFDk9wJhFRH&=g9Lkcl@;8w_=fRV8?5> -z_+CkhI`Z@#Pi*p0PnaEN+~o@`G70zi>P+gleKY3gcl+vheB~bBXlm4@AMUvHC%*Hm -zedFeVL!wSOB*i%WSL35DWfw~qM}3xxyB2HJJi9n5 -zkLU-BqpqgnlA}b7CDTL4%^gR%;qOhzb%J@5DsvZn{@6d>w&VTZ`ae3>G`{0ce&8K{qnVuW -zCqL2tj(^#jbJH9r%DK>~50Wiy4L&Ka120IH{Vwp0;G;W!7aP6eFyD~bw>WwUjgDKS -zo7AIduU?8#Q$c&dspG_jeU0NcQ5QPdriCjPNHJ=*@m`6U;G)Sbg3~8jBPp$Exk*T* -zTBA}Vnen~@^hY~#IlDTbvOlVV1SG?TD|suWAn -zD8+UulFSCy(xoVnqFahyDTd77mPB9R8#Y-<(MwEb5@j;#zNZvwlSrvIyGWt*+@e>C -z?NaQNBGFU?sY~$5Fga -z+3S^JQi_17NG3Pb5H5zK7?Wa7iYl{>wK^#pq-d6+TZ%y`#-*U~VS{+H^>`{Wr6}Oy -zbYHdXZIUA1WSu}&8l`YdE$`Jyu~CXyw+I%Wxv)d*1aCLH$R;>;R(Lty>|>Y7QjAN{ -zX(~=6*ek`b6q8Z}%r@2%rJ#AodmGIJ7k-nFLPft@1jkFlhs-xcT&qlqYAG6|*eFH2 -z6#J#fFuqhW=#XMuifJk4q^LHvT%}%$HYw($NHM!u%aEc#iV`U{N6RCXAVNtk6XFucMH?a73jRd -z$T7Y&vYBF(#_zAABXTpNrZhTKirx2gaO`6=nntyT`Tgm18n0MJD1RBDOf#`8`e<|e -zvgqrn1L*yA9Dg|tdixbJ0{wgqrfq&sY+$*tN-&*>-Ob8{W-G}w&3?X|eD%p*waHpR -zFx%8}Q7lEb6lG@f3aZ&?_OVuIQq!qumttIs6jQ-4U5YxlFwNPXGyj*D#GTdz3qSX}fUYgm!MWz%(QqVby -z;eIL7%^undHnoY{%tcb#PHr;3Q%UZGBFSNO`wW&_J54=!4mY}0QvdsuFKRIKex -zvht|$Wl{<$CX0)7Daxhjkz&rm&0M9+xCNbtc#lplT#QOlU@Ed`ugRoR5gdC$Le?X1 -z8$djpoUe-d6EHWMrlJt=YkcWKu7kV#lY8IG8fS4E#5 -zEPE=vlS#9IYtadWi*_kSq)0P+2nIWU7T&bR_*Rn*^(PmZZV?>*Mfl2e7NT0glo!Iw -zV`eiunUjLLnky&m3169RxK>9$5$yg$csb2%%XZu17IdzqyQ66zBHA|UDo!f@bYj3PUk>w!;}?jJaS=-R6jny+#x*Q5RAxE6*fQq-9T&!7$)l0EuB#hPOh&ZMGG -ziZLmAO_4yipqb5kWo82vq0Y4MX>MO0o+qV;=N%FF@TuYJ?>x=rD-U#}?ST4K;SKzz -zwt&*6dmAY>b5UvBg8G#9vc_`7G}|*dEVr?2D7w97CmmAIT<1Nyc{_`WO;Sv8kz`s3 -z1$)j6pIN@yc2@KXGjmpS3f;#htR?Giw}{$%mN{o_^tHjgXNR|>vk2FwYaJIuW)Bx7 -z#&-mY=`S)3Wee+D9&a|2JGy4OMYGvO -zdr{lhnO~h9eOj>i+J#0%2vTBF3n|4Ck8FL?4J65Qa^xa7ShLW{e60ryqi^LITNJ%y -zRoQ35*BYw}&xda?R~1D+?wd44WPagbefVnqH;3nKw}j^%>&^A&M4!JTeZxYXyK<59 -zAU~fu+Uz|ix+9qQ<%KnDv*Jw4xzvT7-!R`hH~L!NfV)edCKHOIF9@TPn}FBT`sz%NH^Pfuh%WIy6+4hZ6E^A -zHWT8@E#gek1>}pKvv3hG8@Ql5PA>YnNHSab{frcJzsY;eTqI@MhWm?FZ?x-f%Nx{?MopcR^=T)ZYVzRDti(v9A;cK;-4P0wZ3Ob?E -zURKFp!&lxJfsdIz?1V1=mr&6z#f%iKCW|3m+_|7<7+iEpQEfK!-hdSJ;X$f!QJVdJ -z_#8TC!tb7{Bv6qx=EKbK93gd!7xK@>iZwlnC9`1MQ6i@xww+jUW)I65i3_c*oNRm* -zbaT>lOn7;F1YVyMzJ6~o0zWQ1Z#v%P##BTnl(wgaue3WYJfB(-o(If6YEy9M>hN;0 -zNxh7k9CvwmxjzE$y&`=5v8%%K%xlB*hU;B!c3c*nuqyqA@RdgD!}I -z=_kYUCbN&sg8t{i%dIB$3ZCP?4==Zy3X(&KuZNdA-ww}{?K#?Wq;>y2eEqb2;d%eZ -zE_-O~vw0nL=0=Yk6_E1@UEEo2IAx)=Jz->+ee38UCv|xh-F)?)YCcy*^O3%W -zq1|KCPY-X@d%Ah2if(gvW`)-aOu}bq@?@JLDxxM=@3`tS(Kq_&jJWG6N~Hy*oPI%-$WHVy5WbnzNzydaTX| -zpYrgF!Ra3I+mw6yB@2UH4VgagkQ}P_$Tvf7_3#GpZV$f|JpIyzVQvSX@zlQ)ykGdz -z16FMi$~}&@n5=uEm!y6Va;Hb$3Qiv{s9G?-{D4FE&6AMnqbbP=RUY{{vx~5qxhFbM -z+U?O^gz1onzY0F?;ctM`1OGOjX%!1I`731l27u*4kNhFzDzla3P=iNabUb@o=L~rG -zq2T=XOgH8vCp3BFQnP`u -z8Tn3hAYsU(tutFmOT88zWnht!ZArFFGNZB0JXB*@sH>MRYq2WVuKF6mpHpBDrY5BOiVO -zJ6h+Adw3#vVBLX0?wjMx26AJ%+oJ=)8jp4|Z0HKdb$Z+B#4gIyrycQ|MS2z -zP3mT9O^Qdp9CEX%V7c=%3$1G*_nQ`yOJ_Xt7a=EIwb1OV;N@2@L65u# -za)a@0p?k!BkNjur+{51kPdBAo=u4NHYZeCmFXUFUnY;!^J@O&$FJ9I;^i4O*@z*Z2 -zUIMwn>?1kU>yej1PV(>!@MaHR4LWK;40 -zofZD(vL_tR71)^pZ&(=F`4~RvV+{Ey?fvXRJvxQ=*E#X_+f&GK;OQQI6nKrPFqCn# -zM^1%IPb5h$?e)l~Lmu()TyWyOT3L@UbxxIs(@#<;n>F8r-0hLS4c_G8o53C9 -zdyujl^~jGvrZ1{luDQ`!u>AybiK!*IwAmy70&=g1{|Y?*rUl-d-+^bEUF0t0c;we1 -zPkG$^3B1Zv|1aQ!9zG4;?BV|c?-p*>eUC2Bvz|)6REM^8PQbK$k9s7fo_FWBBOzCr -zZ7kP&Cx6h{M7xS=*6XLz=k|^3&CeR`~vXwo9DCQcbAydhiHOb -z0hxaBLc774&n>L=S;+L!hvf#3ya6&jiR>bI&?ARzY?H-uz$1SL@}Q|D`Q$>+k6|8y -z+~DDlnhjKa%?}~ZdgOla!1{$v*#S;pTyiqy9{D$r=?6#Or@1@mk^caB#>3wPkN^C_ -zXcOS{9VodArhDXnLT)nKNDkF_0({hLc$k{g=#g_E*O;v&hrA!-&V^j>xgcB!-sIsG;H@5B1>Wi5*MXM{ -zH+hdlCxp^Gl|GM3Gv7VXTK7#Oc+~?7`PaddIu`Ofz(=+$OUI4Wg&Djj7K9;KNdbmh+g7+b>|cCe2?N!WAKvsl!ArUIWwMf>ta`?&$nM#Xcq)e^6(V!Ob=fHUg+WM -zb&>PFI5&TNZqboD{EtUZ$JCxi28+Y|Da3`~-am!734GLjBtU;UfqcJ7e5rE@_8iBL -zcm1Z~C)E4nu3UbQyx6p`eC{%~o`1+=tsBiYmLGNH%7d)`XeL;WTF%z<4G3Soe1X@CSe=Ju7NxW*>=QjrikV5LZ0@>KQS9f -z_U)G9Ewhy?`%jTjiAP8b+sqWRhvgEIm#P!YC^Dh1%Acacx*yi|TgdEa(Wk-fx0yXK -z%gtbS^G{O7V@0@$xZ7ZS!8ne=d&I3j|J1^Nh}0jI`YWCO4N)!`g^+ZrI4VYSQl!3f -zuGvNT1~-zN>ih}NIdI9^V)n887_4VtjVzoOP3qGmk3pV;+z3BOrh?^jPLtS;M+%RF -z{5jLY@)pQ#kgfCQ%r=(aCV8n-1DpBNiO)>1eER9)JT6{p*fGDzBy6Wa{k#t)!8zR;?I8J@Y>7R0jBx67ce~RRksNvka^n)gmAHR+ -z2Ye_=xIM5B%@KDq;Q1mutHINQ0t&&e0Ute9Ks@~4B|PNVzxykL!Ba5E##P78^FM=^ -zoFEl#JN^US1m1`MC+13`_KyQ?x8#GDr?4NoA8!?Z&ZP^4d441KsB1ufI*I!CfhVU5 -zu>4u@8t@F%p8#(IcP9b2`mj97bQS{jyt}Oi&v0*?9mjU{XTa;gZBK+u>d$FJ?}FR` -z+2+*`J__EBFmHm_yEkHvW3Tp66ps({P2hv5p8;N+CZGxYO5q`ACj=Xy83tu^Q^K2_ -zO163g3w6izqz47eS#A>;c6 -zjp#ok$V<+YFxhmu{dGmbsSDZ#tTBGhjHZ-<`()bAD^a)u$qL)Z(0G4Mq2 -z09o>snE|&m`AqOM^mGd9hro-$?M(g|c%#ed7%hasI}o%sZD&YNvO -zZv;LCo{su%!}yuO;0D=vCCU`fvI|Zoz&EUP@Da3et5PS&SA0>Pj_%L01d9zb& -ziuif%tqlYl;P|s8p;7Q8v2!j2pYb@N-$6b{Q|S)KvydBL_AGckhQN;8SOoq*@GjJk -zgP*iQNz2PG6n?GK>oKT@ftTL}z8&>#yMF<`6THfiZ6EP?{(bdPp{OFs!CNICJdth< -zc%!-GZl8<5yTHp)-;T+f;FTV}=o~lhVy6m13Ib%CeZQo>uOmIdx!5UOB-}QsTkM=B -zm^Hxc1@I~GLGXRxy>z$1o9+ALbEVzeJ^Ui!A!iVRZOG?fu-jvBH~5IB{?p)lJ@sD) -z9|w<@yqr1k8JCCVGd-nPGM$4#3Jk6VPo`TM-fYY62Dg70zFTDH8SrV;r(Jh@7d#Ww -z*zIY4f5dqbI3I!^2D#t`9$s#0chT)J{kUR~irZxGF5_ePh`X2d1DF=;SmgHC;AP+` -z2=$I@7My~-9&$3|#bz@(4+hVdL|Z)xUGB=En<2MBwjK2y@J?_$5kJIs*Iz2Aja!z+VNAr`spq>ZEY~ -z1KtR3>!)5Qi8^$5!<(&N3SPH{@0w||mz-N6Y=pr^S9P36z-PhBd?Ne?+^%Y1`80St -zW~b!|7fC{PnS*S^ck&>_(?yau+oe_D$>6(1c0%Ce-~-@KgLj@Sz|Nu9z~MHJn(+-%oq{A3vQQjs6zdpf#=iQ;LRS!Z-AEx4;{FXnuAd7F-W;Y5^4aC6MxRd -z;2S;l8^GJa?WFq__C++vdknn@gT3H$2pn~p1RkN0<;^Bk3O){QJL(?rX%BxHeAeYhItRWp -zdY=T`L4Jff4h;@(ekq*eFP9wr;FI9R;8Wl>w=aUvg7>4o0q-pr5CDHsc*yC8U;}>$ -zgYDpURxPiTghs*bwV(#vq0Z#Z`nez651!(xbh84U=JJ!~4gLus-Qy_E)c&5HQ=fK) -z#OGPT+pHAMCE)4Qk-XXXUo*SNtacOR43FGn-GL{HwKED{4Bmh%$p^&wj=E`?oE~gH -zCYqKPDZ3M{l#g_7da(bs29qTKqc6 -zA`U!FWQQKT4%3}p1LRD|)@z?BB003Uil6J=2ph*4!Mm>!=fk=lX3gL~bnDyGpySxX -zX9D%d-jIrus2}wi@sn|g^l20LNy0-;d%79-{R8=E(t>FRg&rG0|IcD;9Lw|M=j*7L<;9~X7fwbf%il1gWL~! -z5WJe=@is~q72f^3iLllnNyog2W%!3*K;A@GzM0RiwG;OXE6;C~k$a_S)1 -zIqRp!xU&m90|uvpcX;Yw1>Om6C&lgH!{C|lvlV>K<)QhT%-12*)Jn~I81OG#IU&1V -zP(Ju6;OW;3uxEx#z&kO!F!;_L;CAt#Q8uIa&Q1vHZxBa?QaJwt?*q5buhXdOIia06 -zG^r=u?Tg@c!J$n!RXqycg%;V%)CkQ4*l2MgrSKilzlSw}7{; -z7f_D+kAmA(i|n6={S16?18%1~VXz-URiij6l)^cYhJ;((3BDcmzW{DmI@%j81`mLz -zd_(r_qe~g_KYH2*UfCkto`@$x -z5SpxjQ$ZR{9&WK+dZ`rwD#6q46ktE7+yidcVzTGhUx8PCOX?TH&pz;kP6#tTQBE=& -zUZvB*C3SLntMOdkzDAt$YQ0;kcflY3W|n!{w*}Zawi`SXyc6~R1wI6hfpwP9Tqi#( -zodTTW2Viiv2u^yN06P*ln5;k0rED|FOC9TF{;BPArk3S@xUwB3=fKy(OU!1LFQ6ld -zvrjbQ95I|Pnq4G^`XP5io<>e@gSVkov*7-l#GPGjsvkTPJo|eBGQcaqYrwNH7LCGJ -zIvo#5P5B5yUrWHiu25w=<2R;dFE!^C$jRSF9Qd1a<&cjKYi_MwzzRpX!#`DJ?p>Pd -zM)1)`#a#)~ejGgIF#!SaS0eCP;UOm-2I(+Jy;%a6gI9uo7Q6?%3jAU4IZyqG2t4g` -z@ZZA@ZgHG4II4%x941RnmW_?eT!DFh$EMBj+|H-XEtULnV^jy6M>d|EtI!_jl# -zh1&(#dHpx=cJM;fKkf^X&WJtGy}FJ>ve(*(h;7B&P&Z-LkB5HJXy_eBYi -zG9Vxx{5J4R@UBH7JO*wT7^?>#C+=Qqv!9oW)3H(>+JFEr$o@_koC7}gd*LZkIE~vF-v{p>mWuU$DPIPsRmzsTWIYUy-XMWDy($%x!7nntH|S>uUnO~|bBAd3+KhKw -zVP=;#v-cREf~WmaD$)#cH~KjgxeKlNl0<8V+zt6=@XXf)tj84J4Q>}iD?$AU@T4~c -z#DOP$S^TGgV_2Lr;VT`x8d@Qm`E?jLW8wi5$oZ{FeUtKiAM$p{c7i51N}Tw&#jFzf -zmV(#)S-5Tft>7cz)=w{Z#<l&DBob^SNLn!H{oUe -z$uIq#z+2Ruhg}(Mp-L&J_$%Uk-m*))x$85N7WY+lhcQw0&m+$D?b2^a@UsCtFe|_w -z=Z^^wId(C)YNR~{1G^a9EQaOOTP1*93~mwut^yzRNk!X~P2hHQIGfOqz-4tfI`VN? -zIB!GfTqKUl5#X362|NXE=Sd-WdbHFJAkijpyP{k^_>aNsV+F*s|1$bF2B8UpU9V=* -zZ4%fnI_DZt{Wair(Ya<~yxn5d2UVaLAV}j%x -zhjv^F-ic-Bs>HqXCE+2*E8I`QQs*vIqXLdzf+M?rojsC2AfD>HbD(|=-W~FFNz_~a -zH1PbR*fEW$b#xtsW>;{WMjVpe;C4wo+p+J0`)Sbt-gYC=qc=)myGEX!8D-!}Nm9{0 -zQBr)T9zu3d1bb$E0NgI4x5*9TI4^>yVf{RN?)nROK2E{;sPCuG7JRTZc=(CL_TLAdX+O4=laT7Xhidb`vbpUb1F7;D_!t}w -z!*U$Fcc}#SgD-AIVDL=v%sL2T5E>v{6@hO8FFQ#B*pF@f;C3y;WK7;k;_^T^`lADH -zeTSHdaXR)-XpyY!LWgzY)43SjE_6uq%H6&JZWlVt$4PiAxLxSb&X;PX2o?jsK?a^Lm>w~wVek*v!SzMncHr+Y6+tU!b -zUBPj(!Cwd83vNd#_D(+B??J8kPR}LIiQ1(hGf?9ia0l1Ue&%)b?{*0N5O5-Oo&X;z -zl9n|`iKjQfONxcZOW_ZXp#<#8pw9N$T7djTfpNh#80_kXBT)4 -zcnaKo0N#A1)Xzrv>*O+Ch)j# -zh<`G7w|l@l!BfDu6JP3V#U5Stkk$$Jy8c%>12C{RZJ!ariN9THHo)|5vxo96{Q$CE -z;?tfmQfcaN-|dIlEX>X&zSQ}_f%x`Tw$o%y(PxoW?fmF)`oIlZ02S%S^DMti@>1u= -z2dwSYtK4j6`PVd6mO4)zkVkOTe9r75In?o8$*HMDnlg!si?#@_zC(B%_%Yz!;57B! -zZB+z*33&Q9#ZNQpe?@r6>4h*Wy7M>;y!>?=;7+M%4-P+_*SS;e3a6H5f!h^M?RBRP -zJo_&8L#K0l+PTL%a)o(503P?Pg)Mjwybat2_!KC1Oa-x~wZck8r=MD({ -z5T;@9EO^)5;;0fl<^gdu1D+3_1)lzGsb2-Y4!jvW5r_Wm#NGSejy5qUhruow*d?(nU3UE4`C0wMi=aa4l9v)~Qj -zxbD&8<<0+~IsD5`$;&SH+Jh;c@u2X^hs0d~?#>0T2G0av58eh&Zr$x$;AvgrCkgyv -z;VT`l@C*q{ogJv=ID2tEqgB(VDc{X>rsICJOU>HmKM!t~nw>&_{S(|SHQNlw{{_$J -zmb~ah;BG4)68}5F(H%~;aQg8mgfaxU7Y25DTAR>w;C6Z1CItA0t>3e-V-KerSZ-LH -zho26f=<>tn|8TO(Z1_LwkRL(c1Uq}e{!Iiv4j%sliC->-RKuVZ2Hh~&1fKqcII=z437!GogZj^dkARnf{}nvBm;J1p|E<$fy2^16ZgK^B -zaD#tz+Sx@UOsOaw>(uSQQ)gP#MQ@qgmK0KCEa`9B}GfBtb{7X-T` -za$S^I>;|_>BHOY51Grrh*?zI@P*=I9(0@tbdU4}qgZF^`H -zZdYG!#iV!_+^)WC&kXU8i~o>aSa}AH&Vi78v^2F^@^bD3w<|5%7W9DIm6q)j&_Qs! -z(sCjE{2w@0TBiO-m!9&3WYmkLmPcSv367ySqE;{TP{TVZWO-K!Gg=q06OZ+FtBSc_rlRP%`VEf^m&q(I?qco^jd{? -zz8^_mtypI{8T=^XOPyT@>hF({x0jmK5BY8cW<4IW9&o#Ca}Bb172Gb{JR1;qi|8W= -z_i|FJG-GG1@GNkb|L3Z82l8x=@AK#g8IJ$?*vaoLcTsp -z$S(C<2%ZTpOFh&4Y(s~XL1_A!1hBjw+%EpS3H3LDxBgn{Tiy+BSA(|vIq>!oRiDpK -zZ$hx^LtBH7!R`9cmM8v1T4vXWw)|9ZyFRq#=Y!kzp>4yiCGK7yut2oaiI#m826lbu -z-QeE?kBgScFbMujaJyji)L}9PH0xjuz~lO$k0 -z#y1i7!3y6Vdq} -zjSr!I_G#j1Kk9!BZdbT&29KqW{(M;WV&!Z5=`V{o|K2u~^4?``RW!i47zTDJZ0qPV -z;03EB@GJt{3!c4NICX%#b%9TUXM&Fs=fEXW-8}j4=tF{sKP|9IpktHCHhca}4>7K@ -za&)Qd#mX%^eE*3)Iysc%oK|*jc22Go*;1^QBF`;yGo{E9m@j*)WN(e^i9xQI=fX~W -z=1PQI*oi@&_{>|q(zVYMZ+Q|TPu%2*VV=0j%QMd(6H^3YebKa-=b$f3VXatqg4PNwTDCkqDoaN|17$r5i_ -z;wDSnWQm(BvCo2ip83;~m}5`K68lwRu}UmfiN&fMb7WFX88W3md*?J(i+#34%9b>; -z#XMWgv&B3o%X}v(Ce=KX6mw)q6k6VaedM5mWF$kKlOs*YkuW)umkf1Iu7t@I^IUNz -zgPSA6nIprQlPl@viZdC%oZP&V+_*BxIWo98dE!O}HzzO0+!&0>M56f;iT)mu^QE;3 -z+(Pormkecab7XLH)`)?Oe~yfQ4th=oK39f1SB5$_(`{g`jB#$J>pFK;*7Y1ISB4{3 -zh9g&oBXd_U5j3MotrP2%Gl@TOTsb*a^*P7&6gIg5#MXF%-C@;$C>{f7jtYvt{m8TGQN44 -ztIXQun1{3Fm|iVM`08x8(&`+y7gx)ir#+r#RB%KpvP}H(F*k=~OmZY-j)csS^Gps7 -zdkL8m`TscAH$_XM@P7t|SH;aWFEV*(d -z<;ua5D~FElZ&Z{%kz*xS4w76sNb)4JJellyGTHMa&%DgM9knOKBsZ8(?~OUZJaBK! -z&E^~T#sqg9e_zb=CmimNlS|DxRQ!9#|NSH;&lgOlkgG3TR%SnuQ_2py}v5|W5PoYJJQ0=6T=$}Md0x%^Ysr?-Ik(wmEswS -zSI3%%pNTn?{uj<&&%`V#ZG==p9)FIz4tndffqPT!0l$FiJge$&r)7js4mOOI_^pI>nn5ze({P#kHS7;nKA?DuY>Npn2a(^MN&=R{T9RcCy%>G(hv_ -zw0S>&jlo?sqn-tVRN~jbGI@Uv6?zV2N8sIx>qPq%|AK8W#~)FG&eXqrKCtH1it7ON -z5qO8<>ug|hy`T8e9D-ZcpalAlTL*Y*tYGorDKHs<$EVNN*Y1)P*ZH<9uIqOTKX_{F -zjbJdYxDMc)96q5q#dUqV=FSCV_U%RH+MmZ98frz|5|=qnuefxkl#d$KcMU7&V>T(? -ztGKQ|p!kHsO!d3L~+@rWr3H2+k>yIk_ -zu&Pftls9^7RsHp -zRC2e;V!21j&ntPmsbzUU$%9HBGMiZ*R`M<-kC|O0myRp>S4y@^%J76aPxZ$P>hpxr -z+;38!k6Ch7d^R^*ei-{}tE8eL1F2(I~G3@Tw -zikIXNppE%6iq|PVBwVh_4N7=H8T2UrGsQ<0|DEEqiX(uVNLsF2xC#AU)i2M*T>)qM -zq7vGa!7GaQMc~5`_zbvrT=#>Qar^}Xn>U~7A5`GD2s}9gx9hub!(LN<_Nw~a(`Y~a -zq9}hpF#j)icFR^=56$ifJZa6mpE2c!m(Ami6HCa>u8xsjmz%qO6|*=rf;wJXUZ9OP=I^N7>ejmWpd6YNw_hS~&0fXz -zXbcON`yVZgMlkT7Js)_xN+?b7Hx$oSTz6uX;(v)fOq}v-wb}Y>IxEye=M(To)oHs6 -zxsr6qKUAE`qIqur&WXuf1RlpdO4|kWPha}ug`2$HF$tkwRN_KA4V=rSl7t@VM8z*v -z$(5WpUtjZfZXIo2{{zBhR4SCC(Fg`5rSp#VsrpqB`1%OEP4SQQsC1f&-_WRZLFY7d -zuZ@Cx&oX1+-mAxyit`JV?~LLfDek|}ZJ*qs>idv@2yPGlTp7fHb6{OR8=Mo>_dofn -zzUDm(c*xDded&X6(9kKa=jTr4NGCJ}&I$cnHGmg~;*A?{fb-6*3UH$`@UsWnIH5l& -z9ss|9xIU2LOx6oDqDc|VlEJyqmxmN^?}(-=KYHW}6xZjA8sRdXC8z}hxcr~G2BEuTeV1M;3SOp_P<8q0oPM39re9atRMof0{8l<79MhsAswJT -zg2DO-0h%Jz-x#5O>w@~Xa9d?-i(tTOR`KR-L05$O)fG~ok7<1xt}&@E(no+c$UNZs -znoIvFI?16v$UGjuQJ2fuIe`^t`EaT;fvkoXh#Dk>m|x-5O&<07gTfXe?7`^PEm2jRh-ilPpYJf -zv~h1{DV_$-?a(|!_+odsPFDtPg7#dVweBk&<`4n4HUT=GiHF=pc{F>%3B2poGzO@eW=^%WY6 -zN#$>iia!nRJ%VObeSNK)Rs4L_C3E23Q*zuow{J||NK8U$qU^YTgYkQnV+uHr%h@Wq -zVsLKki>iK^**p@n#I%pZ1VUaLUOkLA=6dYA71tfTI|83lTu(0hLoxPGeE#+zsfxoM -zJR373@DXrs+^}lgUU2V-jDqt3&)26~V-UPuKMl^AZcszz{|o`znA=}-@qVExdX>KW -zDmDpJ1j{1SDwkT7Zmq1U2({|Kxp6P6#xXx7yHAtsREZ9Dx91ZhG0Kk~BYJ<4-3t++#S%2mW^<5vAa$?0Yv -z$%kh`X6rwgPdM26Kg7g^3c|xUJNCvT1iK;_ -z)5Fuf^wh5(GUxtB1v0O0SuVsr${wKhjxhzv8+FXWg+m -zd=4^a@tXMwn~kxV|7rrXxjn103mD&PbaG3C?Cs#R2s}FiFHl@JszLDu9WVUEgASzu -z7;y7P7MX`%qqFqzYcX+$4=Uje^AaU*=9mwIa|1`rC(JWmC%#wlH_WN86R##-Is@)) -zaa=Wb)4|7fqT;$`*=8I04CX`T@NcR9Ei@A>7b|&8$z>*Cl;m>dTz5~EDI$4owX%L& -zS=U4`YXV2--9Ad4*EdSDpjiojR*u@t9*T2#yNWYn-X>(?-k`&8FhZPR#dV+TRa`e` -zE&`8tKO!A`C?pAAdC=_0hXFTmT(zUX?0SQe9(;qo6Y&nLU1ZUFZ&m_d-g~nKS}}%> -zj7G>juoEiOMsRPKUT_X`L4aPc^{FtLPl9t(-ckOigqt02#w3{jH^Ve&CW6N~iO(Xgoz=^dDHiP4hbNc1)bfpcmgpvyFgnlc}I0 -z*rv?%t?7Vk=G$pfx$>~s>~(FHk43Pt|4}+;K1FxnZ_~MhLk8n&Bxmk|Db)pursz*} -zj!K5iP0~4}DPE?IhXQcKx&2QRC;3klryPPe4xI}r&Ji`r->j#JbOvyF>vQ^7?6t@S}U6xNb2+PpT!;GDzzi_E>_l*7((?iL7~!}}_SD%W4I -zI)cA;lQ2P1J0yorQ^b4Sl0#1fn_iW}%p!C9gw0_<5$FiBDAzmVtP>uJ3VDjFq+rU*70!8wOn8r?~oLn{Q%VRjL{ -zV54U@-13eH{(6k>FC_OW>#r&6A(O@OFl3JUA0>~`k`^?&lge7l)8GenWSu0(?*2=o -z`xhSFzi>xF;70sc#i=y=C{Cyfve#d|;`OoOn_maO`8uMX8|?(=i=yVU%FnR!C#vCfv~~qu!0J^TeR{TQ -z6LSN9pb~CS@ilKX8^~R-O{L-Z$rc;^=kg?nx>Yh-?v0Q|ocyB3?S&eZML9TUr|(HB -z!M*n+4Z=@!FYnr*8wT8M^f7kr6m{GBDeAU92;6Nx)sXG9Y6Z1(0J3+!%+RV8bg1p8 -zr7x)Pn|)NQjk`&Dfqg|OUy0yeC*}20pOfZ2Q{GG;f1agT-9tHPiG4(l)DOWaMK;H9f -z^DXk8>tCn%#`WQOt8jS+sSWbKFmIp(Y*$>@-yMNZg7f5B7Arpabqbs(*Rt5qys+_$ -z@?L~_KHebjxn(+`jt%o%C)Dxfd9Lf{H_r0|^*O#>HrtzNH4MB7ZBmZvR9E+Ybv}R& -zoObIxKMloeTwVmPYaiipIVrWV31GCNKkVMz9DhAIftqPs>Ez1IaCgr -zo1^6_aBp+$qUPM$`WK=#sL!3Md7}z{m`_zth -z$lio@n$0BB-EJ@d!34w^Iz-xKEDooBzE}=emr4QVy9AIEN!tw)v*w -zpWK&_y*U(`7Lw^vYbqCIW*ZlkuFaKI5p4W8mf2UcI^YYN3DCw|9~L{CnM%&|O~DB> -zlylh(<-EI@u{=!E-xLg+4KtMUNVBWD{`9q$p3Ra>J(abOib=DF_SR0LGPgfYwSOjp -zll|b_kqLft#6C)@U>_yrv=E_<9VIA7^y_a57m9=I?Z|A!^&GDRKj;t#_a4fP%FpKm -zZYJ(a{vB>1-$-s!y}t3AQpwxvbKu-S-MP*tsn7g(>Ld{`6(3MP#zW@jYnhkdYLRxaBuxg;l}KbNubZasUK386w=}WRFs$s+FM?xJnBQG9GqKv -z44p3aQ%f^Gq?Xn|;FcbvT3YY=3$BmgFWt0IxM0S&IDI*y6P0zo*+%l(Ldfh~%O&96 -z{;O8*lKkfV4=Ky!k0{Fq<;X50!Eu{R(ML2M%@O>KD6YRU+zZa*^pyZRl5G^6`(5)n -z<>&QSTbs&O_q}=kqnPL`^mmVb5f1usL;#!voTnV6fL}oTSk;fTF8;@S(uU0ab#}mw -z>AsYhT9QLmkV^<2r>yHDnAL-Ov)H8Se^F)Et+@UX!m#j_ZZp3cn9pl33^;)1Qzmtm -znllZVTdP0K>^Bu8m(D>h@yO2I3=Tf?_`%s-vTA<7Y$JE3e-_`a=`Z#ZU9B`txzs(AA{QeeS{BySXDnA^?Bel&rp7DRrNa*Zw%Zb!T7aH34VIkck|1+;X|oJ) -z&Q5nrCb+k|c+pJW*w1aMyZRN^!!#&d#!vtLds-Rj?((;}{@nn2;v|7zM6yma8Qhy_ -zn)37Yz?tHdU+w=M!;bWLWGF|P7nlumG#-VJc|7!E`mzXSHQ;>co~XvJ-s~ZFM>Ig@ -zemsMoP0z&~vrNzXwC|8W8=I#n^K_H-2_5;FpTq=WV7vAck8Sn6uBqFF`qvjFW;5BA -zmfa^ZwNx5rP -zX4+kw;@Vxi*~acVlsmhy2{)=!nQ3=jifea{N%)k`@5$|KEO*Y!RCe^M2a@UO&K@q} -z%?2)#?)SwmS)ROEOt@S1eM>7ik8nEOp?%7CXrIy@K_>(rpL7+Yo81L_BDl-EKa13Y -zEfS1vPgb@i_t%nIcK^J(QmK{qZ;oA3TBX!el-daHofjJw*Im_o|2~SHi -zYD_uR!6p^g!KU0nnO5HPpzL>J%_>#LN_;?u(mN>0itAV@#UhF -zX?MelYj?b6EN|Sk`bTYr;C#TYQvNH2n;rhxgkZHY*7rjj%>?CG+6g;uPKVACJ)1m|HVh+Sgh1F;A0JWdG29&@1{mrNLQ -zPwBr_tXBLk)tI`6=7Z(<&F$pR^aWz$f=v*(*K*WEX*PQ(iBKzKj-#)sI~9LV&HEw6 -z-&K4R+}oLB;NH$m>T>gyCsYgmpi~6+oS}bYU!u6~{W5TGfNHbrkXZhfw&GAK)|-92 -z*JxU3Z+VmQr5n8woJTC5z8gA}Icln(+==7`T35@u1)yCA8dGGW!^Zjo(lbn>py51h`>h` -z*HdQ-ToOIZjpe@VH~ZqKzh(jYm+T9jM8P>Y80<>k-tQ-70`ibkyu2**9i3a4}_kM^Z84+AQB6!6v<1 -z8plZ$(O1hyQfI9{k~<3m=TM}wEi{|SZLk=!cd#1FE{@u0YPo1O`?zRzZ9;7kY<4QH -zJ8~=npNqiLw>s{|IsHZr)JEa*cPi_ZFle^LQ+m5iQ34giW`g(jnhmtKd{jBpojeB4 -z9aii&CnQjZl_gMzO+(-gD^?x0-}RR@8^Pb4@^_vxb4=D^$|hj;aBlIYmiB^)kGc(X -z4`iKGlG#eNOw!`mI8#6`zH~9m6tnCIX8EoeJ@t1(m55Eb*i=Tasgh8Eqim=;FAU+Ty-WE3+d5^Md~|dC$FlzTf=vDC+HL3LT7xz>dyW-Iin$ -z5-CnHY9hIhdyNZ*p;*|K!C5bjpfr?Y2;?Q47P@G_y@~7j_O##XD3&#m+wXfay -zj;5m1waMy^U^A$Ayvl9Z6tS;Sx43dr)zU3U?2(@07T>R|^A*?M#x#L*&zAbl%STg- -z=Z>bHZH2%+Tk035{7RpakD;?b-~+gL^Mx!{D5+zLtzcs6Q!OE^gXj -zDuTfrI5$vV|DDGrQEs2+esE4m-~9#@U+@?2ai(YqjZ3`Q!$q>$z(tzzCDGX&#aG1LNVlfuoCV`CFSQ>er()K6IFl(GJ* -z%jp#_d=TibwEWvlrd&}$I-3Ttdd`k -zosX0JLwV2f^<&wN2)q~k0;*p@cMHeSR9Jr;O@#pnC9WV&`E}52ruawfR{rQ`&kvK` -zh58sChVDH{Oe#CwtT}Mb<8u0fmGj6*raa<)B(XV<%hjYxG!@A-sgfXbd-aq|Q(XVC -zU$)}9-^vx&^>-_-`*Tvbd`Qy|aQr`(d^sWQC|_~yXrtn~eyb@uo{p(D$lNdbX9S&Q -z1IcT0=U{)6i#?DYK2;RsVE8`|ASoo?q(}4=UaP -z&hGRBIlA3Qp<}OGnf+g^IUyzXm}B*~TYbuW9o_nnZZ@V+r|g8low81K%5Jlj{04`V -zU)?Et%^s3Nqsm%0vi?bKi(F-N>zWnUt?O1?r`-$At=8G~2{)If(x<8Qsnm`E)bzH4 -z&P}P*j@>GNZU_AlJIh`DZo2Mg{i{?j^qPGnm-a(u=UUzl?mg2CDz2L|X)2adv?=9I -z_nKo`NTx4yicX@U$=x&kOJf73>+n%~VKyiJYDO~7hsd%N=G6>#7lK%U@G@8sq%3p8bfEE3xO!D3ve;@)M -zkHDuD*Z$Mxcs47R(PP^T9Nav*dTx}1a}xStRUzk*aDDx0Jzw#ksJ^Zi -zexiGSsvi>+?hr@Z&T4vMyPVqDx18Ep4uLbRR+(0rJ-A>2&R*>u;n0Kpx -zHxqBNNG?r+%+9r(4DR)srrez*Z-Cau$XVa;_s?({z)ErONQDO1XyoB_C1z}aRWXIY?}Yq>Cj*J9=Fvvg`Yjk2shjj}9Pjy|jUt;)2VMrl+>@JD}I$a0-> -ztJ7#O6C{V4lyfa_jNr9ZaXq-55%?Im_b3||E`QFg53^b2=$U{F9KX_XX{S@Z$uA1x -zA*)lJkqypcb+$4qQGBc7or>#6gWcdfN%ew&J;F_1COsPISH^n1^g+dMRHu|7#jjOK -zqz_ADIBETfwg8-yxIs0tP`G(Ala7?(Oqv_TsL9=-YnGXPnKbz2Du6zuD@|$^ouRAD -zE~@3LGZnl?UvJXh@`eaDjo_TrP4v}L7N37PhfOMnn^d-Kl0$n0e;p=a6-A|w8k|G7 -zDdIi)WI=nOz6dsrFHvyXxWm4wI(U=fx2O(oSNt)>I~8wHeiC1H-OHs@2iPydL6_9= -zS6l=46VlD}Hyx{~Qwmm7r_4a$fw@_A3N7}sng(VLviH>Dn9VHvN9Nt?hZO;{i{!QO -zklDGG6T!V+laxF9i;4enmKoWUWx8?{ud>WE71?y3mmR@hq2l@^P#%HPsfgUsJWJ(b -zy_w*m(T%aZN!jQrx)GeG+b#56Q;yBK4FWg*7L{{{>o3?D!5@8pm_t!%74cju`ph2Q -zqea7MZ}~t3o1H3$8XDbP9^G8(w_%lozNgvi`YRoc;4fv54C_Ty=*8sHz{@%e)q3j_3mZ7PDh_*bQZH%ultclGC$|7_uAT^{|CssNR|!OBca9!<$| -z$eiOBl#j{?W_92k^NaN5RvvZjbRKnWqjK~`%Ly`+f=khaPLvl1n!-_eZtLO -z&!B1Ak4oHOda;R}CgDu#u-%Zk30fWk=Nxs1O)9Q)q%XF~U1(Oh`>N^@=a1xvHt(nw -zfOFHH54cs_mlAMpn&#`lxiz;c{|&-TY(Z?o0|l`MFPYR7!C|v<*rXh8GPMQtNxX)9 -zxxZsC445wz#2#~n{x#VMY`IbT$5!Kt>tEQ!z2>&fy*A#aV#S+%6f2kp+1p3?CiN^D -zsRGD6QhKU3D>FSuyHvEVsc7_zWO5hmQ?j0%{bn1o}=-vOB*;z%~Qa6VD!MJ -zfqUm}COEf7*Dnz+9}@KhtAPOr*1S0aZw2=r96hSOK9LQAa|8C#iEJ&O$kx&cX#@gi -zx^EG^U>h~Qv#F!Tls|o-_(!FlH{aw4JUap}0QYuap>X-Ups$@(Fz}89{dAA>ty6Jy -z`}ZrpHr6(hmQMla`2SXX4%~Yk@xLLq9RBNS+QkVs>k4BNOnD&=t6v4owS}JF+a)Ov -z8ZrCL?AuxijtO-~qkv^hwxobM*uIDR7<|nisrDfHsb;{}+@J#r65J -zQt^!e=Q7urUz(JlpXfJ(djofXb2sZ>d32hVbE$F7=g$B7^WJkkv$0p%wy2Jwzr-Nh -z;7*mwZZ#nXO+qmb17scsy_8VkEpCUbuB^K#4&2*rl`p}kM7?_RYdv{f9-kH -zZF(rip?e!LV>Xj{XutBVM`TvH)kor-;`&HT8k4lR;b*ENF$J8b<_oGr(uB)*@4rz( -zIt;j{^#8h&0nY8vj(m9>^@z_~fqD!V$xbu-s1 -zu3OUu&aJsih3^(_PQQSjmy}%)8+UjQYH~a7G8bP!hr>qVp?>9AXELJrf&VIyY)2K> -z8ICKiGn`ReKW-|2o1D?#{71w_9g-gCys{mCn;32qMCe(k&h8%NKUuihQ5u_2nubc=J}n0KUgk=``B>4n3gsr@LTXbLWNuTN -zidn6~=sS`c#q}}YtoZL#kGCp*xtcjW!sUO1IZp|_FyMSO_m2~xjobZu6`*9Csc=2V -zE5}{z*4J~qUGXzxYorptc8<$??!Z@6fG%(jtQ~cObE5xG*ZIIzUX}U(j-p%@N2W-j -zP?^G{7L6$?DpH&Eb|Yh2+GrzV`mx3C -zQZYqEg@zj?6&ki!*kb*j=bi`7n=^Co>m`Hl=Q+=x&pF@o{oe1r+)Mva<2s9XX1QCh -zl55t%t7P+@G=oZgC#0TsN3W92d)CaFr!nciE8{L;LkqC8jwQFZR@UyanKf-6qRZF# -zi8-}lW=&3W;voU@@LB!u`ZEihRpfxPiag_*{vM>y_uPH2RyMcQT9KjHJhc*fmbyK! -z78%NsKx8=Y)sYNUu(N;~cTk2G)S6lA=tmv$rf4yznvk647DVOF+6qn?+QC^xr*Tbx -zFVc5;?#{njWT?DaWau?dm4Tl9?!eU|!ypof47Xk#$uI&t3m9`pWq853nYE4q)FGcx -zD(2J-lG8kgsFYzIoH8tevx;Ton*L)*zv8*O?HZAx_nPdSOV-U(WuWIK_{R|;>wm;i -z9(I)6e@&!IF6^vG-gRClx)hpO3mZh(5_g%HQ)On>YG|%NRO(U*PFYMbA4d8K&)r$C5gAHfBQi{zr^-OjS$FMgM22}J5E*WK -zjob`k0ZV4rJ$)`IKkTuLaV%`bxbErnUw02lx`GWeYu&@BTR!>vwIW;2FMN0{mgYR; -z`-M}sd~jA>WL&F0hN_F<&#Fs2_n%710p&J8>;0^}W#YAZ`{cDEYqc4)($AuiTDPlS -zWUWUcU0wCj>S}_Wg|xT>GQ6M_X-5#f!`&n1R2P!d+=Hl;sTZ6w^@FpDLF3x>KO_B+ -z=kBcQbah=PGK`w1%0SO?ckOi|!z2>u>bfqHVFq>JBxCK4Zu?o)rq5bVtOTnl`g$rFj-nsm~lZ^;rOC6-&mo-7jZ_^vj;R=ea=Z?wnf_yV;9TTXd*zLMrWGFSWCa1X^ -zQ7Jx?W@$yk2BzFi)+7o=xtk6uP?w31rjw?)8xj?Xa_e -zPPg}sB14y%wT?;D(d(`ibE+T7X&yvW$}j{@8AiZa#h7tT|8=Av_uPH&jUvP98%2gG -z^Hdq=DWB@wC^F0;fyi)PV?~l}9hBh(D`wU@rclSaJ1pkZCX&;f^(*n0M|`GD -zf>VZEa8^-iT+<&#`YO-e`8S9Rl{d(yP;H(n!%_G08)Q>x6`ra?WKKvYI4kKkuGW_74{mBpMCaw?*4d#yv#UYPTIya+8A0)aV -z8EVZ_E1_q-JJ2LDG@99V;9Hv_8Cqax0rD5&GQ6PO%vVVCS=1qaA};1skC`<&&3%YU -z8T!E~!yq`T7&d-|#QzS`k9h8GJ0LRj9uOJE%~LC(r<^elhz!$aHW}_e5XmqHI}2EF -zd)_26ESmWWi9UxqK{r -zHe_a#?XKo%^^C&K0_4N)Wq84anYE4u)G_V$+$=K8nz<%2&m$^V&jL7QSORAiE5sUk` -zrS5>3Q{`sXpgcre~ZYl`4*9($vm|ZdbYT|ZxtEZ -zkU-8X7r!--kzoM|M27dZL^3SH&H`55F&Um(GqcvQj5;=v^%vk- -zzY#}yXf^cDF&=Lgx!}|#-*f*5Nvx~L253RQlza4V(|h!96CKLTpqYP(iYnaEw}}o_ -zNF+LZSH|tQM_&s&^T|7z|5}zq1JbS_y1cbn%&Ar+r@0+bxg0vc*>)E=tLQPVUH=;C -zdp&pG`)gSatA8yr449|NK+hq!=j|fH2omV^*4rZ)#$jgxlkT7lFPJj3*0G8@X5C>i -zr{cJ^nBRH#THm>O(L;4oa-KTF6*(Prh+1kxhE1{=+oS_uDyBi5)^*nk@w0inrX94o% -zVz-J6gJ#w`)=|fZJ1FMV7?RUGfvA*W5}YzjgR_cRW!wa%jC9FKOjt$h2>yC;!m2YNEPIHm*cnd8C -zrwpawtfJhwrhfwID?E3XwuualZ6ZUpd1@u}l(%HJi465fATr$57Rk^AI}2!W56SR? -zRx@iIo2aA1?KvnibRjv-J&4NH&t&|{t|13mlP;dYT>00~5f``RNJhGAy`@;1eHiVS0B);f}?L*Aw+=F~Kj(>#l) -zlwl5>GAw|ziY4Qk{xqat_S}8(og%~RJ4J>y^Hdq=x#1p@LU$(~6)q1u0sr}(kqkMo -zvw%Ez?ca(F`DWHSa!^OHJ0#{*shKr7&E<$n87jajLlroys4=eT&qVrK&)r}CR%9rA -zm&njyo>~b#o7`pZ5*b>MKvu)m?}}t-hn)pch_`?46Pj^!<2cd4D_6F_jHI1b4VaEe5xaoVG(u~u7R!5g`T@lzguLOe7DF@ -zVxB4kJ>?68q|n_JNFXvi`tC@EYS>vot-I{+M20#uYaRKhqtP7{bE+B1X>LVSu7);n -z%FqGMD!Pno`U0fy_T2sM??i^2_lOLA=BYB!bHMF;kH|2D1R}#F?}=m>g`EY+7cnEP=C%72}%zT%=$1++Er!GBkFI3>)UDGSE{# -zkET;(NUjN&hi#E}bw)De!Oj8--9s|GpvcTxM-l3fH*39DWGFYYCa1X)Q7JQLn2cvlGEIds9X*m;B31KoK^G~*RC%@`d-gn>X0ml!9ya$fO)D6^c-?WrO@3Y -zNFXwN_fRCmIP5GyzVQA1BEyuKwT?2>A#ctSb7~&RXyFCsf<7~A9Tli!&^;#R)G(6MJc_84VGNuyOn|eBDdU>{B}hN*xx4Ll -zk)ii?kzvj}RR(%4xFffV3`BE#+v -zMl#gH&I065!ew|tlbN-SD%8>H?h$jU9m#3#L{zSZE^x}w1I{Y?jBEPKkiOq@cUF(c -zP}(Ch44J3OK+h3(ZI8$>h6EzRjXjYJld!XZX?H}17tEMh>!?N@^X?%rrxuZ%=4C{s -z3@hN2VGW#BY#7({mm~eA=kBHtWuJ1vD`7~ki>EwP4Gze4hd(50nL@*?hrM~%&g_qAgg@5xtLScNKSJtqEd}IaH`P&&MKOWYx*mZzS(p4#XDqC%-$i3 -zqRl*226}e5$E48RT}U8{;?H+PGW5dE0_4L#J}fc}m|5$nMIFQLkeE}WNKW%OqEdzl -zaLOlGPRkU(U(x;J|LV;y!DuxVTi -zxCR9z9v2`F<#pilr5%zkm1kzHhUP-!@zz-cPF+gCSyq{GO@A%YmwWE6xl?p$y;F3l -zGEc39o;B{CJ4Kf|BoJLbb!W6Z8ewMv&F(Q7UeID@t)m`ww7b1`i42`cPIEV+a(VQC -zQ-(fpRxw~)(_e@5gPyxj-z73k-X$`On5W7>Px&icDRlP)5{L|s-WAC(4Lb{vPt(6! -zWSBFv*3p1E7TrNHro_I<)P`{fb?0OyWia{GUW7$47uj1 -zGSD;M?dua6ip*?#<|TcR45hHMfO2<_3@@lKv)0jwI;!1KF{f&goaTB&r3?+=l%WZn -zRkRq_^fw@VtLN_0dqjrDdqjo~^Hdq=+2s!1BQo?LfvkqR?ulgRhn)osx`$+V!H}7? -zjwaMG>h^p@WEe+snkNyJGE9L}h8b{HF=t%UA3*wf&)wZ05gA55A~GzQr^-Oj6?aSu -z-MxkcBEy3piDcM>odsmMeIFGWl7E!2^3Xb(QAeITAm&t|nKe1h#m3_;vILwml!3E~ -z3gepoW~8t5-2MEcBE#lKMTQ#l)Jo`C=l1rC3=K#iGF;pr$q)O+w~99WY&hqI;Ss&*NAa_jgB8dGHSdw;yF+FIND4r -zn#l!k!%Lg_o~P2+h#XL41GG&JC^oKz^3yF!k>D02u0R~}O2ny3rhL`? -zYda<3?snOy$?7~vOZvxX2A1?Z!0@ -zc`xGpC@DsJ%!D|p#5)maHW+s74D(1p|3z@_BumD%0UkJ6fAJNB1IOk+X@*$3I^!zc -z-=ffZlc4Ua3mrCqPmlP+iGOmf=g~ZjyH8FXO@i=ik;&dQRgYooUL_=eS -zK&{}M%Pu5%HxhJ%b832xYXjn+N_Ve6`eERJ0psyp4k7{9#1J^+j39pS&y_zWgrQA7 -zsrZxfov2(i6A1Ws=x`F8iDnRw{&V2;pEr&MUzAp50SVrN#EakzwCwhNQg-y!40+a$ -z&y!BGX%$UTa+IW`KbvF<2@WB_3=)vfBhL26M_ndWESVveblG^^S@}s` -zYA>m~e<=IxTc}_y>{PH7oC>xZkMr~xj}??Z8XS_9(}!@>s~?}6rUGB^lPkS^N*FN5h#Er%^F?kE({0#B?z;|2=jKRRf%ixO$ -zRFf47eE_`IxEA_86xs)#+Tyx$2hE@zaKJG5_5kr2F^M)Y@e24|n6jKxfPQ>GUbt4YvtJfVE -zk@*_HFwWNq`2S$OCJ>GvojYk<+a#ZX{|B(_WS!iYxgO*KG`sXcF>JVlc$Bm--Hr5#x>CG@XrP3$AuOe*ZAB1Z;<~f -zhJgc0jK^DC6Zm(sC4fCzz`vdC?)qoBbgqVd2=-+(GHP5E73{*3rJ -z;H92>d23)eio`t`_-qEAb4Ez~b;KzLe>MuLIwKVZyaEGiFyKNAs5KsUJf4A<b`&w4_hFKWc**y4< -zpu{RT{nw0Zp^qZ*x^XR(y!@&51V@j;RSc*SgFdd1|L(xHP-na*;*G{N4qp^#0skH1 -zcNv#LU$x_PmTog>rMv+?Xr3{&90!XnCb*M2$soR=4!v$GKin|_pl{}{ts -z$lXN}B1Q86%zR=sWIS$v3jSy(v1D9jxf8Oi8dq72`#ZGj2=u=)AzP24eBYl2;!~4i -zciF#5?M-G@Ir+tEUEr@r`{Ur(fv+RYU8py=z^B)G$@4w;ry)qN$@9lsXmPa1L$}b3CQOV=y7Zz3*g(A#@3vyuKfL!D83Mn*x -z+{KV_J$`vH)P^`*zeDG~EG*zppo0+`pevVr6oIPTMfb}p=3%@7Gp{<L!q0IpkHb4vFfJx)=5i^AZPn;}+ -z>@f|_ICF^q#L3%dZ22HN7&u_jc$|0{2|kIASHKx(4e_`AHU9<-9I$CTPF%J3#4D;f -z1Me`dQ}Z%RL8tJGR4p}p3LSLW0G$Hz9(U}U+2agr{x6^_L={zu7@Tp+!GD(>;?#g!e1CX$!jOZ`yTR8{ -z&?xxbn5YHtY{W?xhsNk%QjF)+Ls-$J?w)VSRbYjU(-lN>l{+ftebordvV@C+krlqzX|gI -z_#T8SdUoiF3##0>&hRjrs&EJXQ<|zavo=MuEC4a@t3y;aRgb8jMbri#wHZ-qmKRVo -zYFmb=9fvOVCkO@|AEJ{UO3OW?fHTmxr8xh0`c`WGA5#WJ!jmE^BBgD#U` -zkiQc{AYSvgf!~K?wG*5tpe1k?w5(kF*Av7av9GS$06jsFuOZM=ad@m7*Ekp8aN0l| -z@|@=cg;FoXfP5HkM`wk`wIBwnf`2pot26kI9?$*!H$FF1G=e~F;FPZZuY&(6@NYF9 -z7dm5H=l-9!=UzUqtkj-;)I^5D(vW~}TUiF@%duS-1%JNm+5^sukD)ED{d2&q8FVgr -z%WTfLc1E6eaY)Rgpb~tCa>iAOe-S0T{_;OfW>6*6u=Qv`pwB?+Ht=fjPH-LtL*OVh -zl^8XHcFcFgj2YLC$*1AZ#W(|h^7`j_h2k}_nfe8ISuVLrH{xVuwoFBtbD{LdGJ_W$C( -zuC0wi{$ew0o8+Y!#y3t?fv;zWJgwk-A!fz6R`&&;*B?x)X3)fDNJm>oAf6FR%0nMN -zM&dH#n&3V(R&HG5lUJ78!qs)c1ocS3O-QbEzn^{1buEbZZLHIF4C5Zy2Yw&w9RmM9 -z_elwzx*z5lGiy6vL_4#_wH@*~grn|D8T?n^|A5bXy@Zs%PJ)4fk}nLp+kmux4mfYU -zlo{6s#@r1*kp9NdMky~FpZ(qc{vdl#idSi! -z=A<(oLgJgi^3A%HD|eTmGR4nU*);Kn?^$u0{xd?d@@x21{ORI -zliY4x=Yw(j;mE^FO$|LXFmAk?M}(0adudb3{aXGn!2Pa4;~jt%61v(Q}QTIl83`SQ~q -zd1lZ`FVA-8{W$yVSMeEm9Ri*QMOqM$mnW@=_e8dPP~xT7Xs6k=(Fru#ZCo2A?=c=X -z+6&G``@zrk_4?D&JPc=IDXxL@O;w4CaBPt0X5iJ}eCepJ!mhEkj__fV*k_Z -zvkPe7g#B^%>YvIRB>&*vEB~ajFl{9-3DZNltBvbI=PS@P##IdRHsd-w|Bl&dH?Ff| -z{3-HlBGF-n^n!FR|5;SmAk3_5+<08qgz>npdE;?i3&v5GU*C&nh!2#Kmj!fZZSirjM2j8I(5>F!WBoa@a;_h0^KKo5PE|w9G -z4Q?PFU;E9omn^jr@*?9Z;7Yc;`ho1TuVj4PAl*QD6VU)ASxX#P{NZ$*71zvXPM;iJMg42JYM -zTy^AqqgC)$tcMNoiX_hMiOR6Bc;Q+EegRHNRqn{oMZg*}>-^p(|7|@QFzneF)|kP+ -z3H~>u-j)pht)5?>enQQ9yAkMDxX(3gTxaykleWr~n{Y@#KI{9NM>w{>h+*t|3I6PR -zIfMTS{Mq-K=lc_}u}(YO|D1JYGBgVVpoxE>ii -z!aBiUiItYP)Gi%#0Ppj4UK*5rE;{Qqt{ogjp?%;iXwbMeK>y(_uKjbsh#BGz#*u(Z -zOn@`a^i~3iPybnP_4oRV)0`RN#4AYfO?145K;#>U!;{D+I4&ojpD3;hQuEKcO2K)S -zsC5T_Ay+JQ?w)1&r_mkO|7&(f<^K+}U<~KK)p-0YqRqHo8}J&t!?;fKG@9;lmr1sL -zy-33z`!MW3FswgaYr&A2burL9k}f_qil{7p0h~|q$}S7S{WlhMGx)_=yKNcxfam_r -zW_;_&pbgM^$%hb#Zz&lD#}p?snG;DHJZ-AS3N&9ed?KUFMn9xobz31UiJ!~Ut9b(S}b=*#kHW)%-R*r -z)$TDh*O)mz_G;bU75P)zI*jH#)MJ?ZDebKf%Uda_d%xMWs2LPB;ENhYSei%EMeQ3y -zbV@yrsQ-zm6F%w`qS8F$qt0fCI)|wLg{bpB>LQ}jEZ>G90`6PM5Ooz%{~J-)eAEp@ -zr8(g)`?X9h^UZ>GzPd*LeJFsm~pjVX1yIYcU))jBKAyiqRgE -z;3tDmgL4y~F&;lDTl3ssBeR{-%fno7%dO48yTHeAp!R_u0UrW?I^Xy@2E#MZ*%bJ> -z;B&@xmggY&JUHK6v}nAB{wm873>>g*TsI5e(cA>zhizZJf$fp(b6%JAN-v=1IqH9v -z-{cia;~H)Rf|P^*0lWfyNIq%%5xIsuhCZv2hr9;kc+#i^-?4dtp8XaOz)Z -zJl4O+c&vXJxOMBF5LCdx@@tIi`u+h9pjzX)vdHU<>%4H=UI*t(-7Qz06AqwxBxrSq -ze=i46$5q~}d1&r($Hcs^+svAdXY3y1TJ{eSwbw`OM^u^z(@|4H8KMp&>W>h01mVa> -z5r<-|g5QKSlvSH<)*qHMm-*nd9YB0!$diAqmtNNb-wsn~T-O45xp5V{-0yL= -z*O)=)Z~=>?68txq+gjr~sr0W0r+=gIc$Q0ElM0LK{n--D9@Vc2ycNpUf%9;wdTsFk -zE#gdr^Dxb;H~$hz$kp6qhIlpiy3027qTI~dI9Kx^IRAosXe*ESb2l7Dnp~{F5pXW( -zanJpW(i|{p1GFFxm@*zKJ_}Ak7LCW-%93%vt!&?_l2;GRphEowOJ>!$7DT>gTnpmP -z+jm`A633@#DveULOkLgO&ZNVfTAV8doLg -zpLOq%qhVk@$h|2P -zN?rs`y-UEUcPaQ&_>`v14B7z;YQg{}UIgbW4XYV=qA4`6{i%eyrt@H62ZhFCsf&!q -zQr9BR520KgIL{aD!qdl;uB89LO6sry%Kwb(?JncGlIY(9PXAuxI+a{oeIb4-@e3sG -z4~89y2a$mO!{AIjVmwYfiulXvyroi_JjM~o8$6#tpl`}oVx5#c`km76(kx5BtCAYSQI8rKCx -zUX3_>BVny^t!@I<)qC!r4$#nmK)lLs1m`i?2~HXN!71Y)_~(;&i{-c(w9;Rl;&z{s -zJll0Lz%HwwNVOKrmV6%zW_SP7HHbwZ4LL8cIhYibsObgB282!* -z_${e0b$lw{{+7T`heD&^e0%E*_-XK80B8LCw_1Gd|3(a`*)j;{=GF`T75Gnsv!Dgz -zIt9PM6fA=4`!v1&;< -z5Kjw@;Cy3l8xrutE!*9(Gv&IX6KQDfcDr_qxd&0X7xcP)VwTU+O$X^m5PqEGU^>XY -zp$tKWjce~bIgI$IV~9%gL^|q~lNq9Jf^(_VyzRs@Q17liOIm4gN42A7cS!%Q)jcNv -zx33Lj*nX#Rt(>ZLrGpf7n_1_M=3XDQFGJLRM5T=K9vv-h$Q{ssM$$o2qZxv%fyvHZOwgNwPl2=0Iq+AVwtaxg-R8IZ -z&U7+X!y%N&Gp@5ho^M0c=c;x;JZv3 -z!Fi?94E|dz%|-Atz?Z<2;ET7|=}Bk&sGofKO>QII;{Qj5B40MHGyb`3_4JQb<2q5V -zM#pRLXX16^wI$Jjl3T;UMX5{OfxP57yUWe2b=`5Yd+@2rvoGKt-HG8mTXnmmGCb9b -z;e4k^zj2k}I=cjrUlYR!mjm6#5QqNL@MroZ&)=GUZFx5Wtztkebj!Id$iM}WYh3&1 -zyyU@uB->s5G`SMxf~kU?d;pvU&l!&oym{mCfwyX01^k^q;QY#7GlLdv2Kh~%)Eket -zj$~VCoDy%aVq6#9W0>OgG#C99icNO7zO87h0Pk*#&~yg -z&bT&qVM^v;>#<@6t@QV(bk(>PO1@!S*95=RY`Fczh0dOUr;oo)7%1uRyFwsd+AV>z -zF<04I>qU?5p1c^?5=%=T>rP2UGwm525s(; -zm{aX$j>C2#>^jQs24{JFNJIYtaQY8=?q7~$KSN0HI1&$oGtd|k(0>A){*%Ui!+x=d -zH1c;lNbmZ(-!8(C_#ktV9#^e57 -z1kU}tAN)08yG%?N*9G)P^gZdhzwO0Frfq;0!~wJ5^q(`nqj4F#YFrcZ2v`G8o$_|E -z+G8CFIA9Z;i4&dS*ya+e2Isch?T(zM&zQ}ub@2^2eeNMKrv}Wd?bAGnsLVEmaC{Ox -z49+;Ch)@4<&;4y?4w$e3vA&Z?z$c?q;EXe4TpOeR9Qc3oqgm(85GP(hf(@*wMR3m3 -z3KGzN4Sd^QkJxoH#EEO)8z%gG-Kx4+WWAbstsYje?;g^eVLIZuy=hFX8 -z%peeX*84(1uS1|h_gInK2q<#b?v(}Ckio0V9TIb@+ss-eKL=^nxE7;4nW&PV@^=c6 -z{}!J|nm3@Bd|a_Iev#+yoM$HYxQ)HZoYWJT%pQ#T6L_z2t(N}%;Pf9b9?#_%_;tQo -z|MxHp?ue+eZwc|({W8X}%PhC+S)z4bm!AtsgBDYV>sbVw7vsMHCR#abWT-P;UXsI)<6GGkyf4-2?p5Z1pIB#cO0CrpcVdoXoqbUgIA!<7SHW+8HRQYV4zNL1{wr^3F6Fy -zzZ854{AS*>U4elKk{=A6F;F}BjzYm1XBd3@IMkE>I1GGhIbl3r%2USUr92CNuE06? -zljrx~+ujahj+?}w4_y=6mCEn!;4HMycziDDH?D>9Try<*3ca`T7iu_4dbe) -z@bGkX6M^`JJ2@W?ow0*baNgf+1#dy2ec+)n$v=KVgM6?8I6IyLXU7}hJUViELj!y^ -zT&FxwWY8&K2Xdd|BH6fGY@8~{XEm~~i#gS9W)+g=4y5C=wh3^SowzfUM_vOS^hzX# -zJole+(*%L%F@QYxt`jGy24{j6ckp6qztzmzegdbp4tH40sV*~X`!shWD%+m|=l8PA -zA`R~*&4E+i1<(DHRt{LQ0r7k+8;_6JH6-8*vg_bXyovZb-W!$Wjy_NHO5Po0X5%^T -zF)_<~rtZ;y^4#7s`KK^FW?xZ;F~!DJdp=_?Ne3w?Gqbizb45C8sxm{=QSdim;Vgpl -z2ruvR9mS`W3ghu2u)9SP{a2~PjSN5T|n-fgkvi`HHs2Sn2C)qir_ -z5&b96T`T{j@}oggbNbN+&W*bdoM((dG&cJ@mOHB>U|T+SEXa^2DGIq}?=`ONpNjVTJ@;oV8U}1Y-2Wgrx5N=7;Ju7di!MG~HCozl0Rf!9--J>r`p1tqAI5cwZ_4B2QWJ8f$ -z@ZY<0Uz|MW7MhF9to`tfsKv&$JnmmL;5;SNfqx9kWzg-Bl&K+@Q>c2_xK^D+)g$1i -zflnBZ<(~v+oGH)!dSIX#8=(F1YH%?F-vnm^`Tr0O6COUb;P2wI@8%r_$!?EDaNZ=# -z8wd$_B+VOF@%(for!|_!71#Zac%g_Q*q;K7zPd)F&-B*>K=NDY{g?4@=UlrqOY5H -zsoei(xakbxX299sJa{7}VhMS9V|p3fboFQ0H8W`A9Iye-iP$tA_nq_ca7Y|LLA9R< -zoP{=g0#||W$AESW_yBmX@pupJGw%0b>B1h18RD#NaqXW8vIc#nx@6A6l1YL;j*~^6 -zaaE4~h2ZorGQOpp*Ix!#z;HV{ZqC44jH?otXUh+Jv>R6?$fpqJS;0S*nEhm!oATsV -z7`g5CslZrwqE_G$Tpy;sN?Ip55B4AWfXc2&z6x#|_kq8I7b -zJP6UNGo)$;fB00lyIPJHmNE`I`5O4AkY@v&7rxaaq18{rzX9Bq|JCVP>^FmIP@E?I -z$CPnxo_q~~_|#9{N^!YpR`Sn2zlM7z!j-v0Vop_>S)kN=rd?6In -zewteC(FX(9SnYjb!0WTOyyX=L@GDMpkJiYc{|4Bn&8~&<-rx*4?+wlw*OKVJ08al! -z<9ck_8uDB93KDSbt%5VqIug)-6P*5uF9t!lc(cCfp8u-k*>V%}*p=yI$7&HLR18)+Xx9*m}dB7Kqg`&7@$~^b$j0w6hfD63OxK7(cC&>?c3>nwyAsZ}I9m;0bdE^__x-pD> -z_ki>Mh?X5KEwo`IFPBVtaKnR&YyGtYb_MCDt6Y9<2b6=)qeZ(X#5)4$Vm -zf6Q{gunmZ>Ku3(nSD;m2^##Rz%D8TwY@q6E-e2|DbLtuL(;o5x3)hHVwPsgA_TXG2 -zr|mk?tHI3Jk<;U+jmKq_PoB8E+^?UQ&t~B5_ow}jf3~{p4e5J(6nNdZ_W2a_u>sCc -zNy_?$FG%~Qe-8M=d9NCC`KmlW{;!rBY=CNXM# -z@3Lw~oCo+0*-ixFM?Uo!k2~v!|G(hOG?2l6cG@ekBaU7=&HP7LZ9H5SGVqlQd?N#& -z`c{Vc!qb;-DjgFq*#Mn_r(p_~!Fimn8P}%yHUie}vgJJN$ -zPCTIazk@=2rL7U14YY!@fx1~AC;bjMZi)Hd2?@vt!I@y#xK7PEn3@rA&i%M?oigz^ -z{pEiqVc>u%3d_sT8pZDF+@r$yh5_^<_^FpuFxOSY6j=R9wK(BG_ -znEw6Bbxo<^;gj9{ua{?Xd}1?U1GRG&Fb&Qvc;0wi*aA2UTQ-iu{H5>g#Jf4b8 -zBq)F)33s?b&OkZe^8#xgn)BQ-F{kp)97nC03-iL2IBPsU_U4So6JPj!A1CI0o*#et -z((;3lm@BJxKJYJaGXST5@`v7E=i}*+B?nyQ{l@E)rx)ZKQ;q0bzSxfzo -zgUs|F0l$y(d!9}4JnS1|1L^tiF@p{`vbAl=?~N-#iE;3pUkCnW -z@E-7g1MdZY8TkCK@gWjB68t74Aa4TiL!i|RJny$5&R5}Iy3M73Cg|Tb$d^fDK=o<} -zME{Wtyzr3|`!|E9PY_$jU#A)3Q(2dB&Y8?iVg!L+ghD65`C{+_ICsn|7c(T-#DD`y -zP3a$5kNVDZ^)BGqd(u7hW;v7PnOWB=&4q4HbMl-^ip(4@%~JAgOC|qQ8TYcdyWT8k -zEWYB~jd6EFhJJ9iyAIC3%Io-Dn6Dk(t_5y;grBi47`RCcflr~JIdB#^C& -zZ>-rFOBH0tg})D#vV#F|>mV&}-v0)FHc*^_H*Rt5pB*%rA?~2rc-%qvR-mYXA#gUZ -z2(Edzj+n%f8REpt#^c0U>!D*R*9v|srmXn!;D3MM^0Wj7Ryqn!sZ0J40#WLU47>q+ -zIwVf?r@6cMt;thUoPh!JkA)aC9t$yv_!Oe}kD<=nkarQ>gz%G)wGjqTh-PpKu>?*b -z)-v$iCqn#hp`f-W>>!9c?!*A@e!a%kAMYed^ch!$$Opl>-Ar%AFOdP`*cE3o3`n}i -zTI3m6?w>*P|%F=9UV)|S>w`hYR8`@tRc{CSS-oJ -zNm@_3AWB^U&bup>#$!dB;O~{NExQc{o_zCng~YsdS_978Y|Y^PR05p-^C=j3UvANO -zEZvgvxU+_=Q0QN9A-)9uC-B-+y#F4(SmMV~*15}Wk!>O8RBu;v5ys^juZeh`@%_Si -z_dFk*e-%@R`2388BG28AZ;_ivrRJo`=p-L3f2-73f!MqU+L#@*;PM?d9=AMVT+>jB -zwPc7x?S@ZFbN{JWdm+wCfX@`Jbl9XGBC=syWg|0&}tCL5bZ{AZrJeYGL42Eo8U -z^GNVa6uJmb|0Uyb;vGge+5C@cl@5&aD?NMo5XYE-is0y6RsMfg7 -z)3(1Z*LoN@puzZl@!^8+2LC5)r~MiDa0WguJbk!m0tTAJ04lMNfiGv^$)}!J(dNKY -zi7s?7YzCFI7?O^FQ=xI=Dk=RZ!RbF`JSbW!BjvFS!*@~88u-npx%a(8p3XiWYoIqT -zbjUN?7YF;v~%9CfEdrB5o)Q -ziCO48I15dj8~ho+G|i>|{n9-uN;G5`&hfN5`h|^wGi3O6xf|H`rWdz6{GXyGkrX@lb0IbFZ>kvR~wI~ -zt_J>mUR#U!c1OY=Sgjbq=dbI>50Db%3viwl8t3!WUgPmhOv3+1P`s=-6#{Kvu1bQ@ -z;=p%Ih4EO~dE;K$bdBjB6&Lt8>Q6;0jn``bG&GrEzi>+3fj}l*IzcD=DRtLY0`a2% -z5d2kXufI6uKigLtOE+p<3!>Cx8T^;vPpRuK^zk(glujk)V4&0+;6KKkWtD_L3*foN -ztNea&UYQMob7R{q4UI8Q)q~=!m1jlVi#^e2E -z+PDgm1;rOK_~*VfG{F6(=%qNR(NK>8_hJn+7>^5Tg?|&SUwXi~AN6g;(f)bT$$Ocv -zG)~Z$fhQ|Z?B8Tu72<~17Wr=-REhFSf<)YSD#5w$RA=C0;5>SYFFj$QsYIn2v{Fh} -zbK(Js0r>OWG6}wjDO<|GH_}}CkEJfF3QEi)P!0IjIYDYp)EbZDbikjdiZ1X^@T69` -zKP2Xs-SdjTxdWF~2TuP1aQgRM9{jg>dj9)fdAvb8Yq%nCCTP#Vdou8$41DqmoYa_L -z83S&>@tsu@3L;M$kEfs~17F$z*A#p(8TO`!g$>ABm%MHCF60& -z%f`1mmOT1c@!a3QWyhnnC+>Le)q$6CYOW4^$Kh%`R$>GG&8I!fSEE1ct_gv7R_tL*fUpuXchTz`1h(oc^QW+t1_lINF4P -zZ&RzjE+jq|E2r#rfsdo0{@0(FPZ-w~z}Gsagr|>VoqHNK-w*;Zab;uRg(#@$hQR4R -zbyMJcTibHrsU2@$TM32)-#3?R3W1n-@Ic^9Jox6o>A%<I%`;9O4Jdg1}g2Lq>nRY%}VF!-Lpe}Tp(jHmX?-2V!O`OYw4 -zdzFgMg7NrLchPvfU2P%`@As6wH^i5t#+}uhd|HZ9b-yn-u#>i~z*$(k@wl)K<8fgF -zh+~EMMKA%wze15&aJ~Z9e8?9RCn$e^D3tp`m2oZfavVL??@xt5G)!d}Fl}5D(|->B -zyx}wtfATfs=}+o81r?QiAQW@~c)xM&fPn_!Pd;XRzxXr$IQ%uf9fa~dJ{Z6|OPj{8 -zkU-DIWl5sj7pnY1@T76w*yx`N4*ygl&kR~A&vwP+P@*gY?*ivZZ4;a)ubkVp0qI{8 -z(~y@o9Dk8tZ(KVrLB|c?=q%A}T$P}IEBJ*e1Zp!wti%WsJO}=x;7mM$1oWQ*-||n- -z|Fjw6#LM8^C#pZF(s|{$Pqg<0{&gJ2-QawIy|)J!^W0eaY(TuR^c#D9R~WC=`M(^Cs|EvjIMr?s*m?Lh8&~Ccn6Vpc3myz>YWI&yFj4L!sPPYCQKBp|@eN)Y^b} -z&gzWE9kd|<5929t?gy23hJtpy@30RBuJ)0;!T_%Jyn6!Y;>|Z6FWy4qI~K3beX3kou?PNKj5Ei@x8rXX1DJR@LxL6fGeJqeS3*1HYHtVU -zVq8g?VaKA7!DaV`KwQ( -z?vanl?Iy0tqJQ*h;-yhxT=n3}sDwXPMkoBaRC~Z}sXj$QCK7#SNZ0FL{c)*_D`X62 -z);n%Iu6GuBSnnMCS#QH|kk?kMUxD*5@L*pBXQ5@E4S{&|Qf2%K>6I7#)!&k`DJfN~CKL}ET&Y6ItqVd0-bp**z4Kj-;X+CL?lFvD?6d?W -zUYWzjw^p*(Uy2xmfdy?K!R;EMb#6x8OoCkZ?=TGP_`A1C(DT0eG%Xqx#3XSg< -ze=fQr__Kkg47@+hCBFvZi=Jah@Kbc0_(JG_`|T(=7wJj{zCoT!N_KlB?+XbSs2rS& -ztMH4#pNp&dNZ?!(HO6&KY$=qU|5`J|i>?)cxaj7=+n`XxmqKEmBAdXuR_4EiXP~^B -zxP$?``&jp{ApxhLbUbk0#cT(sqJ7(3`lqBr+lHNYB`Urg5>V>7uLMr1=Z#Zpo#h4N -z+e)YP7sCbuQR=3NkbqJzgH!5Na7x|u)ewhDr&?j4)QjMhdg9+hAWEJ1THutr5S&Vv -zdVc)h159H8rEa`GB%sty#$&0Qjc-e>`pcsmaqi~Fe6GSksawAh5>x7Sa7w)lPN~Y`Za7x|sozNH; -zb@g`x7p1rMzeJ)J21+*qPN^rr>0kG~kbqLJ91A=sE%{f?5KFygJeE55`ymje?f|FM -z8{kwr@dJsY{qK*YZc~GCO5F)gsaL@%^~`*T!$qC@!@zf3|Cho*sVl)L^*lJGZe0io -zg4Bsbn{g=Z=fB+yvDEztM5#ByDRt*hLSjnY4bDZq{*zP~K&cZy4V+T9fm7<*p9OzP -z-49OX#-?sern -z9@0?ovOfe)p(-{!k9Ddv9_!R-e1Fi%&vh#d6lxlrLe)GG3ZhQKe+ryBje%3Ar9a_Z -zN1>|zF9hORtp(@O8wRIxGn*j}mCH@|Z}dsO+kzeEnIV=m-*_x(#a5sosr)8?1P@N- -z8h2S@@loYonf8_+y)aOsesD@U0^W9-yYGwgHhgy8a#Bb{rMkhX_#`+L&pA27;qFri -zZt?wEnuURq&z~9sQSk-ivEqxyW5qWShX;F4c56sL$qSQ#bFtQfQ}KFmD&7JvieG#r -zQoQ7}kcWy_gH!QNa4J3vPQ@3&cPQR_dPq#gb8-Ub_cpYi8TgxUP9Fg0dsjzx2Y-HB -z%F6C5yg_em^Ly-8ZGhgXCa*jz1Y$vb?%0=PD;&)AW*zn!f85G%iNvt+{qj5Ood9RO -zQyJpSz@OhXICcspAMLy{wbjk2C8_f=c>nZ@wS{w@V^4DF|G|im+yY9 -zgMkC;jjIy8)!z)x1_q65K~!`E@wd4izM~leO`m9h_|L+hN-Tl%YdTkrYeD>i+BMu^iLX(70NNL@k=oUxq;uhBXJ%Qa6lnA6Bike6BmaBJEox2 -zT{|voz8vO@P*(*w(^MK)J?LKzPX8L?aapyVpPyda{C1Rj8=yj6e5$*DJb88j12!Si -z#qJU5;R2dlGvw=XkNump+l|rX_+;uH40~x(Qrn{!oF7uu2hQpTjBEAJgPen&pPK#v -zpflW=QcD5tV>V7@J;S|R%{;y)n9kjzo$&acM%L@m)f}SIe*v8Si%7%tONejZKKzQb -zK+7<27FLmfUst>ie#WWp;;+c$GSQ~lb+xRz8@`e}=jr@9<>b@C{`TZ4?k*X=!(M21 -zRZ;Bsi+#sO;1^^3Zp=;zINK;gJo;CF)4$R<7Q4SDsfK~uQ9}mant=~w;KRms+3{PC -zN1m3VV(B?c{}{IcsuTxIg7a&Vr;wNvHV1wHr}C0>eBSi+EjzB?GDu?bmJGZz1Mkbg -z=fD%E`+JG$j_1vwmw8WQr&0RHf^ogfBVRJEl9pkutQlA3$Ty7Z+3NveTnycIqG4HE3bF4rOqP_r8YkkyuM5VdWM{UXwwHZ;%5w*ofZ9`O= -zJJL}v>C6zd2b`DkeaQC0Q{9cCoa_C1GN)hwd0vPzhSGUnH;i~Ze8&*)M$G0o_)Xwb -zNONPBJMVtkU}>MtkY@p${VaoXRjwP?RrVsZvjJ{--BZ7j+;b+Ka@@63@(s8(-$>@X -zoXysr8R>_x#Y)i!o1m# -z7xgO -zQ!qRQ9Zwk7N~zGaaZP*)63-ad#N@Mx&&2b_QyPdjKNb*(d=Y_oQ)CI8XX|C-8h-;H -za=ZebIwgJ177ux>n?XC^fK6~25FXD7Yek+oJosO-n -z9lj;`P2REM`E%13Gc*)lcM$llc#zTF4byg -zRZXiHlYjNmZd|L_50yK>8}P)b3vuY*15W>5&;4sweEK_p1l*B^5Qu!(xa#r$+dKcL -zD(gItbFTsSL9UX5fq_zL(iIgI1zS?R)O9G#p3b7%AnGb3i( -z*qc2^jAo?hCT`8>j&0l)ON*HW(?&PkptX{uD;WMDv7oc>&+~j)Klkow_s{8^UH;(p -zd_V8+&(HVg^FtY_mcld29!1Yl=&pdA*(bEo`c?o43hh?TRl` -z0{@%t9q)2;Jg0kJ>7$41&y;(Wy;VIHX)t#2&lnYdpRtR7wEc{VZ&n(ocg`9?hT>nS -z3UwR13|(#SQ3dEFYd{t7wCb=y6<^z+MPvC0Tejc71GJ5_@5Dx+U2`(|aYF?R8gKBete`D=R;)0_Whx(i>?u}uoqy@9C!7{Y0pC&utX2hR`&yNeE?}LpOIO^eJW+1!;t6fv -zJ)DoUU%Rf(2vVUXszQ5}{hU*0#^4(pV;4{8_y?3-$3JB3G6ZD1bo|f25mkUbMzpJp -zw7o}Vr2qfcL1ov=e*Em~W97!{%Q(J!+T&djtz33eMb!Ib#+>UJ+@oeewy}%HbOm#9 -z;xrE?H7ePksbss2J#tyCGxo@_+obYIyk7sPBgQVp7pM+vaqP}B%e$3u(j<@`(+N71 -zUB^FT?880AWcE*wu}i`FATyxyIT$t5#1>>6EffTl)6$;{XXyA -z^p{lTPlDSYOH4#CQqDH_$Wqlb^oo3o>e*#sa#{!XzQ4rW4$n;2Utx`)5W56LI4W3* -ze!;gfrPIa=!7^+U+=GLHd(kEM4w60~mSeGC1vU!)8uboN3BHTy{~(GnSFjRm9PDy% -z)Iq<4DQD~`D;#WdaL_@QgQO1`<$G8x_#12#tVF$oQx2j(vW;^ctZ}f*!BGeO4yJr; -z$6VoHn_v|VI-xEHNoQ#+!D7KmY!nou-U*#@5Zz@P=L+t_8V9=^9Cgs|V9F|KBNYA6z)9lNyf@<&q&upSVNiE<>07; -ze!*%?`IK0L6%MvJIOw3uK~l($vRLr9*yy01@P1zvSuRCRN@-GqEW-?Vnb%WMd7gjl -zWU2?pKJ$#@Td76{iRx=a4muf1(Jz%NOj;Sgx$yQ-){htV8d;WxsmP=3emDjnT;SB*TdM>nFKgRcfCUPZM8OJi~0Wda8KulVkp}pyP;%Ww-JS2jj$Y3lXl8&}v@u@F -zBdQCQ8>h%}_6UY3ht+a5o}-tMD>*NaT6RmDN>L(JX{Etcfs4|P8pa4qxgCe;VYEm} -zX@s}Nlq9|Lzqh5rZh7LnPrn0JfHJdheSF=4}_I`7B2BdH|)64qD`0RxQ+a1te0K82(_0yOT3G?*$)4t<3si~JeL{Q -z?u7Kqo&}6pa+y}?bPPx*+5W0=Bq{X2XYO9)!Rs79DJ3fRVpEK43_nEC70FX-RX}e` -zcS-_pSOT07w-jGtGaf}Ad&8BFPNs8BMcP#xUK&%@s%~Fp3XyJKYo|%kf6`sgq%M?6 -zYW>CRK9gFyatvxI%OqVWlk{P<^>eyBj3EJM&F8FuAB#V?r_x5jeAIvLS>n0_z0}-S -zWAVWiCUolhwUfP48q$TJO#NImN@xsvB{UVC5_%YEU)Ti~5i*I7^)OdI$_DI`YX$Ss -zHei>W_vN3uWP5Bqh%#2hyGeCSE@sJ4Y539;KVJ99@H`clPqVPFpN$)Yc*ULT!30^b -z26d>FpiU^&^yV)8N8c -zNjYpSEO7o*t$k>ogGQ0{lXUZ3*M1%K7&=9MT+QbN=4MY8+Q;CHi;xS9EYo^E@$(Xtxm`JKx1oFZO&o+qz~)+7{LRx%p6 -zzq@msBURW@ZP281j%u}%t_KIbmh2is41Gfik&Oq1mc=83oK)6LBI=sGBjR)ZQG$n3BOl9mSBww)4Po> -zW03k+)WbfPUTYrU%OJcT({E%co3Z#tYbLRg2#efcBFm6`0zrxO1bPMYkv4`&S7GiL -z>rsXp8A`cd-LvZxUOW)5#^@AXv|2YoAk%Y$CaC?-$y+~>%(q# -z=4y2!n|uwyL^e`CUcSrJQ~dKKdPV+Hy?fTpcmruktfOA4maEjP!_uUPI<7XlbS3r8 -z)WhmnuDrw^knD92=#}sx3ll6Re@OL^KG>axcO09@e@5V-Y#f`SU(uKkkL{K3$EX!q -zf5mQIzS}RhevGzA**vwdmsNJwILp4d+B1uNcNHZ!MR>K?^o=YC*HBH_{gW7cHHEQ% -zf}q4!A4^AVGO=a$TtjbUW?R)j*ow4dX10}I!OET68z^EhWt&=L52)B9Q5h)@k1FYr -zk!Yms)lO=~Q*1f9ldX8K_D%;5i#<7o!(7{k=c?HdQdpUvs><|*Ri=zGnr`CM$(_xq -zGZM8l$l3Sx+u4>Xv`KQEd$O6lQrRjDMaq-aLcpW@c&7fDDm+t7{gEh%lyg<)BT+4~ -z_C&r){c{9Go~AM{RH=WC-bi`)rJ>`Ow3{QAz~RrpWY%#BSbB5B61YL-PJJ`=u*z*Q -zx@6yjMrI$ja=onT9*<7Svd~<(kzS66H57M@wW(fUH3 -z7V*?Ls>L?*88G>y80_ILmZD!NoHhENlR5UG#r;#RxLmMZ*=S=iZoWBOFALaX7-G=% -zIBKU5k6}8u`LHwOax>{AtNADqSsxOIPk}P5Do`!5J|woP)cFWfmP6tW?rvNmFliq+`4mDyAy -zX;Q>VW_UU}hs=^mEF~8kC)sD0UWUR>Z&hZPIFZvPN65RwQu5F-V82y(gEJ#i7usA -zBQJvmRijKWAGM4n4=L;&KNoEow%_Frh!1v3Zf6O)uWQsW8iQ)etVjLrcA7Mw?1JNE -z7nEOxKHgdM$W^(|n#_`R`Rp@xC!>YkH!M@j^cZ-jMHH#GlqZoljkSIfWdc^7k?F{T -z655F_BCNyeOwQ8or!YN}v7SN+Ar4;8M>i=k>J?P789UPZF$iWd>v=dOiRK}CdPLUw -zs?xbAnjTT<4W?JjKoeO%xXbXc4E{(_U1$_}shX@OO|6`obc(Diniy{7 -zGQ;kKT82{oUX6)z6Ip!lG}7dNx58SW?f*f`s|6)n=d -zR@T6gzinYXW!JN4oX2{OkHh|XtY^nOtHHGkY0Sp;3~F=OnNUNnR7&BGba@ThD7*gQ -zcU?IaR=c`Ux!YPKeIW>8NN^r`wbmk3?6xw|P-`WkNv^u(YEZ5+_V8*!Ew9$fRfSwN -z@X9qF!97fWGlmE>?U5MI(w>gWXRSr<8L?PcZ%xCe&st+$b1+03_x!v%Vje05cOodb -z8$*IhKuGbL!6;m}~NYI76Uo&x! -zN!;*j#)v|YHdiKw1dEWjm*5jF!M6|;EWwbV0D1LHz0`7wZm4JKWeC!Siv27#;dz!? -zQ7@HvUVI|K243-HX7G8YUxgt8O=4gB0%Hu?F?hunmBAOJ1usy_XrSRiS(3Pvr4)@CysxL_oOrO@xF4pThS$vg!!@!uBHw2>Z?> -zBpf&j-yt&UE%=3-E+9Zg=fA~f<3zhP_L*0$4EJ4c!S^cV<_qu(x4wmd(DNBW!XwoG -znOyc6{KC|?5fBbg7Z$$_-)m%kAN;}^>cS&$BP6WqgRhA!Xn|k2p#=e9cOOE+D=qK^ -zMZXBYaPapC2+J=cWQYF%-|Gz5^usTVYDGYp*ou&_`6_(>LbkTTFWfSKfE{i_NZ9ZN -ze9fXCgP$C6e9(iJnyox{>pKXEVc;tdrX8l?D!1o5({x1B& -z4Sz&HSRRd#F!w$9-Xzmw;TPV}j({*X79pYMG<-+M#MAH#ODzP13;rD;;g&e~j*_e2 -zhhJDX5&>aV2SUQ^PWav;o8#dZmVShQucT^D2nmxXz}H45jD%l!hV2&?Oh8C@lI=T2 -z)=h%nW+DQ@s3e4h0~zqWBjKs=3)80}AncotknpZ)@Es@5%z$6GY#IW>ff)!1Yp9t@0)JVaf1d?rG|cIxkvQCaW{2dE3PvJes`XTtYKvXr_oHxmKjPU^y9>LcTqe -z!nLyy5=Kpj?-W@)8-8K>bOeM|vk?*&Q2#gbICWt;bz$-xgoIT^@V!T7&4FLoUX1`b -zB6i0O_%Jvk>6vy;bI%U=+9@u}hM!Doh<2W+)?Kpq`>4wV-wn50)2ssX4}f04#da&l -MedsaxPFuJAC&$n7YXATM - --- -2.38.1 - diff --git a/anolis.go b/anolis.go index 81b28ba3b69ea8e55fd400a0bcf2e7e5fe19f78b..6fca3e7a72d217153d1d17acef0f52f9217271e0 100644 --- a/anolis.go +++ b/anolis.go @@ -1,5 +1,9 @@ +//go:build rpm_crashtraceback // +build rpm_crashtraceback - + +// Copyright 2023 OpenAnolis Community. All rights reserved. +// Use of this source code is governed by the MIT license. + package runtime func init() { diff --git a/download b/download new file mode 100644 index 0000000000000000000000000000000000000000..1a715eec89a8914ac9dba455428750abefd79925 --- /dev/null +++ b/download @@ -0,0 +1 @@ +7d78eecc98f887d1ea9cb5ca30387cea go1.24.0.src.tar.gz diff --git a/go1.21.11.src.tar.gz b/go1.21.11.src.tar.gz deleted file mode 100644 index 913a4e6dcc242d5a92e77d6073edd791c9e74ad6..0000000000000000000000000000000000000000 Binary files a/go1.21.11.src.tar.gz and /dev/null differ diff --git a/golang.spec b/golang.spec index 0bafb90c301f5a68e2600519cef681b280733221..5d06ed0bacb2424a80336ba147a1cf0baf584815 100644 --- a/golang.spec +++ b/golang.spec @@ -1,4 +1,4 @@ -%define anolis_release 1 +%define anolis_release 4 # Disable debuginfo packages %global debug_package %{nil} @@ -42,7 +42,11 @@ %endif # Build golang shared objects for stdlib +%ifarch loongarch64 riscv64 +%bcond_with shared +%else %bcond_without shared +%endif # Pre build std lib with -race enabled # Disabled due to 1.20 new cache usage, see 1.20 upstream release notes @@ -58,10 +62,13 @@ %ifarch loongarch64 %global gohostarch loong64 %endif +%ifarch riscv64 +%global gohostarch riscv64 +%endif # Comment out go_patch as needed -%global go_api 1.21 -%global go_patch 11 +%global go_api 1.24 +%global go_patch 0 Name: golang Version: %{go_api}%{?go_patch:.%{go_patch}} @@ -73,6 +80,51 @@ Source0: https://go.dev/dl/go%{go_api}%{?go_patch:.%{go_patch}}.src.tar.g # make possible to override default traceback level at build time by setting build tag rpm_crashtraceback Source1: anolis.go +Patch1: 0001-cmd-link-internal-add-support-for-internal-linking-o.patch +Patch2: 0002-cmd-dist-internal-platform-enable-internal-linking-f.patch +Patch3: 0003-cmd-runtime-enable-race-detector-on-loong64.patch +Patch4: 0004-runtime-delete-on-register-ABI-fallback-path-for-rac.patch +Patch5: 0005-cmd-internal-obj-loong64-remove-unused-register-alia.patch +Patch6: 0006-internal-bytealg-optimize-IndexByte-and-IndexByteStr.patch +Patch7: 0007-internal-bytealg-optimize-memequal-and-memequal_varl.patch +Patch8: 0008-internal-bytealg-optimize-Index-and-IndexString-func.patch +Patch9: 0009-internal-bytealg-optimize-Count-and-CountString-func.patch +Patch10: 0010-internal-bytealg-adjust-the-format-of-assembly-files.patch +Patch11: 0011-cmd-internal-obj-loong64-optimize-immediate-loading.patch +Patch12: 0012-math-big-optimize-addVV-function-for-loong64.patch +Patch13: 0013-math-big-optimize-addVW-function-for-loong64.patch +Patch14: 0014-math-big-optimize-subVV-function-for-loong64.patch +Patch15: 0015-math-big-optimize-subVW-function-for-loong64.patch +Patch16: 0016-math-big-optimize-shlVU-function-for-loong64.patch +Patch17: 0017-math-big-optimize-shrVU-function-for-loong64.patch +Patch18: 0018-math-big-optimize-mulAddVWW-function-for-loong64.patch +Patch19: 0019-math-big-optimize-addMulVVW-function-for-loong64.patch +Patch20: 0020-cmd-compile-fold-constant-shift-with-extension-on-lo.patch +Patch21: 0021-test-codegen-fix-the-matching-instructions-inside-pl.patch +Patch22: 0022-cmd-compile-optimize-shifts-of-int32-and-uint32-on-l.patch +Patch23: 0023-cmd-compile-simplify-bounded-shift-on-loong64.patch +Patch24: 0024-runtime-use-ABIInternal-on-syscall-and-other-sys.stu.patch +Patch25: 0025-runtime-use-correct-memory-barrier-in-exitThread-fun.patch +Patch26: 0026-cmd-internal-obj-loong64-add-V-XV-SEQI-V-XV-.-AND-OR.patch +Patch27: 0027-cmd-internal-obj-loong64-add-V-XV-ADD-SUB-.-B-H-W-D-.patch +Patch28: 0028-cmd-internal-obj-loong64-add-V-XV-ILV-L-H-.-B-H-W-D-.patch +Patch29: 0029-cmd-internal-obj-loong64-add-V-XV-SLL-SRL-SRA-ROTR-I.patch +Patch30: 0030-cmd-internal-obj-loong64-add-V-XV-FSQRT-FRECIP-FRSQR.patch +Patch31: 0031-cmd-internal-obj-loong64-add-V-XV-NEG-B-H-W-V-instru.patch +Patch32: 0032-cmd-internal-obj-loong64-add-V-XV-MUL-B-H-W-V-and-V-.patch +Patch33: 0033-cmd-internal-obj-loong64-add-V-XV-DIV-B-H-W-V-U-and-.patch +Patch34: 0034-cmd-internal-obj-loong64-add-V-XV-BITCLR-BITSET-BITR.patch +Patch35: 0035-crypto-chacha20-add-loong64-SIMD-implementation.patch +Patch36: 0036-internal-bytealg-optimize-Count-String-in-loong64.patch +Patch37: 0037-cmd-internal-obj-cmd-asm-reclassify-32-bit-immediate.patch +Patch38: 0038-crypto-internal-poly1305-implement-function-update-i.patch +Patch39: 0039-runtime-optimize-the-implementation-of-memclrNoHeapP.patch +Patch40: 0040-runtime-race-add-the-implementation-of-atomic.-Or-An.patch +Patch41: 0041-cmd-internal-obj-loong64-add-F-MAXA-MINA-.-S-D-instr.patch +Patch42: 0042-math-implement-func-archExp-and-archExp2-in-assembly.patch +Patch43: 0043-math-implement-func-archLog-in-assembly-on-loong64.patch +Patch44: 0044-cmd-go-internal-work-allow-a-bunch-of-loong64-specif.patch + # The compiler is written in Go. Needs go(1.4+) compiler for build. %if %{with bootstrap} BuildRequires: gcc-go >= 5 @@ -105,62 +157,6 @@ Provides: bundled(golang(golang.org/x/tools)) = 0.3.1.0.20230118190848.070db2996 Requires: %{name}-bin = %{version}-%{release} Requires: %{name}-src = %{version}-%{release} -Patch3: 0001-cmd-go-use-aliyun-proxy-and-local-sumdb.patch -Patch4: 0004-cmd-link-use-gold-on-ARM-ARM64-only-if-gold-is-avail.patch - - -Patch1001: 0001-cmd-dist-cmd-link-internal-runtime-add-buildmode-plu.patch -Patch1002: 0002-runtime-cmd-go-enable-memory-sanitizer-on-linux-loon.patch -Patch1003: 0003-runtime-cmd-go-enable-address-sanitizer-on-linux-loo.patch -Patch1004: 0004-internal-sysinfo-print-cpu-type-from-cpuinfo-when-in.patch -Patch1005: 0005-cmd-cmd-vendor-pick-up-updates-for-golang.org-x-arch.patch -Patch1006: 0006-cmd-internal-objfile-add-loong64-disassembler-suppor.patch -Patch1007: 0007-runtime-remove-the-meaningless-offset-of-8-for-duffz.patch -Patch1008: 0008-cmd-compiler-remove-the-meaningless-offset-of-8-for-.patch -Patch1009: 0009-cmd-internal-obj-loong64-add-atomic-memory-access-in.patch -Patch1010: 0010-cmd-compiler-runtime-internal-atomic-optimize-xchg-a.patch -Patch1011: 0011-cmd-compiler-runtime-internal-atomic-optimize-xadd-a.patch -Patch1012: 0012-cmd-compiler-runtime-internal-atomic-optimize-And-32.patch -Patch1013: 0013-cmd-compiler-runtime-internal-atomic-Implementing-xc.patch -Patch1014: 0014-cmd-compiler-runtime-internal-atomic-Implementing-xa.patch -Patch1015: 0015-cmd-compiler-runtime-internal-atomic-Implementing-An.patch -Patch1016: 0016-cmd-internal-obj-loong64-remove-the-invalid-plan9-fo.patch -Patch1017: 0017-cmd-internal-obj-loong64-correct-the-instruction-for.patch -Patch1018: 0018-cmd-internal-obj-loong64-recheck-jump-offset-boundar.patch -Patch1019: 0019-cmd-link-internal-loong64-correct-the-glibc-dynamic-.patch -Patch1020: 0020-cmd-link-internal-loadelf-correct-the-relocation-siz.patch -Patch1021: 0021-cmd-compile-cmd-internal-runtime-change-the-register.patch -Patch1022: 0022-cmd-compile-add-ABI-register-definations-for-loong64.patch -Patch1023: 0023-cmd-compile-cmd-internal-runtime-change-registers-on.patch -Patch1024: 0024-internal-abi-define-loong64-regABI-constants.patch -Patch1025: 0025-cmd-compile-internal-add-register-info-for-loong64-r.patch -Patch1026: 0026-cmd-compile-internal-add-spill-support-for-loong64-r.patch -Patch1027: 0027-cmd-compile-update-loong64-CALL-ops.patch -Patch1028: 0028-runtime-make-duff-device-as-ABIInternal-for-loong64.patch -Patch1029: 0029-runtime-support-regABI-and-add-spill-functions-in-ru.patch -Patch1030: 0030-reflect-runtime-add-reflect-support-for-regABI-on-lo.patch -Patch1031: 0031-internal-bytealg-add-regABI-support-in-bytealg-funct.patch -Patch1032: 0032-runtime-add-regABI-support-in-memclr-and-memmove-fun.patch -Patch1033: 0033-cmd-internal-obj-set-morestack-arg-spilling-and-rega.patch -Patch1034: 0034-cmd-compile-fix-If-lowering-on-loong64.patch -Patch1035: 0035-runtime-internal-syscall-use-ABIInternal-for-Syscall.patch -Patch1036: 0036-cmd-compile-internal-buildcfg-enable-regABI-on-loong.patch -Patch1037: 0037-internal-abi-internal-buildcfg-always-enable-registe.patch -Patch1038: 0038-all-delete-loong64-non-register-ABI-fallback-path.patch -Patch1039: 0039-cmd-internal-obj-loong64-using-LookupABI-to-find-duf.patch -Patch1040: 0040-cmd-internal-cmd-link-unify-the-relocation-naming-st.patch -Patch1041: 0041-cmd-link-internal-loadelf-remove-useless-relocation-.patch -Patch1042: 0042-cmd-link-internal-loadelf-add-additional-relocations.patch -Patch1043: 0043-cmd-link-add-new-relocations-numbered-101-to-109-for.patch -Patch1044: 0044-api-add-new-relocations-numbered-101-to-109-for-loon.patch -Patch1045: 0045-cmd-internal-obj-loong64-remove-unused-register-alia.patch -Patch1046: 0046-cmd-internal-runtime-change-the-LR-parameter-registe.patch -Patch1047: 0047-cmd-runtime-enable-race-detector-on-loong64.patch -Patch1048: 0048-runtime-Mark-race-functions-on-loong64-as-ABInternal.patch -Patch1049: 0049-runtime-delete-on-register-ABI-fallback-path-for-rac.patch -Patch1050: 0050-cmd-dist-update-isUnsupportedVMASize-test-skip.patch -#Patch1051: 0051-runtime-race-update-race_linux_loong64.syso.patch - Source100: golang-gdbinit ## git binary diffs are not supported. (Patch1051) @@ -515,6 +511,8 @@ export CFLAGS="$RPM_OPT_FLAGS" export LDFLAGS="$RPM_LD_FLAGS" %if !%{with external_linker} export GO_LDFLAGS="-linkmode internal" +## %else +## export GO_LDFLAGS="-extldflags '$RPM_LD_FLAGS'" %endif %if !%{with cgo} || !%{with external_linker} export CGO_ENABLED=0 @@ -596,10 +594,29 @@ fi %files docs -f go-docs.list %changelog +* Tue Jun 3 2025 zhoujiajia111 - 1.24.0-4 +- Provide tar package using download file + +* Fri Apr 25 2025 Shangtong Guo - 1.24.0-3 +- add support for riscv64 build + +* Mon Feb 24 2025 limeidan - 1.24.0-2 +- add internal linker support on loong64 +- optimize the internal/bytealg package on loong64 +- optimize the math/big package on loong64 +- add new instructions support on loong64 +- optimize memory operation function of runtime on loong64 + +* Tue Feb 18 2025 gaochang - 1.24.0-1 +- update to 1.24.0 + +* Tue Feb 18 2025 gaochang - 1.22.12-1 +- update to 1.22.12 + * Wed Jul 10 2024 yangxinyu - 1.21.11-1 - update to 1.21.11 fix cve-2024-24789 -* Thu Mon 13 2024 chenguoqi - 1.21.10-2 +* Thu Jun 13 2024 chenguoqi - 1.21.10-2 - add buildmode={plugin,shared} support on linux/loong64 - asan and msan support on linux/loong64 - loong64 disassembler support diff --git a/race_linux_loong64.syso b/race_linux_loong64.syso index 6fdb3bad77751956e4c1ee6c0732ddcc3a7fc3dc..0d2b4946fbf31abc042ea4ee852785cb13cce5a6 100644 Binary files a/race_linux_loong64.syso and b/race_linux_loong64.syso differ