diff --git a/2001-cpu-internal-provide-runtime-detection-of-RISC-V-ext.patch b/2001-cpu-internal-provide-runtime-detection-of-RISC-V-ext.patch new file mode 100644 index 0000000000000000000000000000000000000000..4912af4bacff7098ee5e9d6811fcc04f99d0032f --- /dev/null +++ b/2001-cpu-internal-provide-runtime-detection-of-RISC-V-ext.patch @@ -0,0 +1,240 @@ +From 833fbe8b2b9c9f3e9af481879dce51bd88ad613d Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Fri, 25 Aug 2023 11:22:02 +0200 +Subject: [PATCH 01/38] cpu/internal: provide runtime detection of RISC-V + extensions on Linux + +Add a RISCV64 variable to cpu/internal that indicates both the presence +of RISC-V extensions and performance information about the underlying +RISC-V cores. The variable is only populated with non false values on +Linux. The detection code relies on the riscv_hwprobe syscall +introduced in Linux 6.4. The patch can detect RVV 1.0 and whether +the CPU supports fast misaligned accesses. It can only detect RVV 1.0 +on a 6.5 kernel or later (without backports). + +Updates #61416 + +Change-Id: I2d8289345c885b699afff441d417cae38f6bdc54 +Reviewed-on: https://go-review.googlesource.com/c/go/+/522995 +Reviewed-by: Joel Sing +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Michael Knyszek +Reviewed-by: David Chase +--- + src/internal/cpu/cpu.go | 12 ++++ + src/internal/cpu/cpu_riscv64.go | 11 ++++ + src/internal/cpu/cpu_riscv64_linux.go | 91 +++++++++++++++++++++++++++ + src/internal/cpu/cpu_riscv64_other.go | 11 ++++ + src/runtime/os_linux_riscv64.go | 30 +++++++++ + 5 files changed, 155 insertions(+) + create mode 100644 src/internal/cpu/cpu_riscv64_linux.go + create mode 100644 src/internal/cpu/cpu_riscv64_other.go + +diff --git a/src/internal/cpu/cpu.go b/src/internal/cpu/cpu.go +index cd3db10523..81b8f7022e 100644 +--- a/src/internal/cpu/cpu.go ++++ b/src/internal/cpu/cpu.go +@@ -136,6 +136,17 @@ var S390X struct { + _ CacheLinePad + } + ++// RISCV64 contains the supported CPU features and performance characteristics for riscv64 ++// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate ++// the presence of RISC-V extensions. ++// The struct is padded to avoid false sharing. ++var RISCV64 struct { ++ _ CacheLinePad ++ HasFastMisaligned bool // Fast misaligned accesses ++ HasV bool // Vector extension compatible with RVV 1.0 ++ _ CacheLinePad ++} ++ + // CPU feature variables are accessed by assembly code in various packages. + //go:linkname X86 + //go:linkname ARM +@@ -144,6 +155,7 @@ var S390X struct { + //go:linkname MIPS64X + //go:linkname PPC64 + //go:linkname S390X ++//go:linkname RISCV64 + + // Initialize examines the processor and sets the relevant variables above. + // This is called by the runtime package early in program initialization, +diff --git a/src/internal/cpu/cpu_riscv64.go b/src/internal/cpu/cpu_riscv64.go +index 2173fe8886..e6e532c7e7 100644 +--- a/src/internal/cpu/cpu_riscv64.go ++++ b/src/internal/cpu/cpu_riscv64.go +@@ -6,5 +6,16 @@ package cpu + + const CacheLinePadSize = 64 + ++// RISC-V doesn't have a 'cpuid' equivalent. On Linux we rely on the riscv_hwprobe syscall. ++ + func doinit() { ++ options = []option{ ++ {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned}, ++ {Name: "v", Feature: &RISCV64.HasV}, ++ } ++ osInit() ++} ++ ++func isSet(hwc uint, value uint) bool { ++ return hwc&value != 0 + } +diff --git a/src/internal/cpu/cpu_riscv64_linux.go b/src/internal/cpu/cpu_riscv64_linux.go +new file mode 100644 +index 0000000000..a076d3e33c +--- /dev/null ++++ b/src/internal/cpu/cpu_riscv64_linux.go +@@ -0,0 +1,91 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build riscv64 && linux ++ ++package cpu ++ ++import _ "unsafe" ++ ++// RISC-V extension discovery code for Linux. ++// ++// A note on detection of the Vector extension using HWCAP. ++// ++// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5. ++// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe ++// syscall is not available then neither is the Vector extension (which needs kernel support). ++// The riscv_hwprobe syscall should then be all we need to detect the Vector extension. ++// However, some RISC-V board manufacturers ship boards with an older kernel on top of which ++// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe ++// patches. These kernels advertise support for the Vector extension using HWCAP. Falling ++// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not ++// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option. ++// ++// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by ++// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board ++// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified ++// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use ++// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector ++// extension are binary incompatible. HWCAP can then not be used in isolation to populate the ++// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0. ++// Go will only support the ratified versions >= 1.0 and so any vector code it might generate ++// would crash on a Scaleway RV1 instance or a Lichee Pi 4a, if allowed to run. ++// ++// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector ++// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype ++// register. This check would allow us to safely detect version 1.0 of the Vector extension ++// with HWCAP, if riscv_hwprobe were not available. However, the check cannot ++// be added until the assembler supports the Vector instructions. ++// ++// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the ++// extensions it advertises support for are explicitly versioned. It's also worth noting that ++// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zvbb. ++// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority ++// of RISC-V extensions. ++// ++// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information. ++ ++const ( ++ // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. ++ riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4 ++ riscv_HWPROBE_IMA_V = 0x4 ++ riscv_HWPROBE_KEY_CPUPERF_0 = 0x5 ++ riscv_HWPROBE_MISALIGNED_FAST = 0x3 ++ riscv_HWPROBE_MISALIGNED_MASK = 0x7 ++) ++ ++// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go. ++type riscvHWProbePairs struct { ++ key int64 ++ value uint64 ++} ++ ++//go:linkname riscvHWProbe ++func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool ++ ++func osInit() { ++ // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key ++ // field should be initialised with one of the key constants defined above, e.g., ++ // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value. ++ // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0. ++ ++ pairs := []riscvHWProbePairs{ ++ {riscv_HWPROBE_KEY_IMA_EXT_0, 0}, ++ {riscv_HWPROBE_KEY_CPUPERF_0, 0}, ++ } ++ ++ // This call only indicates that extensions are supported if they are implemented on all cores. ++ if !riscvHWProbe(pairs, 0) { ++ return ++ } ++ ++ if pairs[0].key != -1 { ++ v := uint(pairs[0].value) ++ RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V) ++ } ++ if pairs[1].key != -1 { ++ v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK ++ RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST ++ } ++} +diff --git a/src/internal/cpu/cpu_riscv64_other.go b/src/internal/cpu/cpu_riscv64_other.go +new file mode 100644 +index 0000000000..1307d822b3 +--- /dev/null ++++ b/src/internal/cpu/cpu_riscv64_other.go +@@ -0,0 +1,11 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build riscv64 && !linux ++ ++package cpu ++ ++func osInit() { ++ // Other operating systems do not support the riscv_hwprobe syscall. ++} +diff --git a/src/runtime/os_linux_riscv64.go b/src/runtime/os_linux_riscv64.go +index 9be88a5ad2..c4a4d4e50d 100644 +--- a/src/runtime/os_linux_riscv64.go ++++ b/src/runtime/os_linux_riscv64.go +@@ -4,4 +4,34 @@ + + package runtime + ++import ( ++ "internal/runtime/syscall" ++ "unsafe" ++) ++ + func osArchInit() {} ++ ++type riscvHWProbePairs = struct { ++ key int64 ++ value uint64 ++} ++ ++// TODO: Consider whether to use the VDSO entry for riscv_hwprobe. ++// There is a VDSO entry for riscv_hwprobe that should allow us to avoid the syscall ++// entirely as it can handle the case where the caller only requests extensions that are ++// supported on all cores, which is what we're doing here. However, as we're only calling ++// this syscall once, it may not be worth the added effort to implement the VDSO call. ++ ++//go:linkname internal_cpu_riscvHWProbe internal/cpu.riscvHWProbe ++func internal_cpu_riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool { ++ // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go. ++ const sys_RISCV_HWPROBE uintptr = 258 ++ ++ if len(pairs) == 0 { ++ return false ++ } ++ // Passing in a cpuCount of 0 and a cpu of nil ensures that only extensions supported by all the ++ // cores are returned, which is the behaviour we want in internal/cpu. ++ _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(unsafe.Pointer(&pairs[0])), uintptr(len(pairs)), uintptr(0), uintptr(unsafe.Pointer(nil)), uintptr(flags), 0) ++ return e1 == 0 ++} +-- +2.50.1 + diff --git a/2002-cmd-go-add-rva23u64-as-a-valid-value-for-GORISCV64.patch b/2002-cmd-go-add-rva23u64-as-a-valid-value-for-GORISCV64.patch new file mode 100644 index 0000000000000000000000000000000000000000..c149608f73d47c5604b23864dea7e9ef4df1a608 --- /dev/null +++ b/2002-cmd-go-add-rva23u64-as-a-valid-value-for-GORISCV64.patch @@ -0,0 +1,188 @@ +From 09f2ce246410cdbcb95b9cce742bbb237cbc5e98 Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Mon, 2 Dec 2024 15:47:25 +0100 +Subject: [PATCH 02/38] cmd/go: add rva23u64 as a valid value for GORISCV64 + +The RVA23 profile was ratified on the 21st of October 2024. + +https://riscv.org/announcements/2024/10/risc-v-announces-ratification-of-the-rva23-profile-standard/ + +Now that it's ratified we can add rva23u64 as a valid value for the +GORISCV64 environment variable. This will allow the compiler and +assembler to generate instructions made mandatory by the new profile +without a runtime check. Examples of such instructions include those +introduced by the Vector and Zicond extensions. + +Setting GORISCV64=rva23u64 defines the riscv64.rva20u64, +riscv64.rva22u64 and riscv64.rva23u64 build tags, sets the internal +variable buildcfg.GORISCV64 to 23 and defines the macros +GORISCV64_rva23u64, hasV, hasZba, hasZbb, hasZbs, hasZfa, and +hasZicond for use in assembly language code. + +Updates #61476 + +Change-Id: I7641c23084fa52891c9a18df58f4013cb6597d88 +Reviewed-on: https://go-review.googlesource.com/c/go/+/633417 +Reviewed-by: Carlos Amedee +Reviewed-by: Jorropo +Reviewed-by: Joel Sing +Reviewed-by: Dmitri Shuralyov +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +--- + src/cmd/go/alldocs.go | 7 ++++--- + src/cmd/go/internal/help/helpdoc.go | 7 ++++--- + src/cmd/go/testdata/script/tooltags.txt | 7 ++++++- + src/cmd/internal/testdir/testdir_test.go | 2 +- + src/internal/buildcfg/cfg.go | 7 ++++++- + src/internal/buildcfg/cfg_test.go | 4 ++++ + src/runtime/asm_riscv64.h | 9 +++++++++ + 7 files changed, 34 insertions(+), 9 deletions(-) + +diff --git a/src/cmd/go/alldocs.go b/src/cmd/go/alldocs.go +index 2220863b8e..008d97b2e0 100644 +--- a/src/cmd/go/alldocs.go ++++ b/src/cmd/go/alldocs.go +@@ -2100,8 +2100,8 @@ + // (or ppc64le.power8, ppc64le.power9, and ppc64le.power10) + // feature build tags. + // - For GOARCH=riscv64, +-// GORISCV64=rva20u64 and rva22u64 correspond to the riscv64.rva20u64 +-// and riscv64.rva22u64 build tags. ++// GORISCV64=rva20u64, rva22u64 and rva23u64 correspond to the riscv64.rva20u64, ++// riscv64.rva22u64 and riscv64.rva23u64 build tags. + // - For GOARCH=wasm, GOWASM=satconv and signext + // correspond to the wasm.satconv and wasm.signext feature build tags. + // +@@ -2473,8 +2473,9 @@ + // Valid values are power8 (default), power9, power10. + // GORISCV64 + // For GOARCH=riscv64, the RISC-V user-mode application profile for which +-// to compile. Valid values are rva20u64 (default), rva22u64. ++// to compile. Valid values are rva20u64 (default), rva22u64, rva23u64. + // See https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc ++// and https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc + // GOWASM + // For GOARCH=wasm, comma-separated list of experimental WebAssembly features to use. + // Valid values are satconv, signext. +diff --git a/src/cmd/go/internal/help/helpdoc.go b/src/cmd/go/internal/help/helpdoc.go +index ccc04c25d2..ddde3fdeb5 100644 +--- a/src/cmd/go/internal/help/helpdoc.go ++++ b/src/cmd/go/internal/help/helpdoc.go +@@ -646,8 +646,9 @@ Architecture-specific environment variables: + Valid values are power8 (default), power9, power10. + GORISCV64 + For GOARCH=riscv64, the RISC-V user-mode application profile for which +- to compile. Valid values are rva20u64 (default), rva22u64. ++ to compile. Valid values are rva20u64 (default), rva22u64, rva23u64. + See https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc ++ and https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc + GOWASM + For GOARCH=wasm, comma-separated list of experimental WebAssembly features to use. + Valid values are satconv, signext. +@@ -952,8 +953,8 @@ The defined architecture feature build tags are: + (or ppc64le.power8, ppc64le.power9, and ppc64le.power10) + feature build tags. + - For GOARCH=riscv64, +- GORISCV64=rva20u64 and rva22u64 correspond to the riscv64.rva20u64 +- and riscv64.rva22u64 build tags. ++ GORISCV64=rva20u64, rva22u64 and rva23u64 correspond to the riscv64.rva20u64, ++ riscv64.rva22u64 and riscv64.rva23u64 build tags. + - For GOARCH=wasm, GOWASM=satconv and signext + correspond to the wasm.satconv and wasm.signext feature build tags. + +diff --git a/src/cmd/go/testdata/script/tooltags.txt b/src/cmd/go/testdata/script/tooltags.txt +index 1f6f54563c..a69b7a5c37 100644 +--- a/src/cmd/go/testdata/script/tooltags.txt ++++ b/src/cmd/go/testdata/script/tooltags.txt +@@ -50,10 +50,15 @@ env GORISCV64=rva22u64 + go list -f '{{context.ToolTags}}' + stdout 'riscv64.rva20u64 riscv64.rva22u64' + ++env GOARCH=riscv64 ++env GORISCV64=rva23u64 ++go list -f '{{context.ToolTags}}' ++stdout 'riscv64.rva20u64 riscv64.rva22u64 riscv64.rva23u64' ++ + env GOARCH=riscv64 + env GORISCV64=rva22 + ! go list -f '{{context.ToolTags}}' +-stderr 'go: invalid GORISCV64: must be rva20u64, rva22u64' ++stderr 'go: invalid GORISCV64: must be rva20u64, rva22u64, rva23u64' + + env GOARCH=riscv64 + env GORISCV64= +diff --git a/src/cmd/internal/testdir/testdir_test.go b/src/cmd/internal/testdir/testdir_test.go +index 7469a6491a..29bd1f7cf8 100644 +--- a/src/cmd/internal/testdir/testdir_test.go ++++ b/src/cmd/internal/testdir/testdir_test.go +@@ -1489,7 +1489,7 @@ var ( + "ppc64x": {}, // A pseudo-arch representing both ppc64 and ppc64le + "s390x": {}, + "wasm": {}, +- "riscv64": {"GORISCV64", "rva20u64", "rva22u64"}, ++ "riscv64": {"GORISCV64", "rva20u64", "rva22u64", "rva23u64"}, + } + ) + +diff --git a/src/internal/buildcfg/cfg.go b/src/internal/buildcfg/cfg.go +index fca09bf8d3..5ae4c0c7ad 100644 +--- a/src/internal/buildcfg/cfg.go ++++ b/src/internal/buildcfg/cfg.go +@@ -307,8 +307,10 @@ func goriscv64() int { + return 20 + case "rva22u64": + return 22 ++ case "rva23u64": ++ return 23 + } +- Error = fmt.Errorf("invalid GORISCV64: must be rva20u64, rva22u64") ++ Error = fmt.Errorf("invalid GORISCV64: must be rva20u64, rva22u64, rva23u64") + v := DefaultGORISCV64[len("rva"):] + i := strings.IndexFunc(v, func(r rune) bool { + return r < '0' || r > '9' +@@ -441,6 +443,9 @@ func gogoarchTags() []string { + if GORISCV64 >= 22 { + list = append(list, GOARCH+"."+"rva22u64") + } ++ if GORISCV64 >= 23 { ++ list = append(list, GOARCH+"."+"rva23u64") ++ } + return list + case "wasm": + var list []string +diff --git a/src/internal/buildcfg/cfg_test.go b/src/internal/buildcfg/cfg_test.go +index 757270b778..2bbd478280 100644 +--- a/src/internal/buildcfg/cfg_test.go ++++ b/src/internal/buildcfg/cfg_test.go +@@ -32,6 +32,10 @@ func TestConfigFlags(t *testing.T) { + if goriscv64() != 22 { + t.Errorf("Wrong parsing of RISCV64=rva22u64") + } ++ os.Setenv("GORISCV64", "rva23u64") ++ if goriscv64() != 23 { ++ t.Errorf("Wrong parsing of RISCV64=rva23u64") ++ } + Error = nil + os.Setenv("GORISCV64", "rva22") + if _ = goriscv64(); Error == nil { +diff --git a/src/runtime/asm_riscv64.h b/src/runtime/asm_riscv64.h +index d4deb093a6..2414b9f067 100644 +--- a/src/runtime/asm_riscv64.h ++++ b/src/runtime/asm_riscv64.h +@@ -10,3 +10,12 @@ + #define hasZbb + #define hasZbs + #endif ++ ++#ifdef GORISCV64_rva23u64 ++#define hasV ++#define hasZba ++#define hasZbb ++#define hasZbs ++#define hasZfa ++#define hasZicond ++#endif +-- +2.50.1 + diff --git a/2003-cmd-internal-obj-riscv-update-references-to-RISC-V-s.patch b/2003-cmd-internal-obj-riscv-update-references-to-RISC-V-s.patch new file mode 100644 index 0000000000000000000000000000000000000000..99cb25297629c9456b8805d870dd053ee3d9034e --- /dev/null +++ b/2003-cmd-internal-obj-riscv-update-references-to-RISC-V-s.patch @@ -0,0 +1,671 @@ +From 4128921041b88fdc8f8e62a6cd1d563ef17fa2eb Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 24 Nov 2024 14:38:33 +1100 +Subject: [PATCH 03/38] cmd/internal/obj/riscv: update references to RISC-V + specification + +Update references to version 20240411 of the RISC-V specifications. +Reorder and regroup instructions to maintain ordering. Also be +consistent with formatting. + +The instruction encodings table was seemingly missed in CL 616115. + +Change-Id: I47b7c8538383ff3b0503ba59db570c3d4f0d5653 +Reviewed-on: https://go-review.googlesource.com/c/go/+/631935 +Reviewed-by: Cherry Mui +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Ian Lance Taylor +Reviewed-by: Meng Zhuo +Reviewed-by: Pengcheng Wang +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 4 + + src/cmd/internal/obj/riscv/cpu.go | 118 ++++++++++---------- + src/cmd/internal/obj/riscv/obj.go | 54 ++++----- + 3 files changed, 94 insertions(+), 82 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 37c0c1d858..9ab4e066be 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -363,6 +363,10 @@ start: + SLLIUW $63, X17, X18 // 1b99f80b + SLLIUW $1, X18, X19 // 9b191908 + ++ // ++ // "B" Extension for Bit Manipulation, Version 1.0.0 ++ // ++ + // 28.4.2: Basic Bit Manipulation (Zbb) + ANDN X19, X20, X21 // b37a3a41 or 93caf9ffb37a5a01 + ANDN X19, X20 // 337a3a41 or 93cff9ff337afa01 +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index 154217589b..2b75ed38a6 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -576,6 +576,10 @@ const ( + // 22.5 Quad-Precision Floating-Point Classify Instruction + AFCLASSQ + ++ // ++ // "B" Extension for Bit Manipulation, Version 1.0.0 ++ // ++ + // 28.4.1: Address Generation Instructions (Zba) + AADDUW + ASH1ADD +@@ -625,15 +629,15 @@ const ( + ABSETI + + // +- // RISC-V Vector ISA-extension (1.0) (Unprivileged 20240411) ++ // "V" Standard Extension for Vector Operations, Version 1.0 + // + +- // 31.6. Configuration-Setting Instructions ++ // 31.6: Configuration-Setting Instructions + AVSETVLI + AVSETIVLI + AVSETVL + +- // 31.7.4. Vector Unit-Stride Instructions ++ // 31.7.4: Vector Unit-Stride Instructions + AVLE8V + AVLE16V + AVLE32V +@@ -645,7 +649,7 @@ const ( + AVLMV + AVSMV + +- // 31.7.5. Vector Strided Instructions ++ // 31.7.5: Vector Strided Instructions + AVLSE8V + AVLSE16V + AVLSE32V +@@ -655,7 +659,7 @@ const ( + AVSSE32V + AVSSE64V + +- // 31.7.6. Vector Indexed Instructions ++ // 31.7.6: Vector Indexed Instructions + AVLUXEI8V + AVLUXEI16V + AVLUXEI32V +@@ -673,13 +677,13 @@ const ( + AVSOXEI32V + AVSOXEI64V + +- // 31.7.7. Unit-stride Fault-Only-First Loads ++ // 31.7.7: Unit-stride Fault-Only-First Loads + AVLE8FFV + AVLE16FFV + AVLE32FFV + AVLE64FFV + +- // 31.7.9. Vector Load/Store Whole Register Instructions ++ // 31.7.9: Vector Load/Store Whole Register Instructions + AVL1RE8V + AVL1RE16V + AVL1RE32V +@@ -701,7 +705,7 @@ const ( + AVS4RV + AVS8RV + +- // 31.11.1. Vector Single-Width Integer Add and Subtract ++ // 31.11.1: Vector Single-Width Integer Add and Subtract + AVADDVV + AVADDVX + AVADDVI +@@ -710,7 +714,7 @@ const ( + AVRSUBVX + AVRSUBVI + +- // 31.11.2. Vector Widening Integer Add/Subtract ++ // 31.11.2: Vector Widening Integer Add/Subtract + AVWADDUVV + AVWADDUVX + AVWSUBUVV +@@ -728,7 +732,7 @@ const ( + AVWSUBWV + AVWSUBWX + +- // 31.11.3. Vector Integer Extension ++ // 31.11.3: Vector Integer Extension + AVZEXTVF2 + AVSEXTVF2 + AVZEXTVF4 +@@ -736,7 +740,7 @@ const ( + AVZEXTVF8 + AVSEXTVF8 + +- // 31.11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions ++ // 31.11.4: Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions + AVADCVVM + AVADCVXM + AVADCVIM +@@ -753,7 +757,7 @@ const ( + AVMSBCVV + AVMSBCVX + +- // 31.11.5. Vector Bitwise Logical Instructions ++ // 31.11.5: Vector Bitwise Logical Instructions + AVANDVV + AVANDVX + AVANDVI +@@ -764,7 +768,7 @@ const ( + AVXORVX + AVXORVI + +- // 31.11.6. Vector Single-Width Shift Instructions ++ // 31.11.6: Vector Single-Width Shift Instructions + AVSLLVV + AVSLLVX + AVSLLVI +@@ -775,7 +779,7 @@ const ( + AVSRAVX + AVSRAVI + +- // 31.11.7. Vector Narrowing Integer Right Shift Instructions ++ // 31.11.7: Vector Narrowing Integer Right Shift Instructions + AVNSRLWV + AVNSRLWX + AVNSRLWI +@@ -783,7 +787,7 @@ const ( + AVNSRAWX + AVNSRAWI + +- // 31.11.8. Vector Integer Compare Instructions ++ // 31.11.8: Vector Integer Compare Instructions + AVMSEQVV + AVMSEQVX + AVMSEQVI +@@ -805,7 +809,7 @@ const ( + AVMSGTVX + AVMSGTVI + +- // 31.11.9. Vector Integer Min/Max Instructions ++ // 31.11.9: Vector Integer Min/Max Instructions + AVMINUVV + AVMINUVX + AVMINVV +@@ -815,7 +819,7 @@ const ( + AVMAXVV + AVMAXVX + +- // 31.11.10. Vector Single-Width Integer Multiply Instructions ++ // 31.11.10: Vector Single-Width Integer Multiply Instructions + AVMULVV + AVMULVX + AVMULHVV +@@ -825,7 +829,7 @@ const ( + AVMULHSUVV + AVMULHSUVX + +- // 31.11.11. Vector Integer Divide Instructions ++ // 31.11.11: Vector Integer Divide Instructions + AVDIVUVV + AVDIVUVX + AVDIVVV +@@ -835,7 +839,7 @@ const ( + AVREMVV + AVREMVX + +- // 31.11.12. Vector Widening Integer Multiply Instructions ++ // 31.11.12: Vector Widening Integer Multiply Instructions + AVWMULVV + AVWMULVX + AVWMULUVV +@@ -843,7 +847,7 @@ const ( + AVWMULSUVV + AVWMULSUVX + +- // 31.11.13. Vector Single-Width Integer Multiply-Add Instructions ++ // 31.11.13: Vector Single-Width Integer Multiply-Add Instructions + AVMACCVV + AVMACCVX + AVNMSACVV +@@ -853,7 +857,7 @@ const ( + AVNMSUBVV + AVNMSUBVX + +- // 31.11.14. Vector Widening Integer Multiply-Add Instructions ++ // 31.11.14: Vector Widening Integer Multiply-Add Instructions + AVWMACCUVV + AVWMACCUVX + AVWMACCVV +@@ -862,17 +866,17 @@ const ( + AVWMACCSUVX + AVWMACCUSVX + +- // 31.11.15. Vector Integer Merge Instructions ++ // 31.11.15: Vector Integer Merge Instructions + AVMERGEVVM + AVMERGEVXM + AVMERGEVIM + +- // 31.11.16. Vector Integer Move Instructions ++ // 31.11.16: Vector Integer Move Instructions + AVMVVV + AVMVVX + AVMVVI + +- // 31.12.1. Vector Single-Width Saturating Add and Subtract ++ // 31.12.1: Vector Single-Width Saturating Add and Subtract + AVSADDUVV + AVSADDUVX + AVSADDUVI +@@ -884,7 +888,7 @@ const ( + AVSSUBVV + AVSSUBVX + +- // 31.12.2. Vector Single-Width Averaging Add and Subtract ++ // 31.12.2: Vector Single-Width Averaging Add and Subtract + AVAADDUVV + AVAADDUVX + AVAADDVV +@@ -894,11 +898,11 @@ const ( + AVASUBVV + AVASUBVX + +- // 31.12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation ++ // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation + AVSMULVV + AVSMULVX + +- // 31.12.4. Vector Single-Width Scaling Shift Instructions ++ // 31.12.4: Vector Single-Width Scaling Shift Instructions + AVSSRLVV + AVSSRLVX + AVSSRLVI +@@ -906,7 +910,7 @@ const ( + AVSSRAVX + AVSSRAVI + +- // 31.12.5. Vector Narrowing Fixed-Point Clip Instructions ++ // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions + AVNCLIPUWV + AVNCLIPUWX + AVNCLIPUWI +@@ -914,14 +918,14 @@ const ( + AVNCLIPWX + AVNCLIPWI + +- // 31.13.2. Vector Single-Width Floating-Point Add/Subtract Instructions ++ // 31.13.2: Vector Single-Width Floating-Point Add/Subtract Instructions + AVFADDVV + AVFADDVF + AVFSUBVV + AVFSUBVF + AVFRSUBVF + +- // 31.13.3. Vector Widening Floating-Point Add/Subtract Instructions ++ // 31.13.3: Vector Widening Floating-Point Add/Subtract Instructions + AVFWADDVV + AVFWADDVF + AVFWSUBVV +@@ -931,18 +935,18 @@ const ( + AVFWSUBWV + AVFWSUBWF + +- // 31.13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions ++ // 31.13.4: Vector Single-Width Floating-Point Multiply/Divide Instructions + AVFMULVV + AVFMULVF + AVFDIVVV + AVFDIVVF + AVFRDIVVF + +- // 31.13.5. Vector Widening Floating-Point Multiply ++ // 31.13.5: Vector Widening Floating-Point Multiply + AVFWMULVV + AVFWMULVF + +- // 31.13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions ++ // 31.13.6: Vector Single-Width Floating-Point Fused Multiply-Add Instructions + AVFMACCVV + AVFMACCVF + AVFNMACCVV +@@ -960,7 +964,7 @@ const ( + AVFNMSUBVV + AVFNMSUBVF + +- // 31.13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions ++ // 31.13.7: Vector Widening Floating-Point Fused Multiply-Add Instructions + AVFWMACCVV + AVFWMACCVF + AVFWNMACCVV +@@ -970,22 +974,22 @@ const ( + AVFWNMSACVV + AVFWNMSACVF + +- // 31.13.8. Vector Floating-Point Square-Root Instruction ++ // 31.13.8: Vector Floating-Point Square-Root Instruction + AVFSQRTV + +- // 31.13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction ++ // 31.13.9: Vector Floating-Point Reciprocal Square-Root Estimate Instruction + AVFRSQRT7V + +- // 31.13.10. Vector Floating-Point Reciprocal Estimate Instruction ++ // 31.13.10: Vector Floating-Point Reciprocal Estimate Instruction + AVFREC7V + +- // 31.13.11. Vector Floating-Point MIN/MAX Instructions ++ // 31.13.11: Vector Floating-Point MIN/MAX Instructions + AVFMINVV + AVFMINVF + AVFMAXVV + AVFMAXVF + +- // 31.13.12. Vector Floating-Point Sign-Injection Instructions ++ // 31.13.12: Vector Floating-Point Sign-Injection Instructions + AVFSGNJVV + AVFSGNJVF + AVFSGNJNVV +@@ -993,7 +997,7 @@ const ( + AVFSGNJXVV + AVFSGNJXVF + +- // 31.13.13. Vector Floating-Point Compare Instructions ++ // 31.13.13: Vector Floating-Point Compare Instructions + AVMFEQVV + AVMFEQVF + AVMFNEVV +@@ -1005,16 +1009,16 @@ const ( + AVMFGTVF + AVMFGEVF + +- // 31.13.14. Vector Floating-Point Classify Instruction ++ // 31.13.14: Vector Floating-Point Classify Instruction + AVFCLASSV + +- // 31.13.15. Vector Floating-Point Merge Instruction ++ // 31.13.15: Vector Floating-Point Merge Instruction + AVFMERGEVFM + +- // 31.13.16. Vector Floating-Point Move Instruction ++ // 31.13.16: Vector Floating-Point Move Instruction + AVFMVVF + +- // 31.13.17. Single-Width Floating-Point/Integer Type-Convert Instructions ++ // 31.13.17: Single-Width Floating-Point/Integer Type-Convert Instructions + AVFCVTXUFV + AVFCVTXFV + AVFCVTRTZXUFV +@@ -1022,7 +1026,7 @@ const ( + AVFCVTFXUV + AVFCVTFXV + +- // 31.13.18. Widening Floating-Point/Integer Type-Convert Instructions ++ // 31.13.18: Widening Floating-Point/Integer Type-Convert Instructions + AVFWCVTXUFV + AVFWCVTXFV + AVFWCVTRTZXUFV +@@ -1031,7 +1035,7 @@ const ( + AVFWCVTFXV + AVFWCVTFFV + +- // 31.13.19. Narrowing Floating-Point/Integer Type-Convert Instructions ++ // 31.13.19: Narrowing Floating-Point/Integer Type-Convert Instructions + AVFNCVTXUFW + AVFNCVTXFW + AVFNCVTRTZXUFW +@@ -1041,7 +1045,7 @@ const ( + AVFNCVTFFW + AVFNCVTRODFFW + +- // 31.14.1. Vector Single-Width Integer Reduction Instructions ++ // 31.14.1: Vector Single-Width Integer Reduction Instructions + AVREDSUMVS + AVREDMAXUVS + AVREDMAXVS +@@ -1051,21 +1055,21 @@ const ( + AVREDORVS + AVREDXORVS + +- // 31.14.2. Vector Widening Integer Reduction Instructions ++ // 31.14.2: Vector Widening Integer Reduction Instructions + AVWREDSUMUVS + AVWREDSUMVS + +- // 31.14.3. Vector Single-Width Floating-Point Reduction Instructions ++ // 31.14.3: Vector Single-Width Floating-Point Reduction Instructions + AVFREDOSUMVS + AVFREDUSUMVS + AVFREDMAXVS + AVFREDMINVS + +- // 31.14.4. Vector Widening Floating-Point Reduction Instructions ++ // 31.14.4: Vector Widening Floating-Point Reduction Instructions + AVFWREDOSUMVS + AVFWREDUSUMVS + +- // 31.15. Vector Mask Instructions ++ // 31.15: Vector Mask Instructions + AVMANDMM + AVMNANDMM + AVMANDNMM +@@ -1082,15 +1086,15 @@ const ( + AVIOTAM + AVIDV + +- // 31.16.1. Integer Scalar Move Instructions ++ // 31.16.1: Integer Scalar Move Instructions + AVMVXS + AVMVSX + +- // 31.16.2. Floating-Point Scalar Move Instructions ++ // 31.16.2: Floating-Point Scalar Move Instructions + AVFMVFS + AVFMVSF + +- // 31.16.3. Vector Slide Instructions ++ // 31.16.3: Vector Slide Instructions + AVSLIDEUPVX + AVSLIDEUPVI + AVSLIDEDOWNVX +@@ -1100,16 +1104,16 @@ const ( + AVSLIDE1DOWNVX + AVFSLIDE1DOWNVF + +- // 31.16.4. Vector Register Gather Instructions ++ // 31.16.4: Vector Register Gather Instructions + AVRGATHERVV + AVRGATHEREI16VV + AVRGATHERVX + AVRGATHERVI + +- // 31.16.5. Vector Compress Instruction ++ // 31.16.5: Vector Compress Instruction + AVCOMPRESSVM + +- // 31.16.6. Whole Vector Register Move ++ // 31.16.6: Whole Vector Register Move + AVMV1RV + AVMV2RV + AVMV4RV +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 1757e0b106..17fbac4ad9 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1665,7 +1665,9 @@ type instructionData struct { + // their encoding type. Entries are masked with obj.AMask to keep + // indices small. + var instructions = [ALAST & obj.AMask]instructionData{ ++ // + // Unprivileged ISA ++ // + + // 2.4: Integer Computational Instructions + AADDI & obj.AMask: {enc: iIIEncoding, ternary: true}, +@@ -1714,7 +1716,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + // 2.7: Memory Ordering + AFENCE & obj.AMask: {enc: iIIEncoding}, + +- // 5.2: Integer Computational Instructions (RV64I) ++ // 4.2: Integer Computational Instructions (RV64I) + AADDIW & obj.AMask: {enc: iIIEncoding, ternary: true}, + ASLLIW & obj.AMask: {enc: iIIEncoding, ternary: true}, + ASRLIW & obj.AMask: {enc: iIIEncoding, ternary: true}, +@@ -1725,14 +1727,14 @@ var instructions = [ALAST & obj.AMask]instructionData{ + ASUBW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + ASRAW & obj.AMask: {enc: rIIIEncoding, immForm: ASRAIW, ternary: true}, + +- // 5.3: Load and Store Instructions (RV64I) ++ // 4.3: Load and Store Instructions (RV64I) + ALD & obj.AMask: {enc: iIIEncoding}, + ASD & obj.AMask: {enc: sIEncoding}, + + // 7.1: CSR Instructions + ACSRRS & obj.AMask: {enc: iIIEncoding}, + +- // 7.1: Multiplication Operations ++ // 13.1: Multiplication Operations + AMUL & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AMULH & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AMULHU & obj.AMask: {enc: rIIIEncoding, ternary: true}, +@@ -1747,13 +1749,13 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AREMW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AREMUW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + +- // 8.2: Load-Reserved/Store-Conditional ++ // 14.2: Load-Reserved/Store-Conditional Instructions (Zalrsc) + ALRW & obj.AMask: {enc: rIIIEncoding}, + ALRD & obj.AMask: {enc: rIIIEncoding}, + ASCW & obj.AMask: {enc: rIIIEncoding}, + ASCD & obj.AMask: {enc: rIIIEncoding}, + +- // 8.3: Atomic Memory Operations ++ // 14.4: Atomic Memory Operations (Zaamo) + AAMOSWAPW & obj.AMask: {enc: rIIIEncoding}, + AAMOSWAPD & obj.AMask: {enc: rIIIEncoding}, + AAMOADDW & obj.AMask: {enc: rIIIEncoding}, +@@ -1773,11 +1775,11 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AAMOMINUW & obj.AMask: {enc: rIIIEncoding}, + AAMOMINUD & obj.AMask: {enc: rIIIEncoding}, + +- // 11.5: Single-Precision Load and Store Instructions ++ // 20.5: Single-Precision Load and Store Instructions + AFLW & obj.AMask: {enc: iFEncoding}, + AFSW & obj.AMask: {enc: sFEncoding}, + +- // 11.6: Single-Precision Floating-Point Computational Instructions ++ // 20.6: Single-Precision Floating-Point Computational Instructions + AFADDS & obj.AMask: {enc: rFFFEncoding}, + AFSUBS & obj.AMask: {enc: rFFFEncoding}, + AFMULS & obj.AMask: {enc: rFFFEncoding}, +@@ -1790,7 +1792,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AFNMSUBS & obj.AMask: {enc: rFFFFEncoding}, + AFNMADDS & obj.AMask: {enc: rFFFFEncoding}, + +- // 11.7: Single-Precision Floating-Point Conversion and Move Instructions ++ // 20.7: Single-Precision Floating-Point Conversion and Move Instructions + AFCVTWS & obj.AMask: {enc: rFIEncoding}, + AFCVTLS & obj.AMask: {enc: rFIEncoding}, + AFCVTSW & obj.AMask: {enc: rIFEncoding}, +@@ -1805,19 +1807,19 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AFMVXW & obj.AMask: {enc: rFIEncoding}, + AFMVWX & obj.AMask: {enc: rIFEncoding}, + +- // 11.8: Single-Precision Floating-Point Compare Instructions ++ // 20.8: Single-Precision Floating-Point Compare Instructions + AFEQS & obj.AMask: {enc: rFFIEncoding}, + AFLTS & obj.AMask: {enc: rFFIEncoding}, + AFLES & obj.AMask: {enc: rFFIEncoding}, + +- // 11.9: Single-Precision Floating-Point Classify Instruction ++ // 20.9: Single-Precision Floating-Point Classify Instruction + AFCLASSS & obj.AMask: {enc: rFIEncoding}, + + // 12.3: Double-Precision Load and Store Instructions + AFLD & obj.AMask: {enc: iFEncoding}, + AFSD & obj.AMask: {enc: sFEncoding}, + +- // 12.4: Double-Precision Floating-Point Computational Instructions ++ // 21.4: Double-Precision Floating-Point Computational Instructions + AFADDD & obj.AMask: {enc: rFFFEncoding}, + AFSUBD & obj.AMask: {enc: rFFFEncoding}, + AFMULD & obj.AMask: {enc: rFFFEncoding}, +@@ -1830,7 +1832,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AFNMSUBD & obj.AMask: {enc: rFFFFEncoding}, + AFNMADDD & obj.AMask: {enc: rFFFFEncoding}, + +- // 12.5: Double-Precision Floating-Point Conversion and Move Instructions ++ // 21.5: Double-Precision Floating-Point Conversion and Move Instructions + AFCVTWD & obj.AMask: {enc: rFIEncoding}, + AFCVTLD & obj.AMask: {enc: rFIEncoding}, + AFCVTDW & obj.AMask: {enc: rIFEncoding}, +@@ -1847,25 +1849,19 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AFMVXD & obj.AMask: {enc: rFIEncoding}, + AFMVDX & obj.AMask: {enc: rIFEncoding}, + +- // 12.6: Double-Precision Floating-Point Compare Instructions ++ // 21.6: Double-Precision Floating-Point Compare Instructions + AFEQD & obj.AMask: {enc: rFFIEncoding}, + AFLTD & obj.AMask: {enc: rFFIEncoding}, + AFLED & obj.AMask: {enc: rFFIEncoding}, + +- // 12.7: Double-Precision Floating-Point Classify Instruction ++ // 21.7: Double-Precision Floating-Point Classify Instruction + AFCLASSD & obj.AMask: {enc: rFIEncoding}, + +- // Privileged ISA +- +- // 3.2.1: Environment Call and Breakpoint +- AECALL & obj.AMask: {enc: iIIEncoding}, +- AEBREAK & obj.AMask: {enc: iIIEncoding}, +- + // +- // RISC-V Bit-Manipulation ISA-extensions (1.0) ++ // "B" Extension for Bit Manipulation, Version 1.0.0 + // + +- // 1.1: Address Generation Instructions (Zba) ++ // 28.4.1: Address Generation Instructions (Zba) + AADDUW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + ASH1ADD & obj.AMask: {enc: rIIIEncoding, ternary: true}, + ASH1ADDUW & obj.AMask: {enc: rIIIEncoding, ternary: true}, +@@ -1875,7 +1871,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + ASH3ADDUW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + ASLLIUW & obj.AMask: {enc: iIIEncoding, ternary: true}, + +- // 1.2: Basic Bit Manipulation (Zbb) ++ // 28.4.2: Basic Bit Manipulation (Zbb) + AANDN & obj.AMask: {enc: rIIIEncoding, ternary: true}, + ACLZ & obj.AMask: {enc: rIIEncoding}, + ACLZW & obj.AMask: {enc: rIIEncoding}, +@@ -1893,7 +1889,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AXNOR & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AZEXTH & obj.AMask: {enc: rIIEncoding}, + +- // 1.3: Bitwise Rotation (Zbb) ++ // 28.4.3: Bitwise Rotation (Zbb) + AROL & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AROLW & obj.AMask: {enc: rIIIEncoding, ternary: true}, + AROR & obj.AMask: {enc: rIIIEncoding, immForm: ARORI, ternary: true}, +@@ -1903,7 +1899,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AORCB & obj.AMask: {enc: iIIEncoding}, + AREV8 & obj.AMask: {enc: iIIEncoding}, + +- // 1.5: Single-bit Instructions (Zbs) ++ // 28.4.4: Single-bit Instructions (Zbs) + ABCLR & obj.AMask: {enc: rIIIEncoding, immForm: ABCLRI, ternary: true}, + ABCLRI & obj.AMask: {enc: iIIEncoding, ternary: true}, + ABEXT & obj.AMask: {enc: rIIIEncoding, immForm: ABEXTI, ternary: true}, +@@ -1913,6 +1909,14 @@ var instructions = [ALAST & obj.AMask]instructionData{ + ABSET & obj.AMask: {enc: rIIIEncoding, immForm: ABSETI, ternary: true}, + ABSETI & obj.AMask: {enc: iIIEncoding, ternary: true}, + ++ // ++ // Privileged ISA ++ // ++ ++ // 3.3.1: Environment Call and Breakpoint ++ AECALL & obj.AMask: {enc: iIIEncoding}, ++ AEBREAK & obj.AMask: {enc: iIIEncoding}, ++ + // Escape hatch + AWORD & obj.AMask: {enc: rawEncoding}, + +-- +2.50.1 + diff --git a/2004-cmd-asm-cmd-internal-obj-riscv-implement-vector-conf.patch b/2004-cmd-asm-cmd-internal-obj-riscv-implement-vector-conf.patch new file mode 100644 index 0000000000000000000000000000000000000000..c69a89428f7a16ba8ffb95b5c4bc06de8527cb99 --- /dev/null +++ b/2004-cmd-asm-cmd-internal-obj-riscv-implement-vector-conf.patch @@ -0,0 +1,618 @@ +From 32c3aa1ef9561069b7dcbf4b2a2bc9e4d57d0736 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 24 Nov 2024 12:39:20 +1100 +Subject: [PATCH 04/38] cmd/asm,cmd/internal/obj/riscv: implement vector + configuration setting instructions + +Implement vector configuration setting instructions (VSETVLI, +VSETIVLI, VSETL). These allow the vector length (vl) and vector +type (vtype) CSRs to be configured via a single instruction. +Unfortunately each instruction has its own dedicated encoding. + +In the case of VSETVLI/VSETIVLI, the vector type is specified via +a series of special operands, which specify the selected element +width (E8, E16, E32, E64), the vector register group multiplier +(M1, M2, M4, M8, MF2, MF4, MF8), the vector tail policy (TU, TA) +and vector mask policy (MU, MA). Note that the order of these +special operands matches non-Go assemblers. + +Partially based on work by Pengcheng Wang . + +Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 +Change-Id: I431f59c1e048a3e84754f0643a963da473a741fe +Reviewed-on: https://go-review.googlesource.com/c/go/+/631936 +Reviewed-by: Mark Ryan +Reviewed-by: Meng Zhuo +Reviewed-by: Cherry Mui +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Dmitri Shuralyov +--- + src/cmd/asm/internal/arch/arm64.go | 6 +- + src/cmd/asm/internal/arch/riscv64.go | 35 +++- + src/cmd/asm/internal/asm/asm.go | 21 +++ + src/cmd/asm/internal/asm/parse.go | 16 +- + src/cmd/asm/internal/asm/testdata/riscv64.s | 24 +++ + .../asm/internal/asm/testdata/riscv64error.s | 4 + + src/cmd/internal/obj/arm64/a.out.go | 4 +- + src/cmd/internal/obj/link.go | 3 +- + src/cmd/internal/obj/riscv/cpu.go | 71 +++++++++ + src/cmd/internal/obj/riscv/list.go | 9 ++ + src/cmd/internal/obj/riscv/obj.go | 149 ++++++++++++++++-- + src/cmd/internal/obj/util.go | 7 + + 12 files changed, 325 insertions(+), 24 deletions(-) + +diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go +index e63601de64..87ccb8c040 100644 +--- a/src/cmd/asm/internal/arch/arm64.go ++++ b/src/cmd/asm/internal/arch/arm64.go +@@ -59,10 +59,10 @@ func jumpArm64(word string) bool { + + var arm64SpecialOperand map[string]arm64.SpecialOperand + +-// GetARM64SpecialOperand returns the internal representation of a special operand. +-func GetARM64SpecialOperand(name string) arm64.SpecialOperand { ++// ARM64SpecialOperand returns the internal representation of a special operand. ++func ARM64SpecialOperand(name string) arm64.SpecialOperand { + if arm64SpecialOperand == nil { +- // Generate the mapping automatically when the first time the function is called. ++ // Generate mapping when function is first called. + arm64SpecialOperand = map[string]arm64.SpecialOperand{} + for opd := arm64.SPOP_BEGIN; opd < arm64.SPOP_END; opd++ { + arm64SpecialOperand[opd.String()] = opd +diff --git a/src/cmd/asm/internal/arch/riscv64.go b/src/cmd/asm/internal/arch/riscv64.go +index 27a66c5e63..69e060a865 100644 +--- a/src/cmd/asm/internal/arch/riscv64.go ++++ b/src/cmd/asm/internal/arch/riscv64.go +@@ -13,9 +13,8 @@ import ( + "cmd/internal/obj/riscv" + ) + +-// IsRISCV64AMO reports whether the op (as defined by a riscv.A* +-// constant) is one of the AMO instructions that requires special +-// handling. ++// IsRISCV64AMO reports whether op is an AMO instruction that requires ++// special handling. + func IsRISCV64AMO(op obj.As) bool { + switch op { + case riscv.ASCW, riscv.ASCD, riscv.AAMOSWAPW, riscv.AAMOSWAPD, riscv.AAMOADDW, riscv.AAMOADDD, +@@ -26,3 +25,33 @@ func IsRISCV64AMO(op obj.As) bool { + } + return false + } ++ ++// IsRISCV64VTypeI reports whether op is a vtype immediate instruction that ++// requires special handling. ++func IsRISCV64VTypeI(op obj.As) bool { ++ return op == riscv.AVSETVLI || op == riscv.AVSETIVLI ++} ++ ++var riscv64SpecialOperand map[string]riscv.SpecialOperand ++ ++// RISCV64SpecialOperand returns the internal representation of a special operand. ++func RISCV64SpecialOperand(name string) riscv.SpecialOperand { ++ if riscv64SpecialOperand == nil { ++ // Generate mapping when function is first called. ++ riscv64SpecialOperand = map[string]riscv.SpecialOperand{} ++ for opd := riscv.SPOP_BEGIN; opd < riscv.SPOP_END; opd++ { ++ riscv64SpecialOperand[opd.String()] = opd ++ } ++ } ++ if opd, ok := riscv64SpecialOperand[name]; ok { ++ return opd ++ } ++ return riscv.SPOP_END ++} ++ ++// RISCV64ValidateVectorType reports whether the given configuration is a ++// valid vector type. ++func RISCV64ValidateVectorType(vsew, vlmul, vtail, vmask int64) error { ++ _, err := riscv.EncodeVectorType(vsew, vlmul, vtail, vmask) ++ return err ++} +diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go +index 9fc7fa5598..a1f6a73d70 100644 +--- a/src/cmd/asm/internal/asm/asm.go ++++ b/src/cmd/asm/internal/asm/asm.go +@@ -915,6 +915,19 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) { + prog.To = a[5] + break + } ++ if p.arch.Family == sys.RISCV64 && arch.IsRISCV64VTypeI(op) { ++ prog.From = a[0] ++ vsew := p.getSpecial(prog, op, &a[1]) ++ vlmul := p.getSpecial(prog, op, &a[2]) ++ vtail := p.getSpecial(prog, op, &a[3]) ++ vmask := p.getSpecial(prog, op, &a[4]) ++ if err := arch.RISCV64ValidateVectorType(vsew, vlmul, vtail, vmask); err != nil { ++ p.errorf("invalid vtype: %v", err) ++ } ++ prog.AddRestSourceArgs([]obj.Addr{a[1], a[2], a[3], a[4]}) ++ prog.To = a[5] ++ break ++ } + fallthrough + default: + p.errorf("can't handle %s instruction with %d operands", op, len(a)) +@@ -965,3 +978,11 @@ func (p *Parser) getRegister(prog *obj.Prog, op obj.As, addr *obj.Addr) int16 { + } + return addr.Reg + } ++ ++// getSpecial checks that addr represents a special operand and returns its value. ++func (p *Parser) getSpecial(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 { ++ if addr.Type != obj.TYPE_SPECIAL || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 { ++ p.errorf("%s: expected special operand; found %s", op, obj.Dconv(prog, addr)) ++ } ++ return addr.Offset ++} +diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go +index 638f4e2fc4..8f8f6dcc34 100644 +--- a/src/cmd/asm/internal/asm/parse.go ++++ b/src/cmd/asm/internal/asm/parse.go +@@ -21,6 +21,7 @@ import ( + "cmd/asm/internal/lex" + "cmd/internal/obj" + "cmd/internal/obj/arm64" ++ "cmd/internal/obj/riscv" + "cmd/internal/obj/x86" + "cmd/internal/objabi" + "cmd/internal/src" +@@ -398,16 +399,21 @@ func (p *Parser) operand(a *obj.Addr) { + tok := p.next() + name := tok.String() + if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) { ++ // See if this is an architecture specific special operand. + switch p.arch.Family { + case sys.ARM64: +- // arm64 special operands. +- if opd := arch.GetARM64SpecialOperand(name); opd != arm64.SPOP_END { ++ if opd := arch.ARM64SpecialOperand(name); opd != arm64.SPOP_END { + a.Type = obj.TYPE_SPECIAL + a.Offset = int64(opd) +- break + } +- fallthrough +- default: ++ case sys.RISCV64: ++ if opd := arch.RISCV64SpecialOperand(name); opd != riscv.SPOP_END { ++ a.Type = obj.TYPE_SPECIAL ++ a.Offset = int64(opd) ++ } ++ } ++ ++ if a.Type != obj.TYPE_SPECIAL { + // We have a symbol. Parse $sym±offset(symkind) + p.symbolReference(a, p.qualifySymbol(name), prefix) + } +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 9ab4e066be..cbe99ba348 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -424,6 +424,30 @@ start: + BSET $63, X9 // 9394f42b + BSETI $1, X10, X11 // 93151528 + ++ // ++ // "V" Standard Extension for Vector Operations, Version 1.0 ++ // ++ ++ // 31.6: Configuration Setting Instructions ++ VSETVLI X10, E8, M1, TU, MU, X12 // 57760500 ++ VSETVLI X10, E16, M1, TU, MU, X12 // 57768500 ++ VSETVLI X10, E32, M1, TU, MU, X12 // 57760501 ++ VSETVLI X10, E64, M1, TU, MU, X12 // 57768501 ++ VSETVLI X10, E32, M1, TU, MA, X12 // 57760509 ++ VSETVLI X10, E32, M1, TA, MA, X12 // 5776050d ++ VSETVLI X10, E32, M2, TA, MA, X12 // 5776150d ++ VSETVLI X10, E32, M4, TA, MA, X12 // 5776250d ++ VSETVLI X10, E32, M8, TA, MA, X12 // 5776350d ++ VSETVLI X10, E32, MF2, TA, MA, X12 // 5776550d ++ VSETVLI X10, E32, MF4, TA, MA, X12 // 5776650d ++ VSETVLI X10, E32, MF8, TA, MA, X12 // 5776750d ++ VSETVLI X10, E32, M1, TA, MA, X12 // 5776050d ++ VSETVLI $15, E32, M1, TA, MA, X12 // 57f607cd ++ VSETIVLI $0, E32, M1, TA, MA, X12 // 577600cd ++ VSETIVLI $15, E32, M1, TA, MA, X12 // 57f607cd ++ VSETIVLI $31, E32, M1, TA, MA, X12 // 57f60fcd ++ VSETVL X10, X11, X12 // 57f6a580 ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index 0b0184aaa7..a90f22af9f 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -46,4 +46,8 @@ TEXT errors(SB),$0 + SRLI $1, X5, F1 // ERROR "expected integer register in rd position but got non-integer register F1" + SRLI $1, F1, X5 // ERROR "expected integer register in rs1 position but got non-integer register F1" + FNES F1, (X5) // ERROR "needs an integer register output" ++ VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" ++ VSETVL X10, X11 // ERROR "expected integer register in rs1 position" + RET +diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go +index ad00e4842c..de04a24280 100644 +--- a/src/cmd/internal/obj/arm64/a.out.go ++++ b/src/cmd/internal/obj/arm64/a.out.go +@@ -1055,8 +1055,8 @@ type SpecialOperand int + + const ( + // PRFM +- SPOP_PLDL1KEEP SpecialOperand = iota // must be the first one +- SPOP_BEGIN SpecialOperand = iota - 1 // set as the lower bound ++ SPOP_PLDL1KEEP SpecialOperand = obj.SpecialOperandARM64Base + iota // must be the first one ++ SPOP_BEGIN SpecialOperand = obj.SpecialOperandARM64Base + iota - 1 // set as the lower bound + SPOP_PLDL1STRM + SPOP_PLDL2KEEP + SPOP_PLDL2STRM +diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go +index 1b2d344eaf..8ad9257c66 100644 +--- a/src/cmd/internal/obj/link.go ++++ b/src/cmd/internal/obj/link.go +@@ -98,7 +98,8 @@ import ( + // val = string + // + // +-// Special symbolic constants for ARM64, such as conditional flags, tlbi_op and so on. ++// Special symbolic constants for ARM64 (such as conditional flags, tlbi_op and so on) ++// and RISCV64 (such as names for vector configuration instruction arguments). + // Encoding: + // type = TYPE_SPECIAL + // offset = The constant value corresponding to this symbol +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index 2b75ed38a6..143164ac41 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1227,6 +1227,77 @@ const ( + RM_RMM // Round to Nearest, ties to Max Magnitude + ) + ++type SpecialOperand int ++ ++const ( ++ SPOP_BEGIN SpecialOperand = obj.SpecialOperandRISCVBase ++ ++ // Vector mask policy. ++ SPOP_MA SpecialOperand = obj.SpecialOperandRISCVBase + iota - 1 ++ SPOP_MU ++ ++ // Vector tail policy. ++ SPOP_TA ++ SPOP_TU ++ ++ // Vector register group multiplier (VLMUL). ++ SPOP_M1 ++ SPOP_M2 ++ SPOP_M4 ++ SPOP_M8 ++ SPOP_MF2 ++ SPOP_MF4 ++ SPOP_MF8 ++ ++ // Vector selected element width (VSEW). ++ SPOP_E8 ++ SPOP_E16 ++ SPOP_E32 ++ SPOP_E64 ++ ++ SPOP_END ++) ++ ++var specialOperands = map[SpecialOperand]struct { ++ encoding uint32 ++ name string ++}{ ++ SPOP_MA: {encoding: 1, name: "MA"}, ++ SPOP_MU: {encoding: 0, name: "MU"}, ++ ++ SPOP_TA: {encoding: 1, name: "TA"}, ++ SPOP_TU: {encoding: 0, name: "TU"}, ++ ++ SPOP_M1: {encoding: 0, name: "M1"}, ++ SPOP_M2: {encoding: 1, name: "M2"}, ++ SPOP_M4: {encoding: 2, name: "M4"}, ++ SPOP_M8: {encoding: 3, name: "M8"}, ++ SPOP_MF2: {encoding: 5, name: "MF2"}, ++ SPOP_MF4: {encoding: 6, name: "MF4"}, ++ SPOP_MF8: {encoding: 7, name: "MF8"}, ++ ++ SPOP_E8: {encoding: 0, name: "E8"}, ++ SPOP_E16: {encoding: 1, name: "E16"}, ++ SPOP_E32: {encoding: 2, name: "E32"}, ++ SPOP_E64: {encoding: 3, name: "E64"}, ++} ++ ++func (so SpecialOperand) encode() uint32 { ++ op, ok := specialOperands[so] ++ if ok { ++ return op.encoding ++ } ++ return 0 ++} ++ ++func (so SpecialOperand) String() string { ++ op, ok := specialOperands[so] ++ if ok { ++ return op.name ++ } ++ return "" ++} ++ + // All unary instructions which write to their arguments (as opposed to reading + // from them) go here. The assembly parser uses this information to populate + // its AST in a semantically reasonable way. +diff --git a/src/cmd/internal/obj/riscv/list.go b/src/cmd/internal/obj/riscv/list.go +index c5b7e80719..8eb97a476d 100644 +--- a/src/cmd/internal/obj/riscv/list.go ++++ b/src/cmd/internal/obj/riscv/list.go +@@ -14,6 +14,7 @@ func init() { + obj.RegisterRegister(obj.RBaseRISCV, REG_END, RegName) + obj.RegisterOpcode(obj.ABaseRISCV, Anames) + obj.RegisterOpSuffix("riscv64", opSuffixString) ++ obj.RegisterSpecialOperands(int64(SPOP_BEGIN), int64(SPOP_END), specialOperandConv) + } + + func RegName(r int) string { +@@ -49,3 +50,11 @@ func opSuffixString(s uint8) string { + } + return fmt.Sprintf(".%s", ss) + } ++ ++func specialOperandConv(a int64) string { ++ spc := SpecialOperand(a) ++ if spc >= SPOP_BEGIN && spc < SPOP_END { ++ return spc.String() ++ } ++ return "SPC_??" ++} +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 17fbac4ad9..ffe2e48e08 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1163,27 +1163,35 @@ func immEven(x int64) error { + return nil + } + +-// immIFits checks whether the immediate value x fits in nbits bits +-// as a signed integer. If it does not, an error is returned. +-func immIFits(x int64, nbits uint) error { +- nbits-- +- min := int64(-1) << nbits +- max := int64(1)< max { + if nbits <= 16 { +- return fmt.Errorf("signed immediate %d must be in range [%d, %d] (%d bits)", x, min, max, nbits) ++ return fmt.Errorf("%s immediate %d must be in range [%d, %d] (%d bits)", label, x, min, max, nbits) + } +- return fmt.Errorf("signed immediate %#x must be in range [%#x, %#x] (%d bits)", x, min, max, nbits) ++ return fmt.Errorf("%s immediate %#x must be in range [%#x, %#x] (%d bits)", label, x, min, max, nbits) + } + return nil + } + ++// immIFits checks whether the immediate value x fits in nbits bits ++// as a signed integer. If it does not, an error is returned. ++func immIFits(x int64, nbits uint) error { ++ return immFits(x, nbits, true) ++} ++ + // immI extracts the signed integer of the specified size from an immediate. + func immI(as obj.As, imm int64, nbits uint) uint32 { + if err := immIFits(imm, nbits); err != nil { + panic(fmt.Sprintf("%v: %v", as, err)) + } +- return uint32(imm) ++ return uint32(imm) & ((1 << nbits) - 1) + } + + func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { +@@ -1192,6 +1200,26 @@ func wantImmI(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { + } + } + ++// immUFits checks whether the immediate value x fits in nbits bits ++// as an unsigned integer. If it does not, an error is returned. ++func immUFits(x int64, nbits uint) error { ++ return immFits(x, nbits, false) ++} ++ ++// immU extracts the unsigned integer of the specified size from an immediate. ++func immU(as obj.As, imm int64, nbits uint) uint32 { ++ if err := immUFits(imm, nbits); err != nil { ++ panic(fmt.Sprintf("%v: %v", as, err)) ++ } ++ return uint32(imm) & ((1 << nbits) - 1) ++} ++ ++func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) { ++ if err := immUFits(imm, nbits); err != nil { ++ ctxt.Diag("%v: %v", ins, err) ++ } ++} ++ + func wantReg(ctxt *obj.Link, ins *instruction, pos string, descr string, r, min, max uint32) { + if r < min || r > max { + var suffix string +@@ -1348,6 +1376,29 @@ func validateJ(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateVsetvli(ctxt *obj.Link, ins *instruction) { ++ wantImmU(ctxt, ins, ins.imm, 11) ++ wantIntReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantNoneReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateVsetivli(ctxt *obj.Link, ins *instruction) { ++ wantImmU(ctxt, ins, ins.imm, 10) ++ wantIntReg(ctxt, ins, "rd", ins.rd) ++ wantImmU(ctxt, ins, int64(ins.rs1), 5) ++ wantNoneReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateVsetvl(ctxt *obj.Link, ins *instruction) { ++ wantIntReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateRaw(ctxt *obj.Link, ins *instruction) { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. +@@ -1536,6 +1587,29 @@ func encodeCJImmediate(imm uint32) uint32 { + return bits << 2 + } + ++func encodeVset(as obj.As, rs1, rs2, rd uint32) uint32 { ++ enc := encode(as) ++ if enc == nil { ++ panic("encodeVset: could not encode instruction") ++ } ++ return enc.funct7<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode ++} ++ ++func encodeVsetvli(ins *instruction) uint32 { ++ vtype := immU(ins.as, ins.imm, 11) ++ return encodeVset(ins.as, regI(ins.rs1), vtype, regI(ins.rd)) ++} ++ ++func encodeVsetivli(ins *instruction) uint32 { ++ vtype := immU(ins.as, ins.imm, 10) ++ avl := immU(ins.as, int64(ins.rs1), 5) ++ return encodeVset(ins.as, avl, vtype, regI(ins.rd)) ++} ++ ++func encodeVsetvl(ins *instruction) uint32 { ++ return encodeVset(ins.as, regI(ins.rs1), regI(ins.rs2), regI(ins.rd)) ++} ++ + func encodeRawIns(ins *instruction) uint32 { + // Treat the raw value specially as a 32-bit unsigned integer. + // Nobody wants to enter negative machine code. +@@ -1606,6 +1680,27 @@ func EncodeUImmediate(imm int64) (int64, error) { + return imm << 12, nil + } + ++func EncodeVectorType(vsew, vlmul, vtail, vmask int64) (int64, error) { ++ vsewSO := SpecialOperand(vsew) ++ if vsewSO < SPOP_E8 || vsewSO > SPOP_E64 { ++ return -1, fmt.Errorf("invalid vector selected element width %q", vsewSO) ++ } ++ vlmulSO := SpecialOperand(vlmul) ++ if vlmulSO < SPOP_M1 || vlmulSO > SPOP_MF8 { ++ return -1, fmt.Errorf("invalid vector register group multiplier %q", vlmulSO) ++ } ++ vtailSO := SpecialOperand(vtail) ++ if vtailSO != SPOP_TA && vtailSO != SPOP_TU { ++ return -1, fmt.Errorf("invalid vector tail policy %q", vtailSO) ++ } ++ vmaskSO := SpecialOperand(vmask) ++ if vmaskSO != SPOP_MA && vmaskSO != SPOP_MU { ++ return -1, fmt.Errorf("invalid vector mask policy %q", vmaskSO) ++ } ++ vtype := vmaskSO.encode()<<7 | vtailSO.encode()<<6 | vsewSO.encode()<<3 | vlmulSO.encode() ++ return int64(vtype), nil ++} ++ + type encoding struct { + encode func(*instruction) uint32 // encode returns the machine code for an instruction + validate func(*obj.Link, *instruction) // validate validates an instruction +@@ -1643,6 +1738,11 @@ var ( + uEncoding = encoding{encode: encodeU, validate: validateU, length: 4} + jEncoding = encoding{encode: encodeJ, validate: validateJ, length: 4} + ++ // Encodings for vector configuration setting instruction. ++ vsetvliEncoding = encoding{encode: encodeVsetvli, validate: validateVsetvli, length: 4} ++ vsetivliEncoding = encoding{encode: encodeVsetivli, validate: validateVsetivli, length: 4} ++ vsetvlEncoding = encoding{encode: encodeVsetvl, validate: validateVsetvl, length: 4} ++ + // rawEncoding encodes a raw instruction byte sequence. + rawEncoding = encoding{encode: encodeRawIns, validate: validateRaw, length: 4} + +@@ -1909,6 +2009,15 @@ var instructions = [ALAST & obj.AMask]instructionData{ + ABSET & obj.AMask: {enc: rIIIEncoding, immForm: ABSETI, ternary: true}, + ABSETI & obj.AMask: {enc: iIIEncoding, ternary: true}, + ++ // ++ // "V" Standard Extension for Vector Operations, Version 1.0 ++ // ++ ++ // 31.6. Vector Configuration-Setting Instructions ++ AVSETVLI & obj.AMask: {enc: vsetvliEncoding, immForm: AVSETIVLI}, ++ AVSETIVLI & obj.AMask: {enc: vsetivliEncoding}, ++ AVSETVL & obj.AMask: {enc: vsetvlEncoding}, ++ + // + // Privileged ISA + // +@@ -2466,7 +2575,12 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins := instructionForProg(p) + inss := []*instruction{ins} + +- if len(p.RestArgs) > 1 { ++ if ins.as == AVSETVLI || ins.as == AVSETIVLI { ++ if len(p.RestArgs) != 4 { ++ p.Ctxt.Diag("incorrect number of arguments for instruction") ++ return nil ++ } ++ } else if len(p.RestArgs) > 1 { + p.Ctxt.Diag("too many source registers") + return nil + } +@@ -2704,6 +2818,21 @@ func instructionsForProg(p *obj.Prog) []*instruction { + // XNOR -> (NOT (XOR x y)) + ins.as = AXOR + inss = append(inss, &instruction{as: AXORI, rs1: ins.rd, rs2: obj.REG_NONE, rd: ins.rd, imm: -1}) ++ ++ case AVSETVLI, AVSETIVLI: ++ ins.rs1, ins.rs2 = ins.rs2, obj.REG_NONE ++ vtype, err := EncodeVectorType(p.RestArgs[0].Offset, p.RestArgs[1].Offset, p.RestArgs[2].Offset, p.RestArgs[3].Offset) ++ if err != nil { ++ p.Ctxt.Diag("%v: %v", p, err) ++ } ++ ins.imm = int64(vtype) ++ if ins.as == AVSETIVLI { ++ if p.From.Type != obj.TYPE_CONST { ++ p.Ctxt.Diag("%v: expected immediate value", p) ++ } ++ ins.rs1 = uint32(p.From.Offset) ++ } ++ + } + + for _, ins := range inss { +diff --git a/src/cmd/internal/obj/util.go b/src/cmd/internal/obj/util.go +index 26de22122a..7d87bff949 100644 +--- a/src/cmd/internal/obj/util.go ++++ b/src/cmd/internal/obj/util.go +@@ -591,6 +591,13 @@ type spcSet struct { + + var spcSpace []spcSet + ++// Each architecture is allotted a distinct subspace: [Lo, Hi) for declaring its ++// arch-specific special operands. ++const ( ++ SpecialOperandARM64Base = 0 << 16 ++ SpecialOperandRISCVBase = 1 << 16 ++) ++ + // RegisterSpecialOperands binds a pretty-printer (SPCconv) for special + // operand numbers to a given special operand number range. Lo is inclusive, + // hi is exclusive (valid special operands are lo through hi-1). +-- +2.50.1 + diff --git a/2005-cmd-internal-obj-riscv-support-MOVD-with-floating-po.patch b/2005-cmd-internal-obj-riscv-support-MOVD-with-floating-po.patch new file mode 100644 index 0000000000000000000000000000000000000000..2236c6d3c52c4c62185d08bb57bc434cb6cfcecb --- /dev/null +++ b/2005-cmd-internal-obj-riscv-support-MOVD-with-floating-po.patch @@ -0,0 +1,83 @@ +From cb9721c809d18d5d54f513a884f27e9f16057335 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Tue, 2 Jul 2024 00:31:53 +1000 +Subject: [PATCH 05/38] cmd/internal/obj/riscv: support MOVD with floating + point constants + +Currently, we only support loading of values from memory (or other +registers). Add floating point constant support to MOVD. This is +implemented by storing the floating point constant to a symbol, +which is then loaded into the floating point register. + +Change-Id: I6db242d27f606f0d5d084a3ab93538698d3a4f8c +Reviewed-on: https://go-review.googlesource.com/c/go/+/631876 +Reviewed-by: Meng Zhuo +Reviewed-by: Mark Ryan +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Cherry Mui +LUCI-TryBot-Result: Go LUCI +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 3 +++ + src/cmd/internal/obj/riscv/obj.go | 22 ++++++++++++++++++--- + 2 files changed, 22 insertions(+), 3 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index cbe99ba348..fc44f561f2 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -510,6 +510,9 @@ start: + MOVD F0, 4(X5) // 27b20200 + MOVD F0, F1 // d3000022 + ++ // Convert to load of symbol (AUIPC + FLD) ++ MOVD $(709.78271289338397), F3 // 970f000087b10f00 ++ + // TLS load with local-exec (LUI + ADDIW + ADD of TP + load) + MOV tls(SB), X5 // b70f00009b8f0f00b38f4f0083b20f00 + MOVB tls(SB), X5 // b70f00009b8f0f00b38f4f0083820f00 +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index ffe2e48e08..c41d99c0c7 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -147,6 +147,15 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { + p.From.Name = obj.NAME_EXTERN + p.From.Offset = 0 + } ++ ++ case AMOVD: ++ if p.From.Type == obj.TYPE_FCONST && p.From.Name == obj.NAME_NONE && p.From.Reg == obj.REG_NONE { ++ f64 := p.From.Val.(float64) ++ p.From.Type = obj.TYPE_MEM ++ p.From.Sym = ctxt.Float64Sym(f64) ++ p.From.Name = obj.NAME_EXTERN ++ p.From.Offset = 0 ++ } + } + + if ctxt.Flag_dynlink { +@@ -2443,12 +2452,19 @@ func instructionsForMOV(p *obj.Prog) []*instruction { + } + + // Note that the values for $off_hi and $off_lo are currently +- // zero and will be assigned during relocation. ++ // zero and will be assigned during relocation. If the destination ++ // is an integer register then we can use the same register for the ++ // address computation, otherwise we need to use the temporary register. + // + // AUIPC $off_hi, Rd + // L $off_lo, Rd, Rd +- insAUIPC := &instruction{as: AAUIPC, rd: ins.rd} +- ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), ins.rd, obj.REG_NONE, 0 ++ // ++ addrReg := ins.rd ++ if addrReg < REG_X0 || addrReg > REG_X31 { ++ addrReg = REG_TMP ++ } ++ insAUIPC := &instruction{as: AAUIPC, rd: addrReg} ++ ins.as, ins.rs1, ins.rs2, ins.imm = movToLoad(p.As), addrReg, obj.REG_NONE, 0 + inss = []*instruction{insAUIPC, ins} + + default: +-- +2.50.1 + diff --git a/2006-internal-bytealg-clean-up-and-simplify-the-riscv64-e.patch b/2006-internal-bytealg-clean-up-and-simplify-the-riscv64-e.patch new file mode 100644 index 0000000000000000000000000000000000000000..cb913a68214ab793482ae39cda2e8fe2c5d76794 --- /dev/null +++ b/2006-internal-bytealg-clean-up-and-simplify-the-riscv64-e.patch @@ -0,0 +1,160 @@ +From 07fcbfd050efcddb03f07818a24feca0e68c8e6b Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Fri, 7 Feb 2025 21:12:05 +1100 +Subject: [PATCH 06/38] internal/bytealg: clean up and simplify the riscv64 + equal implementation + +Now that riscv64 is only regabi, remove the entrypoint separation and +have runtime.memequal_varlen call runtime.memequal. Add a zero byte +length check and replace the equal and not equal exit paths with a +single exit path that conditions on length reaching zero. + +Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 +Change-Id: Ida4e54378daa7fd423f759753eba04ce513a27cb +Reviewed-on: https://go-review.googlesource.com/c/go/+/648855 +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Michael Knyszek +Reviewed-by: Cherry Mui +--- + src/internal/bytealg/equal_riscv64.s | 62 +++++++++++++--------------- + 1 file changed, 29 insertions(+), 33 deletions(-) + +diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s +index 7f470ce0a0..87b2d79302 100644 +--- a/src/internal/bytealg/equal_riscv64.s ++++ b/src/internal/bytealg/equal_riscv64.s +@@ -7,25 +7,23 @@ + + #define CTXT S10 + +-// func memequal(a, b unsafe.Pointer, size uintptr) bool +-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 +- // X10 = a_base +- // X11 = b_base +- // X12 = size +- JMP memequal<>(SB) +- + // func memequal_varlen(a, b unsafe.Pointer) bool + TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 ++ // X10 = a_base ++ // X11 = b_base + MOV 8(CTXT), X12 // compiler stores size at offset 8 in the closure ++ JMP runtime·memequal(SB) ++ ++// func memequal(a, b unsafe.Pointer, size uintptr) bool ++TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 + // X10 = a_base + // X11 = b_base +- JMP memequal<>(SB) ++ // X12 = size ++ BNE X10, X11, length_check ++ MOV $0, X12 + +-// On entry X10 and X11 contain pointers, X12 contains length. +-// For non-regabi X13 contains address for return value. +-// For regabi return value in X10. +-TEXT memequal<>(SB),NOSPLIT|NOFRAME,$0 +- BEQ X10, X11, eq ++length_check: ++ BEQZ X12, done + + MOV $32, X23 + BLT X12, X23, loop4_check +@@ -44,7 +42,7 @@ align: + SUB $1, X9 + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 +- BNE X19, X20, not_eq ++ BNE X19, X20, done + ADD $1, X10 + ADD $1, X11 + BNEZ X9, align +@@ -57,19 +55,19 @@ loop32: + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 +- BNE X19, X20, not_eq +- BNE X21, X22, not_eq ++ BNE X19, X20, done ++ BNE X21, X22, done + MOV 16(X10), X14 + MOV 16(X11), X15 + MOV 24(X10), X16 + MOV 24(X11), X17 +- BNE X14, X15, not_eq +- BNE X16, X17, not_eq ++ BNE X14, X15, done ++ BNE X16, X17, done + ADD $32, X10 + ADD $32, X11 + SUB $32, X12 + BGE X12, X9, loop32 +- BEQZ X12, eq ++ BEQZ X12, done + + loop16_check: + MOV $16, X23 +@@ -79,13 +77,13 @@ loop16: + MOV 0(X11), X20 + MOV 8(X10), X21 + MOV 8(X11), X22 +- BNE X19, X20, not_eq +- BNE X21, X22, not_eq ++ BNE X19, X20, done ++ BNE X21, X22, done + ADD $16, X10 + ADD $16, X11 + SUB $16, X12 + BGE X12, X23, loop16 +- BEQZ X12, eq ++ BEQZ X12, done + + loop4_check: + MOV $4, X23 +@@ -95,32 +93,30 @@ loop4: + MOVBU 0(X11), X20 + MOVBU 1(X10), X21 + MOVBU 1(X11), X22 +- BNE X19, X20, not_eq +- BNE X21, X22, not_eq ++ BNE X19, X20, done ++ BNE X21, X22, done + MOVBU 2(X10), X14 + MOVBU 2(X11), X15 + MOVBU 3(X10), X16 + MOVBU 3(X11), X17 +- BNE X14, X15, not_eq +- BNE X16, X17, not_eq ++ BNE X14, X15, done ++ BNE X16, X17, done + ADD $4, X10 + ADD $4, X11 + SUB $4, X12 + BGE X12, X23, loop4 + + loop1: +- BEQZ X12, eq ++ BEQZ X12, done + MOVBU 0(X10), X19 + MOVBU 0(X11), X20 +- BNE X19, X20, not_eq ++ BNE X19, X20, done + ADD $1, X10 + ADD $1, X11 + SUB $1, X12 + JMP loop1 + +-not_eq: +- MOVB ZERO, X10 +- RET +-eq: +- MOV $1, X10 ++done: ++ // If X12 is zero then memory is equivalent. ++ SEQZ X12, X10 + RET +-- +2.50.1 + diff --git a/2007-cmd-internal-obj-riscv-implement-vector-load-store-i.patch b/2007-cmd-internal-obj-riscv-implement-vector-load-store-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..b4c16228410667d7a624a5f41667cfb80464b37e --- /dev/null +++ b/2007-cmd-internal-obj-riscv-implement-vector-load-store-i.patch @@ -0,0 +1,539 @@ +From cdb071dc48906c58fa2107d49619805f7e97cc98 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Tue, 18 Jun 2024 03:00:47 +1000 +Subject: [PATCH 07/38] cmd/internal/obj/riscv: implement vector load/store + instructions + +Implement vector unit stride, vector strided, vector indexed and +vector whole register load and store instructions. + +The vector unit stride instructions take an optional vector mask +register, which if specified must be register V0. If only two +operands are given, the instruction is encoded as unmasked. + +The vector strided and vector indexed instructions also take an +optional vector mask register, which if specified must be register +V0. If only three operands are given, the instruction is encoded as +unmasked. + +Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 +Change-Id: I35e43bb8f1cf6ae8826fbeec384b95ac945da50f +Reviewed-on: https://go-review.googlesource.com/c/go/+/631937 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: Michael Knyszek +Reviewed-by: Meng Zhuo +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Pengcheng Wang +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 98 ++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 39 +++ + src/cmd/internal/obj/riscv/anames.go | 4 + + src/cmd/internal/obj/riscv/cpu.go | 4 + + src/cmd/internal/obj/riscv/obj.go | 226 ++++++++++++++++-- + 5 files changed, 356 insertions(+), 15 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index fc44f561f2..5aa2563b6f 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -448,6 +448,104 @@ start: + VSETIVLI $31, E32, M1, TA, MA, X12 // 57f60fcd + VSETVL X10, X11, X12 // 57f6a580 + ++ // 31.7.4: Vector Unit-Stride Instructions ++ VLE8V (X10), V3 // 87010502 ++ VLE8V (X10), V0, V3 // 87010500 ++ VLE16V (X10), V3 // 87510502 ++ VLE16V (X10), V0, V3 // 87510500 ++ VLE32V (X10), V3 // 87610502 ++ VLE32V (X10), V0, V3 // 87610500 ++ VLE64V (X10), V3 // 87710502 ++ VLE64V (X10), V0, V3 // 87710500 ++ VSE8V V3, (X10) // a7010502 ++ VSE8V V3, V0, (X10) // a7010500 ++ VSE16V V3, (X10) // a7510502 ++ VSE16V V3, V0, (X10) // a7510500 ++ VSE32V V3, (X10) // a7610502 ++ VSE32V V3, V0, (X10) // a7610500 ++ VSE64V V3, (X10) // a7710502 ++ VSE64V V3, V0, (X10) // a7710500 ++ VLMV (X10), V3 // 8701b502 ++ VSMV V3, (X10) // a701b502 ++ ++ // 31.7.5: Vector Strided Instructions ++ VLSE8V (X10), X11, V3 // 8701b50a ++ VLSE8V (X10), X11, V0, V3 // 8701b508 ++ VLSE16V (X10), X11, V3 // 8751b50a ++ VLSE16V (X10), X11, V0, V3 // 8751b508 ++ VLSE32V (X10), X11, V3 // 8761b50a ++ VLSE32V (X10), X11, V0, V3 // 8761b508 ++ VLSE64V (X10), X11, V3 // 8771b50a ++ VLSE64V (X10), X11, V0, V3 // 8771b508 ++ VSSE8V V3, X11, (X10) // a701b50a ++ VSSE8V V3, X11, V0, (X10) // a701b508 ++ VSSE16V V3, X11, (X10) // a751b50a ++ VSSE16V V3, X11, V0, (X10) // a751b508 ++ VSSE32V V3, X11, (X10) // a761b50a ++ VSSE32V V3, X11, V0, (X10) // a761b508 ++ VSSE64V V3, X11, (X10) // a771b50a ++ VSSE64V V3, X11, V0, (X10) // a771b508 ++ ++ // 31.7.6: Vector Indexed Instructions ++ VLUXEI8V (X10), V2, V3 // 87012506 ++ VLUXEI8V (X10), V2, V0, V3 // 87012504 ++ VLUXEI16V (X10), V2, V3 // 87512506 ++ VLUXEI16V (X10), V2, V0, V3 // 87512504 ++ VLUXEI32V (X10), V2, V3 // 87612506 ++ VLUXEI32V (X10), V2, V0, V3 // 87612504 ++ VLUXEI64V (X10), V2, V3 // 87712506 ++ VLUXEI64V (X10), V2, V0, V3 // 87712504 ++ VLOXEI8V (X10), V2, V3 // 8701250e ++ VLOXEI8V (X10), V2, V0, V3 // 8701250c ++ VLOXEI16V (X10), V2, V3 // 8751250e ++ VLOXEI16V (X10), V2, V0, V3 // 8751250c ++ VLOXEI32V (X10), V2, V3 // 8761250e ++ VLOXEI32V (X10), V2, V0, V3 // 8761250c ++ VLOXEI64V (X10), V2, V3 // 8771250e ++ VLOXEI64V (X10), V2, V0, V3 // 8771250c ++ VSUXEI8V V3, V2, (X10) // a7012506 ++ VSUXEI8V V3, V2, V0, (X10) // a7012504 ++ VSUXEI16V V3, V2, (X10) // a7512506 ++ VSUXEI16V V3, V2, V0, (X10) // a7512504 ++ VSUXEI32V V3, V2, (X10) // a7612506 ++ VSUXEI32V V3, V2, V0, (X10) // a7612504 ++ VSUXEI64V V3, V2, (X10) // a7712506 ++ VSUXEI64V V3, V2, V0, (X10) // a7712504 ++ VSOXEI8V V3, V2, (X10) // a701250e ++ VSOXEI8V V3, V2, V0, (X10) // a701250c ++ VSOXEI16V V3, V2, (X10) // a751250e ++ VSOXEI16V V3, V2, V0, (X10) // a751250c ++ VSOXEI32V V3, V2, (X10) // a761250e ++ VSOXEI32V V3, V2, V0, (X10) // a761250c ++ VSOXEI64V V3, V2, (X10) // a771250e ++ VSOXEI64V V3, V2, V0, (X10) // a771250c ++ ++ // 31.7.9: Vector Load/Store Whole Register Instructions ++ VL1RV (X10), V3 // 87018502 ++ VL1RE8V (X10), V3 // 87018502 ++ VL1RE16V (X10), V3 // 87518502 ++ VL1RE32V (X10), V3 // 87618502 ++ VL1RE64V (X10), V3 // 87718502 ++ VL2RV (X10), V2 // 07018522 ++ VL2RE8V (X10), V2 // 07018522 ++ VL2RE16V (X10), V2 // 07518522 ++ VL2RE32V (X10), V2 // 07618522 ++ VL2RE64V (X10), V2 // 07718522 ++ VL4RV (X10), V4 // 07028562 ++ VL4RE8V (X10), V4 // 07028562 ++ VL4RE16V (X10), V4 // 07528562 ++ VL4RE32V (X10), V4 // 07628562 ++ VL4RE64V (X10), V4 // 07728562 ++ VL8RV (X10), V8 // 070485e2 ++ VL8RE8V (X10), V8 // 070485e2 ++ VL8RE16V (X10), V8 // 075485e2 ++ VL8RE32V (X10), V8 // 076485e2 ++ VL8RE64V (X10), V8 // 077485e2 ++ VS1RV V3, (X11) // a7818502 ++ VS2RV V2, (X11) // 27818522 ++ VS4RV V4, (X11) // 27828562 ++ VS8RV V8, (X11) // 278485e2 ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index a90f22af9f..82a2348894 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -50,4 +50,43 @@ TEXT errors(SB),$0 + VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" + VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" + VSETVL X10, X11 // ERROR "expected integer register in rs1 position" ++ VLE8V (X10), X10 // ERROR "expected vector register in rd position" ++ VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" ++ VLE8V (X10), V1, V3 // ERROR "invalid vector mask register" ++ VSE8V X10, (X10) // ERROR "expected vector register in rs1 position" ++ VSE8V V3, (V1) // ERROR "expected integer register in rd position" ++ VSE8V V3, V1, (X10) // ERROR "invalid vector mask register" ++ VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" ++ VLSE8V (X10), X10, X11 // ERROR "expected vector register in rd position" ++ VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" ++ VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" ++ VLSE8V (X10), X10, V1, V3 // ERROR "invalid vector mask register" ++ VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" ++ VSSE8V X10, X11, (X10) // ERROR "expected vector register in rd position" ++ VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" ++ VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" ++ VSSE8V V3, X11, V1, (X10) // ERROR "invalid vector mask register" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" ++ VLUXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" ++ VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" ++ VSUXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" ++ VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" ++ VLOXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" ++ VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" ++ VSOXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" ++ VL1RV (X10), V0, V3 // ERROR "too many operands for instruction" ++ VL1RV (X10), X10 // ERROR "expected vector register in rd position" ++ VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" ++ VS1RV V3, V0, (X11) // ERROR "too many operands for instruction" ++ VS1RV X11, (X11) // ERROR "expected vector register in rs1 position" ++ VS1RV V3, (V1) // ERROR "expected integer register in rd position" + RET +diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go +index c49569c943..6df5f0a173 100644 +--- a/src/cmd/internal/obj/riscv/anames.go ++++ b/src/cmd/internal/obj/riscv/anames.go +@@ -650,5 +650,9 @@ var Anames = []string{ + "RDTIME", + "SEQZ", + "SNEZ", ++ "VL1RV", ++ "VL2RV", ++ "VL4RV", ++ "VL8RV", + "LAST", + } +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index 143164ac41..0ecf6dbae2 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1178,6 +1178,10 @@ const ( + ARDTIME + ASEQZ + ASNEZ ++ AVL1RV ++ AVL2RV ++ AVL4RV ++ AVL8RV + + // End marker + ALAST +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index c41d99c0c7..16bd1310c1 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1339,6 +1339,27 @@ func validateIF(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantNoneReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateIIIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateIVIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateSI(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins, ins.imm, 12) + wantIntReg(ctxt, ins, "rd", ins.rd) +@@ -1355,6 +1376,27 @@ func validateSF(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateSV(ctxt *obj.Link, ins *instruction) { ++ wantIntReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "rs1", ins.rs1) ++ wantNoneReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateSVII(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateSVIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateB(ctxt *obj.Link, ins *instruction) { + // Offsets are multiples of two, so accept 13 bit immediates for the + // 12 bit slot. We implicitly drop the least significant bit in encodeB. +@@ -1431,7 +1473,10 @@ func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 { + if enc.rs2 != 0 && rs2 != 0 { + panic("encodeR: instruction uses rs2, but rs2 was nonzero") + } +- return funct7<<25 | enc.funct7<<25 | enc.rs2<<20 | rs2<<20 | rs1<<15 | enc.funct3<<12 | funct3<<12 | rd<<7 | enc.opcode ++ funct3 |= enc.funct3 ++ funct7 |= enc.funct7 ++ rs2 |= enc.rs2 ++ return funct7<<25 | rs2<<20 | rs1<<15 | funct3<<12 | rd<<7 | enc.opcode + } + + // encodeR4 encodes an R4-type RISC-V instruction. +@@ -1483,38 +1528,67 @@ func encodeRFF(ins *instruction) uint32 { + } + + // encodeI encodes an I-type RISC-V instruction. +-func encodeI(as obj.As, rs1, rd, imm uint32) uint32 { ++func encodeI(as obj.As, rs1, rd, imm, funct7 uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeI: could not encode instruction") + } + imm |= uint32(enc.csr) +- return imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode ++ return funct7<<25 | imm<<20 | rs1<<15 | enc.funct3<<12 | rd<<7 | enc.opcode + } + + func encodeIII(ins *instruction) uint32 { +- return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm)) ++ return encodeI(ins.as, regI(ins.rs1), regI(ins.rd), uint32(ins.imm), 0) + } + + func encodeIF(ins *instruction) uint32 { +- return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm)) ++ return encodeI(ins.as, regI(ins.rs1), regF(ins.rd), uint32(ins.imm), 0) ++} ++ ++func encodeIV(ins *instruction) uint32 { ++ return encodeI(ins.as, regI(ins.rs1), regV(ins.rd), uint32(ins.imm), ins.funct7) ++} ++ ++func encodeIIIV(ins *instruction) uint32 { ++ return encodeI(ins.as, regI(ins.rs1), regV(ins.rd), regI(ins.rs2), ins.funct7) ++} ++ ++func encodeIVIV(ins *instruction) uint32 { ++ return encodeI(ins.as, regI(ins.rs1), regV(ins.rd), regV(ins.rs2), ins.funct7) + } + + // encodeS encodes an S-type RISC-V instruction. +-func encodeS(as obj.As, rs1, rs2, imm uint32) uint32 { ++func encodeS(as obj.As, rs1, rs2, imm, funct7 uint32) uint32 { + enc := encode(as) + if enc == nil { + panic("encodeS: could not encode instruction") + } +- return (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode ++ if enc.rs2 != 0 && rs2 != 0 { ++ panic("encodeS: instruction uses rs2, but rs2 was nonzero") ++ } ++ rs2 |= enc.rs2 ++ imm |= uint32(enc.csr) &^ 0x1f ++ return funct7<<25 | (imm>>5)<<25 | rs2<<20 | rs1<<15 | enc.funct3<<12 | (imm&0x1f)<<7 | enc.opcode + } + + func encodeSI(ins *instruction) uint32 { +- return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm)) ++ return encodeS(ins.as, regI(ins.rd), regI(ins.rs1), uint32(ins.imm), 0) + } + + func encodeSF(ins *instruction) uint32 { +- return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm)) ++ return encodeS(ins.as, regI(ins.rd), regF(ins.rs1), uint32(ins.imm), 0) ++} ++ ++func encodeSV(ins *instruction) uint32 { ++ return encodeS(ins.as, regI(ins.rd), 0, regV(ins.rs1), ins.funct7) ++} ++ ++func encodeSVII(ins *instruction) uint32 { ++ return encodeS(ins.as, regI(ins.rs1), regI(ins.rs2), regV(ins.rd), ins.funct7) ++} ++ ++func encodeSVIV(ins *instruction) uint32 { ++ return encodeS(ins.as, regI(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct7) + } + + // encodeBImmediate encodes an immediate for a B-type RISC-V instruction. +@@ -1721,7 +1795,7 @@ var ( + // + // 1. the instruction encoding (R/I/S/B/U/J), in lowercase + // 2. zero or more register operand identifiers (I = integer +- // register, F = float register), in uppercase ++ // register, F = float register, V = vector register), in uppercase + // 3. the word "Encoding" + // + // For example, rIIIEncoding indicates an R-type instruction with two +@@ -1737,11 +1811,17 @@ var ( + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} + +- iIIEncoding = encoding{encode: encodeIII, validate: validateIII, length: 4} +- iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4} ++ iIIEncoding = encoding{encode: encodeIII, validate: validateIII, length: 4} ++ iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4} ++ iVEncoding = encoding{encode: encodeIV, validate: validateIV, length: 4} ++ iIIVEncoding = encoding{encode: encodeIIIV, validate: validateIIIV, length: 4} ++ iVIVEncoding = encoding{encode: encodeIVIV, validate: validateIVIV, length: 4} + +- sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4} +- sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4} ++ sIEncoding = encoding{encode: encodeSI, validate: validateSI, length: 4} ++ sFEncoding = encoding{encode: encodeSF, validate: validateSF, length: 4} ++ sVEncoding = encoding{encode: encodeSV, validate: validateSV, length: 4} ++ sVIIEncoding = encoding{encode: encodeSVII, validate: validateSVII, length: 4} ++ sVIVEncoding = encoding{encode: encodeSVIV, validate: validateSVIV, length: 4} + + bEncoding = encoding{encode: encodeB, validate: validateB, length: 4} + uEncoding = encoding{encode: encodeU, validate: validateU, length: 4} +@@ -2022,11 +2102,73 @@ var instructions = [ALAST & obj.AMask]instructionData{ + // "V" Standard Extension for Vector Operations, Version 1.0 + // + +- // 31.6. Vector Configuration-Setting Instructions ++ // 31.6: Vector Configuration-Setting Instructions + AVSETVLI & obj.AMask: {enc: vsetvliEncoding, immForm: AVSETIVLI}, + AVSETIVLI & obj.AMask: {enc: vsetivliEncoding}, + AVSETVL & obj.AMask: {enc: vsetvlEncoding}, + ++ // 31.7.4: Vector Unit-Stride Instructions ++ AVLE8V & obj.AMask: {enc: iVEncoding}, ++ AVLE16V & obj.AMask: {enc: iVEncoding}, ++ AVLE32V & obj.AMask: {enc: iVEncoding}, ++ AVLE64V & obj.AMask: {enc: iVEncoding}, ++ AVSE8V & obj.AMask: {enc: sVEncoding}, ++ AVSE16V & obj.AMask: {enc: sVEncoding}, ++ AVSE32V & obj.AMask: {enc: sVEncoding}, ++ AVSE64V & obj.AMask: {enc: sVEncoding}, ++ AVLMV & obj.AMask: {enc: iVEncoding}, ++ AVSMV & obj.AMask: {enc: sVEncoding}, ++ ++ // 31.7.5: Vector Strided Instructions ++ AVLSE8V & obj.AMask: {enc: iIIVEncoding}, ++ AVLSE16V & obj.AMask: {enc: iIIVEncoding}, ++ AVLSE32V & obj.AMask: {enc: iIIVEncoding}, ++ AVLSE64V & obj.AMask: {enc: iIIVEncoding}, ++ AVSSE8V & obj.AMask: {enc: sVIIEncoding}, ++ AVSSE16V & obj.AMask: {enc: sVIIEncoding}, ++ AVSSE32V & obj.AMask: {enc: sVIIEncoding}, ++ AVSSE64V & obj.AMask: {enc: sVIIEncoding}, ++ ++ // 31.7.6: Vector Indexed Instructions ++ AVLUXEI8V & obj.AMask: {enc: iVIVEncoding}, ++ AVLUXEI16V & obj.AMask: {enc: iVIVEncoding}, ++ AVLUXEI32V & obj.AMask: {enc: iVIVEncoding}, ++ AVLUXEI64V & obj.AMask: {enc: iVIVEncoding}, ++ AVLOXEI8V & obj.AMask: {enc: iVIVEncoding}, ++ AVLOXEI16V & obj.AMask: {enc: iVIVEncoding}, ++ AVLOXEI32V & obj.AMask: {enc: iVIVEncoding}, ++ AVLOXEI64V & obj.AMask: {enc: iVIVEncoding}, ++ AVSUXEI8V & obj.AMask: {enc: sVIVEncoding}, ++ AVSUXEI16V & obj.AMask: {enc: sVIVEncoding}, ++ AVSUXEI32V & obj.AMask: {enc: sVIVEncoding}, ++ AVSUXEI64V & obj.AMask: {enc: sVIVEncoding}, ++ AVSOXEI8V & obj.AMask: {enc: sVIVEncoding}, ++ AVSOXEI16V & obj.AMask: {enc: sVIVEncoding}, ++ AVSOXEI32V & obj.AMask: {enc: sVIVEncoding}, ++ AVSOXEI64V & obj.AMask: {enc: sVIVEncoding}, ++ ++ // 31.7.9. Vector Load/Store Whole Register Instructions ++ AVL1RE8V & obj.AMask: {enc: iVEncoding}, ++ AVL1RE16V & obj.AMask: {enc: iVEncoding}, ++ AVL1RE32V & obj.AMask: {enc: iVEncoding}, ++ AVL1RE64V & obj.AMask: {enc: iVEncoding}, ++ AVL2RE8V & obj.AMask: {enc: iVEncoding}, ++ AVL2RE16V & obj.AMask: {enc: iVEncoding}, ++ AVL2RE32V & obj.AMask: {enc: iVEncoding}, ++ AVL2RE64V & obj.AMask: {enc: iVEncoding}, ++ AVL4RE8V & obj.AMask: {enc: iVEncoding}, ++ AVL4RE16V & obj.AMask: {enc: iVEncoding}, ++ AVL4RE32V & obj.AMask: {enc: iVEncoding}, ++ AVL4RE64V & obj.AMask: {enc: iVEncoding}, ++ AVL8RE8V & obj.AMask: {enc: iVEncoding}, ++ AVL8RE16V & obj.AMask: {enc: iVEncoding}, ++ AVL8RE32V & obj.AMask: {enc: iVEncoding}, ++ AVL8RE64V & obj.AMask: {enc: iVEncoding}, ++ AVS1RV & obj.AMask: {enc: sVEncoding}, ++ AVS2RV & obj.AMask: {enc: sVEncoding}, ++ AVS4RV & obj.AMask: {enc: sVEncoding}, ++ AVS8RV & obj.AMask: {enc: sVEncoding}, ++ + // + // Privileged ISA + // +@@ -2849,6 +2991,60 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.rs1 = uint32(p.From.Offset) + } + ++ case AVLE8V, AVLE16V, AVLE32V, AVLE64V, AVSE8V, AVSE16V, AVSE32V, AVSE64V, AVLMV, AVSMV: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), obj.REG_NONE ++ ++ case AVLSE8V, AVLSE16V, AVLSE32V, AVLSE64V, ++ AVLUXEI8V, AVLUXEI16V, AVLUXEI32V, AVLUXEI64V, AVLOXEI8V, AVLOXEI16V, AVLOXEI32V, AVLOXEI64V: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rs1, ins.rs2, ins.rs3 = ins.rs2, ins.rs1, obj.REG_NONE ++ ++ case AVSSE8V, AVSSE16V, AVSSE32V, AVSSE64V, ++ AVSUXEI8V, AVSUXEI16V, AVSUXEI32V, AVSUXEI64V, AVSOXEI8V, AVSOXEI16V, AVSOXEI32V, AVSOXEI64V: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = ins.rs2, ins.rd, ins.rs1, obj.REG_NONE ++ ++ case AVL1RV, AVL1RE8V, AVL1RE16V, AVL1RE32V, AVL1RE64V, AVL2RV, AVL2RE8V, AVL2RE16V, AVL2RE32V, AVL2RE64V, ++ AVL4RV, AVL4RE8V, AVL4RE16V, AVL4RE32V, AVL4RE64V, AVL8RV, AVL8RE8V, AVL8RE16V, AVL8RE32V, AVL8RE64V: ++ switch ins.as { ++ case AVL1RV: ++ ins.as = AVL1RE8V ++ case AVL2RV: ++ ins.as = AVL2RE8V ++ case AVL4RV: ++ ins.as = AVL4RE8V ++ case AVL8RV: ++ ins.as = AVL8RE8V ++ } ++ if ins.rs1 != obj.REG_NONE { ++ p.Ctxt.Diag("%v: too many operands for instruction", p) ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), obj.REG_NONE ++ ++ case AVS1RV, AVS2RV, AVS4RV, AVS8RV: ++ if ins.rs1 != obj.REG_NONE { ++ p.Ctxt.Diag("%v: too many operands for instruction", p) ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), obj.REG_NONE + } + + for _, ins := range inss { +-- +2.50.1 + diff --git a/2008-cmd-internal-obj-riscv-add-riscv64-CSR-map.patch b/2008-cmd-internal-obj-riscv-add-riscv64-CSR-map.patch new file mode 100644 index 0000000000000000000000000000000000000000..c809d096a7bcbc91711d67cdc211670e2b65751e --- /dev/null +++ b/2008-cmd-internal-obj-riscv-add-riscv64-CSR-map.patch @@ -0,0 +1,363 @@ +From ac2d6889584d4328b0142850081dce2fab95ff9b Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Tue, 19 Nov 2024 16:06:03 +0100 +Subject: [PATCH 08/38] cmd/internal/obj/riscv: add riscv64 CSR map + +The map is automatically generated by running the latest version of +parse.py from github.com/riscv/riscv-opcodes. + +Change-Id: I05e00ab27ec583750752c25e1835c2578b339fbf +Reviewed-on: https://go-review.googlesource.com/c/go/+/630518 +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Pengcheng Wang +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Joel Sing +Reviewed-by: Michael Pratt +--- + src/cmd/internal/obj/riscv/inst.go | 332 +++++++++++++++++++++++++++++ + 1 file changed, 332 insertions(+) + +diff --git a/src/cmd/internal/obj/riscv/inst.go b/src/cmd/internal/obj/riscv/inst.go +index 2d9132e532..5ee5bda361 100644 +--- a/src/cmd/internal/obj/riscv/inst.go ++++ b/src/cmd/internal/obj/riscv/inst.go +@@ -1229,3 +1229,335 @@ func encode(a obj.As) *inst { + } + return nil + } ++ ++var csrs = map[uint16]string{ ++ 0x1: "FFLAGS", ++ 0x2: "FRM", ++ 0x3: "FCSR", ++ 0x7: "UTVT", ++ 0x8: "VSTART", ++ 0x9: "VXSAT", ++ 0xa: "VXRM", ++ 0xf: "VCSR", ++ 0x11: "SSP", ++ 0x15: "SEED", ++ 0x17: "JVT", ++ 0x45: "UNXTI", ++ 0x46: "UINTSTATUS", ++ 0x48: "USCRATCHCSW", ++ 0x49: "USCRATCHCSWL", ++ 0x100: "SSTATUS", ++ 0x102: "SEDELEG", ++ 0x103: "SIDELEG", ++ 0x104: "SIE", ++ 0x105: "STVEC", ++ 0x106: "SCOUNTEREN", ++ 0x107: "STVT", ++ 0x10a: "SENVCFG", ++ 0x10c: "SSTATEEN0", ++ 0x10d: "SSTATEEN1", ++ 0x10e: "SSTATEEN2", ++ 0x10f: "SSTATEEN3", ++ 0x120: "SCOUNTINHIBIT", ++ 0x140: "SSCRATCH", ++ 0x141: "SEPC", ++ 0x142: "SCAUSE", ++ 0x143: "STVAL", ++ 0x144: "SIP", ++ 0x145: "SNXTI", ++ 0x146: "SINTSTATUS", ++ 0x148: "SSCRATCHCSW", ++ 0x149: "SSCRATCHCSWL", ++ 0x14d: "STIMECMP", ++ 0x14e: "SCTRCTL", ++ 0x14f: "SCTRSTATUS", ++ 0x150: "SISELECT", ++ 0x151: "SIREG", ++ 0x152: "SIREG2", ++ 0x153: "SIREG3", ++ 0x155: "SIREG4", ++ 0x156: "SIREG5", ++ 0x157: "SIREG6", ++ 0x15c: "STOPEI", ++ 0x15f: "SCTRDEPTH", ++ 0x180: "SATP", ++ 0x181: "SRMCFG", ++ 0x200: "VSSTATUS", ++ 0x204: "VSIE", ++ 0x205: "VSTVEC", ++ 0x240: "VSSCRATCH", ++ 0x241: "VSEPC", ++ 0x242: "VSCAUSE", ++ 0x243: "VSTVAL", ++ 0x244: "VSIP", ++ 0x24d: "VSTIMECMP", ++ 0x24e: "VSCTRCTL", ++ 0x250: "VSISELECT", ++ 0x251: "VSIREG", ++ 0x252: "VSIREG2", ++ 0x253: "VSIREG3", ++ 0x255: "VSIREG4", ++ 0x256: "VSIREG5", ++ 0x257: "VSIREG6", ++ 0x25c: "VSTOPEI", ++ 0x280: "VSATP", ++ 0x300: "MSTATUS", ++ 0x301: "MISA", ++ 0x302: "MEDELEG", ++ 0x303: "MIDELEG", ++ 0x304: "MIE", ++ 0x305: "MTVEC", ++ 0x306: "MCOUNTEREN", ++ 0x307: "MTVT", ++ 0x308: "MVIEN", ++ 0x309: "MVIP", ++ 0x30a: "MENVCFG", ++ 0x30c: "MSTATEEN0", ++ 0x30d: "MSTATEEN1", ++ 0x30e: "MSTATEEN2", ++ 0x30f: "MSTATEEN3", ++ 0x320: "MCOUNTINHIBIT", ++ 0x321: "MCYCLECFG", ++ 0x322: "MINSTRETCFG", ++ 0x323: "MHPMEVENT3", ++ 0x324: "MHPMEVENT4", ++ 0x325: "MHPMEVENT5", ++ 0x326: "MHPMEVENT6", ++ 0x327: "MHPMEVENT7", ++ 0x328: "MHPMEVENT8", ++ 0x329: "MHPMEVENT9", ++ 0x32a: "MHPMEVENT10", ++ 0x32b: "MHPMEVENT11", ++ 0x32c: "MHPMEVENT12", ++ 0x32d: "MHPMEVENT13", ++ 0x32e: "MHPMEVENT14", ++ 0x32f: "MHPMEVENT15", ++ 0x330: "MHPMEVENT16", ++ 0x331: "MHPMEVENT17", ++ 0x332: "MHPMEVENT18", ++ 0x333: "MHPMEVENT19", ++ 0x334: "MHPMEVENT20", ++ 0x335: "MHPMEVENT21", ++ 0x336: "MHPMEVENT22", ++ 0x337: "MHPMEVENT23", ++ 0x338: "MHPMEVENT24", ++ 0x339: "MHPMEVENT25", ++ 0x33a: "MHPMEVENT26", ++ 0x33b: "MHPMEVENT27", ++ 0x33c: "MHPMEVENT28", ++ 0x33d: "MHPMEVENT29", ++ 0x33e: "MHPMEVENT30", ++ 0x33f: "MHPMEVENT31", ++ 0x340: "MSCRATCH", ++ 0x341: "MEPC", ++ 0x342: "MCAUSE", ++ 0x343: "MTVAL", ++ 0x344: "MIP", ++ 0x345: "MNXTI", ++ 0x346: "MINTSTATUS", ++ 0x348: "MSCRATCHCSW", ++ 0x349: "MSCRATCHCSWL", ++ 0x34a: "MTINST", ++ 0x34b: "MTVAL2", ++ 0x34e: "MCTRCTL", ++ 0x350: "MISELECT", ++ 0x351: "MIREG", ++ 0x352: "MIREG2", ++ 0x353: "MIREG3", ++ 0x355: "MIREG4", ++ 0x356: "MIREG5", ++ 0x357: "MIREG6", ++ 0x35c: "MTOPEI", ++ 0x3a0: "PMPCFG0", ++ 0x3a1: "PMPCFG1", ++ 0x3a2: "PMPCFG2", ++ 0x3a3: "PMPCFG3", ++ 0x3a4: "PMPCFG4", ++ 0x3a5: "PMPCFG5", ++ 0x3a6: "PMPCFG6", ++ 0x3a7: "PMPCFG7", ++ 0x3a8: "PMPCFG8", ++ 0x3a9: "PMPCFG9", ++ 0x3aa: "PMPCFG10", ++ 0x3ab: "PMPCFG11", ++ 0x3ac: "PMPCFG12", ++ 0x3ad: "PMPCFG13", ++ 0x3ae: "PMPCFG14", ++ 0x3af: "PMPCFG15", ++ 0x3b0: "PMPADDR0", ++ 0x3b1: "PMPADDR1", ++ 0x3b2: "PMPADDR2", ++ 0x3b3: "PMPADDR3", ++ 0x3b4: "PMPADDR4", ++ 0x3b5: "PMPADDR5", ++ 0x3b6: "PMPADDR6", ++ 0x3b7: "PMPADDR7", ++ 0x3b8: "PMPADDR8", ++ 0x3b9: "PMPADDR9", ++ 0x3ba: "PMPADDR10", ++ 0x3bb: "PMPADDR11", ++ 0x3bc: "PMPADDR12", ++ 0x3bd: "PMPADDR13", ++ 0x3be: "PMPADDR14", ++ 0x3bf: "PMPADDR15", ++ 0x3c0: "PMPADDR16", ++ 0x3c1: "PMPADDR17", ++ 0x3c2: "PMPADDR18", ++ 0x3c3: "PMPADDR19", ++ 0x3c4: "PMPADDR20", ++ 0x3c5: "PMPADDR21", ++ 0x3c6: "PMPADDR22", ++ 0x3c7: "PMPADDR23", ++ 0x3c8: "PMPADDR24", ++ 0x3c9: "PMPADDR25", ++ 0x3ca: "PMPADDR26", ++ 0x3cb: "PMPADDR27", ++ 0x3cc: "PMPADDR28", ++ 0x3cd: "PMPADDR29", ++ 0x3ce: "PMPADDR30", ++ 0x3cf: "PMPADDR31", ++ 0x3d0: "PMPADDR32", ++ 0x3d1: "PMPADDR33", ++ 0x3d2: "PMPADDR34", ++ 0x3d3: "PMPADDR35", ++ 0x3d4: "PMPADDR36", ++ 0x3d5: "PMPADDR37", ++ 0x3d6: "PMPADDR38", ++ 0x3d7: "PMPADDR39", ++ 0x3d8: "PMPADDR40", ++ 0x3d9: "PMPADDR41", ++ 0x3da: "PMPADDR42", ++ 0x3db: "PMPADDR43", ++ 0x3dc: "PMPADDR44", ++ 0x3dd: "PMPADDR45", ++ 0x3de: "PMPADDR46", ++ 0x3df: "PMPADDR47", ++ 0x3e0: "PMPADDR48", ++ 0x3e1: "PMPADDR49", ++ 0x3e2: "PMPADDR50", ++ 0x3e3: "PMPADDR51", ++ 0x3e4: "PMPADDR52", ++ 0x3e5: "PMPADDR53", ++ 0x3e6: "PMPADDR54", ++ 0x3e7: "PMPADDR55", ++ 0x3e8: "PMPADDR56", ++ 0x3e9: "PMPADDR57", ++ 0x3ea: "PMPADDR58", ++ 0x3eb: "PMPADDR59", ++ 0x3ec: "PMPADDR60", ++ 0x3ed: "PMPADDR61", ++ 0x3ee: "PMPADDR62", ++ 0x3ef: "PMPADDR63", ++ 0x5a8: "SCONTEXT", ++ 0x600: "HSTATUS", ++ 0x602: "HEDELEG", ++ 0x603: "HIDELEG", ++ 0x604: "HIE", ++ 0x605: "HTIMEDELTA", ++ 0x606: "HCOUNTEREN", ++ 0x607: "HGEIE", ++ 0x608: "HVIEN", ++ 0x609: "HVICTL", ++ 0x60a: "HENVCFG", ++ 0x60c: "HSTATEEN0", ++ 0x60d: "HSTATEEN1", ++ 0x60e: "HSTATEEN2", ++ 0x60f: "HSTATEEN3", ++ 0x643: "HTVAL", ++ 0x644: "HIP", ++ 0x645: "HVIP", ++ 0x646: "HVIPRIO1", ++ 0x647: "HVIPRIO2", ++ 0x64a: "HTINST", ++ 0x680: "HGATP", ++ 0x6a8: "HCONTEXT", ++ 0x747: "MSECCFG", ++ 0x7a0: "TSELECT", ++ 0x7a1: "TDATA1", ++ 0x7a2: "TDATA2", ++ 0x7a3: "TDATA3", ++ 0x7a4: "TINFO", ++ 0x7a5: "TCONTROL", ++ 0x7a8: "MCONTEXT", ++ 0x7aa: "MSCONTEXT", ++ 0x7b0: "DCSR", ++ 0x7b1: "DPC", ++ 0x7b2: "DSCRATCH0", ++ 0x7b3: "DSCRATCH1", ++ 0xb00: "MCYCLE", ++ 0xb02: "MINSTRET", ++ 0xb03: "MHPMCOUNTER3", ++ 0xb04: "MHPMCOUNTER4", ++ 0xb05: "MHPMCOUNTER5", ++ 0xb06: "MHPMCOUNTER6", ++ 0xb07: "MHPMCOUNTER7", ++ 0xb08: "MHPMCOUNTER8", ++ 0xb09: "MHPMCOUNTER9", ++ 0xb0a: "MHPMCOUNTER10", ++ 0xb0b: "MHPMCOUNTER11", ++ 0xb0c: "MHPMCOUNTER12", ++ 0xb0d: "MHPMCOUNTER13", ++ 0xb0e: "MHPMCOUNTER14", ++ 0xb0f: "MHPMCOUNTER15", ++ 0xb10: "MHPMCOUNTER16", ++ 0xb11: "MHPMCOUNTER17", ++ 0xb12: "MHPMCOUNTER18", ++ 0xb13: "MHPMCOUNTER19", ++ 0xb14: "MHPMCOUNTER20", ++ 0xb15: "MHPMCOUNTER21", ++ 0xb16: "MHPMCOUNTER22", ++ 0xb17: "MHPMCOUNTER23", ++ 0xb18: "MHPMCOUNTER24", ++ 0xb19: "MHPMCOUNTER25", ++ 0xb1a: "MHPMCOUNTER26", ++ 0xb1b: "MHPMCOUNTER27", ++ 0xb1c: "MHPMCOUNTER28", ++ 0xb1d: "MHPMCOUNTER29", ++ 0xb1e: "MHPMCOUNTER30", ++ 0xb1f: "MHPMCOUNTER31", ++ 0xc00: "CYCLE", ++ 0xc01: "TIME", ++ 0xc02: "INSTRET", ++ 0xc03: "HPMCOUNTER3", ++ 0xc04: "HPMCOUNTER4", ++ 0xc05: "HPMCOUNTER5", ++ 0xc06: "HPMCOUNTER6", ++ 0xc07: "HPMCOUNTER7", ++ 0xc08: "HPMCOUNTER8", ++ 0xc09: "HPMCOUNTER9", ++ 0xc0a: "HPMCOUNTER10", ++ 0xc0b: "HPMCOUNTER11", ++ 0xc0c: "HPMCOUNTER12", ++ 0xc0d: "HPMCOUNTER13", ++ 0xc0e: "HPMCOUNTER14", ++ 0xc0f: "HPMCOUNTER15", ++ 0xc10: "HPMCOUNTER16", ++ 0xc11: "HPMCOUNTER17", ++ 0xc12: "HPMCOUNTER18", ++ 0xc13: "HPMCOUNTER19", ++ 0xc14: "HPMCOUNTER20", ++ 0xc15: "HPMCOUNTER21", ++ 0xc16: "HPMCOUNTER22", ++ 0xc17: "HPMCOUNTER23", ++ 0xc18: "HPMCOUNTER24", ++ 0xc19: "HPMCOUNTER25", ++ 0xc1a: "HPMCOUNTER26", ++ 0xc1b: "HPMCOUNTER27", ++ 0xc1c: "HPMCOUNTER28", ++ 0xc1d: "HPMCOUNTER29", ++ 0xc1e: "HPMCOUNTER30", ++ 0xc1f: "HPMCOUNTER31", ++ 0xc20: "VL", ++ 0xc21: "VTYPE", ++ 0xc22: "VLENB", ++ 0xda0: "SCOUNTOVF", ++ 0xdb0: "STOPI", ++ 0xe12: "HGEIP", ++ 0xeb0: "VSTOPI", ++ 0xf11: "MVENDORID", ++ 0xf12: "MARCHID", ++ 0xf13: "MIMPID", ++ 0xf14: "MHARTID", ++ 0xf15: "MCONFIGPTR", ++ 0xfb0: "MTOPI", ++} +-- +2.50.1 + diff --git a/2009-test-codegen-add-riscv64-rva23u64-specifiers-to-exis.patch b/2009-test-codegen-add-riscv64-rva23u64-specifiers-to-exis.patch new file mode 100644 index 0000000000000000000000000000000000000000..115191fc206df5f8e63fd05746ce784904be6f61 --- /dev/null +++ b/2009-test-codegen-add-riscv64-rva23u64-specifiers-to-exis.patch @@ -0,0 +1,84 @@ +From 2df9023c7dfcd288a64e369ef393570997a84622 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 23 Feb 2025 22:31:35 +1100 +Subject: [PATCH 09/38] test/codegen: add riscv64/rva23u64 specifiers to + existing tests + +Tests that exist for riscv64/rva22u64 should also be applied to +riscv64/rva23u64. + +Change-Id: Ia529fdf0ac55b8bcb3dcd24fa80efef2351f3842 +Reviewed-on: https://go-review.googlesource.com/c/go/+/652315 +Reviewed-by: Cherry Mui +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: Meng Zhuo +Reviewed-by: David Chase +--- + test/codegen/arithmetic.go | 8 ++++---- + test/codegen/shift.go | 6 +++--- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go +index 4b47f6c13d..9b74d034c7 100644 +--- a/test/codegen/arithmetic.go ++++ b/test/codegen/arithmetic.go +@@ -638,7 +638,7 @@ func Int64Min(a, b int64) int64 { + // amd64: "CMPQ","CMOVQLT" + // arm64: "CMP","CSEL" + // riscv64/rva20u64:"BLT\t" +- // riscv64/rva22u64:"MIN\t" ++ // riscv64/rva22u64,riscv64/rva23u64:"MIN\t" + return min(a, b) + } + +@@ -646,7 +646,7 @@ func Int64Max(a, b int64) int64 { + // amd64: "CMPQ","CMOVQGT" + // arm64: "CMP","CSEL" + // riscv64/rva20u64:"BLT\t" +- // riscv64/rva22u64:"MAX\t" ++ // riscv64/rva22u64,riscv64/rva23u64:"MAX\t" + return max(a, b) + } + +@@ -654,7 +654,7 @@ func Uint64Min(a, b uint64) uint64 { + // amd64: "CMPQ","CMOVQCS" + // arm64: "CMP","CSEL" + // riscv64/rva20u64:"BLTU" +- // riscv64/rva22u64:"MINU" ++ // riscv64/rva22u64,riscv64/rva23u64:"MINU" + return min(a, b) + } + +@@ -662,6 +662,6 @@ func Uint64Max(a, b uint64) uint64 { + // amd64: "CMPQ","CMOVQHI" + // arm64: "CMP","CSEL" + // riscv64/rva20u64:"BLTU" +- // riscv64/rva22u64:"MAXU" ++ // riscv64/rva22u64,riscv64/rva23u64:"MAXU" + return max(a, b) + } +diff --git a/test/codegen/shift.go b/test/codegen/shift.go +index 7c29b69181..2f7d7d43f0 100644 +--- a/test/codegen/shift.go ++++ b/test/codegen/shift.go +@@ -531,13 +531,13 @@ func checkShiftToMask(u []uint64, s []int64) { + + func checkLeftShiftWithAddition(a int64, b int64) int64 { + // riscv64/rva20u64: "SLLI","ADD" +- // riscv64/rva22u64: "SH1ADD" ++ // riscv64/rva22u64,riscv64/rva23u64: "SH1ADD" + a = a + b<<1 + // riscv64/rva20u64: "SLLI","ADD" +- // riscv64/rva22u64: "SH2ADD" ++ // riscv64/rva22u64,riscv64/rva23u64: "SH2ADD" + a = a + b<<2 + // riscv64/rva20u64: "SLLI","ADD" +- // riscv64/rva22u64: "SH3ADD" ++ // riscv64/rva22u64,riscv64/rva23u64: "SH3ADD" + a = a + b<<3 + return a + } +-- +2.50.1 + diff --git a/2010-test-codegen-add-riscv64-codegen-for-arithmetic-test.patch b/2010-test-codegen-add-riscv64-codegen-for-arithmetic-test.patch new file mode 100644 index 0000000000000000000000000000000000000000..3b171fcce94c06eff5c5feb9a8e07a99547b243e --- /dev/null +++ b/2010-test-codegen-add-riscv64-codegen-for-arithmetic-test.patch @@ -0,0 +1,101 @@ +From 701dea763e872464263b5d92fd3474997825ee93 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Mon, 24 Feb 2025 23:08:46 +1100 +Subject: [PATCH 10/38] test/codegen: add riscv64 codegen for arithmetic tests + +Codify the current riscv64 code generation for various subtract from +constant and addition/subtraction tests. + +Change-Id: I54ad923280a0578a338bc4431fa5bdc0644c4729 +Reviewed-on: https://go-review.googlesource.com/c/go/+/652316 +Reviewed-by: Meng Zhuo +Reviewed-by: Cherry Mui +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: David Chase +--- + test/codegen/arithmetic.go | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go +index 9b74d034c7..db84fdf1b7 100644 +--- a/test/codegen/arithmetic.go ++++ b/test/codegen/arithmetic.go +@@ -85,36 +85,42 @@ func SubMem(arr []int, b, c, d int) int { + + func SubFromConst(a int) int { + // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR` ++ // riscv64: "ADDI\t\\$-40","NEG" + b := 40 - a + return b + } + + func SubFromConstNeg(a int) int { + // ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR` ++ // riscv64: "NEG","ADDI\t\\$-40","NEG" + c := 40 - (-a) + return c + } + + func SubSubFromConst(a int) int { + // ppc64x: `ADD\t[$]20,\sR[0-9]+,\sR` ++ // riscv64: "ADDI\t\\$20",-"NEG" + c := 40 - (20 - a) + return c + } + + func AddSubFromConst(a int) int { + // ppc64x: `SUBC\tR[0-9]+,\s[$]60,\sR` ++ // riscv64: "ADDI\t\\$-60","NEG" + c := 40 + (20 - a) + return c + } + + func NegSubFromConst(a int) int { + // ppc64x: `ADD\t[$]-20,\sR[0-9]+,\sR` ++ // riscv64: "ADDI\t\\$-20" + c := -(20 - a) + return c + } + + func NegAddFromConstNeg(a int) int { + // ppc64x: `SUBC\tR[0-9]+,\s[$]40,\sR` ++ // riscv64: "ADDI\t\\$-40","NEG" + c := -(-40 + a) + return c + } +@@ -122,6 +128,7 @@ func NegAddFromConstNeg(a int) int { + func SubSubNegSimplify(a, b int) int { + // amd64:"NEGQ" + // ppc64x:"NEG" ++ // riscv64:"NEG",-"SUB" + r := (a - b) - a + return r + } +@@ -129,6 +136,7 @@ func SubSubNegSimplify(a, b int) int { + func SubAddSimplify(a, b int) int { + // amd64:-"SUBQ",-"ADDQ" + // ppc64x:-"SUB",-"ADD" ++ // riscv64:-"SUB",-"ADD" + r := a + (b - a) + return r + } +@@ -152,6 +160,7 @@ func SubAddSimplify2(a, b, c int) (int, int, int, int, int, int) { + func SubAddNegSimplify(a, b int) int { + // amd64:"NEGQ",-"ADDQ",-"SUBQ" + // ppc64x:"NEG",-"ADD",-"SUB" ++ // riscv64:"NEG",-"ADD",-"SUB" + r := a - (b + a) + return r + } +@@ -159,6 +168,7 @@ func SubAddNegSimplify(a, b int) int { + func AddAddSubSimplify(a, b, c int) int { + // amd64:-"SUBQ" + // ppc64x:-"SUB" ++ // riscv64:"ADD","ADD",-"SUB" + r := a + (b + (c - a)) + return r + } +-- +2.50.1 + diff --git a/2011-cmd-compile-internal-ssa-remove-double-negation-with.patch b/2011-cmd-compile-internal-ssa-remove-double-negation-with.patch new file mode 100644 index 0000000000000000000000000000000000000000..af4d9f58d90c45174b4a04c90bcdf0d982ed9ca3 --- /dev/null +++ b/2011-cmd-compile-internal-ssa-remove-double-negation-with.patch @@ -0,0 +1,97 @@ +From 8d480bdad8f87c68ba94f5be0117a14d0910abf3 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Mon, 24 Feb 2025 00:37:45 +1100 +Subject: [PATCH 11/38] cmd/compile/internal/ssa: remove double negation with + addition on riscv64 + +On riscv64, subtraction from a constant is typically implemented as an +ADDI with the negative constant, followed by a negation. However this can +lead to multiple NEG/ADDI/NEG sequences that can be optimised out. + +For example, runtime.(*_panic).nextDefer currently contains: + + lbu t0, 0(t0) + addi t0, t0, -8 + neg t0, t0 + addi t0, t0, -7 + neg t0, t0 + +Which is now optimised to: + + lbu t0, 0(t0) + addi t0, t0, -1 + +Change-Id: Idf5815e6db2e3705cc4a4811ca9130a064ae3d80 +Reviewed-on: https://go-review.googlesource.com/c/go/+/652318 +Reviewed-by: Cherry Mui +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: David Chase +--- + .../compile/internal/ssa/_gen/RISCV64.rules | 1 + + .../compile/internal/ssa/rewriteRISCV64.go | 22 +++++++++++++++++++ + test/codegen/arithmetic.go | 2 +- + 3 files changed, 24 insertions(+), 1 deletion(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index 9ae9604381..b38c9bd6c8 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -768,6 +768,7 @@ + + // Double negation. + (NEG (NEG x)) => x ++(NEG s:(ADDI [val] (NEG x))) && s.Uses == 1 && is32Bit(-val) => (ADDI [-val] x) + + // Addition of zero or two constants. + (ADDI [0] x) => x +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index aa44ab311e..e16caa2cdc 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -6075,6 +6075,28 @@ func rewriteValueRISCV64_OpRISCV64NEG(v *Value) bool { + v.copyOf(x) + return true + } ++ // match: (NEG s:(ADDI [val] (NEG x))) ++ // cond: s.Uses == 1 && is32Bit(-val) ++ // result: (ADDI [-val] x) ++ for { ++ s := v_0 ++ if s.Op != OpRISCV64ADDI { ++ break ++ } ++ val := auxIntToInt64(s.AuxInt) ++ s_0 := s.Args[0] ++ if s_0.Op != OpRISCV64NEG { ++ break ++ } ++ x := s_0.Args[0] ++ if !(s.Uses == 1 && is32Bit(-val)) { ++ break ++ } ++ v.reset(OpRISCV64ADDI) ++ v.AuxInt = int64ToAuxInt(-val) ++ v.AddArg(x) ++ return true ++ } + // match: (NEG (MOVDconst [x])) + // result: (MOVDconst [-x]) + for { +diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go +index db84fdf1b7..a7cad45936 100644 +--- a/test/codegen/arithmetic.go ++++ b/test/codegen/arithmetic.go +@@ -92,7 +92,7 @@ func SubFromConst(a int) int { + + func SubFromConstNeg(a int) int { + // ppc64x: `ADD\t[$]40,\sR[0-9]+,\sR` +- // riscv64: "NEG","ADDI\t\\$-40","NEG" ++ // riscv64: "ADDI\t\\$40",-"NEG" + c := 40 - (-a) + return c + } +-- +2.50.1 + diff --git a/2012-cmd-internal-obj-riscv-prevent-duplicate-error-repor.patch b/2012-cmd-internal-obj-riscv-prevent-duplicate-error-repor.patch new file mode 100644 index 0000000000000000000000000000000000000000..71cdd92ef72d28679dec9d275791cffef6103a68 --- /dev/null +++ b/2012-cmd-internal-obj-riscv-prevent-duplicate-error-repor.patch @@ -0,0 +1,188 @@ +From 6f9505d81702138a686e5774ed5ece8cde98ec1c Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Tue, 10 Dec 2024 17:02:26 +0100 +Subject: [PATCH 12/38] cmd/internal/obj/riscv: prevent duplicate error reports + +The riscv64 Go assembler can output certain errors, ones produced by +instructionsForProg, multiple times. These errors are guaranteed to +be output at least twice and can appear three or more times if a +rescan is needed to recompute branch addresses. For example, the +syntactically incorrect instruction + +MOV (X10), $1 + +will generate at least two identical errors + +asm: 86076 (asm.s:21524) MOV (X10), $1: unsupported MOV +asm: 86076 (asm.s:21524) MOV (X10), $1: unsupported MOV +asm: assembly failed + +In addition to confusing the user, these duplicate errors make it +difficult to write negative tests for certain types of instructions, +e.g., branches, whose duplicate errors are not always identical, +and so not ignored by endtoend_test.go. + +We fix the issue by returning from preprocess if any errors have been +generated by the time we reach the end of the rescan loop. One +implication of this change is that validation errors will no longer +be reported if an error is generated earlier in the preprocess stage. +Negative test cases for validation errors are therefore moved to +their own file as the existing riscv64error.s file contains errors +generated by instructionsForProg that will now suppress the +validation errors. + +Change-Id: Iffacdbefce28f44970dd5dda44990b822b8a23d4 +Reviewed-on: https://go-review.googlesource.com/c/go/+/637315 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Joel Sing +Reviewed-by: David Chase +Reviewed-by: Cherry Mui +--- + src/cmd/asm/internal/asm/endtoend_test.go | 4 ++ + .../asm/internal/asm/testdata/riscv64error.s | 34 -------------- + .../internal/asm/testdata/riscv64validation.s | 46 +++++++++++++++++++ + src/cmd/internal/obj/riscv/obj.go | 5 ++ + 4 files changed, 55 insertions(+), 34 deletions(-) + create mode 100644 src/cmd/asm/internal/asm/testdata/riscv64validation.s + +diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go +index 6e1aa1cd95..ed6248a135 100644 +--- a/src/cmd/asm/internal/asm/endtoend_test.go ++++ b/src/cmd/asm/internal/asm/endtoend_test.go +@@ -487,6 +487,10 @@ func TestRISCVErrors(t *testing.T) { + testErrors(t, "riscv64", "riscv64error") + } + ++func TestRISCVValidation(t *testing.T) { ++ testErrors(t, "riscv64", "riscv64validation") ++} ++ + func TestS390XEndToEnd(t *testing.T) { + testEndToEnd(t, "s390x", "s390x") + } +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index 82a2348894..e8855f6cd5 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -43,50 +43,16 @@ TEXT errors(SB),$0 + SRLIW $-1, X5, X6 // ERROR "immediate out of range 0 to 31" + SRAIW $-1, X5, X6 // ERROR "immediate out of range 0 to 31" + SD X5, 4294967296(X6) // ERROR "constant 4294967296 too large" +- SRLI $1, X5, F1 // ERROR "expected integer register in rd position but got non-integer register F1" +- SRLI $1, F1, X5 // ERROR "expected integer register in rs1 position but got non-integer register F1" + FNES F1, (X5) // ERROR "needs an integer register output" +- VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" +- VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" + VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" +- VSETVL X10, X11 // ERROR "expected integer register in rs1 position" +- VLE8V (X10), X10 // ERROR "expected vector register in rd position" +- VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" + VLE8V (X10), V1, V3 // ERROR "invalid vector mask register" +- VSE8V X10, (X10) // ERROR "expected vector register in rs1 position" +- VSE8V V3, (V1) // ERROR "expected integer register in rd position" + VSE8V V3, V1, (X10) // ERROR "invalid vector mask register" +- VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" +- VLSE8V (X10), X10, X11 // ERROR "expected vector register in rd position" +- VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" +- VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" + VLSE8V (X10), X10, V1, V3 // ERROR "invalid vector mask register" +- VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" +- VSSE8V X10, X11, (X10) // ERROR "expected vector register in rd position" +- VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" +- VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" + VSSE8V V3, X11, V1, (X10) // ERROR "invalid vector mask register" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" +- VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" + VLUXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" +- VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" +- VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" + VSUXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" +- VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" +- VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" + VLOXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" +- VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" +- VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" + VSOXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" + VL1RV (X10), V0, V3 // ERROR "too many operands for instruction" +- VL1RV (X10), X10 // ERROR "expected vector register in rd position" +- VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" + VS1RV V3, V0, (X11) // ERROR "too many operands for instruction" +- VS1RV X11, (X11) // ERROR "expected vector register in rs1 position" +- VS1RV V3, (V1) // ERROR "expected integer register in rd position" + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +new file mode 100644 +index 0000000000..773f275dd3 +--- /dev/null ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -0,0 +1,46 @@ ++// Copyright 2024 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++// This file is for validation errors only, i.e., errors reported by the validate function. ++// Negative test cases for errors generated earlier in the assembler's preprocess stage ++// should be added to riscv64error.s. If they are added to this file, they will prevent ++// the validate function from being run and TestRISCVValidation will report missing ++// errors. ++ ++TEXT validation(SB),$0 ++ SRLI $1, X5, F1 // ERROR "expected integer register in rd position but got non-integer register F1" ++ SRLI $1, F1, X5 // ERROR "expected integer register in rs1 position but got non-integer register F1" ++ VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETVL X10, X11 // ERROR "expected integer register in rs1 position" ++ VLE8V (X10), X10 // ERROR "expected vector register in rd position" ++ VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" ++ VSE8V X10, (X10) // ERROR "expected vector register in rs1 position" ++ VSE8V V3, (V1) // ERROR "expected integer register in rd position" ++ VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" ++ VLSE8V (X10), X10, X11 // ERROR "expected vector register in rd position" ++ VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" ++ VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" ++ VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" ++ VSSE8V X10, X11, (X10) // ERROR "expected vector register in rd position" ++ VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" ++ VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" ++ VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" ++ VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" ++ VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" ++ VL1RV (X10), X10 // ERROR "expected vector register in rd position" ++ VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" ++ VS1RV X11, (X11) // ERROR "expected vector register in rs1 position" ++ VS1RV V3, (V1) // ERROR "expected integer register in rd position" ++ RET +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 16bd1310c1..de9851519a 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -833,6 +833,11 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { + } + } + ++ // Return if errors have been detected up to this point. Continuing ++ // may lead to duplicate errors being output. ++ if ctxt.Errors > 0 { ++ return ++ } + if !rescan { + break + } +-- +2.50.1 + diff --git a/2013-cmd-internal-obj-riscv-add-support-for-vector-intege.patch b/2013-cmd-internal-obj-riscv-add-support-for-vector-intege.patch new file mode 100644 index 0000000000000000000000000000000000000000..57d7f37e9822a7a834f9af1e3b0654817939be5c --- /dev/null +++ b/2013-cmd-internal-obj-riscv-add-support-for-vector-intege.patch @@ -0,0 +1,1327 @@ +From 2913d4996a59c391548d7ecc5292d5d28f54f2fa Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Tue, 18 Jun 2024 23:36:43 +1000 +Subject: [PATCH 13/38] cmd/internal/obj/riscv: add support for vector integer + arithmetic instructions + +Add support for vector integer arithmetic instructions to the RISC-V +assembler. This includes vector addition, subtraction, integer +extension, add-with-carry, subtract-with-borrow, bitwise logical +operations, comparison, min/max, integer division and multiplication +instructions. + +Change-Id: I8c191ef8e31291e13743732903e4f12356133a46 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646775 +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Cherry Mui +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 317 ++++++++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 165 ++++++++ + .../internal/asm/testdata/riscv64validation.s | 225 +++++++++- + src/cmd/internal/obj/riscv/anames.go | 13 + + src/cmd/internal/obj/riscv/cpu.go | 13 + + src/cmd/internal/obj/riscv/obj.go | 393 +++++++++++++++++- + 6 files changed, 1101 insertions(+), 25 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 5aa2563b6f..91c1c1e5af 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -546,6 +546,323 @@ start: + VS4RV V4, (X11) // 27828562 + VS8RV V8, (X11) // 278485e2 + ++ // 31.11.1: Vector Single-Width Integer Add and Subtract ++ VADDVV V1, V2, V3 // d7812002 ++ VADDVV V1, V2, V0, V3 // d7812000 ++ VADDVX X10, V2, V3 // d7412502 ++ VADDVX X10, V2, V0, V3 // d7412500 ++ VADDVI $15, V2, V3 // d7b12702 ++ VADDVI $15, V2, V0, V3 // d7b12700 ++ VADDVI $-16, V2, V3 // d7312802 ++ VADDVI $-16, V2, V0, V3 // d7312800 ++ VSUBVV V1, V2, V3 // d781200a ++ VSUBVV V1, V2, V0, V3 // d7812008 ++ VSUBVX X10, V2, V3 // d741250a ++ VSUBVX X10, V2, V0, V3 // d7412508 ++ VRSUBVX X10, V2, V3 // d741250e ++ VRSUBVX X10, V2, V0, V3 // d741250c ++ VRSUBVI $15, V2, V0, V3 // d7b1270c ++ VRSUBVI $-16, V2, V0, V3 // d731280c ++ VNEGV V2, V3 // d741200e ++ VNEGV V2, V0, V3 // d741200c ++ ++ // 31.11.2: Vector Widening Integer Add/Subtract ++ VWADDUVV V1, V2, V3 // d7a120c2 ++ VWADDUVV V1, V2, V0, V3 // d7a120c0 ++ VWADDUVX X10, V2, V3 // d76125c2 ++ VWADDUVX X10, V2, V0, V3 // d76125c0 ++ VWSUBUVV V1, V2, V3 // d7a120ca ++ VWSUBUVV V1, V2, V0, V3 // d7a120c8 ++ VWSUBUVX X10, V2, V3 // d76125ca ++ VWSUBUVX X10, V2, V0, V3 // d76125c8 ++ VWADDVV V1, V2, V3 // d7a120c6 ++ VWADDVV V1, V2, V0, V3 // d7a120c4 ++ VWADDVX X10, V2, V3 // d76125c6 ++ VWADDVX X10, V2, V0, V3 // d76125c4 ++ VWSUBVV V1, V2, V3 // d7a120ce ++ VWSUBVV V1, V2, V0, V3 // d7a120cc ++ VWSUBVX X10, V2, V3 // d76125ce ++ VWSUBVX X10, V2, V0, V3 // d76125cc ++ VWADDUWV V1, V2, V3 // d7a120d2 ++ VWADDUWV V1, V2, V0, V3 // d7a120d0 ++ VWADDUWX X10, V2, V3 // d76125d2 ++ VWADDUWX X10, V2, V0, V3 // d76125d0 ++ VWSUBUWV V1, V2, V3 // d7a120da ++ VWSUBUWV V1, V2, V0, V3 // d7a120d8 ++ VWSUBUWX X10, V2, V3 // d76125da ++ VWSUBUWX X10, V2, V0, V3 // d76125d8 ++ VWADDWV V1, V2, V3 // d7a120d6 ++ VWADDWV V1, V2, V0, V3 // d7a120d4 ++ VWADDWX X10, V2, V3 // d76125d6 ++ VWADDWX X10, V2, V0, V3 // d76125d4 ++ VWSUBWV V1, V2, V3 // d7a120de ++ VWSUBWV V1, V2, V0, V3 // d7a120dc ++ VWSUBWX X10, V2, V3 // d76125de ++ VWSUBWX X10, V2, V0, V3 // d76125dc ++ VWCVTXXV V2, V3 // d76120c6 ++ VWCVTXXV V2, V0, V3 // d76120c4 ++ VWCVTUXXV V2, V3 // d76120c2 ++ VWCVTUXXV V2, V0, V3 // d76120c0 ++ ++ // 31.11.3: Vector Integer Extension ++ VZEXTVF2 V2, V3 // d721234a ++ VZEXTVF2 V2, V0, V3 // d7212348 ++ VSEXTVF2 V2, V3 // d7a1234a ++ VSEXTVF2 V2, V0, V3 // d7a12348 ++ VZEXTVF4 V2, V3 // d721224a ++ VZEXTVF4 V2, V0, V3 // d7212248 ++ VSEXTVF4 V2, V3 // d7a1224a ++ VSEXTVF4 V2, V0, V3 // d7a12248 ++ VZEXTVF8 V2, V3 // d721214a ++ VZEXTVF8 V2, V0, V3 // d7212148 ++ VSEXTVF8 V2, V3 // d7a1214a ++ VSEXTVF8 V2, V0, V3 // d7a12148 ++ ++ // 31.11.4: Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions ++ VADCVVM V1, V2, V0, V3 // d7812040 ++ VADCVXM X11, V2, V0, V3 // d7c12540 ++ VADCVIM $15, V2, V0, V3 // d7b12740 ++ VMADCVVM V1, V2, V0, V3 // d7812044 ++ VMADCVXM X11, V2, V0, V3 // d7c12544 ++ VMADCVIM $15, V2, V0, V3 // d7b12744 ++ VMADCVV V1, V2, V3 // d7812046 ++ VMADCVX X11, V2, V3 // d7c12546 ++ VMADCVI $15, V2, V3 // d7b12746 ++ VSBCVVM V1, V2, V0, V3 // d7812048 ++ VSBCVXM X11, V2, V0, V3 // d7c12548 ++ VMSBCVVM V1, V2, V0, V3 // d781204c ++ VMSBCVXM X11, V2, V0, V3 // d7c1254c ++ VMSBCVV V1, V2, V3 // d781204e ++ VMSBCVX X11, V2, V3 // d7c1254e ++ ++ // 31.11.5: Vector Bitwise Logical Instructions ++ VANDVV V1, V2, V3 // d7812026 ++ VANDVV V1, V2, V0, V3 // d7812024 ++ VANDVX X11, V2, V3 // d7c12526 ++ VANDVX X11, V2, V0, V3 // d7c12524 ++ VANDVI $15, V2, V3 // d7b12726 ++ VANDVI $15, V2, V0, V3 // d7b12724 ++ VORVV V1, V2, V3 // d781202a ++ VORVV V1, V2, V0, V3 // d7812028 ++ VORVX X11, V2, V3 // d7c1252a ++ VORVX X11, V2, V0, V3 // d7c12528 ++ VORVI $15, V2, V3 // d7b1272a ++ VORVI $15, V2, V0, V3 // d7b12728 ++ VXORVV V1, V2, V3 // d781202e ++ VXORVV V1, V2, V0, V3 // d781202c ++ VXORVX X11, V2, V3 // d7c1252e ++ VXORVX X11, V2, V0, V3 // d7c1252c ++ VXORVI $15, V2, V3 // d7b1272e ++ VXORVI $15, V2, V0, V3 // d7b1272c ++ VNOTV V2, V3 // d7b12f2e ++ VNOTV V2, V0, V3 // d7b12f2c ++ ++ // 31.11.6: Vector Single-Width Shift Instructions ++ VSLLVV V1, V2, V3 // d7812096 ++ VSLLVV V1, V2, V0, V3 // d7812094 ++ VSLLVX X11, V2, V3 // d7c12596 ++ VSLLVX X11, V2, V0, V3 // d7c12594 ++ VSLLVI $15, V2, V3 // d7b12796 ++ VSLLVI $15, V2, V0, V3 // d7b12794 ++ VSRLVV V1, V2, V3 // d78120a2 ++ VSRLVV V1, V2, V0, V3 // d78120a0 ++ VSRLVX X11, V2, V3 // d7c125a2 ++ VSRLVX X11, V2, V0, V3 // d7c125a0 ++ VSRLVI $15, V2, V3 // d7b127a2 ++ VSRLVI $15, V2, V0, V3 // d7b127a0 ++ VSRAVV V1, V2, V3 // d78120a6 ++ VSRAVV V1, V2, V0, V3 // d78120a4 ++ VSRAVX X11, V2, V3 // d7c125a6 ++ VSRAVX X11, V2, V0, V3 // d7c125a4 ++ VSRAVI $15, V2, V3 // d7b127a6 ++ VSRAVI $15, V2, V0, V3 // d7b127a4 ++ ++ // 31.11.7: Vector Narrowing Integer Right Shift Instructions ++ VNSRLWV V1, V2, V3 // d78120b2 ++ VNSRLWV V1, V2, V0, V3 // d78120b0 ++ VNSRLWX X10, V2, V3 // d74125b2 ++ VNSRLWX X10, V2, V0, V3 // d74125b0 ++ VNSRLWI $31, V2, V3 // d7b12fb2 ++ VNSRLWI $31, V2, V0, V3 // d7b12fb0 ++ VNSRAWV V1, V2, V3 // d78120b6 ++ VNSRAWV V1, V2, V0, V3 // d78120b4 ++ VNSRAWX X10, V2, V3 // d74125b6 ++ VNSRAWX X10, V2, V0, V3 // d74125b4 ++ VNSRAWI $31, V2, V3 // d7b12fb6 ++ VNSRAWI $31, V2, V0, V3 // d7b12fb4 ++ VNCVTXXW V2, V3 // d74120b2 ++ VNCVTXXW V2, V0, V3 // d74120b0 ++ ++ // 31.11.8: Vector Integer Compare Instructions ++ VMSEQVV V1, V2, V3 // d7812062 ++ VMSEQVV V1, V2, V0, V3 // d7812060 ++ VMSEQVX X10, V2, V3 // d7412562 ++ VMSEQVX X10, V2, V0, V3 // d7412560 ++ VMSEQVI $15, V2, V3 // d7b12762 ++ VMSEQVI $15, V2, V0, V3 // d7b12760 ++ VMSNEVV V1, V2, V3 // d7812066 ++ VMSNEVV V1, V2, V0, V3 // d7812064 ++ VMSNEVX X10, V2, V3 // d7412566 ++ VMSNEVX X10, V2, V0, V3 // d7412564 ++ VMSNEVI $15, V2, V3 // d7b12766 ++ VMSNEVI $15, V2, V0, V3 // d7b12764 ++ VMSLTUVV V1, V2, V3 // d781206a ++ VMSLTUVV V1, V2, V0, V3 // d7812068 ++ VMSLTUVX X10, V2, V3 // d741256a ++ VMSLTUVX X10, V2, V0, V3 // d7412568 ++ VMSLTVV V1, V2, V3 // d781206e ++ VMSLTVV V1, V2, V0, V3 // d781206c ++ VMSLTVX X10, V2, V3 // d741256e ++ VMSLTVX X10, V2, V0, V3 // d741256c ++ VMSLEUVV V1, V2, V3 // d7812072 ++ VMSLEUVV V1, V2, V0, V3 // d7812070 ++ VMSLEUVX X10, V2, V3 // d7412572 ++ VMSLEUVX X10, V2, V0, V3 // d7412570 ++ VMSLEUVI $15, V2, V3 // d7b12772 ++ VMSLEUVI $15, V2, V0, V3 // d7b12770 ++ VMSLEVV V1, V2, V3 // d7812076 ++ VMSLEVV V1, V2, V0, V3 // d7812074 ++ VMSLEVX X10, V2, V3 // d7412576 ++ VMSLEVX X10, V2, V0, V3 // d7412574 ++ VMSLEVI $15, V2, V3 // d7b12776 ++ VMSLEVI $15, V2, V0, V3 // d7b12774 ++ VMSGTUVX X10, V2, V3 // d741257a ++ VMSGTUVX X10, V2, V0, V3 // d7412578 ++ VMSGTUVI $15, V2, V3 // d7b1277a ++ VMSGTUVI $15, V2, V0, V3 // d7b12778 ++ VMSGTVX X10, V2, V3 // d741257e ++ VMSGTVX X10, V2, V0, V3 // d741257c ++ VMSGTVI $15, V2, V3 // d7b1277e ++ VMSGTVI $15, V2, V0, V3 // d7b1277c ++ VMSGTVV V1, V2, V3 // d701116e ++ VMSGTVV V1, V2, V0, V3 // d701116c ++ VMSGTUVV V1, V2, V3 // d701116a ++ VMSGTUVV V1, V2, V0, V3 // d7011168 ++ VMSGEVV V1, V2, V3 // d7011176 ++ VMSGEVV V1, V2, V0, V3 // d7011174 ++ VMSGEUVV V1, V2, V3 // d7011172 ++ VMSGEUVV V1, V2, V0, V3 // d7011170 ++ VMSLTVI $15, V2, V3 // d7312776 ++ VMSLTVI $15, V2, V0, V3 // d7312774 ++ VMSLTUVI $15, V2, V3 // d7312772 ++ VMSLTUVI $15, V2, V0, V3 // d7312770 ++ VMSGEVI $15, V2, V3 // d731277e ++ VMSGEVI $15, V2, V0, V3 // d731277c ++ VMSGEUVI $15, V2, V3 // d731277a ++ VMSGEUVI $15, V2, V0, V3 // d7312778 ++ ++ // 31.11.9: Vector Integer Min/Max Instructions ++ VMINUVV V1, V2, V3 // d7812012 ++ VMINUVV V1, V2, V0, V3 // d7812010 ++ VMINUVX X10, V2, V3 // d7412512 ++ VMINUVX X10, V2, V0, V3 // d7412510 ++ VMINVV V1, V2, V3 // d7812016 ++ VMINVV V1, V2, V0, V3 // d7812014 ++ VMINVX X10, V2, V3 // d7412516 ++ VMINVX X10, V2, V0, V3 // d7412514 ++ VMAXUVV V1, V2, V3 // d781201a ++ VMAXUVV V1, V2, V0, V3 // d7812018 ++ VMAXUVX X10, V2, V3 // d741251a ++ VMAXUVX X10, V2, V0, V3 // d7412518 ++ VMAXVV V1, V2, V3 // d781201e ++ VMAXVV V1, V2, V0, V3 // d781201c ++ VMAXVX X10, V2, V3 // d741251e ++ VMAXVX X10, V2, V0, V3 // d741251c ++ ++ // 31.11.10: Vector Single-Width Integer Multiply Instructions ++ VMULVV V1, V2, V3 // d7a12096 ++ VMULVV V1, V2, V0, V3 // d7a12094 ++ VMULVX X10, V2, V3 // d7612596 ++ VMULVX X10, V2, V0, V3 // d7612594 ++ VMULHVV V1, V2, V3 // d7a1209e ++ VMULHVV V1, V2, V0, V3 // d7a1209c ++ VMULHVX X10, V2, V3 // d761259e ++ VMULHVX X10, V2, V0, V3 // d761259c ++ VMULHUVV V1, V2, V3 // d7a12092 ++ VMULHUVV V1, V2, V0, V3 // d7a12090 ++ VMULHUVX X10, V2, V3 // d7612592 ++ VMULHUVX X10, V2, V0, V3 // d7612590 ++ VMULHSUVV V1, V2, V3 // d7a1209a ++ VMULHSUVV V1, V2, V0, V3 // d7a12098 ++ VMULHSUVX X10, V2, V3 // d761259a ++ VMULHSUVX X10, V2, V0, V3 // d7612598 ++ ++ // 31.11.11: Vector Integer Divide Instructions ++ VDIVUVV V1, V2, V3 // d7a12082 ++ VDIVUVV V1, V2, V0, V3 // d7a12080 ++ VDIVUVX X10, V2, V3 // d7612582 ++ VDIVUVX X10, V2, V0, V3 // d7612580 ++ VDIVVV V1, V2, V3 // d7a12086 ++ VDIVVV V1, V2, V0, V3 // d7a12084 ++ VDIVVX X10, V2, V3 // d7612586 ++ VDIVVX X10, V2, V0, V3 // d7612584 ++ VREMUVV V1, V2, V3 // d7a1208a ++ VREMUVV V1, V2, V0, V3 // d7a12088 ++ VREMUVX X10, V2, V3 // d761258a ++ VREMUVX X10, V2, V0, V3 // d7612588 ++ VREMVV V1, V2, V3 // d7a1208e ++ VREMVV V1, V2, V0, V3 // d7a1208c ++ VREMVX X10, V2, V3 // d761258e ++ VREMVX X10, V2, V0, V3 // d761258c ++ ++ // 31.11.12: Vector Widening Integer Multiply Instructions ++ VWMULVV V1, V2, V3 // d7a120ee ++ VWMULVV V1, V2, V0, V3 // d7a120ec ++ VWMULVX X10, V2, V3 // d76125ee ++ VWMULVX X10, V2, V0, V3 // d76125ec ++ VWMULUVV V1, V2, V3 // d7a120e2 ++ VWMULUVV V1, V2, V0, V3 // d7a120e0 ++ VWMULUVX X10, V2, V3 // d76125e2 ++ VWMULUVX X10, V2, V0, V3 // d76125e0 ++ VWMULSUVV V1, V2, V3 // d7a120ea ++ VWMULSUVV V1, V2, V0, V3 // d7a120e8 ++ VWMULSUVX X10, V2, V3 // d76125ea ++ VWMULSUVX X10, V2, V0, V3 // d76125e8 ++ ++ // 31.11.13: Vector Single-Width Integer Multiply-Add Instructions ++ VMACCVV V1, V2, V3 // d7a120b6 ++ VMACCVV V1, V2, V0, V3 // d7a120b4 ++ VMACCVX X10, V2, V3 // d76125b6 ++ VMACCVX X10, V2, V0, V3 // d76125b4 ++ VNMSACVV V1, V2, V3 // d7a120be ++ VNMSACVV V1, V2, V0, V3 // d7a120bc ++ VNMSACVX X10, V2, V3 // d76125be ++ VNMSACVX X10, V2, V0, V3 // d76125bc ++ VMADDVV V1, V2, V3 // d7a120a6 ++ VMADDVV V1, V2, V0, V3 // d7a120a4 ++ VMADDVX X10, V2, V3 // d76125a6 ++ VMADDVX X10, V2, V0, V3 // d76125a4 ++ VNMSUBVV V1, V2, V3 // d7a120ae ++ VNMSUBVV V1, V2, V0, V3 // d7a120ac ++ VNMSUBVX X10, V2, V3 // d76125ae ++ VNMSUBVX X10, V2, V0, V3 // d76125ac ++ ++ // 31.11.14: Vector Widening Integer Multiply-Add Instructions ++ VWMACCUVV V1, V2, V3 // d7a120f2 ++ VWMACCUVV V1, V2, V0, V3 // d7a120f0 ++ VWMACCUVX X10, V2, V3 // d76125f2 ++ VWMACCUVX X10, V2, V0, V3 // d76125f0 ++ VWMACCVV V1, V2, V3 // d7a120f6 ++ VWMACCVV V1, V2, V0, V3 // d7a120f4 ++ VWMACCVX X10, V2, V3 // d76125f6 ++ VWMACCVX X10, V2, V0, V3 // d76125f4 ++ VWMACCSUVV V1, V2, V3 // d7a120fe ++ VWMACCSUVV V1, V2, V0, V3 // d7a120fc ++ VWMACCSUVX X10, V2, V3 // d76125fe ++ VWMACCSUVX X10, V2, V0, V3 // d76125fc ++ VWMACCUSVX X10, V2, V3 // d76125fa ++ VWMACCUSVX X10, V2, V0, V3 // d76125f8 ++ ++ // 31.11.15: Vector Integer Merge Instructions ++ VMERGEVVM V1, V2, V0, V3 // d781205c ++ VMERGEVXM X10, V2, V0, V3 // d741255c ++ VMERGEVIM $15, V2, V0, V3 // d7b1275c ++ ++ // 31.11.16: Vector Integer Move Instructions ++ VMVVV V2, V3 // d701015e ++ VMVVX X10, V3 // d741055e ++ VMVVI $15, V3 // d7b1075e ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index e8855f6cd5..4f882aa56d 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -44,6 +44,10 @@ TEXT errors(SB),$0 + SRAIW $-1, X5, X6 // ERROR "immediate out of range 0 to 31" + SD X5, 4294967296(X6) // ERROR "constant 4294967296 too large" + FNES F1, (X5) // ERROR "needs an integer register output" ++ ++ // ++ // "V" Standard Extension for Vector Operations, Version 1.0 ++ // + VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" + VLE8V (X10), V1, V3 // ERROR "invalid vector mask register" + VSE8V V3, V1, (X10) // ERROR "invalid vector mask register" +@@ -55,4 +59,165 @@ TEXT errors(SB),$0 + VSOXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" + VL1RV (X10), V0, V3 // ERROR "too many operands for instruction" + VS1RV V3, V0, (X11) // ERROR "too many operands for instruction" ++ VADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADDVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VADDVI $15, V4, V1, V2 // ERROR "invalid vector mask register" ++ VSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VRSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VRSUBVI $15, V4, V1, V2 // ERROR "invalid vector mask register" ++ VNEGV V2, V3, V4 // ERROR "invalid vector mask register" ++ VWADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWCVTXXV V2, V1, V3 // ERROR "invalid vector mask register" ++ VWCVTUXXV V2, V1, V3 // ERROR "invalid vector mask register" ++ VZEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" ++ VZEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" ++ VZEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" ++ VADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" ++ VADCVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" ++ VMADCVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VANDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VANDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VANDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNOTV V1, V2, V3 // ERROR "invalid vector mask register" ++ VSLLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWI $31, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWI $31, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCVTXXW V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSACVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSACVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUSVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVIM $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMVVV V1, V2, V3 // ERROR "too many operands for instruction" ++ VMVVX X10, V2, V3 // ERROR "too many operands for instruction" ++ VMVVI $15, V2, V3 // ERROR "too many operands for instruction" ++ + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index 773f275dd3..602cab2c2e 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -11,36 +11,231 @@ + TEXT validation(SB),$0 + SRLI $1, X5, F1 // ERROR "expected integer register in rd position but got non-integer register F1" + SRLI $1, F1, X5 // ERROR "expected integer register in rs1 position but got non-integer register F1" ++ ++ // ++ // "V" Standard Extension for Vector Operations, Version 1.0 ++ // + VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" + VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" + VSETVL X10, X11 // ERROR "expected integer register in rs1 position" +- VLE8V (X10), X10 // ERROR "expected vector register in rd position" ++ VLE8V (X10), X10 // ERROR "expected vector register in vd position" + VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" +- VSE8V X10, (X10) // ERROR "expected vector register in rs1 position" ++ VSE8V X10, (X10) // ERROR "expected vector register in vs1 position" + VSE8V V3, (V1) // ERROR "expected integer register in rd position" + VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" +- VLSE8V (X10), X10, X11 // ERROR "expected vector register in rd position" ++ VLSE8V (X10), X10, X11 // ERROR "expected vector register in vd position" + VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" + VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" + VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" +- VSSE8V X10, X11, (X10) // ERROR "expected vector register in rd position" ++ VSSE8V X10, X11, (X10) // ERROR "expected vector register in vd position" + VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" + VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" + VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" +- VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" ++ VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" + VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" +- VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in rd position" ++ VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" ++ VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" + VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in rs2 position" +- VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in rd position" ++ VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" ++ VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" + VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in rs2 position" +- VL1RV (X10), X10 // ERROR "expected vector register in rd position" ++ VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" ++ VL1RV (X10), X10 // ERROR "expected vector register in vd position" + VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" +- VS1RV X11, (X11) // ERROR "expected vector register in rs1 position" ++ VS1RV X11, (X11) // ERROR "expected vector register in vs1 position" + VS1RV V3, (V1) // ERROR "expected integer register in rd position" ++ VADDVV V1, X10, V3 // ERROR "expected vector register in vs2 position" ++ VADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VADDVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" ++ VADDVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" ++ VSUBVV V1, X10, V3 // ERROR "expected vector register in vs2 position" ++ VSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VRSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VRSUBVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" ++ VRSUBVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" ++ VNEGV X10, V3 // ERROR "expected vector register in vs2 position" ++ VNEGV V2 // ERROR "expected vector register in vd position" ++ VWADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWCVTXXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VWCVTUXXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" ++ VADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMADCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMADCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMADCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMADCVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMADCVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVI $15, V2, V0, V3 // ERROR "expected no register in rs3" ++ VSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSBCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMSBCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" ++ VANDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VANDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VANDVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VANDVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VXORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VXORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VXORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VXORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VNOTV V3 // ERROR "expected vector register in vd position" ++ VNOTV X10, V3 // ERROR "expected vector register in vs2 position" ++ VSLLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSLLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSLLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSLLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSRLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSRLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSRAVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSRAVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNSRLWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNSRLWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNSRLWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VNSRLWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNSRAWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNSRAWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNSRAWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VNSRAWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNCVTXXW X10, V3 // ERROR "expected vector register in vs2 position" ++ VMSEQVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSEQVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSEQVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSEQVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSNEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSNEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSNEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSNEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLTUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLTVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLEUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLEUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGTUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSGTUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGTUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGTVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSGTVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGTVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGEUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSLTVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLTVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLTUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLTUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGEVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGEUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMINUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMINUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMINVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMINVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMAXUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMAXUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMAXVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMAXVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VDIVUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VDIVUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VDIVVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VDIVVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VREMUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREMUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VREMVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREMVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNMSACVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSACVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNMSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCUSVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMERGEVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMERGEVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMERGEVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMERGEVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMVVV X10, V3 // ERROR "expected vector register in vs1 position" ++ VMVVX V1, V2 // ERROR "expected integer register in rs1 position" ++ VMVVI $16, V2 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMVVI $-17, V2 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ + RET +diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go +index 6df5f0a173..a65dfceea9 100644 +--- a/src/cmd/internal/obj/riscv/anames.go ++++ b/src/cmd/internal/obj/riscv/anames.go +@@ -654,5 +654,18 @@ var Anames = []string{ + "VL2RV", + "VL4RV", + "VL8RV", ++ "VMSGEUVI", ++ "VMSGEUVV", ++ "VMSGEVI", ++ "VMSGEVV", ++ "VMSGTUVV", ++ "VMSGTVV", ++ "VMSLTUVI", ++ "VMSLTVI", ++ "VNCVTXXW", ++ "VNEGV", ++ "VNOTV", ++ "VWCVTUXXV", ++ "VWCVTXXV", + "LAST", + } +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index 0ecf6dbae2..fa4c2cf372 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1182,6 +1182,19 @@ const ( + AVL2RV + AVL4RV + AVL8RV ++ AVMSGEUVI ++ AVMSGEUVV ++ AVMSGEVI ++ AVMSGEVV ++ AVMSGTUVV ++ AVMSGTVV ++ AVMSLTUVI ++ AVMSLTVI ++ AVNCVTXXW ++ AVNEGV ++ AVNOTV ++ AVWCVTUXXV ++ AVWCVTXXV + + // End marker + ALAST +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index de9851519a..2f45b406ea 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1328,6 +1328,43 @@ func validateRFF(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateRVIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantIntReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVVi(ctxt *obj.Link, ins *instruction) { ++ wantImmI(ctxt, ins, ins.imm, 5) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVVu(ctxt *obj.Link, ins *instruction) { ++ wantImmU(ctxt, ins, ins.imm, 5) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVVV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantVectorReg(ctxt, ins, "vs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateIII(ctxt *obj.Link, ins *instruction) { + wantImmI(ctxt, ins, ins.imm, 12) + wantIntReg(ctxt, ins, "rd", ins.rd) +@@ -1345,23 +1382,23 @@ func validateIF(ctxt *obj.Link, ins *instruction) { + } + + func validateIV(ctxt *obj.Link, ins *instruction) { +- wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) + wantNoneReg(ctxt, ins, "rs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + + func validateIIIV(ctxt *obj.Link, ins *instruction) { +- wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) + wantIntReg(ctxt, ins, "rs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + + func validateIVIV(ctxt *obj.Link, ins *instruction) { +- wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) +- wantVectorReg(ctxt, ins, "rs2", ins.rs2) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + +@@ -1383,22 +1420,22 @@ func validateSF(ctxt *obj.Link, ins *instruction) { + + func validateSV(ctxt *obj.Link, ins *instruction) { + wantIntReg(ctxt, ins, "rd", ins.rd) +- wantVectorReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs1", ins.rs1) + wantNoneReg(ctxt, ins, "rs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + + func validateSVII(ctxt *obj.Link, ins *instruction) { +- wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) + wantIntReg(ctxt, ins, "rs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + + func validateSVIV(ctxt *obj.Link, ins *instruction) { +- wantVectorReg(ctxt, ins, "rd", ins.rd) ++ wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) +- wantVectorReg(ctxt, ins, "rs2", ins.rs2) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + +@@ -1475,11 +1512,15 @@ func encodeR(as obj.As, rs1, rs2, rd, funct3, funct7 uint32) uint32 { + if enc == nil { + panic("encodeR: could not encode instruction") + } ++ if enc.rs1 != 0 && rs1 != 0 { ++ panic("encodeR: instruction uses rs1, but rs1 is nonzero") ++ } + if enc.rs2 != 0 && rs2 != 0 { +- panic("encodeR: instruction uses rs2, but rs2 was nonzero") ++ panic("encodeR: instruction uses rs2, but rs2 is nonzero") + } + funct3 |= enc.funct3 + funct7 |= enc.funct7 ++ rs1 |= enc.rs1 + rs2 |= enc.rs2 + return funct7<<25 | rs2<<20 | rs1<<15 | funct3<<12 | rd<<7 | enc.opcode + } +@@ -1532,6 +1573,26 @@ func encodeRFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) + } + ++func encodeRVV(ins *instruction) uint32 { ++ return encodeR(ins.as, 0, regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVVi(ins *instruction) uint32 { ++ return encodeR(ins.as, immI(ins.as, ins.imm, 5), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVVu(ins *instruction) uint32 { ++ return encodeR(ins.as, immU(ins.as, ins.imm, 5), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVIV(ins *instruction) uint32 { ++ return encodeR(ins.as, regI(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVVV(ins *instruction) uint32 { ++ return encodeR(ins.as, regV(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ + // encodeI encodes an I-type RISC-V instruction. + func encodeI(as obj.As, rs1, rd, imm, funct7 uint32) uint32 { + enc := encode(as) +@@ -1815,6 +1876,11 @@ var ( + rFIEncoding = encoding{encode: encodeRFI, validate: validateRFI, length: 4} + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} ++ rVVEncoding = encoding{encode: encodeRVV, validate: validateRVV, length: 4} ++ rVViEncoding = encoding{encode: encodeRVVi, validate: validateRVVi, length: 4} ++ rVVuEncoding = encoding{encode: encodeRVVu, validate: validateRVVu, length: 4} ++ rVIVEncoding = encoding{encode: encodeRVIV, validate: validateRVIV, length: 4} ++ rVVVEncoding = encoding{encode: encodeRVVV, validate: validateRVVV, length: 4} + + iIIEncoding = encoding{encode: encodeIII, validate: validateIII, length: 4} + iFEncoding = encoding{encode: encodeIF, validate: validateIF, length: 4} +@@ -2152,7 +2218,7 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVSOXEI32V & obj.AMask: {enc: sVIVEncoding}, + AVSOXEI64V & obj.AMask: {enc: sVIVEncoding}, + +- // 31.7.9. Vector Load/Store Whole Register Instructions ++ // 31.7.9: Vector Load/Store Whole Register Instructions + AVL1RE8V & obj.AMask: {enc: iVEncoding}, + AVL1RE16V & obj.AMask: {enc: iVEncoding}, + AVL1RE32V & obj.AMask: {enc: iVEncoding}, +@@ -2174,6 +2240,177 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVS4RV & obj.AMask: {enc: sVEncoding}, + AVS8RV & obj.AMask: {enc: sVEncoding}, + ++ // 31.11.1: Vector Single-Width Integer Add and Subtract ++ AVADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVADDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVADDVI & obj.AMask: {enc: rVViEncoding}, ++ AVSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSUBVX & obj.AMask: {enc: rVIVEncoding}, ++ AVRSUBVX & obj.AMask: {enc: rVIVEncoding}, ++ AVRSUBVI & obj.AMask: {enc: rVViEncoding}, ++ ++ // 31.11.2: Vector Widening Integer Add/Subtract ++ AVWADDUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWADDUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWSUBUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWSUBUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWADDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWSUBVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWADDUWV & obj.AMask: {enc: rVVVEncoding}, ++ AVWADDUWX & obj.AMask: {enc: rVIVEncoding}, ++ AVWSUBUWV & obj.AMask: {enc: rVVVEncoding}, ++ AVWSUBUWX & obj.AMask: {enc: rVIVEncoding}, ++ AVWADDWV & obj.AMask: {enc: rVVVEncoding}, ++ AVWADDWX & obj.AMask: {enc: rVIVEncoding}, ++ AVWSUBWV & obj.AMask: {enc: rVVVEncoding}, ++ AVWSUBWX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.3: Vector Integer Extension ++ AVZEXTVF2 & obj.AMask: {enc: rVVEncoding}, ++ AVSEXTVF2 & obj.AMask: {enc: rVVEncoding}, ++ AVZEXTVF4 & obj.AMask: {enc: rVVEncoding}, ++ AVSEXTVF4 & obj.AMask: {enc: rVVEncoding}, ++ AVZEXTVF8 & obj.AMask: {enc: rVVEncoding}, ++ AVSEXTVF8 & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.11.4: Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions ++ AVADCVVM & obj.AMask: {enc: rVVVEncoding}, ++ AVADCVXM & obj.AMask: {enc: rVIVEncoding}, ++ AVADCVIM & obj.AMask: {enc: rVViEncoding}, ++ AVMADCVVM & obj.AMask: {enc: rVVVEncoding}, ++ AVMADCVXM & obj.AMask: {enc: rVIVEncoding}, ++ AVMADCVIM & obj.AMask: {enc: rVViEncoding}, ++ AVMADCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMADCVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMADCVI & obj.AMask: {enc: rVViEncoding}, ++ AVSBCVVM & obj.AMask: {enc: rVVVEncoding}, ++ AVSBCVXM & obj.AMask: {enc: rVIVEncoding}, ++ AVMSBCVVM & obj.AMask: {enc: rVVVEncoding}, ++ AVMSBCVXM & obj.AMask: {enc: rVIVEncoding}, ++ AVMSBCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSBCVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.5: Vector Bitwise Logical Instructions ++ AVANDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVANDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVANDVI & obj.AMask: {enc: rVViEncoding}, ++ AVORVV & obj.AMask: {enc: rVVVEncoding}, ++ AVORVX & obj.AMask: {enc: rVIVEncoding}, ++ AVORVI & obj.AMask: {enc: rVViEncoding}, ++ AVXORVV & obj.AMask: {enc: rVVVEncoding}, ++ AVXORVX & obj.AMask: {enc: rVIVEncoding}, ++ AVXORVI & obj.AMask: {enc: rVViEncoding}, ++ ++ // 31.11.6: Vector Single-Width Shift Instructions ++ AVSLLVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSLLVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSLLVI & obj.AMask: {enc: rVVuEncoding}, ++ AVSRLVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSRLVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSRLVI & obj.AMask: {enc: rVVuEncoding}, ++ AVSRAVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSRAVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSRAVI & obj.AMask: {enc: rVVuEncoding}, ++ ++ // 31.11.7: Vector Narrowing Integer Right Shift Instructions ++ AVNSRLWV & obj.AMask: {enc: rVVVEncoding}, ++ AVNSRLWX & obj.AMask: {enc: rVIVEncoding}, ++ AVNSRLWI & obj.AMask: {enc: rVVuEncoding}, ++ AVNSRAWV & obj.AMask: {enc: rVVVEncoding}, ++ AVNSRAWX & obj.AMask: {enc: rVIVEncoding}, ++ AVNSRAWI & obj.AMask: {enc: rVVuEncoding}, ++ ++ // 31.11.8: Vector Integer Compare Instructions ++ AVMSEQVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSEQVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSEQVI & obj.AMask: {enc: rVViEncoding}, ++ AVMSNEVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSNEVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSNEVI & obj.AMask: {enc: rVViEncoding}, ++ AVMSLTUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSLTUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSLTVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSLTVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSLEUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSLEUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSLEUVI & obj.AMask: {enc: rVViEncoding}, ++ AVMSLEVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMSLEVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSLEVI & obj.AMask: {enc: rVViEncoding}, ++ AVMSGTUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSGTUVI & obj.AMask: {enc: rVViEncoding}, ++ AVMSGTVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMSGTVI & obj.AMask: {enc: rVViEncoding}, ++ ++ // 31.11.9: Vector Integer Min/Max Instructions ++ AVMINUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMINUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMINVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMINVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMAXUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMAXUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMAXVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMAXVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.10: Vector Single-Width Integer Multiply Instructions ++ AVMULVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMULVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMULHVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMULHVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMULHUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMULHUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMULHSUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMULHSUVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.11: Vector Integer Divide Instructions ++ AVDIVUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVDIVUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVDIVVV & obj.AMask: {enc: rVVVEncoding}, ++ AVDIVVX & obj.AMask: {enc: rVIVEncoding}, ++ AVREMUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVREMUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVREMVV & obj.AMask: {enc: rVVVEncoding}, ++ AVREMVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.12: Vector Widening Integer Multiply Instructions ++ AVWMULVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMULVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWMULUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMULUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWMULSUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMULSUVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.13: Vector Single-Width Integer Multiply-Add Instructions ++ AVMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMACCVX & obj.AMask: {enc: rVIVEncoding}, ++ AVNMSACVV & obj.AMask: {enc: rVVVEncoding}, ++ AVNMSACVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMADDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVNMSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVNMSUBVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.14: Vector Widening Integer Multiply-Add Instructions ++ AVWMACCUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMACCUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMACCVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWMACCSUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVWMACCSUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVWMACCUSVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.11.15: Vector Integer Merge Instructions ++ AVMERGEVVM & obj.AMask: {enc: rVVVEncoding}, ++ AVMERGEVXM & obj.AMask: {enc: rVIVEncoding}, ++ AVMERGEVIM & obj.AMask: {enc: rVViEncoding}, ++ ++ // 31.11.16: Vector Integer Move Instructions ++ AVMVVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMVVX & obj.AMask: {enc: rVIVEncoding}, ++ AVMVVI & obj.AMask: {enc: rVViEncoding}, ++ + // + // Privileged ISA + // +@@ -3050,6 +3287,142 @@ func instructionsForProg(p *obj.Prog) []*instruction { + p.Ctxt.Diag("%v: too many operands for instruction", p) + } + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), obj.REG_NONE ++ ++ case AVADDVV, AVADDVX, AVSUBVV, AVSUBVX, AVRSUBVX, AVWADDUVV, AVWADDUVX, AVWSUBUVV, AVWSUBUVX, ++ AVWADDVV, AVWADDVX, AVWSUBVV, AVWSUBVX, AVWADDUWV, AVWADDUWX, AVWSUBUWV, AVWSUBUWX, ++ AVWADDWV, AVWADDWX, AVWSUBWV, AVWSUBWX, AVANDVV, AVANDVX, AVORVV, AVORVX, AVXORVV, AVXORVX, ++ AVSLLVV, AVSLLVX, AVSRLVV, AVSRLVX, AVSRAVV, AVSRAVX, ++ AVMSEQVV, AVMSEQVX, AVMSNEVV, AVMSNEVX, AVMSLTUVV, AVMSLTUVX, AVMSLTVV, AVMSLTVX, ++ AVMSLEUVV, AVMSLEUVX, AVMSLEVV, AVMSLEVX, AVMSGTUVX, AVMSGTVX, ++ AVMINUVV, AVMINUVX, AVMINVV, AVMINVX, AVMAXUVV, AVMAXUVX, AVMAXVV, AVMAXVX, ++ AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX, ++ AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX, ++ AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, ++ AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, ++ AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX, ++ AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), obj.REG_NONE ++ ++ case AVADDVI, AVRSUBVI, AVANDVI, AVORVI, AVXORVI, AVMSEQVI, AVMSNEVI, AVMSLEUVI, AVMSLEVI, AVMSGTUVI, AVMSGTVI, ++ AVSLLVI, AVSRLVI, AVSRAVI, AVNSRLWI, AVNSRAWI: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), obj.REG_NONE, uint32(p.Reg), obj.REG_NONE ++ ++ case AVZEXTVF2, AVSEXTVF2, AVZEXTVF4, AVSEXTVF4, AVZEXTVF8, AVSEXTVF8: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rs1 = obj.REG_NONE ++ ++ case AVMVVV, AVMVVX: ++ if ins.rs1 != obj.REG_NONE { ++ p.Ctxt.Diag("%v: too many operands for instruction", p) ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), REG_V0 ++ ++ case AVMVVI: ++ if ins.rs1 != obj.REG_NONE { ++ p.Ctxt.Diag("%v: too many operands for instruction", p) ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), obj.REG_NONE, REG_V0 ++ ++ case AVADCVVM, AVADCVXM, AVMADCVVM, AVMADCVXM, AVSBCVVM, AVSBCVXM, AVMSBCVVM, AVMSBCVXM, AVADCVIM, AVMADCVIM, ++ AVMERGEVVM, AVMERGEVXM, AVMERGEVIM: ++ if ins.rs3 != REG_V0 { ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), obj.REG_NONE ++ ++ case AVMADCVV, AVMADCVX, AVMSBCVV, AVMSBCVX, AVMADCVI: ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg) ++ ++ case AVNEGV, AVWCVTXXV, AVWCVTUXXV, AVNCVTXXW: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ switch ins.as { ++ case AVNEGV: ++ ins.as = AVRSUBVX ++ case AVWCVTXXV: ++ ins.as = AVWADDVX ++ case AVWCVTUXXV: ++ ins.as = AVWADDUVX ++ case AVNCVTXXW: ++ ins.as = AVNSRLWX ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), REG_X0, uint32(p.From.Reg) ++ ++ case AVNOTV: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.as = AVXORVI ++ ins.rd, ins.rs1, ins.rs2, ins.imm = uint32(p.To.Reg), obj.REG_NONE, uint32(p.From.Reg), -1 ++ ++ case AVMSGTVV, AVMSGTUVV, AVMSGEVV, AVMSGEUVV: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ switch ins.as { ++ case AVMSGTVV: ++ ins.as = AVMSLTVV ++ case AVMSGTUVV: ++ ins.as = AVMSLTUVV ++ case AVMSGEVV: ++ ins.as = AVMSLEVV ++ case AVMSGEUVV: ++ ins.as = AVMSLEUVV ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), obj.REG_NONE ++ ++ case AVMSLTVI, AVMSLTUVI, AVMSGEVI, AVMSGEUVI: ++ // Set mask bit ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ switch ins.as { ++ case AVMSLTVI: ++ ins.as = AVMSLEVI ++ case AVMSLTUVI: ++ ins.as = AVMSLEUVI ++ case AVMSGEVI: ++ ins.as = AVMSGTVI ++ case AVMSGEUVI: ++ ins.as = AVMSGTUVI ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3, ins.imm = uint32(p.To.Reg), obj.REG_NONE, uint32(p.Reg), obj.REG_NONE, ins.imm-1 + } + + for _, ins := range inss { +-- +2.50.1 + diff --git a/2014-cmd-internal-obj-riscv-add-support-for-vector-fixed-.patch b/2014-cmd-internal-obj-riscv-add-support-for-vector-fixed-.patch new file mode 100644 index 0000000000000000000000000000000000000000..132124326a09d221d96eda52dd7cfd9247b4d9f4 --- /dev/null +++ b/2014-cmd-internal-obj-riscv-add-support-for-vector-fixed-.patch @@ -0,0 +1,266 @@ +From ffdae97645a5f7770fe1667e56665a7ceb681d87 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Mon, 6 Jan 2025 00:30:30 +1100 +Subject: [PATCH 14/38] cmd/internal/obj/riscv: add support for vector + fixed-point arithmetic instructions + +Add support for vector fixed-point arithmetic instructions to the +RISC-V assembler. This includes single width saturating addition +and subtraction, averaging addition and subtraction and scaling +shift instructions. + +Change-Id: I9aa27e9565ad016ba5bb2b479e1ba70db24e4ff5 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646776 +Reviewed-by: Mark Ryan +Reviewed-by: Carlos Amedee +Reviewed-by: Dmitri Shuralyov +LUCI-TryBot-Result: Go LUCI +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 74 +++++++++++++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 32 ++++++++ + .../internal/asm/testdata/riscv64validation.s | 32 ++++++++ + src/cmd/internal/obj/riscv/obj.go | 51 ++++++++++++- + 4 files changed, 186 insertions(+), 3 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 91c1c1e5af..4468639769 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -863,6 +863,80 @@ start: + VMVVX X10, V3 // d741055e + VMVVI $15, V3 // d7b1075e + ++ // 31.12.1: Vector Single-Width Saturating Add and Subtract ++ VSADDUVV V1, V2, V3 // d7812082 ++ VSADDUVV V1, V2, V0, V3 // d7812080 ++ VSADDUVX X10, V2, V3 // d7412582 ++ VSADDUVX X10, V2, V0, V3 // d7412580 ++ VSADDUVI $15, V2, V3 // d7b12782 ++ VSADDUVI $15, V2, V0, V3 // d7b12780 ++ VSADDVV V1, V2, V3 // d7812086 ++ VSADDVV V1, V2, V0, V3 // d7812084 ++ VSADDVX X10, V2, V3 // d7412586 ++ VSADDVX X10, V2, V0, V3 // d7412584 ++ VSADDVI $15, V2, V3 // d7b12786 ++ VSADDVI $15, V2, V0, V3 // d7b12784 ++ VSSUBUVV V1, V2, V3 // d781208a ++ VSSUBUVV V1, V2, V0, V3 // d7812088 ++ VSSUBUVX X10, V2, V3 // d741258a ++ VSSUBUVX X10, V2, V0, V3 // d7412588 ++ VSSUBVV V1, V2, V3 // d781208e ++ VSSUBVV V1, V2, V0, V3 // d781208c ++ VSSUBVX X10, V2, V3 // d741258e ++ VSSUBVX X10, V2, V0, V3 // d741258c ++ ++ // 31.12.2: Vector Single-Width Averaging Add and Subtract ++ VAADDUVV V1, V2, V3 // d7a12022 ++ VAADDUVV V1, V2, V0, V3 // d7a12020 ++ VAADDUVX X10, V2, V3 // d7612522 ++ VAADDUVX X10, V2, V0, V3 // d7612520 ++ VAADDVV V1, V2, V3 // d7a12026 ++ VAADDVV V1, V2, V0, V3 // d7a12024 ++ VAADDVX X10, V2, V3 // d7612526 ++ VAADDVX X10, V2, V0, V3 // d7612524 ++ VASUBUVV V1, V2, V3 // d7a1202a ++ VASUBUVV V1, V2, V0, V3 // d7a12028 ++ VASUBUVX X10, V2, V3 // d761252a ++ VASUBUVX X10, V2, V0, V3 // d7612528 ++ VASUBVV V1, V2, V3 // d7a1202e ++ VASUBVV V1, V2, V0, V3 // d7a1202c ++ VASUBVX X10, V2, V3 // d761252e ++ VASUBVX X10, V2, V0, V3 // d761252c ++ ++ // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation ++ VSMULVV V1, V2, V3 // d781209e ++ VSMULVV V1, V2, V0, V3 // d781209c ++ VSMULVX X10, V2, V3 // d741259e ++ VSMULVX X10, V2, V0, V3 // d741259c ++ ++ // 31.12.4: Vector Single-Width Scaling Shift Instructions ++ VSSRLVV V1, V2, V3 // d78120aa ++ VSSRLVV V1, V2, V0, V3 // d78120a8 ++ VSSRLVX X10, V2, V3 // d74125aa ++ VSSRLVX X10, V2, V0, V3 // d74125a8 ++ VSSRLVI $15, V2, V3 // d7b127aa ++ VSSRLVI $15, V2, V0, V3 // d7b127a8 ++ VSSRAVV V1, V2, V3 // d78120ae ++ VSSRAVV V1, V2, V0, V3 // d78120ac ++ VSSRAVX X10, V2, V3 // d74125ae ++ VSSRAVX X10, V2, V0, V3 // d74125ac ++ VSSRAVI $16, V2, V3 // d73128ae ++ VSSRAVI $16, V2, V0, V3 // d73128ac ++ ++ // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions ++ VNCLIPUWV V1, V2, V3 // d78120ba ++ VNCLIPUWV V1, V2, V0, V3 // d78120b8 ++ VNCLIPUWX X10, V2, V3 // d74125ba ++ VNCLIPUWX X10, V2, V0, V3 // d74125b8 ++ VNCLIPUWI $16, V2, V3 // d73128ba ++ VNCLIPUWI $16, V2, V0, V3 // d73128b8 ++ VNCLIPWV V1, V2, V3 // d78120be ++ VNCLIPWV V1, V2, V0, V3 // d78120bc ++ VNCLIPWX X10, V2, V3 // d74125be ++ VNCLIPWX X10, V2, V0, V3 // d74125bc ++ VNCLIPWI $16, V2, V3 // d73128be ++ VNCLIPWI $16, V2, V0, V3 // d73128bc ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index 4f882aa56d..3efd5b1ea1 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -219,5 +219,37 @@ TEXT errors(SB),$0 + VMVVV V1, V2, V3 // ERROR "too many operands for instruction" + VMVVX X10, V2, V3 // ERROR "too many operands for instruction" + VMVVI $15, V2, V3 // ERROR "too many operands for instruction" ++ VSADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWI $16, V2, V4, V3 // ERROR "invalid vector mask register" + + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index 602cab2c2e..c6f71e64fb 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -237,5 +237,37 @@ TEXT validation(SB),$0 + VMVVX V1, V2 // ERROR "expected integer register in rs1 position" + VMVVI $16, V2 // ERROR "signed immediate 16 must be in range [-16, 15]" + VMVVI $-17, V2 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VSADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSADDUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VSADDUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VSSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VAADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VAADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VAADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VAADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VASUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VASUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VASUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VASUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRLVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VSSRLVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VSSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRAVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VSSRAVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VNCLIPUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNCLIPUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNCLIPUWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VNCLIPUWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VNCLIPWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNCLIPWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNCLIPWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VNCLIPWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" + + RET +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 2f45b406ea..a285ce4614 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -2411,6 +2411,48 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVMVVX & obj.AMask: {enc: rVIVEncoding}, + AVMVVI & obj.AMask: {enc: rVViEncoding}, + ++ // 31.12.1: Vector Single-Width Saturating Add and Subtract ++ AVSADDUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSADDUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSADDUVI & obj.AMask: {enc: rVViEncoding}, ++ AVSADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSADDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSADDVI & obj.AMask: {enc: rVViEncoding}, ++ AVSSUBUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSSUBUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSSUBVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.12.2: Vector Single-Width Averaging Add and Subtract ++ AVAADDUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVAADDUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVAADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVAADDVX & obj.AMask: {enc: rVIVEncoding}, ++ AVASUBUVV & obj.AMask: {enc: rVVVEncoding}, ++ AVASUBUVX & obj.AMask: {enc: rVIVEncoding}, ++ AVASUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVASUBVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation ++ AVSMULVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSMULVX & obj.AMask: {enc: rVIVEncoding}, ++ ++ // 31.12.4: Vector Single-Width Scaling Shift Instructions ++ AVSSRLVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSSRLVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSSRLVI & obj.AMask: {enc: rVVuEncoding}, ++ AVSSRAVV & obj.AMask: {enc: rVVVEncoding}, ++ AVSSRAVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSSRAVI & obj.AMask: {enc: rVVuEncoding}, ++ ++ // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions ++ AVNCLIPUWV & obj.AMask: {enc: rVVVEncoding}, ++ AVNCLIPUWX & obj.AMask: {enc: rVIVEncoding}, ++ AVNCLIPUWI & obj.AMask: {enc: rVVuEncoding}, ++ AVNCLIPWV & obj.AMask: {enc: rVVVEncoding}, ++ AVNCLIPWX & obj.AMask: {enc: rVIVEncoding}, ++ AVNCLIPWI & obj.AMask: {enc: rVVuEncoding}, ++ + // + // Privileged ISA + // +@@ -3297,10 +3339,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { + AVMINUVV, AVMINUVX, AVMINVV, AVMINVX, AVMAXUVV, AVMAXUVX, AVMAXVV, AVMAXVX, + AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX, + AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX, +- AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, +- AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, ++ AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, + AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX, +- AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX: ++ AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX, ++ AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX, ++ AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX, ++ AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI, ++ AVNCLIPUWV, AVNCLIPUWX, AVNCLIPUWI, AVNCLIPWV, AVNCLIPWX, AVNCLIPWI: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +-- +2.50.1 + diff --git a/2015-crypto-sha512-remove-unnecessary-move-op-replace-wit.patch b/2015-crypto-sha512-remove-unnecessary-move-op-replace-wit.patch new file mode 100644 index 0000000000000000000000000000000000000000..1925283083a11eb40940d47bc620d5a5a41ff954 --- /dev/null +++ b/2015-crypto-sha512-remove-unnecessary-move-op-replace-wit.patch @@ -0,0 +1,66 @@ +From df974c54e789de924a915a515f9726e6839b86ea Mon Sep 17 00:00:00 2001 +From: Julian Zhu +Date: Tue, 15 Apr 2025 21:17:20 +0800 +Subject: [PATCH 15/38] crypto/sha512: remove unnecessary move op, replace with + direct add +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +goos: linux +goarch: riscv64 +pkg: crypto/sha512 + │ o │ n │ + │ sec/op │ sec/op vs base │ +Hash8Bytes/New-4 3.499µ ± 0% 3.444µ ± 0% -1.56% (p=0.000 n=10) +Hash8Bytes/Sum384-4 4.012µ ± 0% 3.957µ ± 0% -1.37% (p=0.000 n=10) +Hash8Bytes/Sum512-4 4.218µ ± 0% 4.162µ ± 0% -1.32% (p=0.000 n=10) +Hash1K/New-4 17.07µ ± 0% 16.57µ ± 0% -2.97% (p=0.000 n=10) +Hash1K/Sum384-4 17.59µ ± 0% 17.11µ ± 0% -2.76% (p=0.000 n=10) +Hash1K/Sum512-4 17.78µ ± 0% 17.30µ ± 0% -2.72% (p=0.000 n=10) +Hash8K/New-4 112.2µ ± 0% 108.7µ ± 0% -3.08% (p=0.000 n=10) +Hash8K/Sum384-4 112.7µ ± 0% 109.2µ ± 0% -3.09% (p=0.000 n=10) +Hash8K/Sum512-4 112.9µ ± 0% 109.4µ ± 0% -3.07% (p=0.000 n=10) +geomean 19.72µ 19.24µ -2.44% + + │ o │ n │ + │ B/s │ B/s vs base │ +Hash8Bytes/New-4 2.184Mi ± 0% 2.213Mi ± 0% +1.31% (p=0.000 n=10) +Hash8Bytes/Sum384-4 1.898Mi ± 1% 1.926Mi ± 0% +1.51% (p=0.000 n=10) +Hash8Bytes/Sum512-4 1.812Mi ± 1% 1.831Mi ± 0% +1.05% (p=0.000 n=10) +Hash1K/New-4 57.20Mi ± 0% 58.95Mi ± 0% +3.06% (p=0.000 n=10) +Hash1K/Sum384-4 55.51Mi ± 0% 57.09Mi ± 0% +2.84% (p=0.000 n=10) +Hash1K/Sum512-4 54.91Mi ± 0% 56.44Mi ± 0% +2.79% (p=0.000 n=10) +Hash8K/New-4 69.63Mi ± 0% 71.84Mi ± 0% +3.17% (p=0.000 n=10) +Hash8K/Sum384-4 69.30Mi ± 0% 71.52Mi ± 0% +3.20% (p=0.000 n=10) +Hash8K/Sum512-4 69.19Mi ± 0% 71.39Mi ± 0% +3.18% (p=0.000 n=10) +geomean 19.65Mi 20.13Mi +2.45% + +Change-Id: Ib68b934276ec08246d4ae60ef9870c233f0eac69 +Reviewed-on: https://go-review.googlesource.com/c/go/+/665595 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: David Chase +Reviewed-by: Joel Sing +Reviewed-by: Roland Shoemaker +--- + src/crypto/internal/fips140/sha512/sha512block_riscv64.s | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/src/crypto/internal/fips140/sha512/sha512block_riscv64.s b/src/crypto/internal/fips140/sha512/sha512block_riscv64.s +index 2b156271e6..f25ed62237 100644 +--- a/src/crypto/internal/fips140/sha512/sha512block_riscv64.s ++++ b/src/crypto/internal/fips140/sha512/sha512block_riscv64.s +@@ -138,9 +138,8 @@ + #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \ + SHA512T1(index, e, f, g, h); \ + SHA512T2(a, b, c); \ +- MOV X6, h; \ + ADD X5, d; \ +- ADD X5, h ++ ADD X6, X5, h + + #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \ + MSGSCHEDULE0(index); \ +-- +2.50.1 + diff --git a/2016-crypto-sha256-improve-performance-of-riscv64-assembl.patch b/2016-crypto-sha256-improve-performance-of-riscv64-assembl.patch new file mode 100644 index 0000000000000000000000000000000000000000..b1bf3df782b10737d787871ad4ad54c3c57bfb04 --- /dev/null +++ b/2016-crypto-sha256-improve-performance-of-riscv64-assembl.patch @@ -0,0 +1,119 @@ +From e3b02d86387ac374bea22c5686fb6c38c0e9f8bd Mon Sep 17 00:00:00 2001 +From: Julian Zhu +Date: Tue, 31 Dec 2024 13:35:56 +0800 +Subject: [PATCH 16/38] crypto/sha256: improve performance of riscv64 assembly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Simplified the implementation of Ch and Maj by reducing instructions, based on CL 605495 which made the same change for SHA-512. + +goos: linux +goarch: riscv64 +pkg: crypto/sha256 +cpu: Spacemit(R) X60 + │ oldsha256 │ newsha256 │ + │ sec/op │ sec/op vs base │ +Hash8Bytes/New-8 2.303µ ± 0% 2.098µ ± 0% -8.90% (p=0.000 n=10) +Hash8Bytes/Sum224-8 2.535µ ± 0% 2.329µ ± 0% -8.13% (p=0.000 n=10) +Hash8Bytes/Sum256-8 2.558µ ± 0% 2.352µ ± 0% -8.04% (p=0.000 n=10) +Hash1K/New-8 28.67µ ± 0% 25.21µ ± 0% -12.06% (p=0.000 n=10) +Hash1K/Sum224-8 28.89µ ± 0% 25.43µ ± 0% -11.99% (p=0.000 n=10) +Hash1K/Sum256-8 28.91µ ± 0% 25.43µ ± 0% -12.04% (p=0.000 n=10) +Hash8K/New-8 218.0µ ± 1% 192.7µ ± 2% -11.58% (p=0.000 n=10) +Hash8K/Sum224-8 218.0µ ± 1% 193.6µ ± 1% -11.20% (p=0.000 n=10) +Hash8K/Sum256-8 219.1µ ± 1% 193.4µ ± 1% -11.74% (p=0.000 n=10) +geomean 24.93µ 22.28µ -10.65% + + │ oldsha256 │ newsha256 │ + │ B/s │ B/s vs base │ +Hash8Bytes/New-8 3.309Mi ± 0% 3.633Mi ± 0% +9.80% (p=0.000 n=10) +Hash8Bytes/Sum224-8 3.009Mi ± 0% 3.271Mi ± 0% +8.72% (p=0.000 n=10) +Hash8Bytes/Sum256-8 2.985Mi ± 0% 3.242Mi ± 0% +8.63% (p=0.000 n=10) +Hash1K/New-8 34.06Mi ± 0% 38.73Mi ± 0% +13.72% (p=0.000 n=10) +Hash1K/Sum224-8 33.80Mi ± 0% 38.40Mi ± 0% +13.63% (p=0.000 n=10) +Hash1K/Sum256-8 33.78Mi ± 0% 38.40Mi ± 0% +13.69% (p=0.000 n=10) +Hash8K/New-8 35.84Mi ± 1% 40.54Mi ± 2% +13.10% (p=0.000 n=10) +Hash8K/Sum224-8 35.83Mi ± 1% 40.35Mi ± 1% +12.61% (p=0.000 n=10) +Hash8K/Sum256-8 35.66Mi ± 1% 40.40Mi ± 1% +13.29% (p=0.000 n=10) +geomean 15.54Mi 17.39Mi +11.89% + +Change-Id: I9aa692fcfd70634dc6c308db9b5d06bd82ac2302 +Reviewed-on: https://go-review.googlesource.com/c/go/+/639495 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Joel Sing +Reviewed-by: Junyang Shao +Reviewed-by: David Chase +Reviewed-by: Meng Zhuo +--- + .../fips140/sha256/sha256block_riscv64.s | 27 +++++++++---------- + 1 file changed, 13 insertions(+), 14 deletions(-) + +diff --git a/src/crypto/internal/fips140/sha256/sha256block_riscv64.s b/src/crypto/internal/fips140/sha256/sha256block_riscv64.s +index 847b9699a6..567d44781c 100644 +--- a/src/crypto/internal/fips140/sha256/sha256block_riscv64.s ++++ b/src/crypto/internal/fips140/sha256/sha256block_riscv64.s +@@ -88,47 +88,46 @@ + // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + Kt + Wt + // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x) + // Ch(x, y, z) = (x AND y) XOR (NOT x AND z) ++// = ((y XOR z) AND x) XOR z + #define SHA256T1(index, e, f, g, h) \ + MOVWU (index*4)(X18), X8; \ + ADD X5, h; \ + RORW $6, e, X6; \ + ADD X8, h; \ + RORW $11, e, X7; \ +- XOR X7, X6; \ + RORW $25, e, X8; \ ++ XOR X7, X6; \ ++ XOR f, g, X5; \ + XOR X8, X6; \ ++ AND e, X5; \ + ADD X6, h; \ +- AND e, f, X5; \ +- NOT e, X7; \ +- AND g, X7; \ +- XOR X7, X5; \ ++ XOR g, X5; \ + ADD h, X5 + + // Calculate T2 in X6. + // T2 = BIGSIGMA0(a) + Maj(a, b, c) + // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x) + // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z) ++// = ((y XOR z) AND x) XOR (y AND z) + #define SHA256T2(a, b, c) \ + RORW $2, a, X6; \ + RORW $13, a, X7; \ +- XOR X7, X6; \ + RORW $22, a, X8; \ ++ XOR X7, X6; \ ++ XOR b, c, X9; \ ++ AND b, c, X7; \ ++ AND a, X9; \ + XOR X8, X6; \ +- AND a, b, X7; \ +- AND a, c, X8; \ +- XOR X8, X7; \ +- AND b, c, X9; \ +- XOR X9, X7; \ +- ADD X7, X6 ++ XOR X7, X9; \ ++ ADD X9, X6 + + // Calculate T1 and T2, then e = d + T1 and a = T1 + T2. + // The values for e and a are stored in d and h, ready for rotation. + #define SHA256ROUND(index, a, b, c, d, e, f, g, h) \ + SHA256T1(index, e, f, g, h); \ + SHA256T2(a, b, c); \ +- MOV X6, h; \ + ADD X5, d; \ +- ADD X5, h ++ ADD X6, X5, h + + #define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \ + MSGSCHEDULE0(index); \ +-- +2.50.1 + diff --git a/2017-cmd-internal-obj-riscv-add-support-for-vector-floati.patch b/2017-cmd-internal-obj-riscv-add-support-for-vector-floati.patch new file mode 100644 index 0000000000000000000000000000000000000000..7da24f4e7839ffe8b0223da8bdbe43e01c92a057 --- /dev/null +++ b/2017-cmd-internal-obj-riscv-add-support-for-vector-floati.patch @@ -0,0 +1,1735 @@ +From 2435111d664e0fe17bfaa98c8bc1cbe11b6b733e Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 12 Feb 2025 23:40:28 +1100 +Subject: [PATCH 17/38] cmd/internal/obj/riscv: add support for vector + floating-point instructions + +Add support for vector floating-point instructions to the RISC-V +assembler. This includes single-width and widening addition and +subtraction, multiplication and division, fused multiply-addition, +comparison, min/max, sign-injection, classification and type +conversion instructions. + +Change-Id: I8bceb1c5d7eead0561ba5407ace00805a6144f51 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646777 +Reviewed-by: Carlos Amedee +Reviewed-by: Junyang Shao +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +Reviewed-by: Mark Ryan +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 224 +++++++ + .../asm/internal/asm/testdata/riscv64error.s | 500 +++++++++------ + .../internal/asm/testdata/riscv64validation.s | 603 ++++++++++-------- + src/cmd/internal/obj/riscv/anames.go | 4 + + src/cmd/internal/obj/riscv/cpu.go | 4 + + src/cmd/internal/obj/riscv/obj.go | 213 ++++++- + 6 files changed, 1076 insertions(+), 472 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 4468639769..d17576655b 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -937,6 +937,230 @@ start: + VNCLIPWI $16, V2, V3 // d73128be + VNCLIPWI $16, V2, V0, V3 // d73128bc + ++ // 31.13.2: Vector Single-Width Floating-Point Add/Subtract Instructions ++ VFADDVV V1, V2, V3 // d7912002 ++ VFADDVV V1, V2, V0, V3 // d7912000 ++ VFADDVF F10, V2, V3 // d7512502 ++ VFADDVF F10, V2, V0, V3 // d7512500 ++ VFSUBVV V1, V2, V3 // d791200a ++ VFSUBVV V1, V2, V0, V3 // d7912008 ++ VFSUBVF F10, V2, V3 // d751250a ++ VFSUBVF F10, V2, V0, V3 // d7512508 ++ VFRSUBVF F10, V2, V3 // d751259e ++ VFRSUBVF F10, V2, V0, V3 // d751259c ++ ++ // 31.13.3: Vector Widening Floating-Point Add/Subtract Instructions ++ VFWADDVV V1, V2, V3 // d79120c2 ++ VFWADDVV V1, V2, V0, V3 // d79120c0 ++ VFWADDVF F10, V2, V3 // d75125c2 ++ VFWADDVF F10, V2, V0, V3 // d75125c0 ++ VFWSUBVV V1, V2, V3 // d79120ca ++ VFWSUBVV V1, V2, V0, V3 // d79120c8 ++ VFWSUBVF F10, V2, V3 // d75125ca ++ VFWSUBVF F10, V2, V0, V3 // d75125c8 ++ VFWADDWV V1, V2, V3 // d79120d2 ++ VFWADDWV V1, V2, V0, V3 // d79120d0 ++ VFWADDWF F10, V2, V3 // d75125d2 ++ VFWADDWF F10, V2, V0, V3 // d75125d0 ++ VFWSUBWV V1, V2, V3 // d79120da ++ VFWSUBWV V1, V2, V0, V3 // d79120d8 ++ VFWSUBWF F10, V2, V3 // d75125da ++ VFWSUBWF F10, V2, V0, V3 // d75125d8 ++ ++ // 31.13.4: Vector Single-Width Floating-Point Multiply/Divide Instructions ++ VFMULVV V1, V2, V3 // d7912092 ++ VFMULVV V1, V2, V0, V3 // d7912090 ++ VFMULVF F10, V2, V3 // d7512592 ++ VFMULVF F10, V2, V0, V3 // d7512590 ++ VFDIVVV V1, V2, V3 // d7912082 ++ VFDIVVV V1, V2, V0, V3 // d7912080 ++ VFDIVVF F10, V2, V3 // d7512582 ++ VFDIVVF F10, V2, V0, V3 // d7512580 ++ VFRDIVVF F10, V2, V3 // d7512586 ++ VFRDIVVF F10, V2, V0, V3 // d7512584 ++ ++ // 31.13.5: Vector Widening Floating-Point Multiply ++ VFWMULVV V1, V2, V3 // d79120e2 ++ VFWMULVV V1, V2, V0, V3 // d79120e0 ++ VFWMULVF F10, V2, V3 // d75125e2 ++ VFWMULVF F10, V2, V0, V3 // d75125e0 ++ ++ // 31.13.6: Vector Single-Width Floating-Point Fused Multiply-Add Instructions ++ VFMACCVV V2, V1, V3 // d79120b2 ++ VFMACCVV V2, V1, V0, V3 // d79120b0 ++ VFMACCVF V2, F10, V3 // d75125b2 ++ VFMACCVF V2, F10, V0, V3 // d75125b0 ++ VFNMACCVV V2, V1, V3 // d79120b6 ++ VFNMACCVV V2, V1, V0, V3 // d79120b4 ++ VFNMACCVF V2, F10, V3 // d75125b6 ++ VFNMACCVF V2, F10, V0, V3 // d75125b4 ++ VFMSACVV V2, V1, V3 // d79120ba ++ VFMSACVV V2, V1, V0, V3 // d79120b8 ++ VFMSACVF V2, F10, V3 // d75125ba ++ VFMSACVF V2, F10, V0, V3 // d75125b8 ++ VFNMSACVV V2, V1, V3 // d79120be ++ VFNMSACVV V2, V1, V0, V3 // d79120bc ++ VFNMSACVF V2, F10, V3 // d75125be ++ VFNMSACVF V2, F10, V0, V3 // d75125bc ++ VFMADDVV V2, V1, V3 // d79120a2 ++ VFMADDVV V2, V1, V0, V3 // d79120a0 ++ VFMADDVF V2, F10, V3 // d75125a2 ++ VFMADDVF V2, F10, V0, V3 // d75125a0 ++ VFNMADDVV V2, V1, V3 // d79120a6 ++ VFNMADDVV V2, V1, V0, V3 // d79120a4 ++ VFNMADDVF V2, F10, V3 // d75125a6 ++ VFNMADDVF V2, F10, V0, V3 // d75125a4 ++ VFMSUBVV V2, V1, V3 // d79120aa ++ VFMSUBVV V2, V1, V0, V3 // d79120a8 ++ VFMSUBVF V2, F10, V3 // d75125aa ++ VFMSUBVF V2, F10, V0, V3 // d75125a8 ++ VFNMSUBVV V2, V1, V3 // d79120ae ++ VFNMSUBVV V2, V1, V0, V3 // d79120ac ++ VFNMSUBVF V2, F10, V3 // d75125ae ++ VFNMSUBVF V2, F10, V0, V3 // d75125ac ++ ++ // 31.13.7: Vector Widening Floating-Point Fused Multiply-Add Instructions ++ VFWMACCVV V2, V1, V3 // d79120f2 ++ VFWMACCVV V2, V1, V0, V3 // d79120f0 ++ VFWMACCVF V2, F10, V3 // d75125f2 ++ VFWMACCVF V2, F10, V0, V3 // d75125f0 ++ VFWNMACCVV V2, V1, V3 // d79120f6 ++ VFWNMACCVV V2, V1, V0, V3 // d79120f4 ++ VFWNMACCVF V2, F10, V3 // d75125f6 ++ VFWNMACCVF V2, F10, V0, V3 // d75125f4 ++ VFWMSACVV V2, V1, V3 // d79120fa ++ VFWMSACVV V2, V1, V0, V3 // d79120f8 ++ VFWMSACVF V2, F10, V3 // d75125fa ++ VFWMSACVF V2, F10, V0, V3 // d75125f8 ++ VFWNMSACVV V2, V1, V3 // d79120fe ++ VFWNMSACVV V2, V1, V0, V3 // d79120fc ++ VFWNMSACVF V2, F10, V3 // d75125fe ++ VFWNMSACVF V2, F10, V0, V3 // d75125fc ++ ++ // 31.13.8: Vector Floating-Point Square-Root Instruction ++ VFSQRTV V2, V3 // d711204e ++ VFSQRTV V2, V0, V3 // d711204c ++ ++ // 31.13.9: Vector Floating-Point Reciprocal Square-Root Estimate Instruction ++ VFRSQRT7V V2, V3 // d711224e ++ VFRSQRT7V V2, V0, V3 // d711224c ++ ++ // 31.13.10: Vector Floating-Point Reciprocal Estimate Instruction ++ VFREC7V V2, V3 // d791224e ++ VFREC7V V2, V0, V3 // d791224c ++ ++ // 31.13.11: Vector Floating-Point MIN/MAX Instructions ++ VFMINVV V1, V2, V3 // d7912012 ++ VFMINVV V1, V2, V0, V3 // d7912010 ++ VFMINVF F10, V2, V3 // d7512512 ++ VFMINVF F10, V2, V0, V3 // d7512510 ++ VFMAXVV V1, V2, V3 // d791201a ++ VFMAXVV V1, V2, V0, V3 // d7912018 ++ VFMAXVF F10, V2, V3 // d751251a ++ VFMAXVF F10, V2, V0, V3 // d7512518 ++ ++ // 31.13.12: Vector Floating-Point Sign-Injection Instructions ++ VFSGNJVV V1, V2, V3 // d7912022 ++ VFSGNJVV V1, V2, V0, V3 // d7912020 ++ VFSGNJVF F10, V2, V3 // d7512522 ++ VFSGNJVF F10, V2, V0, V3 // d7512520 ++ VFSGNJNVV V1, V2, V3 // d7912026 ++ VFSGNJNVV V1, V2, V0, V3 // d7912024 ++ VFSGNJNVF F10, V2, V3 // d7512526 ++ VFSGNJNVF F10, V2, V0, V3 // d7512524 ++ VFSGNJXVV V1, V2, V3 // d791202a ++ VFSGNJXVV V1, V2, V0, V3 // d7912028 ++ VFSGNJXVF F10, V2, V3 // d751252a ++ VFSGNJXVF F10, V2, V0, V3 // d7512528 ++ VFNEGV V2, V3 // d7112126 ++ VFNEGV V2, V0, V3 // d7112124 ++ VFABSV V2, V3 // d711212a ++ VFABSV V2, V0, V3 // d7112128 ++ ++ // 31.13.13: Vector Floating-Point Compare Instructions ++ VMFEQVV V1, V2, V3 // d7912062 ++ VMFEQVV V1, V2, V0, V3 // d7912060 ++ VMFEQVF F10, V2, V3 // d7512562 ++ VMFEQVF F10, V2, V0, V3 // d7512560 ++ VMFNEVV V1, V2, V3 // d7912072 ++ VMFNEVV V1, V2, V0, V3 // d7912070 ++ VMFNEVF F10, V2, V3 // d7512572 ++ VMFNEVF F10, V2, V0, V3 // d7512570 ++ VMFLTVV V1, V2, V3 // d791206e ++ VMFLTVV V1, V2, V0, V3 // d791206c ++ VMFLTVF F10, V2, V3 // d751256e ++ VMFLTVF F10, V2, V0, V3 // d751256c ++ VMFLEVV V1, V2, V3 // d7912066 ++ VMFLEVV V1, V2, V0, V3 // d7912064 ++ VMFLEVF F10, V2, V3 // d7512566 ++ VMFLEVF F10, V2, V0, V3 // d7512564 ++ VMFGTVF F10, V2, V3 // d7512576 ++ VMFGTVF F10, V2, V0, V3 // d7512574 ++ VMFGEVF F10, V2, V3 // d751257e ++ VMFGEVF F10, V2, V0, V3 // d751257c ++ VMFGTVV V1, V2, V3 // d711116e ++ VMFGTVV V1, V2, V0, V3 // d711116c ++ VMFGEVV V1, V2, V3 // d7111166 ++ VMFGEVV V1, V2, V0, V3 // d7111164 ++ ++ // 31.13.14: Vector Floating-Point Classify Instruction ++ VFCLASSV V2, V3 // d711284e ++ VFCLASSV V2, V0, V3 // d711284c ++ ++ // 31.13.15: Vector Floating-Point Merge Instruction ++ VFMERGEVFM F10, V2, V0, V3 // d751255c ++ ++ // 31.13.16: Vector Floating-Point Move Instruction ++ VFMVVF F10, V3 // d751055e ++ ++ // 31.13.17: Single-Width Floating-Point/Integer Type-Convert Instructions ++ VFCVTXUFV V2, V3 // d711204a ++ VFCVTXUFV V2, V0, V3 // d7112048 ++ VFCVTXFV V2, V3 // d791204a ++ VFCVTXFV V2, V0, V3 // d7912048 ++ VFCVTRTZXUFV V2, V3 // d711234a ++ VFCVTRTZXUFV V2, V0, V3 // d7112348 ++ VFCVTRTZXFV V2, V3 // d791234a ++ VFCVTRTZXFV V2, V0, V3 // d7912348 ++ VFCVTFXUV V2, V3 // d711214a ++ VFCVTFXUV V2, V0, V3 // d7112148 ++ VFCVTFXV V2, V3 // d791214a ++ VFCVTFXV V2, V0, V3 // d7912148 ++ ++ // 31.13.18: Widening Floating-Point/Integer Type-Convert Instructions ++ VFWCVTXUFV V2, V3 // d711244a ++ VFWCVTXUFV V2, V0, V3 // d7112448 ++ VFWCVTXFV V2, V3 // d791244a ++ VFWCVTXFV V2, V0, V3 // d7912448 ++ VFWCVTRTZXUFV V2, V3 // d711274a ++ VFWCVTRTZXUFV V2, V0, V3 // d7112748 ++ VFWCVTRTZXFV V2, V3 // d791274a ++ VFWCVTRTZXFV V2, V0, V3 // d7912748 ++ VFWCVTFXUV V2, V3 // d711254a ++ VFWCVTFXUV V2, V0, V3 // d7112548 ++ VFWCVTFXV V2, V3 // d791254a ++ VFWCVTFXV V2, V0, V3 // d7912548 ++ VFWCVTFFV V2, V3 // d711264a ++ VFWCVTFFV V2, V0, V3 // d7112648 ++ ++ // 31.13.19: Narrowing Floating-Point/Integer Type-Convert Instructions ++ VFNCVTXUFW V2, V3 // d711284a ++ VFNCVTXUFW V2, V0, V3 // d7112848 ++ VFNCVTXFW V2, V3 // d791284a ++ VFNCVTXFW V2, V0, V3 // d7912848 ++ VFNCVTRTZXUFW V2, V3 // d7112b4a ++ VFNCVTRTZXUFW V2, V0, V3 // d7112b48 ++ VFNCVTRTZXFW V2, V3 // d7912b4a ++ VFNCVTRTZXFW V2, V0, V3 // d7912b48 ++ VFNCVTFXUW V2, V3 // d711294a ++ VFNCVTFXUW V2, V0, V3 // d7112948 ++ VFNCVTFXW V2, V3 // d791294a ++ VFNCVTFXW V2, V0, V3 // d7912948 ++ VFNCVTFFW V2, V3 // d7112a4a ++ VFNCVTFFW V2, V0, V3 // d7112a48 ++ VFNCVTRODFFW V2, V3 // d7912a4a ++ VFNCVTRODFFW V2, V0, V3 // d7912a48 ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index 3efd5b1ea1..c96eaa2383 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -48,208 +48,302 @@ TEXT errors(SB),$0 + // + // "V" Standard Extension for Vector Operations, Version 1.0 + // +- VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" +- VLE8V (X10), V1, V3 // ERROR "invalid vector mask register" +- VSE8V V3, V1, (X10) // ERROR "invalid vector mask register" +- VLSE8V (X10), X10, V1, V3 // ERROR "invalid vector mask register" +- VSSE8V V3, X11, V1, (X10) // ERROR "invalid vector mask register" +- VLUXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" +- VSUXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" +- VLOXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" +- VSOXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" +- VL1RV (X10), V0, V3 // ERROR "too many operands for instruction" +- VS1RV V3, V0, (X11) // ERROR "too many operands for instruction" +- VADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VADDVX X10, V2, V1, V3 // ERROR "invalid vector mask register" +- VADDVI $15, V4, V1, V2 // ERROR "invalid vector mask register" +- VSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" +- VRSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" +- VRSUBVI $15, V4, V1, V2 // ERROR "invalid vector mask register" +- VNEGV V2, V3, V4 // ERROR "invalid vector mask register" +- VWADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWADDWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWSUBWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWCVTXXV V2, V1, V3 // ERROR "invalid vector mask register" +- VWCVTUXXV V2, V1, V3 // ERROR "invalid vector mask register" +- VZEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" +- VSEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" +- VZEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" +- VSEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" +- VZEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" +- VSEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" +- VADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VADCVVM V1, V2, V3 // ERROR "invalid vector mask register" +- VADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VADCVXM X10, V2, V3 // ERROR "invalid vector mask register" +- VADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" +- VADCVIM $15, V2, V3 // ERROR "invalid vector mask register" +- VMADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMADCVVM V1, V2, V3 // ERROR "invalid vector mask register" +- VMADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMADCVXM X10, V2, V3 // ERROR "invalid vector mask register" +- VMADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" +- VMADCVIM $15, V2, V3 // ERROR "invalid vector mask register" +- VSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" +- VSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" +- VMSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" +- VMSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" +- VANDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VANDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VANDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VXORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VXORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VXORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VNOTV V1, V2, V3 // ERROR "invalid vector mask register" +- VSLLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSLLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSLLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRLWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRLWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRLWI $31, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRAWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRAWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNSRAWI $31, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCVTXXW V2, V4, V3 // ERROR "invalid vector mask register" +- VMSEQVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSEQVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSEQVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSNEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSNEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSNEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSLTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMSGEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMINUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMINUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMINVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMINVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMAXUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMAXUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMAXVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMAXVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMULHSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VDIVUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VDIVUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VDIVVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VDIVVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VREMUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VREMUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VREMVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VREMVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMULSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNMSACVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNMSACVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNMSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNMSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VWMACCUSVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMERGEVVM V1, V2, V3 // ERROR "invalid vector mask register" +- VMERGEVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VMERGEVXM X10, V2, V3 // ERROR "invalid vector mask register" +- VMERGEVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VMERGEVIM $15, V2, V3 // ERROR "invalid vector mask register" +- VMERGEVIM $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VMVVV V1, V2, V3 // ERROR "too many operands for instruction" +- VMVVX X10, V2, V3 // ERROR "too many operands for instruction" +- VMVVI $15, V2, V3 // ERROR "too many operands for instruction" +- VSADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSADDUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VSADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSADDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VAADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VAADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VAADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VAADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VASUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VASUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VASUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VASUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VSSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPUWI $16, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPWV V1, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPWX X10, V2, V4, V3 // ERROR "invalid vector mask register" +- VNCLIPWI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSETIVLI X10, E32, M2, TA, MA, X12 // ERROR "expected immediate value" ++ VLE8V (X10), V1, V3 // ERROR "invalid vector mask register" ++ VSE8V V3, V1, (X10) // ERROR "invalid vector mask register" ++ VLSE8V (X10), X10, V1, V3 // ERROR "invalid vector mask register" ++ VSSE8V V3, X11, V1, (X10) // ERROR "invalid vector mask register" ++ VLUXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" ++ VSUXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" ++ VLOXEI8V (X10), V2, V1, V3 // ERROR "invalid vector mask register" ++ VSOXEI8V V3, V2, V1, (X10) // ERROR "invalid vector mask register" ++ VL1RV (X10), V0, V3 // ERROR "too many operands for instruction" ++ VS1RV V3, V0, (X11) // ERROR "too many operands for instruction" ++ VADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADDVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VADDVI $15, V4, V1, V2 // ERROR "invalid vector mask register" ++ VSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VRSUBVX X10, V2, V1, V3 // ERROR "invalid vector mask register" ++ VRSUBVI $15, V4, V1, V2 // ERROR "invalid vector mask register" ++ VNEGV V2, V3, V4 // ERROR "invalid vector mask register" ++ VWADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWADDWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWSUBWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWCVTXXV V2, V1, V3 // ERROR "invalid vector mask register" ++ VWCVTUXXV V2, V1, V3 // ERROR "invalid vector mask register" ++ VZEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF2 V2, V3, V4 // ERROR "invalid vector mask register" ++ VZEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF4 V2, V3, V4 // ERROR "invalid vector mask register" ++ VZEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" ++ VSEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" ++ VADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VADCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" ++ VADCVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" ++ VMADCVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VANDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VANDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VANDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VXORVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNOTV V1, V2, V3 // ERROR "invalid vector mask register" ++ VSLLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRLWI $31, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNSRAWI $31, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCVTXXW V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSEQVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSNEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSLTUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSGEUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMINVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMAXVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMULHSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VDIVVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREMVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMULSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSACVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSACVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNMSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCSUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCSUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWMACCUSVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMERGEVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VMERGEVIM $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMVVV V1, V2, V3 // ERROR "too many operands for instruction" ++ VMVVX X10, V2, V3 // ERROR "too many operands for instruction" ++ VMVVI $15, V2, V3 // ERROR "too many operands for instruction" ++ VSADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSADDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VAADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VASUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPUWI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VNCLIPWI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFADDVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSUBVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFRSUBVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWADDVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWSUBVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWADDWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWADDWF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWSUBWV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWSUBWF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMULVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFDIVVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFDIVVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFRDIVVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWMULVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMACCVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFMACCVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFNMACCVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFNMACCVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFMSACVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFMSACVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFNMSACVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFNMSACVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFMADDVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFMADDVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFNMADDVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFNMADDVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFMSUBVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFMSUBVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFNMSUBVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFNMSUBVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFWMACCVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFWMACCVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFWNMACCVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFWNMACCVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFWMSACVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFWMSACVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFWNMSACVV V2, V1, V4, V3 // ERROR "invalid vector mask register" ++ VFWNMSACVF V2, F10, V4, V3 // ERROR "invalid vector mask register" ++ VFSQRTV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFRSQRT7V V2, V4, V3 // ERROR "invalid vector mask register" ++ VFREC7V V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMINVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMINVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMAXVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMAXVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJNVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJNVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJXVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSGNJXVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNEGV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFABSV V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFEQVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFEQVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFNEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFNEVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFLTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFLTVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFLEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFLEVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFGTVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFGEVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFGTVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VMFGEVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFMERGEVFM X10, V2, V3 // ERROR "invalid vector mask register" ++ VFMERGEVFM F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTXUFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTXFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTRTZXUFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTRTZXFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTFXUV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFCVTFXV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTXUFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTXFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTRTZXUFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTRTZXFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTFXUV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTFXV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWCVTFFV V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTXUFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTXFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTRTZXUFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTRTZXFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTFXUW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTFXW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTFFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VFNCVTRODFFW V2, V4, V3 // ERROR "invalid vector mask register" + + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index c6f71e64fb..2c509a1e91 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -15,259 +15,354 @@ TEXT validation(SB),$0 + // + // "V" Standard Extension for Vector Operations, Version 1.0 + // +- VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" +- VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" +- VSETVL X10, X11 // ERROR "expected integer register in rs1 position" +- VLE8V (X10), X10 // ERROR "expected vector register in vd position" +- VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" +- VSE8V X10, (X10) // ERROR "expected vector register in vs1 position" +- VSE8V V3, (V1) // ERROR "expected integer register in rd position" +- VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" +- VLSE8V (X10), X10, X11 // ERROR "expected vector register in vd position" +- VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" +- VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" +- VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" +- VSSE8V X10, X11, (X10) // ERROR "expected vector register in vd position" +- VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" +- VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" +- VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" +- VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" +- VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" +- VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" +- VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" +- VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" +- VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" +- VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" +- VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" +- VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" +- VL1RV (X10), X10 // ERROR "expected vector register in vd position" +- VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" +- VS1RV X11, (X11) // ERROR "expected vector register in vs1 position" +- VS1RV V3, (V1) // ERROR "expected integer register in rd position" +- VADDVV V1, X10, V3 // ERROR "expected vector register in vs2 position" +- VADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VADDVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" +- VADDVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" +- VSUBVV V1, X10, V3 // ERROR "expected vector register in vs2 position" +- VSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VRSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VRSUBVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" +- VRSUBVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" +- VNEGV X10, V3 // ERROR "expected vector register in vs2 position" +- VNEGV V2 // ERROR "expected vector register in vd position" +- VWADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWADDUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWADDUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWSUBUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWSUBUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWADDWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWADDWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWSUBWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWSUBWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWCVTXXV X10, V3 // ERROR "expected vector register in vs2 position" +- VWCVTUXXV X10, V3 // ERROR "expected vector register in vs2 position" +- VZEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VZEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" +- VSEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VSEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" +- VZEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VZEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" +- VSEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VSEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" +- VZEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VZEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" +- VSEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" +- VSEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" +- VADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +- VADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" +- VADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +- VMADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" +- VMADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMADCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMADCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" +- VMADCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMADCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" +- VMADCVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMADCVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMADCVI $15, V2, V0, V3 // ERROR "expected no register in rs3" +- VSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +- VSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" +- VMSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +- VMSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" +- VMSBCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSBCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" +- VMSBCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSBCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" +- VANDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VANDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VANDVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VANDVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VXORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VXORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VXORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VXORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VNOTV V3 // ERROR "expected vector register in vd position" +- VNOTV X10, V3 // ERROR "expected vector register in vs2 position" +- VSLLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSLLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSLLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" +- VSLLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" +- VSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSRLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" +- VSRLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" +- VSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSRAVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" +- VSRAVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" +- VNSRLWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNSRLWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNSRLWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" +- VNSRLWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" +- VNSRAWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNSRAWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNSRAWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" +- VNSRAWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" +- VNCVTXXW X10, V3 // ERROR "expected vector register in vs2 position" +- VMSEQVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSEQVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSEQVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSEQVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSNEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSNEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSNEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSNEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSLTUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSLTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSLTVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSLTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSLEUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSLEUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSLEUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSLEUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSLEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMSLEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSLEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSLEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSGTUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" +- VMSGTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSGTUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSGTUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSGTVV X10, V2, V3 // ERROR "expected vector register in vs2 position" +- VMSGTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMSGTVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSGTVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSGEVV X10, V2, V3 // ERROR "expected vector register in vs2 position" +- VMSGEUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" +- VMSLTVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSLTVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSLTUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSLTUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSGEVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSGEVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMSGEUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMSGEUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMINUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMINUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMINVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMINVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMAXUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMAXUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMAXVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMAXVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMULHVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMULHVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMULHUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMULHUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMULHSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMULHSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VDIVUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VDIVUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VDIVVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VDIVVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VREMUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VREMUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VREMVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VREMVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMULUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMULUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMULSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMULSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNMSACVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNMSACVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VMADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNMSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNMSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMACCUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VWMACCSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCUSVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMERGEVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +- VMERGEVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" +- VMERGEVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMERGEVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VMVVV X10, V3 // ERROR "expected vector register in vs1 position" +- VMVVX V1, V2 // ERROR "expected integer register in rs1 position" +- VMVVI $16, V2 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VMVVI $-17, V2 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VSADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSADDUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" +- VSADDUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" +- VSSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VAADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VAADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VAADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VAADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VASUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VASUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VASUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VASUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSSRLVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" +- VSSRLVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" +- VSSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VSSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VSSRAVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" +- VSSRAVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" +- VNCLIPUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNCLIPUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNCLIPUWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" +- VNCLIPUWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" +- VNCLIPWV X10, V2, V3 // ERROR "expected vector register in vs1 position" +- VNCLIPWX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNCLIPWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" +- VNCLIPWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VSETVLI $32, E16, M1, TU, MU, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETVLI $-1, E32, M2, TA, MA, X12 // ERROR "must be in range [0, 31] (5 bits)" ++ VSETVL X10, X11 // ERROR "expected integer register in rs1 position" ++ VLE8V (X10), X10 // ERROR "expected vector register in vd position" ++ VLE8V (V1), V3 // ERROR "expected integer register in rs1 position" ++ VSE8V X10, (X10) // ERROR "expected vector register in vs1 position" ++ VSE8V V3, (V1) // ERROR "expected integer register in rd position" ++ VLSE8V (X10), V3 // ERROR "expected integer register in rs2 position" ++ VLSE8V (X10), X10, X11 // ERROR "expected vector register in vd position" ++ VLSE8V (V1), X10, V3 // ERROR "expected integer register in rs1 position" ++ VLSE8V (X10), V1, V0, V3 // ERROR "expected integer register in rs2 position" ++ VSSE8V V3, (X10) // ERROR "expected integer register in rs2 position" ++ VSSE8V X10, X11, (X10) // ERROR "expected vector register in vd position" ++ VSSE8V V3, X11, (V1) // ERROR "expected integer register in rs1 position" ++ VSSE8V V3, V1, V0, (X10) // ERROR "expected integer register in rs2 position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" ++ VLUXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" ++ VLUXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLUXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" ++ VSUXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" ++ VSUXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSUXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" ++ VLOXEI8V (X10), V2, X11 // ERROR "expected vector register in vd position" ++ VLOXEI8V (V1), V2, V3 // ERROR "expected integer register in rs1 position" ++ VLOXEI8V (X10), X11, V0, V3 // ERROR "expected vector register in vs2 position" ++ VSOXEI8V X10, V2, (X10) // ERROR "expected vector register in vd position" ++ VSOXEI8V V3, V2, (V1) // ERROR "expected integer register in rs1 position" ++ VSOXEI8V V3, X11, V0, (X10) // ERROR "expected vector register in vs2 position" ++ VL1RV (X10), X10 // ERROR "expected vector register in vd position" ++ VL1RV (V1), V3 // ERROR "expected integer register in rs1 position" ++ VS1RV X11, (X11) // ERROR "expected vector register in vs1 position" ++ VS1RV V3, (V1) // ERROR "expected integer register in rd position" ++ VADDVV V1, X10, V3 // ERROR "expected vector register in vs2 position" ++ VADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VADDVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" ++ VADDVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" ++ VSUBVV V1, X10, V3 // ERROR "expected vector register in vs2 position" ++ VSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VRSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VRSUBVI $16, V4, V2 // ERROR "signed immediate 16 must be in range [-16, 15] (5 bits)" ++ VRSUBVI $-17, V4, V2 // ERROR "signed immediate -17 must be in range [-16, 15] (5 bits)" ++ VNEGV X10, V3 // ERROR "expected vector register in vs2 position" ++ VNEGV V2 // ERROR "expected vector register in vd position" ++ VWADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWADDWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWADDWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWSUBWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWSUBWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWCVTXXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VWCVTUXXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF2 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF2 X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF4 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF4 X10, V3 // ERROR "expected vector register in vs2 position" ++ VZEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VZEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" ++ VSEXTVF8 V2, V0, V3, V4 // ERROR "expected no register in rs3" ++ VSEXTVF8 X10, V3 // ERROR "expected vector register in vs2 position" ++ VADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMADCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMADCVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMADCVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMADCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMADCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMADCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMADCVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMADCVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMADCVI $15, V2, V0, V3 // ERROR "expected no register in rs3" ++ VSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMSBCVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSBCVV V1, V2, V0, V3 // ERROR "expected no register in rs3" ++ VMSBCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSBCVX X10, V2, V0, V3 // ERROR "expected no register in rs3" ++ VANDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VANDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VANDVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VANDVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VXORVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VXORVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VXORVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VXORVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VNOTV V3 // ERROR "expected vector register in vd position" ++ VNOTV X10, V3 // ERROR "expected vector register in vs2 position" ++ VSLLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSLLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSLLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSLLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSRLVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSRLVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSRAVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSRAVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNSRLWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNSRLWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNSRLWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VNSRLWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNSRAWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNSRAWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNSRAWI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VNSRAWI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VNCVTXXW X10, V3 // ERROR "expected vector register in vs2 position" ++ VMSEQVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSEQVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSEQVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSEQVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSNEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSNEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSNEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSNEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLTUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLTVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLEUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLEUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMSLEVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSLEVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLEVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGTUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGTUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSGTUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGTUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGTVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGTVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMSGTVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGTVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSGEUVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMSLTVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLTVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSLTUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSLTUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGEVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMSGEUVI $17, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMSGEUVI $-16, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMINUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMINUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMINVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMINVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMAXUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMAXUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMAXVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMAXVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMULHSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMULHSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VDIVUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VDIVUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VDIVVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VDIVVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VREMUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREMUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VREMVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREMVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMULSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMULSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNMSACVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSACVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNMSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VWMACCUSVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VMERGEVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" ++ VMERGEVXM V1, V2, V0, V3 // ERROR "expected integer register in rs1 position" ++ VMERGEVIM $16, V2, V0, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMERGEVIM $-17, V2, V0, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VMVVV X10, V3 // ERROR "expected vector register in vs1 position" ++ VMVVX V1, V2 // ERROR "expected integer register in rs1 position" ++ VMVVI $16, V2 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VMVVI $-17, V2 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VSADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSADDUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" ++ VSADDUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" ++ VSSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VAADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VAADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VAADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VAADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VASUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VASUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VASUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VASUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRLVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VSSRLVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VSSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VSSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSSRAVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VSSRAVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VNCLIPUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNCLIPUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNCLIPUWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VNCLIPUWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VNCLIPWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNCLIPWX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VNCLIPWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" ++ VNCLIPWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" ++ VFADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFADDVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFSUBVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFRSUBVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFWADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWADDVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFWSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWSUBVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFWADDWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWADDWF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFWSUBWV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWSUBWF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFMULVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFDIVVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFDIVVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFRDIVVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFWMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWMULVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFMACCVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFNMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFNMACCVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFMSACVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFMSACVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFNMSACVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFNMSACVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFMADDVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFMADDVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFNMADDVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFNMADDVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFMSUBVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFMSUBVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFNMSUBVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFNMSUBVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFWMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFWMACCVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFWNMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFWNMACCVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFWMSACVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFWMSACVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFWNMSACVV V2, X10, V3 // ERROR "expected vector register in vs1 position" ++ VFWNMSACVF V2, X10, V3 // ERROR "expected float register in rs1 position" ++ VFSQRTV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFRSQRT7V X10, V3 // ERROR "expected vector register in vs2 position" ++ VFREC7V X10, V3 // ERROR "expected vector register in vs2 position" ++ VFMINVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFMINVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFMAXVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFMAXVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFSGNJVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFSGNJVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFSGNJNVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFSGNJNVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFSGNJXVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFSGNJXVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VFNEGV V2, X10 // ERROR "expected vector register in vd position" ++ VFABSV V2, X10 // ERROR "expected vector register in vd position" ++ VMFEQVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMFEQVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFNEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMFNEVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFLTVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMFLTVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFLEVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMFLEVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFGTVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFGEVF X10, V2, V3 // ERROR "expected float register in rs1 position" ++ VMFGTVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VMFGEVV X10, V2, V3 // ERROR "expected vector register in vs2 position" ++ VFCLASSV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFMERGEVFM X10, V2, V0, V3 // ERROR "expected float register in rs1 position" ++ VFMVVF X10, V3 // ERROR "expected float register in rs1 position" ++ VFCVTXUFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFCVTXFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFCVTRTZXUFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFCVTRTZXFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFCVTFXUV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFCVTFXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTXUFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTXFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTRTZXUFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTRTZXFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTFXUV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTFXV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFWCVTFFV X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTXUFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTXFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTRTZXUFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTRTZXFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTFXUW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTFXW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTFFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VFNCVTRODFFW X10, V3 // ERROR "expected vector register in vs2 position" + + RET +diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go +index a65dfceea9..bf1fdb8b88 100644 +--- a/src/cmd/internal/obj/riscv/anames.go ++++ b/src/cmd/internal/obj/riscv/anames.go +@@ -650,6 +650,10 @@ var Anames = []string{ + "RDTIME", + "SEQZ", + "SNEZ", ++ "VFABSV", ++ "VFNEGV", ++ "VMFGEVV", ++ "VMFGTVV", + "VL1RV", + "VL2RV", + "VL4RV", +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index fa4c2cf372..3cad4f9d94 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1178,6 +1178,10 @@ const ( + ARDTIME + ASEQZ + ASNEZ ++ AVFABSV ++ AVFNEGV ++ AVMFGEVV ++ AVMFGTVV + AVL1RV + AVL2RV + AVL4RV +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index a285ce4614..aa8a3d769e 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1314,17 +1314,24 @@ func validateRFI(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + +-func validateRIF(ctxt *obj.Link, ins *instruction) { ++func validateRFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins, "rd", ins.rd) + wantNoneReg(ctxt, ins, "rs1", ins.rs1) +- wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantFloatReg(ctxt, ins, "rs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + +-func validateRFF(ctxt *obj.Link, ins *instruction) { ++func validateRIF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins, "rd", ins.rd) + wantNoneReg(ctxt, ins, "rs1", ins.rs1) +- wantFloatReg(ctxt, ins, "rs2", ins.rs2) ++ wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVFV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantFloatReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + +@@ -1565,12 +1572,20 @@ func encodeRFI(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regI(ins.rd), ins.funct3, ins.funct7) + } + ++func encodeRFF(ins *instruction) uint32 { ++ return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) ++} ++ + func encodeRIF(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) + } + +-func encodeRFF(ins *instruction) uint32 { +- return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) ++func encodeRVFV(ins *instruction) uint32 { ++ return encodeR(ins.as, regF(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVIV(ins *instruction) uint32 { ++ return encodeR(ins.as, regI(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } + + func encodeRVV(ins *instruction) uint32 { +@@ -1585,10 +1600,6 @@ func encodeRVVu(ins *instruction) uint32 { + return encodeR(ins.as, immU(ins.as, ins.imm, 5), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } + +-func encodeRVIV(ins *instruction) uint32 { +- return encodeR(ins.as, regI(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) +-} +- + func encodeRVVV(ins *instruction) uint32 { + return encodeR(ins.as, regV(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } +@@ -1876,10 +1887,11 @@ var ( + rFIEncoding = encoding{encode: encodeRFI, validate: validateRFI, length: 4} + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} ++ rVFVEncoding = encoding{encode: encodeRVFV, validate: validateRVFV, length: 4} ++ rVIVEncoding = encoding{encode: encodeRVIV, validate: validateRVIV, length: 4} + rVVEncoding = encoding{encode: encodeRVV, validate: validateRVV, length: 4} + rVViEncoding = encoding{encode: encodeRVVi, validate: validateRVVi, length: 4} + rVVuEncoding = encoding{encode: encodeRVVu, validate: validateRVVu, length: 4} +- rVIVEncoding = encoding{encode: encodeRVIV, validate: validateRVIV, length: 4} + rVVVEncoding = encoding{encode: encodeRVVV, validate: validateRVVV, length: 4} + + iIIEncoding = encoding{encode: encodeIII, validate: validateIII, length: 4} +@@ -2453,6 +2465,133 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVNCLIPWX & obj.AMask: {enc: rVIVEncoding}, + AVNCLIPWI & obj.AMask: {enc: rVVuEncoding}, + ++ // 31.13.2: Vector Single-Width Floating-Point Add/Subtract Instructions ++ AVFADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFADDVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFSUBVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFRSUBVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.3: Vector Widening Floating-Point Add/Subtract Instructions ++ AVFWADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWADDVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWSUBVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWADDWV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWADDWF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWSUBWV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWSUBWF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.4: Vector Single-Width Floating-Point Multiply/Divide Instructions ++ AVFMULVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMULVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFDIVVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFDIVVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFRDIVVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.5: Vector Widening Floating-Point Multiply ++ AVFWMULVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWMULVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.6: Vector Single-Width Floating-Point Fused Multiply-Add Instructions ++ AVFMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMACCVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFNMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFNMACCVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFMSACVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMSACVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFNMSACVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFNMSACVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFMADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMADDVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFNMADDVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFNMADDVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFMSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMSUBVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFNMSUBVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFNMSUBVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.7: Vector Widening Floating-Point Fused Multiply-Add Instructions ++ AVFWMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWMACCVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWNMACCVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWNMACCVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWMSACVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWMSACVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFWNMSACVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFWNMSACVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.8: Vector Floating-Point Square-Root Instruction ++ AVFSQRTV & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.9: Vector Floating-Point Reciprocal Square-Root Estimate Instruction ++ AVFRSQRT7V & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.10: Vector Floating-Point Reciprocal Estimate Instruction ++ AVFREC7V & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.11: Vector Floating-Point MIN/MAX Instructions ++ AVFMINVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMINVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFMAXVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFMAXVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.12: Vector Floating-Point Sign-Injection Instructions ++ AVFSGNJVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFSGNJVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFSGNJNVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFSGNJNVF & obj.AMask: {enc: rVFVEncoding}, ++ AVFSGNJXVV & obj.AMask: {enc: rVVVEncoding}, ++ AVFSGNJXVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.13: Vector Floating-Point Compare Instructions ++ AVMFEQVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMFEQVF & obj.AMask: {enc: rVFVEncoding}, ++ AVMFNEVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMFNEVF & obj.AMask: {enc: rVFVEncoding}, ++ AVMFLTVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMFLTVF & obj.AMask: {enc: rVFVEncoding}, ++ AVMFLEVV & obj.AMask: {enc: rVVVEncoding}, ++ AVMFLEVF & obj.AMask: {enc: rVFVEncoding}, ++ AVMFGTVF & obj.AMask: {enc: rVFVEncoding}, ++ AVMFGEVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.14: Vector Floating-Point Classify Instruction ++ AVFCLASSV & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.15: Vector Floating-Point Merge Instruction ++ AVFMERGEVFM & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.16: Vector Floating-Point Move Instruction ++ AVFMVVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.13.17: Single-Width Floating-Point/Integer Type-Convert Instructions ++ AVFCVTXUFV & obj.AMask: {enc: rVVEncoding}, ++ AVFCVTXFV & obj.AMask: {enc: rVVEncoding}, ++ AVFCVTRTZXUFV & obj.AMask: {enc: rVVEncoding}, ++ AVFCVTRTZXFV & obj.AMask: {enc: rVVEncoding}, ++ AVFCVTFXUV & obj.AMask: {enc: rVVEncoding}, ++ AVFCVTFXV & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.18: Widening Floating-Point/Integer Type-Convert Instructions ++ AVFWCVTXUFV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTXFV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTRTZXUFV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTRTZXFV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTFXUV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTFXV & obj.AMask: {enc: rVVEncoding}, ++ AVFWCVTFFV & obj.AMask: {enc: rVVEncoding}, ++ ++ // 31.13.19: Narrowing Floating-Point/Integer Type-Convert Instructions ++ AVFNCVTXUFW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTXFW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTRTZXUFW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTRTZXFW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTFXUW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTFXW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTFFW & obj.AMask: {enc: rVVEncoding}, ++ AVFNCVTRODFFW & obj.AMask: {enc: rVVEncoding}, ++ + // + // Privileged ISA + // +@@ -3345,7 +3484,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { + AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX, + AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX, + AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI, +- AVNCLIPUWV, AVNCLIPUWX, AVNCLIPUWI, AVNCLIPWV, AVNCLIPWX, AVNCLIPWI: ++ AVNCLIPUWV, AVNCLIPUWX, AVNCLIPUWI, AVNCLIPWV, AVNCLIPWX, AVNCLIPWI, ++ AVFADDVV, AVFADDVF, AVFSUBVV, AVFSUBVF, AVFRSUBVF, ++ AVFWADDVV, AVFWADDVF, AVFWSUBVV, AVFWSUBVF, AVFWADDWV, AVFWADDWF, AVFWSUBWV, AVFWSUBWF, ++ AVFMULVV, AVFMULVF, AVFDIVVV, AVFDIVVF, AVFRDIVVF, AVFWMULVV, AVFWMULVF, ++ AVFMINVV, AVFMINVF, AVFMAXVV, AVFMAXVF, ++ AVFSGNJVV, AVFSGNJVF, AVFSGNJNVV, AVFSGNJNVF, AVFSGNJXVV, AVFSGNJXVF, ++ AVMFEQVV, AVMFEQVF, AVMFNEVV, AVMFNEVF, AVMFLTVV, AVMFLTVF, AVMFLEVV, AVMFLEVF, AVMFGTVF, AVMFGEVF: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +@@ -3355,6 +3500,17 @@ func instructionsForProg(p *obj.Prog) []*instruction { + } + ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), obj.REG_NONE + ++ case AVFMACCVV, AVFMACCVF, AVFNMACCVV, AVFNMACCVF, AVFMSACVV, AVFMSACVF, AVFNMSACVV, AVFNMSACVF, ++ AVFMADDVV, AVFMADDVF, AVFNMADDVV, AVFNMADDVF, AVFMSUBVV, AVFMSUBVF, AVFNMSUBVV, AVFNMSUBVF, ++ AVFWMACCVV, AVFWMACCVF, AVFWNMACCVV, AVFWNMACCVF, AVFWMSACVV, AVFWMSACVF, AVFWNMSACVV, AVFWNMSACVF: ++ switch { ++ case ins.rs3 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs3 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), obj.REG_NONE ++ + case AVADDVI, AVRSUBVI, AVANDVI, AVORVI, AVXORVI, AVMSEQVI, AVMSNEVI, AVMSLEUVI, AVMSLEVI, AVMSGTUVI, AVMSGTVI, + AVSLLVI, AVSRLVI, AVSRAVI, AVNSRLWI, AVNSRAWI: + // Set mask bit +@@ -3366,7 +3522,10 @@ func instructionsForProg(p *obj.Prog) []*instruction { + } + ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), obj.REG_NONE, uint32(p.Reg), obj.REG_NONE + +- case AVZEXTVF2, AVSEXTVF2, AVZEXTVF4, AVSEXTVF4, AVZEXTVF8, AVSEXTVF8: ++ case AVZEXTVF2, AVSEXTVF2, AVZEXTVF4, AVSEXTVF4, AVZEXTVF8, AVSEXTVF8, AVFSQRTV, AVFRSQRT7V, AVFREC7V, AVFCLASSV, ++ AVFCVTXUFV, AVFCVTXFV, AVFCVTRTZXUFV, AVFCVTRTZXFV, AVFCVTFXUV, AVFCVTFXV, ++ AVFWCVTXUFV, AVFWCVTXFV, AVFWCVTRTZXUFV, AVFWCVTRTZXFV, AVFWCVTFXUV, AVFWCVTFXV, AVFWCVTFFV, ++ AVFNCVTXUFW, AVFNCVTXFW, AVFNCVTRTZXUFW, AVFNCVTRTZXFW, AVFNCVTFXUW, AVFNCVTFXW, AVFNCVTFFW, AVFNCVTRODFFW: + // Set mask bit + switch { + case ins.rs1 == obj.REG_NONE: +@@ -3388,8 +3547,12 @@ func instructionsForProg(p *obj.Prog) []*instruction { + } + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), obj.REG_NONE, REG_V0 + ++ case AVFMVVF: ++ ins.funct7 |= 1 // unmasked ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), REG_V0 ++ + case AVADCVVM, AVADCVXM, AVMADCVVM, AVMADCVXM, AVSBCVVM, AVSBCVXM, AVMSBCVVM, AVMSBCVXM, AVADCVIM, AVMADCVIM, +- AVMERGEVVM, AVMERGEVXM, AVMERGEVIM: ++ AVMERGEVVM, AVMERGEVXM, AVMERGEVIM, AVFMERGEVFM: + if ins.rs3 != REG_V0 { + p.Ctxt.Diag("%v: invalid vector mask register", p) + } +@@ -3429,7 +3592,7 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.as = AVXORVI + ins.rd, ins.rs1, ins.rs2, ins.imm = uint32(p.To.Reg), obj.REG_NONE, uint32(p.From.Reg), -1 + +- case AVMSGTVV, AVMSGTUVV, AVMSGEVV, AVMSGEUVV: ++ case AVMSGTVV, AVMSGTUVV, AVMSGEVV, AVMSGEUVV, AVMFGTVV, AVMFGEVV: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +@@ -3446,6 +3609,10 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.as = AVMSLEVV + case AVMSGEUVV: + ins.as = AVMSLEUVV ++ case AVMFGTVV: ++ ins.as = AVMFLTVV ++ case AVMFGEVV: ++ ins.as = AVMFLEVV + } + ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), obj.REG_NONE + +@@ -3468,6 +3635,22 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.as = AVMSGTUVI + } + ins.rd, ins.rs1, ins.rs2, ins.rs3, ins.imm = uint32(p.To.Reg), obj.REG_NONE, uint32(p.Reg), obj.REG_NONE, ins.imm-1 ++ ++ case AVFABSV, AVFNEGV: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ switch ins.as { ++ case AVFABSV: ++ ins.as = AVFSGNJXVV ++ case AVFNEGV: ++ ins.as = AVFSGNJNVV ++ } ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.From.Reg) + } + + for _, ins := range inss { +-- +2.50.1 + diff --git a/2018-internal-bytealg-deduplicate-code-between-Count-Coun.patch b/2018-internal-bytealg-deduplicate-code-between-Count-Coun.patch new file mode 100644 index 0000000000000000000000000000000000000000..b31cf12f795f2c0b668ba6cb09ac3619308eaea7 --- /dev/null +++ b/2018-internal-bytealg-deduplicate-code-between-Count-Coun.patch @@ -0,0 +1,63 @@ +From ce80e89f57a8d3544b6f08cb0a34f2f528a9921f Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Thu, 27 Feb 2025 23:04:15 +1100 +Subject: [PATCH 18/38] internal/bytealg: deduplicate code between + Count/CountString for riscv64 + +Change-Id: I22eb4e7444e5fe5f6767cc960895f3c6e2fa13cc +Reviewed-on: https://go-review.googlesource.com/c/go/+/661615 +Reviewed-by: Keith Randall +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Keith Randall +Auto-Submit: Carlos Amedee +Reviewed-by: Carlos Amedee +--- + src/internal/bytealg/count_riscv64.s | 28 +++++++--------------------- + 1 file changed, 7 insertions(+), 21 deletions(-) + +diff --git a/src/internal/bytealg/count_riscv64.s b/src/internal/bytealg/count_riscv64.s +index 3f255cd263..6cc49d1388 100644 +--- a/src/internal/bytealg/count_riscv64.s ++++ b/src/internal/bytealg/count_riscv64.s +@@ -5,6 +5,13 @@ + #include "go_asm.h" + #include "textflag.h" + ++TEXT ·CountString(SB),NOSPLIT,$0-32 ++ // X10 = s_base ++ // X11 = s_len ++ // X12 = byte to count ++ MOV X12, X13 ++ JMP ·Count(SB) ++ + TEXT ·Count(SB),NOSPLIT,$0-40 + // X10 = b_base + // X11 = b_len +@@ -26,24 +33,3 @@ loop: + done: + MOV X14, X10 + RET +- +-TEXT ·CountString(SB),NOSPLIT,$0-32 +- // X10 = s_base +- // X11 = s_len +- // X12 = byte to count +- AND $0xff, X12 +- MOV ZERO, X14 // count +- ADD X10, X11 // end +- +- PCALIGN $16 +-loop: +- BEQ X10, X11, done +- MOVBU (X10), X15 +- ADD $1, X10 +- BNE X12, X15, loop +- ADD $1, X14 +- JMP loop +- +-done: +- MOV X14, X10 +- RET +-- +2.50.1 + diff --git a/2019-cmd-internal-obj-riscv-add-support-for-vector-reduct.patch b/2019-cmd-internal-obj-riscv-add-support-for-vector-reduct.patch new file mode 100644 index 0000000000000000000000000000000000000000..b0586ce03b6d50195f3feaa9a7e45ca0ba086bf0 --- /dev/null +++ b/2019-cmd-internal-obj-riscv-add-support-for-vector-reduct.patch @@ -0,0 +1,176 @@ +From af770c0d1986b2fe6c439331cf76de55ec0c7ea0 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 2 Feb 2025 22:26:07 +1100 +Subject: [PATCH 19/38] cmd/internal/obj/riscv: add support for vector + reduction instructions + +Add support for vector reduction instructions to the RISC-V assembler, +including single-width integer reduction, widening integer reduction, +single-width floating-point reduction and widening floating-point +reduction. + +Change-Id: I8f17bef11389f3a017e0430275023fc5d75936e3 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646778 +Reviewed-by: Meng Zhuo +Reviewed-by: Mark Ryan +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Carlos Amedee +Reviewed-by: Dmitri Shuralyov +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 40 +++++++++++++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 15 +++++++ + .../internal/asm/testdata/riscv64validation.s | 16 ++++++++ + src/cmd/internal/obj/riscv/obj.go | 28 ++++++++++++- + 4 files changed, 98 insertions(+), 1 deletion(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index d17576655b..682dc6f75b 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -1161,6 +1161,46 @@ start: + VFNCVTRODFFW V2, V3 // d7912a4a + VFNCVTRODFFW V2, V0, V3 // d7912a48 + ++ // 31.14.1: Vector Single-Width Integer Reduction Instructions ++ VREDSUMVS V1, V2, V3 // d7a12002 ++ VREDSUMVS V1, V2, V0, V3 // d7a12000 ++ VREDMAXUVS V1, V2, V3 // d7a1201a ++ VREDMAXUVS V1, V2, V0, V3 // d7a12018 ++ VREDMAXVS V1, V2, V3 // d7a1201e ++ VREDMAXVS V1, V2, V0, V3 // d7a1201c ++ VREDMINUVS V1, V2, V3 // d7a12012 ++ VREDMINUVS V1, V2, V0, V3 // d7a12010 ++ VREDMINVS V1, V2, V3 // d7a12016 ++ VREDMINVS V1, V2, V0, V3 // d7a12014 ++ VREDANDVS V1, V2, V3 // d7a12006 ++ VREDANDVS V1, V2, V0, V3 // d7a12004 ++ VREDORVS V1, V2, V3 // d7a1200a ++ VREDORVS V1, V2, V0, V3 // d7a12008 ++ VREDXORVS V1, V2, V3 // d7a1200e ++ VREDXORVS V1, V2, V0, V3 // d7a1200c ++ ++ // 31.14.2: Vector Widening Integer Reduction Instructions ++ VWREDSUMUVS V1, V2, V3 // d78120c2 ++ VWREDSUMUVS V1, V2, V0, V3 // d78120c0 ++ VWREDSUMVS V1, V2, V3 // d78120c6 ++ VWREDSUMVS V1, V2, V0, V3 // d78120c4 ++ ++ // 31.14.3: Vector Single-Width Floating-Point Reduction Instructions ++ VFREDOSUMVS V1, V2, V3 // d791200e ++ VFREDOSUMVS V1, V2, V0, V3 // d791200c ++ VFREDUSUMVS V1, V2, V3 // d7912006 ++ VFREDUSUMVS V1, V2, V0, V3 // d7912004 ++ VFREDMAXVS V1, V2, V3 // d791201e ++ VFREDMAXVS V1, V2, V0, V3 // d791201c ++ VFREDMINVS V1, V2, V3 // d7912016 ++ VFREDMINVS V1, V2, V0, V3 // d7912014 ++ ++ // 31.14.4: Vector Widening Floating-Point Reduction Instructions ++ VFWREDOSUMVS V1, V2, V3 // d79120ce ++ VFWREDOSUMVS V1, V2, V0, V3 // d79120cc ++ VFWREDUSUMVS V1, V2, V3 // d79120c6 ++ VFWREDUSUMVS V1, V2, V0, V3 // d79120c4 ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index c96eaa2383..cd4c96eb6d 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -345,5 +345,20 @@ TEXT errors(SB),$0 + VFNCVTFXW V2, V4, V3 // ERROR "invalid vector mask register" + VFNCVTFFW V2, V4, V3 // ERROR "invalid vector mask register" + VFNCVTRODFFW V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDMAXUVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDMAXVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDMINUVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDMINVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDANDVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDORVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VREDXORVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWREDSUMUVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VWREDSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFREDOSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFREDUSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFREDMAXVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFREDMINVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFWREDOSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" + + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index 2c509a1e91..adb10823d7 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -364,5 +364,21 @@ TEXT validation(SB),$0 + VFNCVTFXW X10, V3 // ERROR "expected vector register in vs2 position" + VFNCVTFFW X10, V3 // ERROR "expected vector register in vs2 position" + VFNCVTRODFFW X10, V3 // ERROR "expected vector register in vs2 position" ++ VREDSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDMAXUVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDMAXVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDMINUVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDMINVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDANDVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDORVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VREDXORVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWREDSUMUVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWREDSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFREDOSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFREDUSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFREDMAXVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFREDMINVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWREDOSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VFWREDUSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" + + RET +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index aa8a3d769e..9d60352ad6 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -2592,6 +2592,30 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVFNCVTFFW & obj.AMask: {enc: rVVEncoding}, + AVFNCVTRODFFW & obj.AMask: {enc: rVVEncoding}, + ++ // 31.14.1: Vector Single-Width Integer Reduction Instructions ++ AVREDSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDMAXUVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDMAXVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDMINUVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDMINVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDANDVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDORVS & obj.AMask: {enc: rVVVEncoding}, ++ AVREDXORVS & obj.AMask: {enc: rVVVEncoding}, ++ ++ // 31.14.2: Vector Widening Integer Reduction Instructions ++ AVWREDSUMUVS & obj.AMask: {enc: rVVVEncoding}, ++ AVWREDSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ ++ // 31.14.3: Vector Single-Width Floating-Point Reduction Instructions ++ AVFREDOSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ AVFREDUSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ AVFREDMAXVS & obj.AMask: {enc: rVVVEncoding}, ++ AVFREDMINVS & obj.AMask: {enc: rVVVEncoding}, ++ ++ // 31.14.4: Vector Widening Floating-Point Reduction Instructions ++ AVFWREDOSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ AVFWREDUSUMVS & obj.AMask: {enc: rVVVEncoding}, ++ + // + // Privileged ISA + // +@@ -3490,7 +3514,9 @@ func instructionsForProg(p *obj.Prog) []*instruction { + AVFMULVV, AVFMULVF, AVFDIVVV, AVFDIVVF, AVFRDIVVF, AVFWMULVV, AVFWMULVF, + AVFMINVV, AVFMINVF, AVFMAXVV, AVFMAXVF, + AVFSGNJVV, AVFSGNJVF, AVFSGNJNVV, AVFSGNJNVF, AVFSGNJXVV, AVFSGNJXVF, +- AVMFEQVV, AVMFEQVF, AVMFNEVV, AVMFNEVF, AVMFLTVV, AVMFLTVF, AVMFLEVV, AVMFLEVF, AVMFGTVF, AVMFGEVF: ++ AVMFEQVV, AVMFEQVF, AVMFNEVV, AVMFNEVF, AVMFLTVV, AVMFLTVF, AVMFLEVV, AVMFLEVF, AVMFGTVF, AVMFGEVF, ++ AVREDSUMVS, AVREDMAXUVS, AVREDMAXVS, AVREDMINUVS, AVREDMINVS, AVREDANDVS, AVREDORVS, AVREDXORVS, ++ AVWREDSUMUVS, AVWREDSUMVS, AVFREDOSUMVS, AVFREDUSUMVS, AVFREDMAXVS, AVFREDMINVS, AVFWREDOSUMVS, AVFWREDUSUMVS: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +-- +2.50.1 + diff --git a/2020-cmd-internal-obj-riscv-add-support-for-vector-mask-i.patch b/2020-cmd-internal-obj-riscv-add-support-for-vector-mask-i.patch new file mode 100644 index 0000000000000000000000000000000000000000..35569a19897fe8a84e09128cca70e9639a14edb1 --- /dev/null +++ b/2020-cmd-internal-obj-riscv-add-support-for-vector-mask-i.patch @@ -0,0 +1,269 @@ +From 1079a3d4de795137caa4563e42f82b1efcbfd750 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 2 Feb 2025 23:09:12 +1100 +Subject: [PATCH 20/38] cmd/internal/obj/riscv: add support for vector mask + instructions + +Add support for vector mask instructions to the RISC-V assembler. +These allow manipulation of vector masks and include mask register +logical instructions, population count and find-first bit set +instructions. + +Change-Id: I3ab3aa0f918338aee9b37ac5a2b2fdc407875072 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646779 +Reviewed-by: Carlos Amedee +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: Junyang Shao +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 28 ++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 6 ++ + .../internal/asm/testdata/riscv64validation.s | 19 +++++ + src/cmd/internal/obj/riscv/anames.go | 8 ++- + src/cmd/internal/obj/riscv/cpu.go | 8 ++- + src/cmd/internal/obj/riscv/obj.go | 70 +++++++++++++++++++ + 6 files changed, 135 insertions(+), 4 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 682dc6f75b..0230d44f99 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -1201,6 +1201,34 @@ start: + VFWREDUSUMVS V1, V2, V3 // d79120c6 + VFWREDUSUMVS V1, V2, V0, V3 // d79120c4 + ++ // 31.15: Vector Mask Instructions ++ VMANDMM V1, V2, V3 // d7a12066 ++ VMNANDMM V1, V2, V3 // d7a12076 ++ VMANDNMM V1, V2, V3 // d7a12062 ++ VMXORMM V1, V2, V3 // d7a1206e ++ VMORMM V1, V2, V3 // d7a1206a ++ VMNORMM V1, V2, V3 // d7a1207a ++ VMORNMM V1, V2, V3 // d7a12072 ++ VMXNORMM V1, V2, V3 // d7a1207e ++ VMMVM V2, V3 // d7212166 ++ VMCLRM V3 // d7a1316e ++ VMSETM V3 // d7a1317e ++ VMNOTM V2, V3 // d7212176 ++ VCPOPM V2, X10 // 57252842 ++ VCPOPM V2, V0, X10 // 57252840 ++ VFIRSTM V2, X10 // 57a52842 ++ VFIRSTM V2, V0, X10 // 57a52840 ++ VMSBFM V2, V3 // d7a12052 ++ VMSBFM V2, V0, V3 // d7a12050 ++ VMSIFM V2, V3 // d7a12152 ++ VMSIFM V2, V0, V3 // d7a12150 ++ VMSOFM V2, V3 // d7212152 ++ VMSOFM V2, V0, V3 // d7212150 ++ VIOTAM V2, V3 // d7212852 ++ VIOTAM V2, V0, V3 // d7212850 ++ VIDV V3 // d7a10852 ++ VIDV V0, V3 // d7a10850 ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index cd4c96eb6d..ad0b3cfd7d 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -360,5 +360,11 @@ TEXT errors(SB),$0 + VFREDMAXVS V1, V2, V4, V3 // ERROR "invalid vector mask register" + VFREDMINVS V1, V2, V4, V3 // ERROR "invalid vector mask register" + VFWREDOSUMVS V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VCPOPM V2, V4, X10 // ERROR "invalid vector mask register" ++ VFIRSTM V2, V4, X10 // ERROR "invalid vector mask register" ++ VMSBFM V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSIFM V2, V4, V3 // ERROR "invalid vector mask register" ++ VMSOFM V2, V4, V3 // ERROR "invalid vector mask register" ++ VIOTAM V2, V4, V3 // ERROR "invalid vector mask register" + + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index adb10823d7..8b0349584f 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -380,5 +380,24 @@ TEXT validation(SB),$0 + VFREDMINVS X10, V2, V3 // ERROR "expected vector register in vs1 position" + VFWREDOSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" + VFWREDUSUMVS X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMANDMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMNANDMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMANDNMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMXORMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMORMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMNORMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMORNMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMXNORMM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMMVM V3, X10 // ERROR "expected vector register in vd position" ++ VMNOTM V3, X10 // ERROR "expected vector register in vd position" ++ VCPOPM V2, V1 // ERROR "expected integer register in rd position" ++ VCPOPM X11, X10 // ERROR "expected vector register in vs2 position" ++ VFIRSTM V2, V1 // ERROR "expected integer register in rd position" ++ VFIRSTM X11, X10 // ERROR "expected vector register in vs2 position" ++ VMSBFM X10, V3 // ERROR "expected vector register in vs2 position" ++ VMSIFM X10, V3 // ERROR "expected vector register in vs2 position" ++ VMSOFM X10, V3 // ERROR "expected vector register in vs2 position" ++ VIOTAM X10, V3 // ERROR "expected vector register in vs2 position" ++ VIDV X10 // ERROR "expected vector register in vd position" + + RET +diff --git a/src/cmd/internal/obj/riscv/anames.go b/src/cmd/internal/obj/riscv/anames.go +index bf1fdb8b88..a689f2de27 100644 +--- a/src/cmd/internal/obj/riscv/anames.go ++++ b/src/cmd/internal/obj/riscv/anames.go +@@ -652,12 +652,16 @@ var Anames = []string{ + "SNEZ", + "VFABSV", + "VFNEGV", +- "VMFGEVV", +- "VMFGTVV", + "VL1RV", + "VL2RV", + "VL4RV", + "VL8RV", ++ "VMCLRM", ++ "VMFGEVV", ++ "VMFGTVV", ++ "VMMVM", ++ "VMNOTM", ++ "VMSETM", + "VMSGEUVI", + "VMSGEUVV", + "VMSGEVI", +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index 3cad4f9d94..d87b6b1efb 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1180,12 +1180,16 @@ const ( + ASNEZ + AVFABSV + AVFNEGV +- AVMFGEVV +- AVMFGTVV + AVL1RV + AVL2RV + AVL4RV + AVL8RV ++ AVMCLRM ++ AVMFGEVV ++ AVMFGTVV ++ AVMMVM ++ AVMNOTM ++ AVMSETM + AVMSGEUVI + AVMSGEUVV + AVMSGEVI +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 9d60352ad6..ff248115c5 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1335,6 +1335,13 @@ func validateRVFV(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateRVI(ctxt *obj.Link, ins *instruction) { ++ wantIntReg(ctxt, ins, "rd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateRVIV(ctxt *obj.Link, ins *instruction) { + wantVectorReg(ctxt, ins, "vd", ins.rd) + wantIntReg(ctxt, ins, "rs1", ins.rs1) +@@ -1584,6 +1591,10 @@ func encodeRVFV(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } + ++func encodeRVI(ins *instruction) uint32 { ++ return encodeR(ins.as, 0, regV(ins.rs2), regI(ins.rd), ins.funct3, ins.funct7) ++} ++ + func encodeRVIV(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } +@@ -1888,6 +1899,7 @@ var ( + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} + rVFVEncoding = encoding{encode: encodeRVFV, validate: validateRVFV, length: 4} ++ rVIEncoding = encoding{encode: encodeRVI, validate: validateRVI, length: 4} + rVIVEncoding = encoding{encode: encodeRVIV, validate: validateRVIV, length: 4} + rVVEncoding = encoding{encode: encodeRVV, validate: validateRVV, length: 4} + rVViEncoding = encoding{encode: encodeRVVi, validate: validateRVVi, length: 4} +@@ -2616,6 +2628,23 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVFWREDOSUMVS & obj.AMask: {enc: rVVVEncoding}, + AVFWREDUSUMVS & obj.AMask: {enc: rVVVEncoding}, + ++ // 31.15: Vector Mask Instructions ++ AVMANDMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMNANDMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMANDNMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMXORMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMORMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMNORMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMORNMM & obj.AMask: {enc: rVVVEncoding}, ++ AVMXNORMM & obj.AMask: {enc: rVVVEncoding}, ++ AVCPOPM & obj.AMask: {enc: rVIEncoding}, ++ AVFIRSTM & obj.AMask: {enc: rVIEncoding}, ++ AVMSBFM & obj.AMask: {enc: rVVEncoding}, ++ AVMSIFM & obj.AMask: {enc: rVVEncoding}, ++ AVMSOFM & obj.AMask: {enc: rVVEncoding}, ++ AVIOTAM & obj.AMask: {enc: rVVEncoding}, ++ AVIDV & obj.AMask: {enc: rVVEncoding}, ++ + // + // Privileged ISA + // +@@ -3677,6 +3706,47 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.as = AVFSGNJNVV + } + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.From.Reg) ++ ++ case AVMANDMM, AVMNANDMM, AVMANDNMM, AVMXORMM, AVMORMM, AVMNORMM, AVMORNMM, AVMXNORMM, AVMMVM, AVMNOTM: ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg) ++ switch ins.as { ++ case AVMMVM: ++ ins.as, ins.rs2 = AVMANDMM, ins.rs1 ++ case AVMNOTM: ++ ins.as, ins.rs2 = AVMNANDMM, ins.rs1 ++ } ++ ++ case AVMCLRM, AVMSETM: ++ ins.rd, ins.rs1, ins.rs2 = uint32(p.From.Reg), uint32(p.From.Reg), uint32(p.From.Reg) ++ switch ins.as { ++ case AVMCLRM: ++ ins.as = AVMXORMM ++ case AVMSETM: ++ ins.as = AVMXNORMM ++ } ++ ++ case AVCPOPM, AVFIRSTM, AVMSBFM, AVMSIFM, AVMSOFM, AVIOTAM: ++ // Set mask bit ++ switch { ++ case ins.rs1 == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rs1 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ ins.rs1 = obj.REG_NONE ++ ++ case AVIDV: ++ // Set mask bit ++ switch { ++ case ins.rd == obj.REG_NONE: ++ ins.funct7 |= 1 // unmasked ++ case ins.rd != obj.REG_NONE && ins.rs2 != REG_V0: ++ p.Ctxt.Diag("%v: invalid vector mask register", p) ++ } ++ if ins.rd == obj.REG_NONE { ++ ins.rd = uint32(p.From.Reg) ++ } ++ ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0 + } + + for _, ins := range inss { +-- +2.50.1 + diff --git a/2021-cmd-internal-obj-riscv-add-support-for-vector-permut.patch b/2021-cmd-internal-obj-riscv-add-support-for-vector-permut.patch new file mode 100644 index 0000000000000000000000000000000000000000..b8ab8992c8c82f8c7857db114f48b6251a1d8575 --- /dev/null +++ b/2021-cmd-internal-obj-riscv-add-support-for-vector-permut.patch @@ -0,0 +1,287 @@ +From 23764d73856451f5e9c949ceb8fda42c93f16796 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 12 Feb 2025 23:40:58 +1100 +Subject: [PATCH 21/38] cmd/internal/obj/riscv: add support for vector + permutation instructions + +Add support for vector permutation instructions to the RISC-V assembler. +This includes integer scalar move, floating point scalar move, slide up +and slide down, register gather, compression and whole vector register +move instructions. + +Change-Id: I1da9f393091504fd81714006355725b8b9ecadea +Reviewed-on: https://go-review.googlesource.com/c/go/+/646780 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Carlos Amedee +Reviewed-by: Mark Ryan +Reviewed-by: Junyang Shao +Reviewed-by: Meng Zhuo +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 45 +++++++++++ + .../asm/internal/asm/testdata/riscv64error.s | 12 +++ + .../internal/asm/testdata/riscv64validation.s | 28 +++++++ + src/cmd/internal/obj/riscv/obj.go | 77 ++++++++++++++++++- + 4 files changed, 159 insertions(+), 3 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 0230d44f99..432078e29d 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -1229,6 +1229,51 @@ start: + VIDV V3 // d7a10852 + VIDV V0, V3 // d7a10850 + ++ // 31.16.1: Integer Scalar Move Instructions ++ VMVXS V2, X10 // 57252042 ++ VMVSX X10, V2 // 57610542 ++ ++ // 31.16.2: Floating-Point Scalar Move Instructions ++ VFMVFS V2, F10 // 57152042 ++ VFMVSF F10, V2 // 57510542 ++ ++ // 31.16.3: Vector Slide Instructions ++ VSLIDEUPVX X10, V2, V3 // d741253a ++ VSLIDEUPVX X10, V2, V0, V3 // d7412538 ++ VSLIDEUPVI $16, V2, V3 // d731283a ++ VSLIDEUPVI $16, V2, V0, V3 // d7312838 ++ VSLIDEDOWNVX X10, V2, V3 // d741253e ++ VSLIDEDOWNVX X10, V2, V0, V3 // d741253c ++ VSLIDEDOWNVI $16, V2, V3 // d731283e ++ VSLIDEDOWNVI $16, V2, V0, V3 // d731283c ++ VSLIDE1UPVX X10, V2, V3 // d761253a ++ VSLIDE1UPVX X10, V2, V0, V3 // d7612538 ++ VFSLIDE1UPVF F10, V2, V3 // d751253a ++ VFSLIDE1UPVF F10, V2, V0, V3 // d7512538 ++ VSLIDE1DOWNVX X10, V2, V3 // d761253e ++ VSLIDE1DOWNVX X10, V2, V0, V3 // d761253c ++ VFSLIDE1DOWNVF F10, V2, V3 // d751253e ++ VFSLIDE1DOWNVF F10, V2, V0, V3 // d751253c ++ ++ // 31.16.4: Vector Register Gather Instructions ++ VRGATHERVV V1, V2, V3 // d7812032 ++ VRGATHERVV V1, V2, V0, V3 // d7812030 ++ VRGATHEREI16VV V1, V2, V3 // d781203a ++ VRGATHEREI16VV V1, V2, V0, V3 // d7812038 ++ VRGATHERVX X10, V2, V3 // d7412532 ++ VRGATHERVX X10, V2, V0, V3 // d7412530 ++ VRGATHERVI $16, V2, V3 // d7312832 ++ VRGATHERVI $16, V2, V0, V3 // d7312830 ++ ++ // 31.16.5: Vector Compress Instruction ++ VCOMPRESSVM V1, V2, V3 // d7a1205e ++ ++ // 31.16.6: Whole Vector Register Move ++ VMV1RV V2, V1 // d730209e ++ VMV2RV V12, V10 // 57b5c09e ++ VMV4RV V8, V4 // 57b2819e ++ VMV8RV V8, V0 // 57b0839e ++ + // + // Privileged ISA + // +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index ad0b3cfd7d..d7a6e6ca85 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -366,5 +366,17 @@ TEXT errors(SB),$0 + VMSIFM V2, V4, V3 // ERROR "invalid vector mask register" + VMSOFM V2, V4, V3 // ERROR "invalid vector mask register" + VIOTAM V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDEUPVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDEUPVI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDEDOWNVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDEDOWNVI $16, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDE1UPVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSLIDE1UPVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VSLIDE1DOWNVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VFSLIDE1DOWNVF F10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VRGATHERVV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VRGATHEREI16VV V1, V2, V4, V3 // ERROR "invalid vector mask register" ++ VRGATHERVX X10, V2, V4, V3 // ERROR "invalid vector mask register" ++ VRGATHERVI $16, V2, V4, V3 // ERROR "invalid vector mask register" + + RET +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index 8b0349584f..374a97dcfe 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -399,5 +399,33 @@ TEXT validation(SB),$0 + VMSOFM X10, V3 // ERROR "expected vector register in vs2 position" + VIOTAM X10, V3 // ERROR "expected vector register in vs2 position" + VIDV X10 // ERROR "expected vector register in vd position" ++ VMVXS X11, X10 // ERROR "expected vector register in vs2 position" ++ VMVXS V2, V1 // ERROR "expected integer register in rd position" ++ VMVSX X11, X10 // ERROR "expected vector register in vd position" ++ VMVSX V2, V1 // ERROR "expected integer register in rs2 position" ++ VFMVFS X10, F10 // ERROR "expected vector register in vs2 position" ++ VFMVFS V2, V1 // ERROR "expected float register in rd position" ++ VFMVSF X10, V2 // ERROR "expected float register in rs2 position" ++ VFMVSF V2, V1 // ERROR "expected float register in rs2 position" ++ VSLIDEUPVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSLIDEUPVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSLIDEUPVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSLIDEDOWNVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VSLIDEDOWNVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VSLIDEDOWNVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VSLIDE1UPVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VFSLIDE1UPVF V1, V2, V3 // ERROR "expected float register in rs1 position" ++ VSLIDE1DOWNVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VFSLIDE1DOWNVF V1, V2, V3 // ERROR "expected float register in rs1 position" ++ VRGATHERVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VRGATHEREI16VV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VRGATHERVX V1, V2, V3 // ERROR "expected integer register in rs1 position" ++ VRGATHERVI $-1, V2, V3 // ERROR "unsigned immediate -1 must be in range [0, 31]" ++ VRGATHERVI $32, V2, V3 // ERROR "unsigned immediate 32 must be in range [0, 31]" ++ VCOMPRESSVM X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMV1RV X10, V1 // ERROR "expected vector register in vs2 position" ++ VMV2RV X10, V10 // ERROR "expected vector register in vs2 position" ++ VMV4RV X10, V4 // ERROR "expected vector register in vs2 position" ++ VMV8RV X10, V0 // ERROR "expected vector register in vs2 position" + + RET +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index ff248115c5..6dc621ebcd 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -1314,6 +1314,13 @@ func validateRFI(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateRFV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantFloatReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateRFF(ctxt *obj.Link, ins *instruction) { + wantFloatReg(ctxt, ins, "rd", ins.rd) + wantNoneReg(ctxt, ins, "rs1", ins.rs1) +@@ -1328,6 +1335,20 @@ func validateRIF(ctxt *obj.Link, ins *instruction) { + wantNoneReg(ctxt, ins, "rs3", ins.rs3) + } + ++func validateRIV(ctxt *obj.Link, ins *instruction) { ++ wantVectorReg(ctxt, ins, "vd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantIntReg(ctxt, ins, "rs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ ++func validateRVF(ctxt *obj.Link, ins *instruction) { ++ wantFloatReg(ctxt, ins, "rd", ins.rd) ++ wantNoneReg(ctxt, ins, "rs1", ins.rs1) ++ wantVectorReg(ctxt, ins, "vs2", ins.rs2) ++ wantNoneReg(ctxt, ins, "rs3", ins.rs3) ++} ++ + func validateRVFV(ctxt *obj.Link, ins *instruction) { + wantVectorReg(ctxt, ins, "vd", ins.rd) + wantFloatReg(ctxt, ins, "rs1", ins.rs1) +@@ -1583,10 +1604,22 @@ func encodeRFF(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) + } + ++func encodeRFV(ins *instruction) uint32 { ++ return encodeR(ins.as, regF(ins.rs2), 0, regV(ins.rd), ins.funct3, ins.funct7) ++} ++ + func encodeRIF(ins *instruction) uint32 { + return encodeR(ins.as, regI(ins.rs2), 0, regF(ins.rd), ins.funct3, ins.funct7) + } + ++func encodeRIV(ins *instruction) uint32 { ++ return encodeR(ins.as, regI(ins.rs2), 0, regV(ins.rd), ins.funct3, ins.funct7) ++} ++ ++func encodeRVF(ins *instruction) uint32 { ++ return encodeR(ins.as, 0, regV(ins.rs2), regF(ins.rd), ins.funct3, ins.funct7) ++} ++ + func encodeRVFV(ins *instruction) uint32 { + return encodeR(ins.as, regF(ins.rs1), regV(ins.rs2), regV(ins.rd), ins.funct3, ins.funct7) + } +@@ -1896,8 +1929,11 @@ var ( + rFFFFEncoding = encoding{encode: encodeRFFFF, validate: validateRFFFF, length: 4} + rFFIEncoding = encoding{encode: encodeRFFI, validate: validateRFFI, length: 4} + rFIEncoding = encoding{encode: encodeRFI, validate: validateRFI, length: 4} ++ rFVEncoding = encoding{encode: encodeRFV, validate: validateRFV, length: 4} + rIFEncoding = encoding{encode: encodeRIF, validate: validateRIF, length: 4} ++ rIVEncoding = encoding{encode: encodeRIV, validate: validateRIV, length: 4} + rFFEncoding = encoding{encode: encodeRFF, validate: validateRFF, length: 4} ++ rVFEncoding = encoding{encode: encodeRVF, validate: validateRVF, length: 4} + rVFVEncoding = encoding{encode: encodeRVFV, validate: validateRVFV, length: 4} + rVIEncoding = encoding{encode: encodeRVI, validate: validateRVI, length: 4} + rVIVEncoding = encoding{encode: encodeRVIV, validate: validateRVIV, length: 4} +@@ -2645,6 +2681,39 @@ var instructions = [ALAST & obj.AMask]instructionData{ + AVIOTAM & obj.AMask: {enc: rVVEncoding}, + AVIDV & obj.AMask: {enc: rVVEncoding}, + ++ // 31.16.1: Integer Scalar Move Instructions ++ AVMVXS & obj.AMask: {enc: rVIEncoding}, ++ AVMVSX & obj.AMask: {enc: rIVEncoding}, ++ ++ // 31.16.2: Floating-Point Scalar Move Instructions ++ AVFMVFS & obj.AMask: {enc: rVFEncoding}, ++ AVFMVSF & obj.AMask: {enc: rFVEncoding}, ++ ++ // 31.16.3: Vector Slide Instructions ++ AVSLIDEUPVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSLIDEUPVI & obj.AMask: {enc: rVVuEncoding}, ++ AVSLIDEDOWNVX & obj.AMask: {enc: rVIVEncoding}, ++ AVSLIDEDOWNVI & obj.AMask: {enc: rVVuEncoding}, ++ AVSLIDE1UPVX & obj.AMask: {enc: rVIVEncoding}, ++ AVFSLIDE1UPVF & obj.AMask: {enc: rVFVEncoding}, ++ AVSLIDE1DOWNVX & obj.AMask: {enc: rVIVEncoding}, ++ AVFSLIDE1DOWNVF & obj.AMask: {enc: rVFVEncoding}, ++ ++ // 31.16.4: Vector Register Gather Instructions ++ AVRGATHERVV & obj.AMask: {enc: rVVVEncoding}, ++ AVRGATHEREI16VV & obj.AMask: {enc: rVVVEncoding}, ++ AVRGATHERVX & obj.AMask: {enc: rVIVEncoding}, ++ AVRGATHERVI & obj.AMask: {enc: rVVuEncoding}, ++ ++ // 31.16.5: Vector Compress Instruction ++ AVCOMPRESSVM & obj.AMask: {enc: rVVVEncoding}, ++ ++ // 31.16.6: Whole Vector Register Move ++ AVMV1RV & obj.AMask: {enc: rVVEncoding}, ++ AVMV2RV & obj.AMask: {enc: rVVEncoding}, ++ AVMV4RV & obj.AMask: {enc: rVVEncoding}, ++ AVMV8RV & obj.AMask: {enc: rVVEncoding}, ++ + // + // Privileged ISA + // +@@ -3545,7 +3614,9 @@ func instructionsForProg(p *obj.Prog) []*instruction { + AVFSGNJVV, AVFSGNJVF, AVFSGNJNVV, AVFSGNJNVF, AVFSGNJXVV, AVFSGNJXVF, + AVMFEQVV, AVMFEQVF, AVMFNEVV, AVMFNEVF, AVMFLTVV, AVMFLTVF, AVMFLEVV, AVMFLEVF, AVMFGTVF, AVMFGEVF, + AVREDSUMVS, AVREDMAXUVS, AVREDMAXVS, AVREDMINUVS, AVREDMINVS, AVREDANDVS, AVREDORVS, AVREDXORVS, +- AVWREDSUMUVS, AVWREDSUMVS, AVFREDOSUMVS, AVFREDUSUMVS, AVFREDMAXVS, AVFREDMINVS, AVFWREDOSUMVS, AVFWREDUSUMVS: ++ AVWREDSUMUVS, AVWREDSUMVS, AVFREDOSUMVS, AVFREDUSUMVS, AVFREDMAXVS, AVFREDMINVS, AVFWREDOSUMVS, AVFWREDUSUMVS, ++ AVSLIDEUPVX, AVSLIDEDOWNVX, AVSLIDE1UPVX, AVFSLIDE1UPVF, AVSLIDE1DOWNVX, AVFSLIDE1DOWNVF, ++ AVRGATHERVV, AVRGATHEREI16VV, AVRGATHERVX: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +@@ -3567,7 +3638,7 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.rd, ins.rs1, ins.rs2, ins.rs3 = uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), obj.REG_NONE + + case AVADDVI, AVRSUBVI, AVANDVI, AVORVI, AVXORVI, AVMSEQVI, AVMSNEVI, AVMSLEUVI, AVMSLEVI, AVMSGTUVI, AVMSGTVI, +- AVSLLVI, AVSRLVI, AVSRAVI, AVNSRLWI, AVNSRAWI: ++ AVSLLVI, AVSRLVI, AVSRAVI, AVNSRLWI, AVNSRAWI, AVRGATHERVI, AVSLIDEUPVI, AVSLIDEDOWNVI: + // Set mask bit + switch { + case ins.rs3 == obj.REG_NONE: +@@ -3707,7 +3778,7 @@ func instructionsForProg(p *obj.Prog) []*instruction { + } + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.From.Reg) + +- case AVMANDMM, AVMNANDMM, AVMANDNMM, AVMXORMM, AVMORMM, AVMNORMM, AVMORNMM, AVMXNORMM, AVMMVM, AVMNOTM: ++ case AVMANDMM, AVMNANDMM, AVMANDNMM, AVMXORMM, AVMORMM, AVMNORMM, AVMORNMM, AVMXNORMM, AVMMVM, AVMNOTM, AVCOMPRESSVM: + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg) + switch ins.as { + case AVMMVM: +-- +2.50.1 + diff --git a/2022-cmd-internal-obj-riscv-fix-LMUL-encoding-for-MF2-and.patch b/2022-cmd-internal-obj-riscv-fix-LMUL-encoding-for-MF2-and.patch new file mode 100644 index 0000000000000000000000000000000000000000..1b98f572340b6a0cef805f09af5061be64c22b3f --- /dev/null +++ b/2022-cmd-internal-obj-riscv-fix-LMUL-encoding-for-MF2-and.patch @@ -0,0 +1,68 @@ +From 3fceb2e468fc9624f9c8ce3cf66f8e04b054cd59 Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Tue, 6 May 2025 13:02:34 +0200 +Subject: [PATCH 22/38] cmd/internal/obj/riscv: fix LMUL encoding for MF2 and + MF8 + +The encodings for the riscv64 special operands SPOP_MF2 and SPOP_MF8 +are incorrect, i.e., their values are swapped. This leads to +incorrect encodings for the VSETVLI and VSETIVLI instructions. The +assembler currently encodes + +VSETVLI X10, E32, MF8, TA, MA, X12 + +as + +VSETVLI X10, E32, MF2, TA, MA, X12 + +We update the encodings for SPOP_MF2 and SPOP_MF8 so that they match +the LMUL table in section "31.3.4. Vector type register, vtype" of +the "RISC-V Instruction Set Manual Volume 1". + +Change-Id: Ic73355533d7c2a901ee060b35c2f7af6d58453e4 +Reviewed-on: https://go-review.googlesource.com/c/go/+/670016 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Cherry Mui +Reviewed-by: Carlos Amedee +Reviewed-by: Meng Zhuo +Reviewed-by: Joel Sing +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 4 ++-- + src/cmd/internal/obj/riscv/cpu.go | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 432078e29d..84122ead07 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -438,9 +438,9 @@ start: + VSETVLI X10, E32, M2, TA, MA, X12 // 5776150d + VSETVLI X10, E32, M4, TA, MA, X12 // 5776250d + VSETVLI X10, E32, M8, TA, MA, X12 // 5776350d +- VSETVLI X10, E32, MF2, TA, MA, X12 // 5776550d ++ VSETVLI X10, E32, MF8, TA, MA, X12 // 5776550d + VSETVLI X10, E32, MF4, TA, MA, X12 // 5776650d +- VSETVLI X10, E32, MF8, TA, MA, X12 // 5776750d ++ VSETVLI X10, E32, MF2, TA, MA, X12 // 5776750d + VSETVLI X10, E32, M1, TA, MA, X12 // 5776050d + VSETVLI $15, E32, M1, TA, MA, X12 // 57f607cd + VSETIVLI $0, E32, M1, TA, MA, X12 // 577600cd +diff --git a/src/cmd/internal/obj/riscv/cpu.go b/src/cmd/internal/obj/riscv/cpu.go +index d87b6b1efb..382c08a9d9 100644 +--- a/src/cmd/internal/obj/riscv/cpu.go ++++ b/src/cmd/internal/obj/riscv/cpu.go +@@ -1297,9 +1297,9 @@ var specialOperands = map[SpecialOperand]struct { + SPOP_M2: {encoding: 1, name: "M2"}, + SPOP_M4: {encoding: 2, name: "M4"}, + SPOP_M8: {encoding: 3, name: "M8"}, +- SPOP_MF2: {encoding: 5, name: "MF2"}, ++ SPOP_MF8: {encoding: 5, name: "MF8"}, + SPOP_MF4: {encoding: 6, name: "MF4"}, +- SPOP_MF8: {encoding: 7, name: "MF8"}, ++ SPOP_MF2: {encoding: 7, name: "MF2"}, + + SPOP_E8: {encoding: 0, name: "E8"}, + SPOP_E16: {encoding: 1, name: "E16"}, +-- +2.50.1 + diff --git a/2023-cmd-internal-obj-riscv-reject-invalid-vadc-vsbc-enco.patch b/2023-cmd-internal-obj-riscv-reject-invalid-vadc-vsbc-enco.patch new file mode 100644 index 0000000000000000000000000000000000000000..db0e220dff4169549275087c1e98f53d45e9781b --- /dev/null +++ b/2023-cmd-internal-obj-riscv-reject-invalid-vadc-vsbc-enco.patch @@ -0,0 +1,123 @@ +From 5685b8abd3c4fcb3f8c9684707fbf65c4144c089 Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Thu, 1 May 2025 10:43:32 +0200 +Subject: [PATCH 23/38] cmd/internal/obj/riscv: reject invalid vadc/vsbc + encodings + +The RISC-V Instruction Set Manual Volume states that "for vadc and +vsbc, the instruction encoding is reserved if the destination vector +register is v0". The assembler currently allows instructions like + +VADCVVM V1, V2, V0, V0 + +to be assembled. It's not clear what the behaviour of such +instructions will be on target hardware so it's best to disallow +them. + +For reference, binutils (2.44-3.fc42) allows the instruction + +vadc.vvm v0, v4, v8, v0 + +to be assembled and the instruction actually executes on a Banana PI +F3 without crashing. However, clang (20.1.2) refuses to assemble the +instruction, producing the following error. + +error: the destination vector register group cannot be V0 + vadc.vvm v0, v4, v8, v0 + ^ +Change-Id: Ia913cbd864ae8dbcf9227f69b963c93a99481cff +Reviewed-on: https://go-review.googlesource.com/c/go/+/669315 +Reviewed-by: Carlos Amedee +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Cherry Mui +Reviewed-by: Joel Sing +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 10 ++++++++++ + src/cmd/asm/internal/asm/testdata/riscv64error.s | 5 +++++ + src/cmd/internal/obj/riscv/obj.go | 9 +++++++-- + 3 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 84122ead07..f9e09435ae 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -623,17 +623,27 @@ start: + VADCVXM X11, V2, V0, V3 // d7c12540 + VADCVIM $15, V2, V0, V3 // d7b12740 + VMADCVVM V1, V2, V0, V3 // d7812044 ++ VMADCVVM V1, V2, V0, V0 // 57802044 + VMADCVXM X11, V2, V0, V3 // d7c12544 ++ VMADCVXM X11, V2, V0, V0 // 57c02544 + VMADCVIM $15, V2, V0, V3 // d7b12744 ++ VMADCVIM $15, V2, V0, V0 // 57b02744 + VMADCVV V1, V2, V3 // d7812046 ++ VMADCVV V1, V2, V0 // 57802046 + VMADCVX X11, V2, V3 // d7c12546 ++ VMADCVX X11, V2, V0 // 57c02546 + VMADCVI $15, V2, V3 // d7b12746 ++ VMADCVI $15, V2, V0 // 57b02746 + VSBCVVM V1, V2, V0, V3 // d7812048 + VSBCVXM X11, V2, V0, V3 // d7c12548 + VMSBCVVM V1, V2, V0, V3 // d781204c ++ VMSBCVVM V1, V2, V0, V0 // 5780204c + VMSBCVXM X11, V2, V0, V3 // d7c1254c ++ VMSBCVXM X11, V2, V0, V0 // 57c0254c + VMSBCVV V1, V2, V3 // d781204e ++ VMSBCVV V1, V2, V0 // 5780204e + VMSBCVX X11, V2, V3 // d7c1254e ++ VMSBCVX X11, V2, V0 // 57c0254e + + // 31.11.5: Vector Bitwise Logical Instructions + VANDVV V1, V2, V3 // d7812026 +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s +index d7a6e6ca85..b5a8d06867 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s +@@ -93,10 +93,13 @@ TEXT errors(SB),$0 + VSEXTVF8 V2, V3, V4 // ERROR "invalid vector mask register" + VADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" + VADCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VADCVVM V1, V2, V0, V0 // ERROR "invalid destination register V0" + VADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" + VADCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VADCVXM X10, V2, V0, V0 // ERROR "invalid destination register V0" + VADCVIM $15, V2, V1, V3 // ERROR "invalid vector mask register" + VADCVIM $15, V2, V3 // ERROR "invalid vector mask register" ++ VADCVIM $15, V2, V0, V0 // ERROR "invalid destination register V0" + VMADCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" + VMADCVVM V1, V2, V3 // ERROR "invalid vector mask register" + VMADCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +@@ -105,8 +108,10 @@ TEXT errors(SB),$0 + VMADCVIM $15, V2, V3 // ERROR "invalid vector mask register" + VSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVVM V1, V2, V0, V0 // ERROR "invalid destination register V0" + VSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSBCVXM X10, V2, V3 // ERROR "invalid vector mask register" ++ VSBCVXM X10, V2, V0, V0 // ERROR "invalid destination register V0" + VMSBCVVM V1, V2, V4, V3 // ERROR "invalid vector mask register" + VMSBCVVM V1, V2, V3 // ERROR "invalid vector mask register" + VMSBCVXM X10, V2, V4, V3 // ERROR "invalid vector mask register" +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index 6dc621ebcd..fc54bce2d7 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -3677,8 +3677,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.funct7 |= 1 // unmasked + ins.rd, ins.rs1, ins.rs2 = uint32(p.To.Reg), uint32(p.From.Reg), REG_V0 + +- case AVADCVVM, AVADCVXM, AVMADCVVM, AVMADCVXM, AVSBCVVM, AVSBCVXM, AVMSBCVVM, AVMSBCVXM, AVADCVIM, AVMADCVIM, +- AVMERGEVVM, AVMERGEVXM, AVMERGEVIM, AVFMERGEVFM: ++ case AVADCVIM, AVADCVVM, AVADCVXM, AVSBCVVM, AVSBCVXM: ++ if ins.rd == REG_V0 { ++ p.Ctxt.Diag("%v: invalid destination register V0", p) ++ } ++ fallthrough ++ ++ case AVMADCVVM, AVMADCVXM, AVMSBCVVM, AVMSBCVXM, AVMADCVIM, AVMERGEVVM, AVMERGEVXM, AVMERGEVIM, AVFMERGEVFM: + if ins.rs3 != REG_V0 { + p.Ctxt.Diag("%v: invalid vector mask register", p) + } +-- +2.50.1 + diff --git a/2024-cmd-compile-add-generic-simplifications-on-riscv64.patch b/2024-cmd-compile-add-generic-simplifications-on-riscv64.patch new file mode 100644 index 0000000000000000000000000000000000000000..28033a5128b075c30d493298833cd5c7d27df0fe --- /dev/null +++ b/2024-cmd-compile-add-generic-simplifications-on-riscv64.patch @@ -0,0 +1,203 @@ +From 7d37a1702bdf19eb1b5e798dc0d26f8b04b5e024 Mon Sep 17 00:00:00 2001 +From: Julian Zhu +Date: Sat, 17 May 2025 21:45:50 +0800 +Subject: [PATCH 24/38] cmd/compile: add generic simplifications on riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +file before after Δ % +addr2line 3636263 3636215 -48 -0.001% +asm 6318110 6317966 -144 -0.002% +buildid 3463352 3463224 -128 -0.004% +cgo 5672502 5672214 -288 -0.005% +compile 26904997 26905719 +722 +0.003% +cover 6405603 6405467 -136 -0.002% +dist 4092630 4092494 -136 -0.003% +doc 9728281 9723977 -4304 -0.044% +fix 4014891 4014835 -56 -0.001% +link 8327674 8327426 -248 -0.003% +nm 3628718 3628494 -224 -0.006% +objdump 5951778 5951626 -152 -0.003% +pack 2896080 2896040 -40 -0.001% +pprof 17596796 17591908 -4888 -0.028% +test2json 3346622 3346566 -56 -0.002% +trace 16179738 16175706 -4032 -0.025% +vet 9603472 9603264 -208 -0.002% +total 156070021 156055655 -14366 -0.009% + +Change-Id: Ie4a79a3c410eb79155ce2418ae64fa670d1ccd53 +Reviewed-on: https://go-review.googlesource.com/c/go/+/673477 +Reviewed-by: Keith Randall +Reviewed-by: Keith Randall +LUCI-TryBot-Result: Go LUCI +Auto-Submit: Keith Randall +Reviewed-by: David Chase +--- + .../compile/internal/ssa/_gen/RISCV64.rules | 9 ++ + .../compile/internal/ssa/rewriteRISCV64.go | 87 +++++++++++++++++++ + 2 files changed, 96 insertions(+) + +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index b38c9bd6c8..9b9f82d4a6 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -750,6 +750,15 @@ + (ROL x (NEG y)) => (ROR x y) + (ROLW x (NEG y)) => (RORW x y) + ++// generic simplifications ++(ADD x (NEG y)) => (SUB x y) ++(SUB x (NEG y)) => (ADD x y) ++(SUB x x) => (MOVDconst [0]) ++(AND x x) => x ++(OR x x) => x ++(ORN x x) => (MOVDconst [-1]) ++(XOR x x) => (MOVDconst [0]) ++ + // Convert const subtraction into ADDI with negative immediate, where possible. + (SUB x (MOVDconst [val])) && is32Bit(-val) => (ADDI [-val] x) + (SUB (MOVDconst [val]) y) && is32Bit(-val) => (NEG (ADDI [-val] y)) +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index e16caa2cdc..13f75794ac 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -539,6 +539,8 @@ func rewriteValueRISCV64(v *Value) bool { + return rewriteValueRISCV64_OpRISCV64OR(v) + case OpRISCV64ORI: + return rewriteValueRISCV64_OpRISCV64ORI(v) ++ case OpRISCV64ORN: ++ return rewriteValueRISCV64_OpRISCV64ORN(v) + case OpRISCV64ROL: + return rewriteValueRISCV64_OpRISCV64ROL(v) + case OpRISCV64ROLW: +@@ -3315,6 +3317,21 @@ func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool { + } + break + } ++ // match: (ADD x (NEG y)) ++ // result: (SUB x y) ++ for { ++ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { ++ x := v_0 ++ if v_1.Op != OpRISCV64NEG { ++ continue ++ } ++ y := v_1.Args[0] ++ v.reset(OpRISCV64SUB) ++ v.AddArg2(x, y) ++ return true ++ } ++ break ++ } + // match: (ADD (SLLI [1] x) y) + // cond: buildcfg.GORISCV64 >= 22 + // result: (SH1ADD x y) +@@ -3465,6 +3482,16 @@ func rewriteValueRISCV64_OpRISCV64AND(v *Value) bool { + } + break + } ++ // match: (AND x x) ++ // result: x ++ for { ++ x := v_0 ++ if x != v_1 { ++ break ++ } ++ v.copyOf(x) ++ return true ++ } + return false + } + func rewriteValueRISCV64_OpRISCV64ANDI(v *Value) bool { +@@ -6148,6 +6175,16 @@ func rewriteValueRISCV64_OpRISCV64OR(v *Value) bool { + } + break + } ++ // match: (OR x x) ++ // result: x ++ for { ++ x := v_0 ++ if x != v_1 { ++ break ++ } ++ v.copyOf(x) ++ return true ++ } + return false + } + func rewriteValueRISCV64_OpRISCV64ORI(v *Value) bool { +@@ -6200,6 +6237,22 @@ func rewriteValueRISCV64_OpRISCV64ORI(v *Value) bool { + } + return false + } ++func rewriteValueRISCV64_OpRISCV64ORN(v *Value) bool { ++ v_1 := v.Args[1] ++ v_0 := v.Args[0] ++ // match: (ORN x x) ++ // result: (MOVDconst [-1]) ++ for { ++ x := v_0 ++ if x != v_1 { ++ break ++ } ++ v.reset(OpRISCV64MOVDconst) ++ v.AuxInt = int64ToAuxInt(-1) ++ return true ++ } ++ return false ++} + func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +@@ -6845,6 +6898,29 @@ func rewriteValueRISCV64_OpRISCV64SUB(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block ++ // match: (SUB x (NEG y)) ++ // result: (ADD x y) ++ for { ++ x := v_0 ++ if v_1.Op != OpRISCV64NEG { ++ break ++ } ++ y := v_1.Args[0] ++ v.reset(OpRISCV64ADD) ++ v.AddArg2(x, y) ++ return true ++ } ++ // match: (SUB x x) ++ // result: (MOVDconst [0]) ++ for { ++ x := v_0 ++ if x != v_1 { ++ break ++ } ++ v.reset(OpRISCV64MOVDconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } + // match: (SUB x (MOVDconst [val])) + // cond: is32Bit(-val) + // result: (ADDI [-val] x) +@@ -6956,6 +7032,17 @@ func rewriteValueRISCV64_OpRISCV64XOR(v *Value) bool { + } + break + } ++ // match: (XOR x x) ++ // result: (MOVDconst [0]) ++ for { ++ x := v_0 ++ if x != v_1 { ++ break ++ } ++ v.reset(OpRISCV64MOVDconst) ++ v.AuxInt = int64ToAuxInt(0) ++ return true ++ } + return false + } + func rewriteValueRISCV64_OpRotateLeft16(v *Value) bool { +-- +2.50.1 + diff --git a/2025-cmd-internal-obj-riscv-fix-vector-integer-multiply-a.patch b/2025-cmd-internal-obj-riscv-fix-vector-integer-multiply-a.patch new file mode 100644 index 0000000000000000000000000000000000000000..77e376506fbb69410f02bcc1aaaaec0361095062 --- /dev/null +++ b/2025-cmd-internal-obj-riscv-fix-vector-integer-multiply-a.patch @@ -0,0 +1,186 @@ +From 105469aa478f5dee5a627d2ec9816df821e5a0e2 Mon Sep 17 00:00:00 2001 +From: Mark Ryan +Date: Tue, 6 May 2025 14:07:09 +0200 +Subject: [PATCH 25/38] cmd/internal/obj/riscv: fix vector integer multiply add + +The RISC-V integer vector multiply add instructions are not encoded +correctly; the first and second arguments are swapped. For example, +the instruction + +VMACCVV V1, V2, V3 + +encodes to + +b620a1d7 or vmacc.vv v3,v1,v2 + +and not + +b61121d7 or vmacc.vv v3,v2,v1 + +as expected. + +This is inconsistent with the argument ordering we use for 3 +argument vector instructions, in which the argument order, as given +in the RISC-V specifications, is reversed, and also with the vector +FMA instructions which have the same argument ordering as the vector +integer multiply add instructions in the "The RISC-V Instruction Set +Manual Volume I". For example, in the ISA manual we have the +following instruction definitions + +; Integer multiply-add, overwrite addend +vmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] + +; FP multiply-accumulate, overwrites addend +vfmacc.vv vd, vs1, vs2, vm # vd[i] = +(vs1[i] * vs2[i]) + vd[i] + +It's reasonable to expect that the Go assembler would use the same +argument ordering for both of these instructions. It currently does +not. + +We fix the issue by switching the argument ordering for the vector +integer multiply add instructions to match those of the vector FMA +instructions. + +Change-Id: Ib98e9999617f991969e5c831734b3bb3324439f6 +Reviewed-on: https://go-review.googlesource.com/c/go/+/670335 +Reviewed-by: Carlos Amedee +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +Reviewed-by: Cherry Mui +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 60 +++++++++---------- + .../internal/asm/testdata/riscv64validation.s | 14 ++--- + src/cmd/internal/obj/riscv/obj.go | 6 +- + 3 files changed, 40 insertions(+), 40 deletions(-) + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index f9e09435ae..583516efcb 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -830,38 +830,38 @@ start: + VWMULSUVX X10, V2, V0, V3 // d76125e8 + + // 31.11.13: Vector Single-Width Integer Multiply-Add Instructions +- VMACCVV V1, V2, V3 // d7a120b6 +- VMACCVV V1, V2, V0, V3 // d7a120b4 +- VMACCVX X10, V2, V3 // d76125b6 +- VMACCVX X10, V2, V0, V3 // d76125b4 +- VNMSACVV V1, V2, V3 // d7a120be +- VNMSACVV V1, V2, V0, V3 // d7a120bc +- VNMSACVX X10, V2, V3 // d76125be +- VNMSACVX X10, V2, V0, V3 // d76125bc +- VMADDVV V1, V2, V3 // d7a120a6 +- VMADDVV V1, V2, V0, V3 // d7a120a4 +- VMADDVX X10, V2, V3 // d76125a6 +- VMADDVX X10, V2, V0, V3 // d76125a4 +- VNMSUBVV V1, V2, V3 // d7a120ae +- VNMSUBVV V1, V2, V0, V3 // d7a120ac +- VNMSUBVX X10, V2, V3 // d76125ae +- VNMSUBVX X10, V2, V0, V3 // d76125ac ++ VMACCVV V2, V1, V3 // d7a120b6 ++ VMACCVV V2, V1, V0, V3 // d7a120b4 ++ VMACCVX V2, X10, V3 // d76125b6 ++ VMACCVX V2, X10, V0, V3 // d76125b4 ++ VNMSACVV V2, V1, V3 // d7a120be ++ VNMSACVV V2, V1, V0, V3 // d7a120bc ++ VNMSACVX V2, X10, V3 // d76125be ++ VNMSACVX V2, X10, V0, V3 // d76125bc ++ VMADDVV V2, V1, V3 // d7a120a6 ++ VMADDVV V2, V1, V0, V3 // d7a120a4 ++ VMADDVX V2, X10, V3 // d76125a6 ++ VMADDVX V2, X10, V0, V3 // d76125a4 ++ VNMSUBVV V2, V1, V3 // d7a120ae ++ VNMSUBVV V2, V1, V0, V3 // d7a120ac ++ VNMSUBVX V2, X10, V3 // d76125ae ++ VNMSUBVX V2, X10, V0, V3 // d76125ac + + // 31.11.14: Vector Widening Integer Multiply-Add Instructions +- VWMACCUVV V1, V2, V3 // d7a120f2 +- VWMACCUVV V1, V2, V0, V3 // d7a120f0 +- VWMACCUVX X10, V2, V3 // d76125f2 +- VWMACCUVX X10, V2, V0, V3 // d76125f0 +- VWMACCVV V1, V2, V3 // d7a120f6 +- VWMACCVV V1, V2, V0, V3 // d7a120f4 +- VWMACCVX X10, V2, V3 // d76125f6 +- VWMACCVX X10, V2, V0, V3 // d76125f4 +- VWMACCSUVV V1, V2, V3 // d7a120fe +- VWMACCSUVV V1, V2, V0, V3 // d7a120fc +- VWMACCSUVX X10, V2, V3 // d76125fe +- VWMACCSUVX X10, V2, V0, V3 // d76125fc +- VWMACCUSVX X10, V2, V3 // d76125fa +- VWMACCUSVX X10, V2, V0, V3 // d76125f8 ++ VWMACCUVV V2, V1, V3 // d7a120f2 ++ VWMACCUVV V2, V1, V0, V3 // d7a120f0 ++ VWMACCUVX V2, X10, V3 // d76125f2 ++ VWMACCUVX V2, X10, V0, V3 // d76125f0 ++ VWMACCVV V2, V1, V3 // d7a120f6 ++ VWMACCVV V2, V1, V0, V3 // d7a120f4 ++ VWMACCVX V2, X10, V3 // d76125f6 ++ VWMACCVX V2, X10, V0, V3 // d76125f4 ++ VWMACCSUVV V2, V1, V3 // d7a120fe ++ VWMACCSUVV V2, V1, V0, V3 // d7a120fc ++ VWMACCSUVX V2, X10, V3 // d76125fe ++ VWMACCSUVX V2, X10, V0, V3 // d76125fc ++ VWMACCUSVX V2, X10, V3 // d76125fa ++ VWMACCUSVX V2, X10, V0, V3 // d76125f8 + + // 31.11.15: Vector Integer Merge Instructions + VMERGEVVM V1, V2, V0, V3 // d781205c +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +index 374a97dcfe..55bf518e68 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s +@@ -214,19 +214,19 @@ TEXT validation(SB),$0 + VWMULUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VWMULSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VWMULSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNMSACVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSACVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VNMSACVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VMADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VMADDVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VMADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VNMSUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VNMSUBVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VNMSUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCUVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VWMACCUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VWMACCVX V1, V2, V3 // ERROR "expected integer register in rs1 position" +- VWMACCSUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" ++ VWMACCSUVV V2, X10, V3 // ERROR "expected vector register in vs1 position" + VWMACCSUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VWMACCUSVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VMERGEVVM X10, V2, V0, V3 // ERROR "expected vector register in vs1 position" +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index fc54bce2d7..cb4612ca91 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -3601,8 +3601,6 @@ func instructionsForProg(p *obj.Prog) []*instruction { + AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX, + AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX, + AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, +- AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX, +- AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX, + AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX, + AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX, + AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI, +@@ -3628,7 +3626,9 @@ func instructionsForProg(p *obj.Prog) []*instruction { + + case AVFMACCVV, AVFMACCVF, AVFNMACCVV, AVFNMACCVF, AVFMSACVV, AVFMSACVF, AVFNMSACVV, AVFNMSACVF, + AVFMADDVV, AVFMADDVF, AVFNMADDVV, AVFNMADDVF, AVFMSUBVV, AVFMSUBVF, AVFNMSUBVV, AVFNMSUBVF, +- AVFWMACCVV, AVFWMACCVF, AVFWNMACCVV, AVFWNMACCVF, AVFWMSACVV, AVFWMSACVF, AVFWNMSACVV, AVFWNMSACVF: ++ AVFWMACCVV, AVFWMACCVF, AVFWNMACCVV, AVFWNMACCVF, AVFWMSACVV, AVFWMSACVF, AVFWNMSACVV, AVFWNMSACVF, ++ AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX, ++ AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX: + switch { + case ins.rs3 == obj.REG_NONE: + ins.funct7 |= 1 // unmasked +-- +2.50.1 + diff --git a/2026-internal-bytealg-vector-implementation-of-equal-for-.patch b/2026-internal-bytealg-vector-implementation-of-equal-for-.patch new file mode 100644 index 0000000000000000000000000000000000000000..08c40a228c346a48e357c20c40a9592de000cddd --- /dev/null +++ b/2026-internal-bytealg-vector-implementation-of-equal-for-.patch @@ -0,0 +1,186 @@ +From 2a9f9a4414f266e6b80585e7227dd6032251659a Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 12 Feb 2025 23:41:22 +1100 +Subject: [PATCH 26/38] internal/bytealg: vector implementation of equal for + riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Provide a vector implementation of equal for riscv64, which is used +when compiled with the rva23u64 profile, or when vector is detected +to be available. Inputs that are 8 byte aligned will still be handled +via a the non-vector code if the length is less than or equal to 64 +bytes. + +On a Banana Pi F3, with GORISCV64=rva23u64: + + │ equal.1 │ equal.2 │ + │ sec/op │ sec/op vs base │ +Equal/0-8 1.254n ± 0% 1.254n ± 0% ~ (p=1.000 n=10) +Equal/same/1-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.466 n=10) +Equal/same/6-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.689 n=10) +Equal/same/9-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.861 n=10) +Equal/same/15-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.657 n=10) +Equal/same/16-8 21.32n ± 0% 21.33n ± 0% ~ (p=0.075 n=10) +Equal/same/20-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.249 n=10) +Equal/same/32-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.303 n=10) +Equal/same/4K-8 21.32n ± 0% 21.32n ± 0% ~ (p=1.000 n=10) +Equal/same/4M-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.582 n=10) +Equal/same/64M-8 21.32n ± 0% 21.32n ± 0% ~ (p=0.930 n=10) +Equal/1-8 39.16n ± 1% 38.71n ± 0% -1.15% (p=0.000 n=10) +Equal/6-8 51.49n ± 1% 50.40n ± 1% -2.12% (p=0.000 n=10) +Equal/9-8 54.46n ± 1% 53.89n ± 0% -1.04% (p=0.000 n=10) +Equal/15-8 71.81n ± 1% 70.59n ± 0% -1.71% (p=0.000 n=10) +Equal/16-8 69.14n ± 0% 68.21n ± 0% -1.34% (p=0.000 n=10) +Equal/20-8 78.59n ± 0% 77.59n ± 0% -1.26% (p=0.000 n=10) +Equal/32-8 41.55n ± 0% 41.16n ± 0% -0.96% (p=0.000 n=10) +Equal/4K-8 925.5n ± 0% 561.4n ± 1% -39.34% (p=0.000 n=10) +Equal/4M-8 3.110m ± 32% 2.463m ± 16% -20.80% (p=0.000 n=10) +Equal/64M-8 47.34m ± 30% 39.89m ± 16% -15.75% (p=0.004 n=10) +EqualBothUnaligned/64_0-8 32.17n ± 1% 32.11n ± 1% ~ (p=0.184 n=10) +EqualBothUnaligned/64_1-8 79.48n ± 0% 48.24n ± 1% -39.31% (p=0.000 n=10) +EqualBothUnaligned/64_4-8 72.71n ± 0% 48.37n ± 1% -33.48% (p=0.000 n=10) +EqualBothUnaligned/64_7-8 77.12n ± 0% 48.16n ± 1% -37.56% (p=0.000 n=10) +EqualBothUnaligned/4096_0-8 908.4n ± 0% 562.4n ± 2% -38.09% (p=0.000 n=10) +EqualBothUnaligned/4096_1-8 956.6n ± 0% 571.4n ± 3% -40.26% (p=0.000 n=10) +EqualBothUnaligned/4096_4-8 949.6n ± 0% 571.6n ± 3% -39.81% (p=0.000 n=10) +EqualBothUnaligned/4096_7-8 954.2n ± 0% 571.7n ± 3% -40.09% (p=0.000 n=10) +EqualBothUnaligned/4194304_0-8 2.935m ± 29% 2.664m ± 19% ~ (p=0.089 n=10) +EqualBothUnaligned/4194304_1-8 3.341m ± 13% 2.896m ± 34% ~ (p=0.075 n=10) +EqualBothUnaligned/4194304_4-8 3.204m ± 39% 3.352m ± 33% ~ (p=0.796 n=10) +EqualBothUnaligned/4194304_7-8 3.226m ± 30% 2.737m ± 34% -15.16% (p=0.043 n=10) +EqualBothUnaligned/67108864_0-8 49.04m ± 17% 39.94m ± 12% -18.57% (p=0.005 n=10) +EqualBothUnaligned/67108864_1-8 51.96m ± 15% 42.48m ± 15% -18.23% (p=0.015 n=10) +EqualBothUnaligned/67108864_4-8 47.67m ± 17% 37.85m ± 41% -20.61% (p=0.035 n=10) +EqualBothUnaligned/67108864_7-8 53.00m ± 22% 38.76m ± 21% -26.87% (p=0.000 n=10) +CompareBytesEqual-8 51.71n ± 1% 52.00n ± 0% +0.57% (p=0.002 n=10) +geomean 1.469µ 1.265µ -13.93% + + │ equal.1 │ equal.2 │ + │ B/s │ B/s vs base │ +Equal/same/1-8 44.73Mi ± 0% 44.72Mi ± 0% ~ (p=0.426 n=10) +Equal/same/6-8 268.3Mi ± 0% 268.4Mi ± 0% ~ (p=0.753 n=10) +Equal/same/9-8 402.6Mi ± 0% 402.5Mi ± 0% ~ (p=0.209 n=10) +Equal/same/15-8 670.9Mi ± 0% 670.9Mi ± 0% ~ (p=0.724 n=10) +Equal/same/16-8 715.6Mi ± 0% 715.4Mi ± 0% -0.04% (p=0.022 n=10) +Equal/same/20-8 894.6Mi ± 0% 894.5Mi ± 0% ~ (p=0.060 n=10) +Equal/same/32-8 1.398Gi ± 0% 1.398Gi ± 0% ~ (p=0.986 n=10) +Equal/same/4K-8 178.9Gi ± 0% 178.9Gi ± 0% ~ (p=0.853 n=10) +Equal/same/4M-8 178.9Ti ± 0% 178.9Ti ± 0% ~ (p=0.971 n=10) +Equal/same/64M-8 2862.8Ti ± 0% 2862.6Ti ± 0% ~ (p=0.971 n=10) +Equal/1-8 24.35Mi ± 1% 24.63Mi ± 0% +1.16% (p=0.000 n=10) +Equal/6-8 111.1Mi ± 1% 113.5Mi ± 1% +2.17% (p=0.000 n=10) +Equal/9-8 157.6Mi ± 1% 159.3Mi ± 0% +1.05% (p=0.000 n=10) +Equal/15-8 199.2Mi ± 1% 202.7Mi ± 0% +1.74% (p=0.000 n=10) +Equal/16-8 220.7Mi ± 0% 223.7Mi ± 0% +1.36% (p=0.000 n=10) +Equal/20-8 242.7Mi ± 0% 245.8Mi ± 0% +1.27% (p=0.000 n=10) +Equal/32-8 734.3Mi ± 0% 741.6Mi ± 0% +0.98% (p=0.000 n=10) +Equal/4K-8 4.122Gi ± 0% 6.795Gi ± 1% +64.84% (p=0.000 n=10) +Equal/4M-8 1.258Gi ± 24% 1.586Gi ± 14% +26.12% (p=0.000 n=10) +Equal/64M-8 1.320Gi ± 23% 1.567Gi ± 14% +18.69% (p=0.004 n=10) +EqualBothUnaligned/64_0-8 1.853Gi ± 1% 1.856Gi ± 1% ~ (p=0.190 n=10) +EqualBothUnaligned/64_1-8 767.9Mi ± 0% 1265.2Mi ± 1% +64.76% (p=0.000 n=10) +EqualBothUnaligned/64_4-8 839.4Mi ± 0% 1261.9Mi ± 1% +50.33% (p=0.000 n=10) +EqualBothUnaligned/64_7-8 791.4Mi ± 0% 1267.5Mi ± 1% +60.16% (p=0.000 n=10) +EqualBothUnaligned/4096_0-8 4.199Gi ± 0% 6.784Gi ± 2% +61.54% (p=0.000 n=10) +EqualBothUnaligned/4096_1-8 3.988Gi ± 0% 6.676Gi ± 3% +67.40% (p=0.000 n=10) +EqualBothUnaligned/4096_4-8 4.017Gi ± 0% 6.674Gi ± 3% +66.14% (p=0.000 n=10) +EqualBothUnaligned/4096_7-8 3.998Gi ± 0% 6.673Gi ± 3% +66.92% (p=0.000 n=10) +EqualBothUnaligned/4194304_0-8 1.332Gi ± 22% 1.468Gi ± 16% ~ (p=0.089 n=10) +EqualBothUnaligned/4194304_1-8 1.169Gi ± 12% 1.350Gi ± 25% ~ (p=0.075 n=10) +EqualBothUnaligned/4194304_4-8 1.222Gi ± 28% 1.165Gi ± 48% ~ (p=0.796 n=10) +EqualBothUnaligned/4194304_7-8 1.211Gi ± 23% 1.427Gi ± 26% +17.88% (p=0.043 n=10) +EqualBothUnaligned/67108864_0-8 1.274Gi ± 14% 1.567Gi ± 14% +22.97% (p=0.005 n=10) +EqualBothUnaligned/67108864_1-8 1.204Gi ± 14% 1.471Gi ± 13% +22.18% (p=0.015 n=10) +EqualBothUnaligned/67108864_4-8 1.311Gi ± 14% 1.651Gi ± 29% +25.92% (p=0.035 n=10) +EqualBothUnaligned/67108864_7-8 1.179Gi ± 18% 1.612Gi ± 17% +36.73% (p=0.000 n=10) +geomean 1.870Gi 2.190Gi +17.16% + +Change-Id: I9c5270bcc6997d020a96d1e97c7e7cfc7ca7fd34 +Reviewed-on: https://go-review.googlesource.com/c/go/+/646736 +Reviewed-by: Mark Ryan +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Mark Freeman +--- + src/internal/bytealg/bytealg.go | 10 ++++++---- + src/internal/bytealg/equal_riscv64.s | 30 ++++++++++++++++++++++++++++ + 2 files changed, 36 insertions(+), 4 deletions(-) + +diff --git a/src/internal/bytealg/bytealg.go b/src/internal/bytealg/bytealg.go +index 6b79a2e1fa..cd96223fb7 100644 +--- a/src/internal/bytealg/bytealg.go ++++ b/src/internal/bytealg/bytealg.go +@@ -11,13 +11,15 @@ import ( + + // Offsets into internal/cpu records for use in assembly. + const ( +- offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42) +- offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) +- offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT) ++ offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9) ++ ++ offsetRISCV64HasV = unsafe.Offsetof(cpu.RISCV64.HasV) + + offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) + +- offsetPPC64HasPOWER9 = unsafe.Offsetof(cpu.PPC64.IsPOWER9) ++ offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42) ++ offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) ++ offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT) + ) + + // MaxLen is the maximum length of the string to be searched for (argument b) in Index. +diff --git a/src/internal/bytealg/equal_riscv64.s b/src/internal/bytealg/equal_riscv64.s +index 87b2d79302..58e033f847 100644 +--- a/src/internal/bytealg/equal_riscv64.s ++++ b/src/internal/bytealg/equal_riscv64.s +@@ -2,6 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + ++#include "asm_riscv64.h" + #include "go_asm.h" + #include "textflag.h" + +@@ -28,6 +29,35 @@ length_check: + MOV $32, X23 + BLT X12, X23, loop4_check + ++#ifndef hasV ++ MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5 ++ BEQZ X5, equal_scalar ++#endif ++ ++ // Use vector if not 8 byte aligned. ++ OR X10, X11, X5 ++ AND $7, X5 ++ BNEZ X5, vector_loop ++ ++ // Use scalar if 8 byte aligned and <= 64 bytes. ++ SUB $64, X12, X6 ++ BLEZ X6, loop32_check ++ ++ PCALIGN $16 ++vector_loop: ++ VSETVLI X12, E8, M8, TA, MA, X5 ++ VLE8V (X10), V8 ++ VLE8V (X11), V16 ++ VMSNEVV V8, V16, V0 ++ VFIRSTM V0, X6 ++ BGEZ X6, done ++ ADD X5, X10 ++ ADD X5, X11 ++ SUB X5, X12 ++ BNEZ X12, vector_loop ++ JMP done ++ ++equal_scalar: + // Check alignment - if alignment differs we have to do one byte at a time. + AND $7, X10, X9 + AND $7, X11, X19 +-- +2.50.1 + diff --git a/2027-internal-bytealg-vector-implementation-of-indexbyte-.patch b/2027-internal-bytealg-vector-implementation-of-indexbyte-.patch new file mode 100644 index 0000000000000000000000000000000000000000..906311e3271da92d176fed9a61dc86636d8e3d7b --- /dev/null +++ b/2027-internal-bytealg-vector-implementation-of-indexbyte-.patch @@ -0,0 +1,156 @@ +From 15bbea13fe1e70701b4c6e1d19a0654e2ba116d7 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sat, 8 Feb 2025 01:03:23 +1100 +Subject: [PATCH 27/38] internal/bytealg: vector implementation of indexbyte + for riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Provide a vector implementation of indexbyte for riscv64, which is used +when compiled with the rva23u64 profile, or when vector is detected +to be available. Inputs that are smaller than 24 bytes will continue +to use the non-vector path. + +On a Banana Pi F3, with GORISCV64=rva23u64: + + │ indexbyte.1 │ indexbyte.2 │ + │ sec/op │ sec/op vs base │ +IndexByte/10-8 52.68n ± 0% 47.26n ± 0% -10.30% (p=0.000 n=10) +IndexByte/32-8 68.62n ± 0% 47.02n ± 0% -31.49% (p=0.000 n=10) +IndexByte/4K-8 2217.0n ± 0% 420.4n ± 0% -81.04% (p=0.000 n=10) +IndexByte/4M-8 2624.4µ ± 0% 767.5µ ± 0% -70.75% (p=0.000 n=10) +IndexByte/64M-8 68.08m ± 10% 47.84m ± 45% -29.73% (p=0.004 n=10) +geomean 17.03µ 8.073µ -52.59% + + │ indexbyte.1 │ indexbyte.2 │ + │ B/s │ B/s vs base │ +IndexByte/10-8 181.0Mi ± 0% 201.8Mi ± 0% +11.48% (p=0.000 n=10) +IndexByte/32-8 444.7Mi ± 0% 649.1Mi ± 0% +45.97% (p=0.000 n=10) +IndexByte/4K-8 1.721Gi ± 0% 9.076Gi ± 0% +427.51% (p=0.000 n=10) +IndexByte/4M-8 1.488Gi ± 0% 5.089Gi ± 0% +241.93% (p=0.000 n=10) +IndexByte/64M-8 940.3Mi ± 9% 1337.8Mi ± 31% +42.27% (p=0.004 n=10) +geomean 727.1Mi 1.498Gi +110.94% + +Change-Id: If7b0dbef38d76fa7a2021e4ecaed668a1d4b9783 +Reviewed-on: https://go-review.googlesource.com/c/go/+/648856 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +Reviewed-by: Mark Freeman +Reviewed-by: Mark Ryan +Reviewed-by: Dmitri Shuralyov +--- + src/internal/bytealg/indexbyte_riscv64.s | 60 ++++++++++++++++-------- + 1 file changed, 41 insertions(+), 19 deletions(-) + +diff --git a/src/internal/bytealg/indexbyte_riscv64.s b/src/internal/bytealg/indexbyte_riscv64.s +index fde00da0ea..527ae6d35e 100644 +--- a/src/internal/bytealg/indexbyte_riscv64.s ++++ b/src/internal/bytealg/indexbyte_riscv64.s +@@ -2,6 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + ++#include "asm_riscv64.h" + #include "go_asm.h" + #include "textflag.h" + +@@ -11,12 +12,14 @@ TEXT ·IndexByte(SB),NOSPLIT,$0-40 + // X12 = b_cap (unused) + // X13 = byte to find + AND $0xff, X13, X12 // x12 byte to look for +- MOV X10, X13 // store base for later + + SLTI $24, X11, X14 +- ADD X10, X11 // end +- BEQZ X14, bigBody ++ BNEZ X14, small ++ JMP indexByteBig<>(SB) + ++small: ++ MOV X10, X13 // store base for later ++ ADD X10, X11 // end + SUB $1, X10 + loop: + ADD $1, X10 +@@ -31,21 +34,19 @@ notfound: + MOV $-1, X10 + RET + +-bigBody: +- JMP indexByteBig<>(SB) +- + TEXT ·IndexByteString(SB),NOSPLIT,$0-32 + // X10 = b_base + // X11 = b_len + // X12 = byte to find +- + AND $0xff, X12 // x12 byte to look for +- MOV X10, X13 // store base for later + + SLTI $24, X11, X14 +- ADD X10, X11 // end +- BEQZ X14, bigBody ++ BNEZ X14, small ++ JMP indexByteBig<>(SB) + ++small: ++ MOV X10, X13 // store base for later ++ ADD X10, X11 // end + SUB $1, X10 + loop: + ADD $1, X10 +@@ -60,20 +61,41 @@ notfound: + MOV $-1, X10 + RET + +-bigBody: +- JMP indexByteBig<>(SB) +- + TEXT indexByteBig<>(SB),NOSPLIT|NOFRAME,$0 +- // On entry ++ // On entry: + // X10 = b_base +- // X11 = end ++ // X11 = b_len (at least 16 bytes) + // X12 = byte to find +- // X13 = b_base +- // X11 is at least 16 bytes > X10 +- +- // On exit ++ // On exit: + // X10 = index of first instance of sought byte, if found, or -1 otherwise + ++ MOV X10, X13 // store base for later ++ ++#ifndef hasV ++ MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X5 ++ BEQZ X5, indexbyte_scalar ++#endif ++ ++ PCALIGN $16 ++vector_loop: ++ VSETVLI X11, E8, M8, TA, MA, X5 ++ VLE8V (X10), V8 ++ VMSEQVX X12, V8, V0 ++ VFIRSTM V0, X6 ++ BGEZ X6, vector_found ++ ADD X5, X10 ++ SUB X5, X11 ++ BNEZ X11, vector_loop ++ JMP notfound ++ ++vector_found: ++ SUB X13, X10 ++ ADD X6, X10 ++ RET ++ ++indexbyte_scalar: ++ ADD X10, X11 // end ++ + // Process the first few bytes until we get to an 8 byte boundary + // No need to check for end here as we have at least 16 bytes in + // the buffer. +-- +2.50.1 + diff --git a/2028-internal-bytealg-vector-implementation-of-compare-fo.patch b/2028-internal-bytealg-vector-implementation-of-compare-fo.patch new file mode 100644 index 0000000000000000000000000000000000000000..83c2c174b72fe4be6eaef730339cd6ec5a47a634 --- /dev/null +++ b/2028-internal-bytealg-vector-implementation-of-compare-fo.patch @@ -0,0 +1,163 @@ +From 96df969b410b4fd381e709ec94c6a0ebeff5018d Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 12 Feb 2025 23:41:35 +1100 +Subject: [PATCH 28/38] internal/bytealg: vector implementation of compare for + riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Provide a vector implementation of compare for riscv64, which is used +when compiled with the rva23u64 profile, or when vector is detected +to be available. Inputs that are 8 byte aligned will still be handled +via a the non-vector code if the length is less than or equal to 128 +bytes. + +On a Banana Pi F3, with GORISCV64=rva23u64: + + │ compare.1 │ compare.2 │ + │ sec/op │ sec/op vs base │ +BytesCompare/1-8 24.36n ± 0% 24.15n ± 0% -0.84% (p=0.007 n=10) +BytesCompare/2-8 26.75n ± 0% 26.97n ± 0% +0.82% (p=0.000 n=10) +BytesCompare/4-8 27.63n ± 0% 27.80n ± 0% +0.60% (p=0.001 n=10) +BytesCompare/8-8 35.91n ± 0% 35.19n ± 0% -2.01% (p=0.000 n=10) +BytesCompare/16-8 53.22n ± 0% 24.04n ± 1% -54.82% (p=0.000 n=10) +BytesCompare/32-8 25.12n ± 0% 26.09n ± 1% +3.86% (p=0.000 n=10) +BytesCompare/64-8 32.52n ± 0% 33.43n ± 1% +2.78% (p=0.000 n=10) +BytesCompare/128-8 46.59n ± 0% 48.22n ± 1% +3.50% (p=0.000 n=10) +BytesCompare/256-8 74.25n ± 0% 50.18n ± 0% -32.42% (p=0.000 n=10) +BytesCompare/512-8 129.85n ± 0% 83.12n ± 0% -35.98% (p=0.000 n=10) +BytesCompare/1024-8 244.6n ± 0% 148.0n ± 1% -39.49% (p=0.000 n=10) +BytesCompare/2048-8 465.9n ± 0% 282.8n ± 2% -39.30% (p=0.000 n=10) +CompareBytesEqual-8 51.96n ± 0% 52.90n ± 1% +1.80% (p=0.000 n=10) +CompareBytesToNil-8 15.77n ± 1% 15.68n ± 0% -0.57% (p=0.000 n=10) +CompareBytesEmpty-8 14.21n ± 1% 14.20n ± 1% ~ (p=1.000 n=10) +CompareBytesIdentical-8 14.20n ± 1% 15.07n ± 1% +6.20% (p=0.000 n=10) +CompareBytesSameLength-8 31.38n ± 0% 30.52n ± 0% -2.74% (p=0.000 n=10) +CompareBytesDifferentLength-8 31.38n ± 0% 30.53n ± 0% -2.71% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=1-8 2401.0µ ± 0% 437.6µ ± 0% -81.77% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=2-8 2376.8µ ± 0% 437.4µ ± 0% -81.60% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=3-8 2384.1µ ± 0% 437.5µ ± 0% -81.65% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=4-8 2377.7µ ± 0% 437.4µ ± 0% -81.60% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=5-8 2366.3µ ± 0% 437.5µ ± 0% -81.51% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=6-8 2357.3µ ± 0% 437.3µ ± 0% -81.45% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=7-8 2385.3µ ± 0% 437.6µ ± 0% -81.65% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=0-8 447.2µ ± 0% 464.8µ ± 0% +3.94% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=1-8 447.7µ ± 0% 453.1µ ± 0% +1.20% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=2-8 447.9µ ± 0% 453.0µ ± 0% +1.15% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=3-8 448.0µ ± 0% 452.5µ ± 0% +1.02% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=4-8 448.0µ ± 0% 452.1µ ± 0% +0.92% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=5-8 447.8µ ± 0% 452.8µ ± 0% +1.12% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=6-8 447.9µ ± 0% 452.4µ ± 0% +1.01% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=7-8 447.9µ ± 0% 452.8µ ± 0% +1.09% (p=0.000 n=10) +CompareBytesBig-8 441.2µ ± 0% 461.8µ ± 0% +4.66% (p=0.000 n=10) +CompareBytesBigIdentical-8 13.81n ± 0% 13.80n ± 0% ~ (p=0.519 n=10) +geomean 3.980µ 2.651µ -33.40% + + │ compare.1 │ compare.2 │ + │ B/s │ B/s vs base │ +CompareBytesBigUnaligned/offset=1-8 416.5Mi ± 0% 2285.1Mi ± 0% +448.64% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=2-8 420.7Mi ± 0% 2286.4Mi ± 0% +443.43% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=3-8 419.5Mi ± 0% 2285.9Mi ± 0% +444.97% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=4-8 420.6Mi ± 0% 2286.1Mi ± 0% +443.57% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=5-8 422.6Mi ± 0% 2285.7Mi ± 0% +440.86% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=6-8 424.2Mi ± 0% 2286.8Mi ± 0% +439.07% (p=0.000 n=10) +CompareBytesBigUnaligned/offset=7-8 419.2Mi ± 0% 2285.2Mi ± 0% +445.07% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=0-8 2.184Gi ± 0% 2.101Gi ± 0% -3.79% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=1-8 2.181Gi ± 0% 2.155Gi ± 0% -1.18% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=2-8 2.180Gi ± 0% 2.156Gi ± 0% -1.13% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=3-8 2.180Gi ± 0% 2.158Gi ± 0% -1.01% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=4-8 2.180Gi ± 0% 2.160Gi ± 0% -0.91% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=5-8 2.181Gi ± 0% 2.157Gi ± 0% -1.11% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=6-8 2.181Gi ± 0% 2.159Gi ± 0% -1.00% (p=0.000 n=10) +CompareBytesBigBothUnaligned/offset=7-8 2.180Gi ± 0% 2.157Gi ± 0% -1.08% (p=0.000 n=10) +CompareBytesBig-8 2.213Gi ± 0% 2.115Gi ± 0% -4.45% (p=0.000 n=10) +CompareBytesBigIdentical-8 69.06Ti ± 0% 69.09Ti ± 0% ~ (p=0.315 n=10) +geomean 2.022Gi 4.022Gi +98.95% + +Change-Id: Id3012faf8d353eb1be0e1fb01b78ac43fa4c7e8b +Reviewed-on: https://go-review.googlesource.com/c/go/+/646737 +Reviewed-by: Mark Ryan +Reviewed-by: Mark Freeman +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Meng Zhuo +--- + src/internal/bytealg/compare_riscv64.s | 47 ++++++++++++++++++++++++-- + 1 file changed, 44 insertions(+), 3 deletions(-) + +diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s +index b1e1f7bcc7..b6166a8286 100644 +--- a/src/internal/bytealg/compare_riscv64.s ++++ b/src/internal/bytealg/compare_riscv64.s +@@ -2,6 +2,7 @@ + // Use of this source code is governed by a BSD-style + // license that can be found in the LICENSE file. + ++#include "asm_riscv64.h" + #include "go_asm.h" + #include "textflag.h" + +@@ -39,6 +40,46 @@ TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 + use_a_len: + BEQZ X5, cmp_len + ++ MOV $16, X6 ++ BLT X5, X6, check8_unaligned ++ ++#ifndef hasV ++ MOVB internal∕cpu·RISCV64+const_offsetRISCV64HasV(SB), X6 ++ BEQZ X6, compare_scalar ++#endif ++ ++ // Use vector if not 8 byte aligned. ++ OR X10, X12, X6 ++ AND $7, X6 ++ BNEZ X6, vector_loop ++ ++ // Use scalar if 8 byte aligned and <= 128 bytes. ++ SUB $128, X5, X6 ++ BLEZ X6, compare_scalar_aligned ++ ++ PCALIGN $16 ++vector_loop: ++ VSETVLI X5, E8, M8, TA, MA, X6 ++ VLE8V (X10), V8 ++ VLE8V (X12), V16 ++ VMSNEVV V8, V16, V0 ++ VFIRSTM V0, X7 ++ BGEZ X7, vector_not_eq ++ ADD X6, X10 ++ ADD X6, X12 ++ SUB X6, X5 ++ BNEZ X5, vector_loop ++ JMP cmp_len ++ ++vector_not_eq: ++ // Load first differing bytes in X8/X9. ++ ADD X7, X10 ++ ADD X7, X12 ++ MOVBU (X10), X8 ++ MOVBU (X12), X9 ++ JMP cmp ++ ++compare_scalar: + MOV $32, X6 + BLT X5, X6, check8_unaligned + +@@ -61,9 +102,9 @@ align: + ADD $1, X12 + BNEZ X7, align + +-check32: +- // X6 contains $32 +- BLT X5, X6, compare16 ++compare_scalar_aligned: ++ MOV $32, X6 ++ BLT X5, X6, check16 + compare32: + MOV 0(X10), X15 + MOV 0(X12), X16 +-- +2.50.1 + diff --git a/2029-cmd-compile-simplify-intrinsification-of-BitLen16-an.patch b/2029-cmd-compile-simplify-intrinsification-of-BitLen16-an.patch new file mode 100644 index 0000000000000000000000000000000000000000..93657be38ed45827a9acfc325dbdd88ef30af230 --- /dev/null +++ b/2029-cmd-compile-simplify-intrinsification-of-BitLen16-an.patch @@ -0,0 +1,659 @@ +From f771ff2c11d62f75116c0c03b5257a4804f47f8c Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 23 Feb 2025 00:02:27 +1100 +Subject: [PATCH 29/38] cmd/compile: simplify intrinsification of BitLen16 and + BitLen8 + +Decompose BitLen16 and BitLen8 within the SSA rules for architectures that +support BitLen32 or BitLen64, rather than having a custom intrinsic. + +Change-Id: Ie4188ce69d1021e63cec27a8e7418efb0714812b +Reviewed-on: https://go-review.googlesource.com/c/go/+/651817 +Reviewed-by: Keith Randall +Reviewed-by: Michael Pratt +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Junyang Shao +TryBot-Result: Gopher Robot +Run-TryBot: Joel Sing +Reviewed-by: Michael Knyszek +--- + src/cmd/compile/internal/ssa/_gen/ARM.rules | 1 + + src/cmd/compile/internal/ssa/_gen/ARM64.rules | 1 + + .../compile/internal/ssa/_gen/LOONG64.rules | 1 + + src/cmd/compile/internal/ssa/_gen/MIPS.rules | 1 + + src/cmd/compile/internal/ssa/_gen/PPC64.rules | 1 + + src/cmd/compile/internal/ssa/_gen/S390X.rules | 1 + + src/cmd/compile/internal/ssa/_gen/Wasm.rules | 1 + + src/cmd/compile/internal/ssa/rewriteARM.go | 34 +++++++++++++ + src/cmd/compile/internal/ssa/rewriteARM64.go | 34 +++++++++++++ + .../compile/internal/ssa/rewriteLOONG64.go | 34 +++++++++++++ + src/cmd/compile/internal/ssa/rewriteMIPS.go | 34 +++++++++++++ + src/cmd/compile/internal/ssa/rewritePPC64.go | 34 +++++++++++++ + src/cmd/compile/internal/ssa/rewriteS390X.go | 51 +++++++++++++++++++ + src/cmd/compile/internal/ssa/rewriteWasm.go | 51 +++++++++++++++++++ + src/cmd/compile/internal/ssagen/intrinsics.go | 39 ++------------ + 15 files changed, 284 insertions(+), 34 deletions(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/ARM.rules b/src/cmd/compile/internal/ssa/_gen/ARM.rules +index 9cdb5d8ad5..6581793fa3 100644 +--- a/src/cmd/compile/internal/ssa/_gen/ARM.rules ++++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules +@@ -80,6 +80,7 @@ + + // bit length + (BitLen32 x) => (RSBconst [32] (CLZ x)) ++(BitLen(16|8) x) => (BitLen32 (ZeroExt(16|8)to32 x)) + + // byte swap for ARMv5 + // let (a, b, c, d) be the bytes of x from high to low +diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules +index d14bb6a91d..6e326ca901 100644 +--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules +@@ -103,6 +103,7 @@ + + (BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) + (BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) ++(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x)) + + (Bswap64 ...) => (REV ...) + (Bswap32 ...) => (REVW ...) +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index 00a0a84f33..e285f9fe27 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -147,6 +147,7 @@ + + (BitLen64 x) => (NEGV (SUBVconst [64] (CLZV x))) + (BitLen32 x) => (NEGV (SUBVconst [32] (CLZW x))) ++(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x)) + (Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...) + (BitRev8 ...) => (BITREV4B ...) + (BitRev16 x) => (REVB2H (BITREV4B x)) +diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules +index f4e0467ea0..9a48164f55 100644 +--- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules ++++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules +@@ -135,6 +135,7 @@ + + // bit length + (BitLen32 x) => (SUB (MOVWconst [32]) (CLZ x)) ++(BitLen(16|8) x) => (BitLen32 (ZeroExt(16|8)to32 x)) + + // boolean ops -- booleans are represented with 0=false, 1=true + (AndB ...) => (AND ...) +diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules +index feef6ee52a..768e37406f 100644 +--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules +@@ -267,6 +267,7 @@ + + (BitLen64 x) => (SUBFCconst [64] (CNTLZD x)) + (BitLen32 x) => (SUBFCconst [32] (CNTLZW x)) ++(BitLen(16|8) x) => (BitLen64 (ZeroExt(16|8)to64 x)) + + (PopCount64 ...) => (POPCNTD ...) + (PopCount(32|16|8) x) => (POPCNT(W|W|B) (MOV(W|H|B)Zreg x)) +diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules +index 462cf8f701..3a903af5d0 100644 +--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules ++++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules +@@ -89,6 +89,7 @@ + (Ctz32 x) => (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW (SUBWconst [1] x) (NOTW x))))) + + (BitLen64 x) => (SUB (MOVDconst [64]) (FLOGR x)) ++(BitLen(32|16|8) x) => (BitLen64 (ZeroExt(32|16|8)to64 x)) + + // POPCNT treats the input register as a vector of 8 bytes, producing + // a population count for each individual byte. For inputs larger than +diff --git a/src/cmd/compile/internal/ssa/_gen/Wasm.rules b/src/cmd/compile/internal/ssa/_gen/Wasm.rules +index 91a9fc5e4a..03c681f440 100644 +--- a/src/cmd/compile/internal/ssa/_gen/Wasm.rules ++++ b/src/cmd/compile/internal/ssa/_gen/Wasm.rules +@@ -329,6 +329,7 @@ + (Ctz(64|32|16|8)NonZero ...) => (I64Ctz ...) + + (BitLen64 x) => (I64Sub (I64Const [64]) (I64Clz x)) ++(BitLen(32|16|8) x) => (BitLen64 (ZeroExt(32|16|8)to64 x)) + + (PopCount64 ...) => (I64Popcnt ...) + (PopCount32 x) => (I64Popcnt (ZeroExt32to64 x)) +diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go +index 09be5ccf68..7053f22181 100644 +--- a/src/cmd/compile/internal/ssa/rewriteARM.go ++++ b/src/cmd/compile/internal/ssa/rewriteARM.go +@@ -466,8 +466,12 @@ func rewriteValueARM(v *Value) bool { + return true + case OpAvg32u: + return rewriteValueARM_OpAvg32u(v) ++ case OpBitLen16: ++ return rewriteValueARM_OpBitLen16(v) + case OpBitLen32: + return rewriteValueARM_OpBitLen32(v) ++ case OpBitLen8: ++ return rewriteValueARM_OpBitLen8(v) + case OpBswap32: + return rewriteValueARM_OpBswap32(v) + case OpClosureCall: +@@ -13042,6 +13046,21 @@ func rewriteValueARM_OpAvg32u(v *Value) bool { + return true + } + } ++func rewriteValueARM_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen32 (ZeroExt16to32 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen32) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueARM_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -13058,6 +13077,21 @@ func rewriteValueARM_OpBitLen32(v *Value) bool { + return true + } + } ++func rewriteValueARM_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen32 (ZeroExt8to32 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen32) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueARM_OpBswap32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go +index ed6974c244..4f5f55bb08 100644 +--- a/src/cmd/compile/internal/ssa/rewriteARM64.go ++++ b/src/cmd/compile/internal/ssa/rewriteARM64.go +@@ -563,10 +563,14 @@ func rewriteValueARM64(v *Value) bool { + return true + case OpAvg64u: + return rewriteValueARM64_OpAvg64u(v) ++ case OpBitLen16: ++ return rewriteValueARM64_OpBitLen16(v) + case OpBitLen32: + return rewriteValueARM64_OpBitLen32(v) + case OpBitLen64: + return rewriteValueARM64_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValueARM64_OpBitLen8(v) + case OpBitRev16: + return rewriteValueARM64_OpBitRev16(v) + case OpBitRev32: +@@ -18327,6 +18331,21 @@ func rewriteValueARM64_OpAvg64u(v *Value) bool { + return true + } + } ++func rewriteValueARM64_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueARM64_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -18361,6 +18380,21 @@ func rewriteValueARM64_OpBitLen64(v *Value) bool { + return true + } + } ++func rewriteValueARM64_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueARM64_OpBitRev16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index ab39040de1..ba0ea088d4 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -129,10 +129,14 @@ func rewriteValueLOONG64(v *Value) bool { + return true + case OpAvg64u: + return rewriteValueLOONG64_OpAvg64u(v) ++ case OpBitLen16: ++ return rewriteValueLOONG64_OpBitLen16(v) + case OpBitLen32: + return rewriteValueLOONG64_OpBitLen32(v) + case OpBitLen64: + return rewriteValueLOONG64_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValueLOONG64_OpBitLen8(v) + case OpBitRev16: + return rewriteValueLOONG64_OpBitRev16(v) + case OpBitRev32: +@@ -995,6 +999,21 @@ func rewriteValueLOONG64_OpAvg64u(v *Value) bool { + return true + } + } ++func rewriteValueLOONG64_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueLOONG64_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -1033,6 +1052,21 @@ func rewriteValueLOONG64_OpBitLen64(v *Value) bool { + return true + } + } ++func rewriteValueLOONG64_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueLOONG64_OpBitRev16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go +index 045ebb955a..5b95549486 100644 +--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go ++++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go +@@ -82,8 +82,12 @@ func rewriteValueMIPS(v *Value) bool { + return true + case OpAvg32u: + return rewriteValueMIPS_OpAvg32u(v) ++ case OpBitLen16: ++ return rewriteValueMIPS_OpBitLen16(v) + case OpBitLen32: + return rewriteValueMIPS_OpBitLen32(v) ++ case OpBitLen8: ++ return rewriteValueMIPS_OpBitLen8(v) + case OpClosureCall: + v.Op = OpMIPSCALLclosure + return true +@@ -792,6 +796,21 @@ func rewriteValueMIPS_OpAvg32u(v *Value) bool { + return true + } + } ++func rewriteValueMIPS_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen32 (ZeroExt16to32 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen32) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueMIPS_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -810,6 +829,21 @@ func rewriteValueMIPS_OpBitLen32(v *Value) bool { + return true + } + } ++func rewriteValueMIPS_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen32 (ZeroExt8to32 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen32) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueMIPS_OpCom16(v *Value) bool { + v_0 := v.Args[0] + // match: (Com16 x) +diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go +index 4e3b8a5cc6..e900ebe0be 100644 +--- a/src/cmd/compile/internal/ssa/rewritePPC64.go ++++ b/src/cmd/compile/internal/ssa/rewritePPC64.go +@@ -106,10 +106,14 @@ func rewriteValuePPC64(v *Value) bool { + return rewriteValuePPC64_OpAtomicStoreRel64(v) + case OpAvg64u: + return rewriteValuePPC64_OpAvg64u(v) ++ case OpBitLen16: ++ return rewriteValuePPC64_OpBitLen16(v) + case OpBitLen32: + return rewriteValuePPC64_OpBitLen32(v) + case OpBitLen64: + return rewriteValuePPC64_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValuePPC64_OpBitLen8(v) + case OpBswap16: + return rewriteValuePPC64_OpBswap16(v) + case OpBswap32: +@@ -1123,6 +1127,21 @@ func rewriteValuePPC64_OpAvg64u(v *Value) bool { + return true + } + } ++func rewriteValuePPC64_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValuePPC64_OpBitLen32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -1155,6 +1174,21 @@ func rewriteValuePPC64_OpBitLen64(v *Value) bool { + return true + } + } ++func rewriteValuePPC64_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValuePPC64_OpBswap16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go +index 1816d2e27e..357b17f8fd 100644 +--- a/src/cmd/compile/internal/ssa/rewriteS390X.go ++++ b/src/cmd/compile/internal/ssa/rewriteS390X.go +@@ -88,8 +88,14 @@ func rewriteValueS390X(v *Value) bool { + return rewriteValueS390X_OpAtomicStoreRel32(v) + case OpAvg64u: + return rewriteValueS390X_OpAvg64u(v) ++ case OpBitLen16: ++ return rewriteValueS390X_OpBitLen16(v) ++ case OpBitLen32: ++ return rewriteValueS390X_OpBitLen32(v) + case OpBitLen64: + return rewriteValueS390X_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValueS390X_OpBitLen8(v) + case OpBswap16: + return rewriteValueS390X_OpBswap16(v) + case OpBswap32: +@@ -1261,6 +1267,36 @@ func rewriteValueS390X_OpAvg64u(v *Value) bool { + return true + } + } ++func rewriteValueS390X_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueS390X_OpBitLen32(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen32 x) ++ // result: (BitLen64 (ZeroExt32to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueS390X_OpBitLen64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -1278,6 +1314,21 @@ func rewriteValueS390X_OpBitLen64(v *Value) bool { + return true + } + } ++func rewriteValueS390X_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueS390X_OpBswap16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssa/rewriteWasm.go b/src/cmd/compile/internal/ssa/rewriteWasm.go +index 6f83aea13a..f3b8205b24 100644 +--- a/src/cmd/compile/internal/ssa/rewriteWasm.go ++++ b/src/cmd/compile/internal/ssa/rewriteWasm.go +@@ -49,8 +49,14 @@ func rewriteValueWasm(v *Value) bool { + case OpAndB: + v.Op = OpWasmI64And + return true ++ case OpBitLen16: ++ return rewriteValueWasm_OpBitLen16(v) ++ case OpBitLen32: ++ return rewriteValueWasm_OpBitLen32(v) + case OpBitLen64: + return rewriteValueWasm_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValueWasm_OpBitLen8(v) + case OpCeil: + v.Op = OpWasmF64Ceil + return true +@@ -679,6 +685,36 @@ func rewriteValueWasm_OpAddr(v *Value) bool { + return true + } + } ++func rewriteValueWasm_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueWasm_OpBitLen32(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen32 x) ++ // result: (BitLen64 (ZeroExt32to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueWasm_OpBitLen64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -696,6 +732,21 @@ func rewriteValueWasm_OpBitLen64(v *Value) bool { + return true + } + } ++func rewriteValueWasm_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueWasm_OpCom16(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index e4da86db51..39d070a090 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -963,51 +963,22 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) + }, +- sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + addF("math/bits", "Len32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) + }, +- sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64) +- addF("math/bits", "Len32", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- if s.config.PtrSize == 4 { +- return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) +- } +- x := s.newValue1(ssa.OpZeroExt32to64, types.Types[types.TUINT64], args[0]) +- return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) +- }, +- sys.ARM, sys.S390X, sys.MIPS, sys.Wasm) +- addF("math/bits", "Len16", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- if s.config.PtrSize == 4 { +- x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) +- return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) +- } +- x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) +- return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) +- }, +- sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + addF("math/bits", "Len16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) + }, +- sys.AMD64) +- addF("math/bits", "Len8", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- if s.config.PtrSize == 4 { +- x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) +- return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x) +- } +- x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) +- return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x) +- }, +- sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + addF("math/bits", "Len8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) + }, +- sys.AMD64) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + addF("math/bits", "Len", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + if s.config.PtrSize == 4 { +@@ -1015,7 +986,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + } + return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) + }, +- sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) + // LeadingZeros is handled because it trivially calls Len. + addF("math/bits", "Reverse64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +-- +2.50.1 + diff --git a/2030-cmd-compile-simplify-intrinsification-of-TrailingZer.patch b/2030-cmd-compile-simplify-intrinsification-of-TrailingZer.patch new file mode 100644 index 0000000000000000000000000000000000000000..5e591d72cfb0635a70af168459f8f38d24ee39ca --- /dev/null +++ b/2030-cmd-compile-simplify-intrinsification-of-TrailingZer.patch @@ -0,0 +1,636 @@ +From f1f0d4924d62288d8198b4d379a8cba63dda5196 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sat, 22 Feb 2025 23:26:21 +1100 +Subject: [PATCH 30/38] cmd/compile: simplify intrinsification of + TrailingZeros16 and TrailingZeros8 + +Decompose Ctz16 and Ctz8 within the SSA rules for LOONG64, MIPS, PPC64 +and S390X, rather than having a custom intrinsic. Note that for PPC64 this +actually allows the existing Ctz16 and Ctz8 rules to be used. + +Change-Id: I27a5e978f852b9d75396d2a80f5d7dfcb5ef7dd4 +Reviewed-on: https://go-review.googlesource.com/c/go/+/651816 +Reviewed-by: Paul Murphy +TryBot-Result: Gopher Robot +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Michael Pratt +Run-TryBot: Joel Sing +Reviewed-by: Keith Randall +Reviewed-by: Keith Randall +--- + .../compile/internal/ssa/_gen/LOONG64.rules | 4 +- + src/cmd/compile/internal/ssa/_gen/MIPS.rules | 5 +- + src/cmd/compile/internal/ssa/_gen/PPC64.rules | 21 +++--- + src/cmd/compile/internal/ssa/_gen/S390X.rules | 6 +- + .../compile/internal/ssa/rewriteLOONG64.go | 46 ++++++++++++- + src/cmd/compile/internal/ssa/rewriteMIPS.go | 44 +++++++++++++ + src/cmd/compile/internal/ssa/rewritePPC64.go | 64 ++++++++++++++++++- + src/cmd/compile/internal/ssa/rewriteS390X.go | 46 ++++++++++++- + src/cmd/compile/internal/ssagen/intrinsics.go | 36 +---------- + .../internal/ssagen/intrinsics_test.go | 6 +- + test/codegen/mathbits.go | 4 +- + 11 files changed, 225 insertions(+), 57 deletions(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +index e285f9fe27..7ffd579dc7 100644 +--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +@@ -153,8 +153,10 @@ + (BitRev16 x) => (REVB2H (BITREV4B x)) + (BitRev32 ...) => (BITREVW ...) + (BitRev64 ...) => (BITREVV ...) +-(Ctz(32|64)NonZero ...) => (Ctz(32|64) ...) ++(Ctz(64|32|16|8)NonZero ...) => (Ctz64 ...) + (Ctz(32|64) ...) => (CTZ(W|V) ...) ++(Ctz16 x) => (CTZV (OR x (MOVVconst [1<<16]))) ++(Ctz8 x) => (CTZV (OR x (MOVVconst [1<<8]))) + + (PopCount64 x) => (MOVVfpgp (VPCNT64 (MOVVgpfp x))) + (PopCount32 x) => (MOVWfpgp (VPCNT32 (MOVWgpfp x))) +diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules +index 9a48164f55..4471763462 100644 +--- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules ++++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules +@@ -126,12 +126,13 @@ + (Sqrt ...) => (SQRTD ...) + (Sqrt32 ...) => (SQRTF ...) + +-// TODO: optimize this case? +-(Ctz32NonZero ...) => (Ctz32 ...) ++(Ctz(32|16|8)NonZero ...) => (Ctz32 ...) + + // count trailing zero + // 32 - CLZ(x&-x - 1) + (Ctz32 x) => (SUB (MOVWconst [32]) (CLZ (SUBconst [1] (AND x (NEG x))))) ++(Ctz16 x) => (Ctz32 (Or32 x (MOVWconst [1<<16]))) ++(Ctz8 x) => (Ctz32 (Or32 x (MOVWconst [1<<8]))) + + // bit length + (BitLen32 x) => (SUB (MOVWconst [32]) (CLZ x)) +diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules +index 768e37406f..1749811b84 100644 +--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules +@@ -254,16 +254,17 @@ + (MOVDaddr {sym} [n] p:(ADD x y)) && sym == nil && n == 0 => p + (MOVDaddr {sym} [n] ptr) && sym == nil && n == 0 && (ptr.Op == OpArgIntReg || ptr.Op == OpPhi) => ptr + +-// TODO: optimize these cases? +-(Ctz32NonZero ...) => (Ctz32 ...) +-(Ctz64NonZero ...) => (Ctz64 ...) +- +-(Ctz64 x) && buildcfg.GOPPC64<=8 => (POPCNTD (ANDN (ADDconst [-1] x) x)) +-(Ctz64 x) => (CNTTZD x) +-(Ctz32 x) && buildcfg.GOPPC64<=8 => (POPCNTW (MOVWZreg (ANDN (ADDconst [-1] x) x))) +-(Ctz32 x) => (CNTTZW (MOVWZreg x)) +-(Ctz16 x) => (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) +-(Ctz8 x) => (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) ++(Ctz(64|32|16|8)NonZero ...) => (Ctz64 ...) ++ ++(Ctz64 x) && buildcfg.GOPPC64 <= 8 => (POPCNTD (ANDN (ADDconst [-1] x) x)) ++(Ctz32 x) && buildcfg.GOPPC64 <= 8 => (POPCNTW (MOVWZreg (ANDN (ADDconst [-1] x) x))) ++(Ctz16 x) && buildcfg.GOPPC64 <= 8 => (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) ++(Ctz8 x) && buildcfg.GOPPC64 <= 8 => (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) ++ ++(Ctz64 x) && buildcfg.GOPPC64 >= 9 => (CNTTZD x) ++(Ctz32 x) && buildcfg.GOPPC64 >= 9 => (CNTTZW (MOVWZreg x)) ++(Ctz16 x) && buildcfg.GOPPC64 >= 9 => (CNTTZD (OR x (MOVDconst [1<<16]))) ++(Ctz8 x) && buildcfg.GOPPC64 >= 9 => (CNTTZD (OR x (MOVDconst [1<<8]))) + + (BitLen64 x) => (SUBFCconst [64] (CNTLZD x)) + (BitLen32 x) => (SUBFCconst [32] (CNTLZW x)) +diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules +index 3a903af5d0..78de5bb5a2 100644 +--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules ++++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules +@@ -80,13 +80,13 @@ + (OffPtr [off] ptr) && is32Bit(off) => (ADDconst [int32(off)] ptr) + (OffPtr [off] ptr) => (ADD (MOVDconst [off]) ptr) + +-// TODO: optimize these cases? +-(Ctz64NonZero ...) => (Ctz64 ...) +-(Ctz32NonZero ...) => (Ctz32 ...) ++(Ctz(64|32|16|8)NonZero ...) => (Ctz64 ...) + + // Ctz(x) = 64 - findLeftmostOne((x-1)&^x) + (Ctz64 x) => (SUB (MOVDconst [64]) (FLOGR (AND (SUBconst [1] x) (NOT x)))) + (Ctz32 x) => (SUB (MOVDconst [64]) (FLOGR (MOVWZreg (ANDW (SUBWconst [1] x) (NOTW x))))) ++(Ctz16 x) => (Ctz64 (Or64 x (MOVDconst [1<<16]))) ++(Ctz8 x) => (Ctz64 (Or64 x (MOVDconst [1<<8]))) + + (BitLen64 x) => (SUB (MOVDconst [64]) (FLOGR x)) + (BitLen(32|16|8) x) => (BitLen64 (ZeroExt(32|16|8)to64 x)) +diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +index ba0ea088d4..4499efa01d 100644 +--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go ++++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go +@@ -189,11 +189,16 @@ func rewriteValueLOONG64(v *Value) bool { + case OpCopysign: + v.Op = OpLOONG64FCOPYSGD + return true ++ case OpCtz16: ++ return rewriteValueLOONG64_OpCtz16(v) ++ case OpCtz16NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCtz32: + v.Op = OpLOONG64CTZW + return true + case OpCtz32NonZero: +- v.Op = OpCtz32 ++ v.Op = OpCtz64 + return true + case OpCtz64: + v.Op = OpLOONG64CTZV +@@ -201,6 +206,11 @@ func rewriteValueLOONG64(v *Value) bool { + case OpCtz64NonZero: + v.Op = OpCtz64 + return true ++ case OpCtz8: ++ return rewriteValueLOONG64_OpCtz8(v) ++ case OpCtz8NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCvt32Fto32: + v.Op = OpLOONG64TRUNCFW + return true +@@ -1242,6 +1252,40 @@ func rewriteValueLOONG64_OpConstNil(v *Value) bool { + return true + } + } ++func rewriteValueLOONG64_OpCtz16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz16 x) ++ // result: (CTZV (OR x (MOVVconst [1<<16]))) ++ for { ++ x := v_0 ++ v.reset(OpLOONG64CTZV) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(1 << 16) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueLOONG64_OpCtz8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz8 x) ++ // result: (CTZV (OR x (MOVVconst [1<<8]))) ++ for { ++ x := v_0 ++ v.reset(OpLOONG64CTZV) ++ v0 := b.NewValue0(v.Pos, OpLOONG64OR, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(1 << 8) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueLOONG64_OpDiv16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go +index 5b95549486..1bc2cb6e6d 100644 +--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go ++++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go +@@ -113,11 +113,21 @@ func rewriteValueMIPS(v *Value) bool { + return rewriteValueMIPS_OpConstBool(v) + case OpConstNil: + return rewriteValueMIPS_OpConstNil(v) ++ case OpCtz16: ++ return rewriteValueMIPS_OpCtz16(v) ++ case OpCtz16NonZero: ++ v.Op = OpCtz32 ++ return true + case OpCtz32: + return rewriteValueMIPS_OpCtz32(v) + case OpCtz32NonZero: + v.Op = OpCtz32 + return true ++ case OpCtz8: ++ return rewriteValueMIPS_OpCtz8(v) ++ case OpCtz8NonZero: ++ v.Op = OpCtz32 ++ return true + case OpCvt32Fto32: + v.Op = OpMIPSTRUNCFW + return true +@@ -929,6 +939,23 @@ func rewriteValueMIPS_OpConstNil(v *Value) bool { + return true + } + } ++func rewriteValueMIPS_OpCtz16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz16 x) ++ // result: (Ctz32 (Or32 x (MOVWconst [1<<16]))) ++ for { ++ x := v_0 ++ v.reset(OpCtz32) ++ v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) ++ v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, typ.UInt32) ++ v1.AuxInt = int32ToAuxInt(1 << 16) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueMIPS_OpCtz32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -954,6 +981,23 @@ func rewriteValueMIPS_OpCtz32(v *Value) bool { + return true + } + } ++func rewriteValueMIPS_OpCtz8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz8 x) ++ // result: (Ctz32 (Or32 x (MOVWconst [1<<8]))) ++ for { ++ x := v_0 ++ v.reset(OpCtz32) ++ v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) ++ v1 := b.NewValue0(v.Pos, OpMIPSMOVWconst, typ.UInt32) ++ v1.AuxInt = int32ToAuxInt(1 << 8) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueMIPS_OpDiv16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go +index e900ebe0be..e987ae9662 100644 +--- a/src/cmd/compile/internal/ssa/rewritePPC64.go ++++ b/src/cmd/compile/internal/ssa/rewritePPC64.go +@@ -158,10 +158,13 @@ func rewriteValuePPC64(v *Value) bool { + return rewriteValuePPC64_OpCopysign(v) + case OpCtz16: + return rewriteValuePPC64_OpCtz16(v) ++ case OpCtz16NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCtz32: + return rewriteValuePPC64_OpCtz32(v) + case OpCtz32NonZero: +- v.Op = OpCtz32 ++ v.Op = OpCtz64 + return true + case OpCtz64: + return rewriteValuePPC64_OpCtz64(v) +@@ -170,6 +173,9 @@ func rewriteValuePPC64(v *Value) bool { + return true + case OpCtz8: + return rewriteValuePPC64_OpCtz8(v) ++ case OpCtz8NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCvt32Fto32: + return rewriteValuePPC64_OpCvt32Fto32(v) + case OpCvt32Fto64: +@@ -1534,9 +1540,13 @@ func rewriteValuePPC64_OpCtz16(v *Value) bool { + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz16 x) ++ // cond: buildcfg.GOPPC64 <= 8 + // result: (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) + for { + x := v_0 ++ if !(buildcfg.GOPPC64 <= 8) { ++ break ++ } + v.reset(OpPPC64POPCNTW) + v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.Int16) +@@ -1548,13 +1558,30 @@ func rewriteValuePPC64_OpCtz16(v *Value) bool { + v.AddArg(v0) + return true + } ++ // match: (Ctz16 x) ++ // cond: buildcfg.GOPPC64 >= 9 ++ // result: (CNTTZD (OR x (MOVDconst [1<<16]))) ++ for { ++ x := v_0 ++ if !(buildcfg.GOPPC64 >= 9) { ++ break ++ } ++ v.reset(OpPPC64CNTTZD) ++ v0 := b.NewValue0(v.Pos, OpPPC64OR, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64) ++ v1.AuxInt = int64ToAuxInt(1 << 16) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++ return false + } + func rewriteValuePPC64_OpCtz32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz32 x) +- // cond: buildcfg.GOPPC64<=8 ++ // cond: buildcfg.GOPPC64 <= 8 + // result: (POPCNTW (MOVWZreg (ANDN (ADDconst [-1] x) x))) + for { + x := v_0 +@@ -1573,22 +1600,27 @@ func rewriteValuePPC64_OpCtz32(v *Value) bool { + return true + } + // match: (Ctz32 x) ++ // cond: buildcfg.GOPPC64 >= 9 + // result: (CNTTZW (MOVWZreg x)) + for { + x := v_0 ++ if !(buildcfg.GOPPC64 >= 9) { ++ break ++ } + v.reset(OpPPC64CNTTZW) + v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, typ.Int64) + v0.AddArg(x) + v.AddArg(v0) + return true + } ++ return false + } + func rewriteValuePPC64_OpCtz64(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz64 x) +- // cond: buildcfg.GOPPC64<=8 ++ // cond: buildcfg.GOPPC64 <= 8 + // result: (POPCNTD (ANDN (ADDconst [-1] x) x)) + for { + x := v_0 +@@ -1605,22 +1637,31 @@ func rewriteValuePPC64_OpCtz64(v *Value) bool { + return true + } + // match: (Ctz64 x) ++ // cond: buildcfg.GOPPC64 >= 9 + // result: (CNTTZD x) + for { + x := v_0 ++ if !(buildcfg.GOPPC64 >= 9) { ++ break ++ } + v.reset(OpPPC64CNTTZD) + v.AddArg(x) + return true + } ++ return false + } + func rewriteValuePPC64_OpCtz8(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Ctz8 x) ++ // cond: buildcfg.GOPPC64 <= 8 + // result: (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) + for { + x := v_0 ++ if !(buildcfg.GOPPC64 <= 8) { ++ break ++ } + v.reset(OpPPC64POPCNTB) + v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDN, typ.UInt8) +@@ -1632,6 +1673,23 @@ func rewriteValuePPC64_OpCtz8(v *Value) bool { + v.AddArg(v0) + return true + } ++ // match: (Ctz8 x) ++ // cond: buildcfg.GOPPC64 >= 9 ++ // result: (CNTTZD (OR x (MOVDconst [1<<8]))) ++ for { ++ x := v_0 ++ if !(buildcfg.GOPPC64 >= 9) { ++ break ++ } ++ v.reset(OpPPC64CNTTZD) ++ v0 := b.NewValue0(v.Pos, OpPPC64OR, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64) ++ v1.AuxInt = int64ToAuxInt(1 << 8) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++ return false + } + func rewriteValuePPC64_OpCvt32Fto32(v *Value) bool { + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go +index 357b17f8fd..7e652a19bc 100644 +--- a/src/cmd/compile/internal/ssa/rewriteS390X.go ++++ b/src/cmd/compile/internal/ssa/rewriteS390X.go +@@ -139,16 +139,26 @@ func rewriteValueS390X(v *Value) bool { + return rewriteValueS390X_OpConstBool(v) + case OpConstNil: + return rewriteValueS390X_OpConstNil(v) ++ case OpCtz16: ++ return rewriteValueS390X_OpCtz16(v) ++ case OpCtz16NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCtz32: + return rewriteValueS390X_OpCtz32(v) + case OpCtz32NonZero: +- v.Op = OpCtz32 ++ v.Op = OpCtz64 + return true + case OpCtz64: + return rewriteValueS390X_OpCtz64(v) + case OpCtz64NonZero: + v.Op = OpCtz64 + return true ++ case OpCtz8: ++ return rewriteValueS390X_OpCtz8(v) ++ case OpCtz8NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCvt32Fto32: + v.Op = OpS390XCFEBRA + return true +@@ -1449,6 +1459,23 @@ func rewriteValueS390X_OpConstNil(v *Value) bool { + return true + } + } ++func rewriteValueS390X_OpCtz16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz16 x) ++ // result: (Ctz64 (Or64 x (MOVDconst [1<<16]))) ++ for { ++ x := v_0 ++ v.reset(OpCtz64) ++ v0 := b.NewValue0(v.Pos, OpOr64, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(1 << 16) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueS390X_OpCtz32(v *Value) bool { + v_0 := v.Args[0] + b := v.Block +@@ -1501,6 +1528,23 @@ func rewriteValueS390X_OpCtz64(v *Value) bool { + return true + } + } ++func rewriteValueS390X_OpCtz8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz8 x) ++ // result: (Ctz64 (Or64 x (MOVDconst [1<<8]))) ++ for { ++ x := v_0 ++ v.reset(OpCtz64) ++ v0 := b.NewValue0(v.Pos, OpOr64, typ.UInt64) ++ v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64) ++ v1.AuxInt = int64ToAuxInt(1 << 8) ++ v0.AddArg2(x, v1) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueS390X_OpDiv16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index 39d070a090..4f84b6b924 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -899,48 +899,16 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm) +- addF("math/bits", "TrailingZeros16", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0]) +- c := s.constInt32(types.Types[types.TUINT32], 1<<16) +- y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) +- return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) +- }, +- sys.MIPS) + addF("math/bits", "TrailingZeros16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0]) + }, +- sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) +- addF("math/bits", "TrailingZeros16", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0]) +- c := s.constInt64(types.Types[types.TUINT64], 1<<16) +- y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) +- return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) +- }, +- sys.Loong64, sys.S390X, sys.PPC64) +- addF("math/bits", "TrailingZeros8", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0]) +- c := s.constInt32(types.Types[types.TUINT32], 1<<8) +- y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c) +- return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y) +- }, +- sys.MIPS) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm) + addF("math/bits", "TrailingZeros8", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) + }, +- sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm) +- addF("math/bits", "TrailingZeros8", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0]) +- c := s.constInt64(types.Types[types.TUINT64], 1<<8) +- y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c) +- return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y) +- }, +- sys.Loong64, sys.S390X) ++ sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm) + alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...) + alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...) + addF("math/bits", "ReverseBytes16", +diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go +index 2e29a45c0b..0f8a8a83b4 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go +@@ -867,6 +867,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64", "internal/runtime/sys", "OnesCount64"}: struct{}{}, + {"ppc64", "internal/runtime/sys", "Prefetch"}: struct{}{}, + {"ppc64", "internal/runtime/sys", "PrefetchStreamed"}: struct{}{}, ++ {"ppc64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, + {"ppc64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, + {"ppc64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, + {"ppc64", "math", "Abs"}: struct{}{}, +@@ -899,6 +900,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64", "math/bits", "RotateLeft64"}: struct{}{}, + {"ppc64", "math/bits", "Sub"}: struct{}{}, + {"ppc64", "math/bits", "Sub64"}: struct{}{}, ++ {"ppc64", "math/bits", "TrailingZeros8"}: struct{}{}, + {"ppc64", "math/bits", "TrailingZeros16"}: struct{}{}, + {"ppc64", "math/bits", "TrailingZeros32"}: struct{}{}, + {"ppc64", "math/bits", "TrailingZeros64"}: struct{}{}, +@@ -988,6 +990,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64le", "internal/runtime/sys", "OnesCount64"}: struct{}{}, + {"ppc64le", "internal/runtime/sys", "Prefetch"}: struct{}{}, + {"ppc64le", "internal/runtime/sys", "PrefetchStreamed"}: struct{}{}, ++ {"ppc64le", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, + {"ppc64le", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, + {"ppc64le", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, + {"ppc64le", "math", "Abs"}: struct{}{}, +@@ -1020,6 +1023,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64le", "math/bits", "RotateLeft64"}: struct{}{}, + {"ppc64le", "math/bits", "Sub"}: struct{}{}, + {"ppc64le", "math/bits", "Sub64"}: struct{}{}, ++ {"ppc64le", "math/bits", "TrailingZeros8"}: struct{}{}, + {"ppc64le", "math/bits", "TrailingZeros16"}: struct{}{}, + {"ppc64le", "math/bits", "TrailingZeros32"}: struct{}{}, + {"ppc64le", "math/bits", "TrailingZeros64"}: struct{}{}, +@@ -1340,7 +1344,7 @@ func TestIntrinsics(t *testing.T) { + + for ik, _ := range wantIntrinsics { + if _, found := gotIntrinsics[ik]; !found { +- t.Errorf("Want intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn) ++ t.Errorf("Want missing intrinsic %v %v.%v", ik.archName, ik.pkg, ik.fn) + } + } + } +diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go +index baed4f7c67..78556c636f 100644 +--- a/test/codegen/mathbits.go ++++ b/test/codegen/mathbits.go +@@ -409,7 +409,7 @@ func TrailingZeros16(n uint16) int { + // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" + // loong64:"CTZV" + // s390x:"FLOGR","OR\t\\$65536" +- // ppc64x/power8:"POPCNTD","ORIS\\t\\$1" ++ // ppc64x/power8:"POPCNTW","ADD\t\\$-1" + // ppc64x/power9:"CNTTZD","ORIS\\t\\$1" + // wasm:"I64Ctz" + return bits.TrailingZeros16(n) +@@ -421,6 +421,8 @@ func TrailingZeros8(n uint8) int { + // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" + // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" + // loong64:"CTZV" ++ // ppc64x/power8:"POPCNTB","ADD\t\\$-1" ++ // ppc64x/power9:"CNTTZD","OR\t\\$256" + // s390x:"FLOGR","OR\t\\$256" + // wasm:"I64Ctz" + return bits.TrailingZeros8(n) +-- +2.50.1 + diff --git a/2031-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch b/2031-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch new file mode 100644 index 0000000000000000000000000000000000000000..2574fa3a88a258fa9b48bc4ec80060ddf3f8c4f2 --- /dev/null +++ b/2031-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch @@ -0,0 +1,46 @@ +From 3b8fdd2925459b1f8c037c283e4b5ffd92b5b5fe Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 12 Mar 2025 22:55:49 +1100 +Subject: [PATCH 31/38] cmd/compile/internal/ssagen: use an alias for + math/bits.Len + +Rather than using a specific intrinsic for math/bits.Len, use a pair of +aliases instead. This requires less code and automatically adapts when +platforms have a math/bits.Len32 or math/bits.Len64 intrinsic. + +Change-Id: I28b300172daaee26ef82a7530d9e96123663f541 +Reviewed-on: https://go-review.googlesource.com/c/go/+/656995 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: David Chase +Reviewed-by: Junyang Shao +Auto-Submit: Jorropo +Reviewed-by: Jorropo +--- + src/cmd/compile/internal/ssagen/intrinsics.go | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index 4f84b6b924..c2b35f41c8 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -947,14 +947,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) +- addF("math/bits", "Len", +- func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +- if s.config.PtrSize == 4 { +- return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) +- } +- return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) +- }, +- sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) ++ alias("math/bits", "Len", "math/bits", "Len64", p8...) ++ alias("math/bits", "Len", "math/bits", "Len32", p4...) ++ + // LeadingZeros is handled because it trivially calls Len. + addF("math/bits", "Reverse64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +-- +2.50.1 + diff --git a/2032-test-codegen-tighten-the-TrailingZeros64-test-on-386.patch b/2032-test-codegen-tighten-the-TrailingZeros64-test-on-386.patch new file mode 100644 index 0000000000000000000000000000000000000000..d11e1fca06e9d3a941a2f20e9de5d82fdec20514 --- /dev/null +++ b/2032-test-codegen-tighten-the-TrailingZeros64-test-on-386.patch @@ -0,0 +1,36 @@ +From 257d48b5b905ee6759970bf6eac9990136a72d63 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Wed, 26 Feb 2025 21:03:15 +1100 +Subject: [PATCH 32/38] test/codegen: tighten the TrailingZeros64 test on 386 + +Make the TrailingZeros64 code generation check more specific for 386. +Just checking for BSFL will match both the generic 64 bit decomposition +and the custom 386 lowering. + +Change-Id: I62076f1889af0ef1f29704cba01ab419cae0c6e3 +Reviewed-on: https://go-review.googlesource.com/c/go/+/656996 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: David Chase +Reviewed-by: Keith Randall +Auto-Submit: Keith Randall +Reviewed-by: Keith Randall +--- + test/codegen/mathbits.go | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go +index 78556c636f..1cee39283d 100644 +--- a/test/codegen/mathbits.go ++++ b/test/codegen/mathbits.go +@@ -372,7 +372,7 @@ func TrailingZeros(n uint) int { + func TrailingZeros64(n uint64) int { + // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // amd64/v3:"TZCNTQ" +- // 386:"BSFL" ++ // 386:"BSFL","JNE" + // arm64:"RBIT","CLZ" + // loong64:"CTZV" + // s390x:"FLOGR" +-- +2.50.1 + diff --git a/2033-cmd-compile-intrinsify-math-bits.TrailingZeros-on-ri.patch b/2033-cmd-compile-intrinsify-math-bits.TrailingZeros-on-ri.patch new file mode 100644 index 0000000000000000000000000000000000000000..6e8fdf362d3d31c9d456d0abfc7edc531dbf131e --- /dev/null +++ b/2033-cmd-compile-intrinsify-math-bits.TrailingZeros-on-ri.patch @@ -0,0 +1,380 @@ +From fd4df2f41008723ade6727f1b0f0f7b14dd8c18f Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Sun, 23 Feb 2025 22:17:53 +1100 +Subject: [PATCH 33/38] cmd/compile: intrinsify math/bits.TrailingZeros on + riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For riscv64/rva22u64 and above, we can intrinsify math/bits.TrailingZeros +using the CTZ/CTZW machine instructions. + +On a StarFive VisionFive 2 with GORISCV64=rva22u64: + + │ ctz.b.1 │ ctz.b.2 │ + │ sec/op │ sec/op vs base │ +TrailingZeros-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) +TrailingZeros8-4 14.76n ± 0% 10.74n ± 0% -27.24% (p=0.000 n=10) +TrailingZeros16-4 26.84n ± 0% 10.74n ± 0% -59.99% (p=0.000 n=10) +TrailingZeros32-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) +TrailingZeros64-4 25.500n ± 0% 8.052n ± 0% -68.42% (p=0.000 n=10) +geomean 23.09n 9.035n -60.88% + +Change-Id: I71edf2b988acb7a68e797afda4ee66d7a57d587e +Reviewed-on: https://go-review.googlesource.com/c/go/+/652320 +Reviewed-by: Cherry Mui +Reviewed-by: Mark Ryan +Reviewed-by: David Chase +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +--- + src/cmd/compile/internal/riscv64/ssa.go | 2 +- + .../compile/internal/ssa/_gen/RISCV64.rules | 7 +++ + .../compile/internal/ssa/_gen/RISCV64Ops.go | 2 + + src/cmd/compile/internal/ssa/opGen.go | 28 ++++++++++ + .../compile/internal/ssa/rewriteRISCV64.go | 54 +++++++++++++++++++ + src/cmd/compile/internal/ssagen/intrinsics.go | 24 +++++++++ + .../internal/ssagen/intrinsics_test.go | 10 +++- + test/codegen/mathbits.go | 25 +++++---- + 8 files changed, 141 insertions(+), 11 deletions(-) + +diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go +index 759d8d7cf4..4acef5be43 100644 +--- a/src/cmd/compile/internal/riscv64/ssa.go ++++ b/src/cmd/compile/internal/riscv64/ssa.go +@@ -419,7 +419,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, + ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, + ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, +- ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW: ++ ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index 9b9f82d4a6..2ed946b3b0 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -218,6 +218,13 @@ + (RotateLeft32 ...) => (ROLW ...) + (RotateLeft64 ...) => (ROL ...) + ++// Count trailing zeros (note that these will only be emitted for rva22u64 and above). ++(Ctz(64|32|16|8)NonZero ...) => (Ctz64 ...) ++(Ctz64 ...) => (CTZ ...) ++(Ctz32 ...) => (CTZW ...) ++(Ctz16 x) => (CTZW (ORI [1<<16] x)) ++(Ctz8 x) => (CTZW (ORI [1<<8] x)) ++ + (Less64 ...) => (SLT ...) + (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) + (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +index 85c74b4676..85e9e47e82 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +@@ -229,6 +229,8 @@ func init() { + {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 + {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // ^arg0 & arg1 + {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint ++ {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros ++ {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word + {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0 + {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 + {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // ^arg0 | arg1 +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 347155de2e..3d441bcb95 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -2507,6 +2507,8 @@ const ( + OpRISCV64AND + OpRISCV64ANDN + OpRISCV64ANDI ++ OpRISCV64CTZ ++ OpRISCV64CTZW + OpRISCV64NOT + OpRISCV64OR + OpRISCV64ORN +@@ -33737,6 +33739,32 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "CTZ", ++ argLen: 1, ++ asm: riscv.ACTZ, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ outputs: []outputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ }, ++ }, ++ { ++ name: "CTZW", ++ argLen: 1, ++ asm: riscv.ACTZW, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ outputs: []outputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ }, ++ }, + { + name: "NOT", + argLen: 1, +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index 13f75794ac..1bc6ff51bf 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -136,6 +136,28 @@ func rewriteValueRISCV64(v *Value) bool { + case OpCopysign: + v.Op = OpRISCV64FSGNJD + return true ++ case OpCtz16: ++ return rewriteValueRISCV64_OpCtz16(v) ++ case OpCtz16NonZero: ++ v.Op = OpCtz64 ++ return true ++ case OpCtz32: ++ v.Op = OpRISCV64CTZW ++ return true ++ case OpCtz32NonZero: ++ v.Op = OpCtz64 ++ return true ++ case OpCtz64: ++ v.Op = OpRISCV64CTZ ++ return true ++ case OpCtz64NonZero: ++ v.Op = OpCtz64 ++ return true ++ case OpCtz8: ++ return rewriteValueRISCV64_OpCtz8(v) ++ case OpCtz8NonZero: ++ v.Op = OpCtz64 ++ return true + case OpCvt32Fto32: + v.Op = OpRISCV64FCVTWS + return true +@@ -995,6 +1017,38 @@ func rewriteValueRISCV64_OpConstNil(v *Value) bool { + return true + } + } ++func rewriteValueRISCV64_OpCtz16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz16 x) ++ // result: (CTZW (ORI [1<<16] x)) ++ for { ++ x := v_0 ++ v.reset(OpRISCV64CTZW) ++ v0 := b.NewValue0(v.Pos, OpRISCV64ORI, typ.UInt32) ++ v0.AuxInt = int64ToAuxInt(1 << 16) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueRISCV64_OpCtz8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (Ctz8 x) ++ // result: (CTZW (ORI [1<<8] x)) ++ for { ++ x := v_0 ++ v.reset(OpRISCV64CTZW) ++ v0 := b.NewValue0(v.Pos, OpRISCV64ORI, typ.UInt32) ++ v0.AuxInt = int64ToAuxInt(1 << 8) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueRISCV64_OpDiv16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index c2b35f41c8..ed6683f102 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -909,6 +909,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM, sys.ARM64, sys.I386, sys.MIPS, sys.Loong64, sys.PPC64, sys.S390X, sys.Wasm) ++ ++ if cfg.goriscv64 >= 22 { ++ addF("math/bits", "TrailingZeros64", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "TrailingZeros32", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "TrailingZeros16", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "TrailingZeros8", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ } ++ + alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...) + alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...) + addF("math/bits", "ReverseBytes16", +diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go +index 0f8a8a83b4..192f91c183 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go +@@ -1106,6 +1106,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, + {"riscv64", "math", "Abs"}: struct{}{}, + {"riscv64", "math", "Copysign"}: struct{}{}, + {"riscv64", "math", "FMA"}: struct{}{}, +@@ -1122,6 +1125,10 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "math/bits", "RotateLeft8"}: struct{}{}, + {"riscv64", "math/bits", "Sub"}: struct{}{}, + {"riscv64", "math/bits", "Sub64"}: struct{}{}, ++ {"riscv64", "math/bits", "TrailingZeros16"}: struct{}{}, ++ {"riscv64", "math/bits", "TrailingZeros32"}: struct{}{}, ++ {"riscv64", "math/bits", "TrailingZeros64"}: struct{}{}, ++ {"riscv64", "math/bits", "TrailingZeros8"}: struct{}{}, + {"riscv64", "runtime", "KeepAlive"}: struct{}{}, + {"riscv64", "runtime", "publicationBarrier"}: struct{}{}, + {"riscv64", "runtime", "slicebytetostringtmp"}: struct{}{}, +@@ -1308,7 +1315,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + + func TestIntrinsics(t *testing.T) { + cfg := &intrinsicBuildConfig{ +- goppc64: 10, ++ goppc64: 10, ++ goriscv64: 23, + } + initIntrinsics(cfg) + +diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go +index 1cee39283d..786fad3bd9 100644 +--- a/test/codegen/mathbits.go ++++ b/test/codegen/mathbits.go +@@ -356,28 +356,30 @@ func RotateLeftVariable32(n uint32, m int) uint32 { + // ------------------------ // + + func TrailingZeros(n uint) int { ++ // 386:"BSFL" + // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // amd64/v3:"TZCNTQ" +- // 386:"BSFL" + // arm:"CLZ" + // arm64:"RBIT","CLZ" + // loong64:"CTZV" +- // s390x:"FLOGR" + // ppc64x/power8:"ANDN","POPCNTD" + // ppc64x/power9: "CNTTZD" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" ++ // s390x:"FLOGR" + // wasm:"I64Ctz" + return bits.TrailingZeros(n) + } + + func TrailingZeros64(n uint64) int { ++ // 386:"BSFL","JNE" + // amd64/v1,amd64/v2:"BSFQ","MOVL\t\\$64","CMOVQEQ" + // amd64/v3:"TZCNTQ" +- // 386:"BSFL","JNE" + // arm64:"RBIT","CLZ" + // loong64:"CTZV" +- // s390x:"FLOGR" + // ppc64x/power8:"ANDN","POPCNTD" + // ppc64x/power9: "CNTTZD" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" ++ // s390x:"FLOGR" + // wasm:"I64Ctz" + return bits.TrailingZeros64(n) + } +@@ -389,40 +391,43 @@ func TrailingZeros64Subtract(n uint64) int { + } + + func TrailingZeros32(n uint32) int { ++ // 386:"BSFL" + // amd64/v1,amd64/v2:"BTSQ\\t\\$32","BSFQ" + // amd64/v3:"TZCNTL" +- // 386:"BSFL" + // arm:"CLZ" + // arm64:"RBITW","CLZW" + // loong64:"CTZW" +- // s390x:"FLOGR","MOVWZ" + // ppc64x/power8:"ANDN","POPCNTW" + // ppc64x/power9: "CNTTZW" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZW" ++ // s390x:"FLOGR","MOVWZ" + // wasm:"I64Ctz" + return bits.TrailingZeros32(n) + } + + func TrailingZeros16(n uint16) int { +- // amd64:"BSFL","ORL\\t\\$65536" + // 386:"BSFL\t" ++ // amd64:"BSFL","ORL\\t\\$65536" + // arm:"ORR\t\\$65536","CLZ",-"MOVHU\tR" + // arm64:"ORR\t\\$65536","RBITW","CLZW",-"MOVHU\tR",-"RBIT\t",-"CLZ\t" + // loong64:"CTZV" +- // s390x:"FLOGR","OR\t\\$65536" + // ppc64x/power8:"POPCNTW","ADD\t\\$-1" + // ppc64x/power9:"CNTTZD","ORIS\\t\\$1" ++ // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$65536","CTZW" ++ // s390x:"FLOGR","OR\t\\$65536" + // wasm:"I64Ctz" + return bits.TrailingZeros16(n) + } + + func TrailingZeros8(n uint8) int { +- // amd64:"BSFL","ORL\\t\\$256" + // 386:"BSFL" ++ // amd64:"BSFL","ORL\\t\\$256" + // arm:"ORR\t\\$256","CLZ",-"MOVBU\tR" + // arm64:"ORR\t\\$256","RBITW","CLZW",-"MOVBU\tR",-"RBIT\t",-"CLZ\t" + // loong64:"CTZV" + // ppc64x/power8:"POPCNTB","ADD\t\\$-1" + // ppc64x/power9:"CNTTZD","OR\t\\$256" ++ // riscv64/rva22u64,riscv64/rva23u64: "ORI\t\\$256","CTZW" + // s390x:"FLOGR","OR\t\\$256" + // wasm:"I64Ctz" + return bits.TrailingZeros8(n) +@@ -469,6 +474,7 @@ func IterateBits16(n uint16) int { + // amd64/v1,amd64/v2:"BSFL",-"BTSL" + // amd64/v3:"TZCNTL" + // arm64:"RBITW","CLZW",-"ORR" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR" + i += bits.TrailingZeros16(n) + n &= n - 1 + } +@@ -481,6 +487,7 @@ func IterateBits8(n uint8) int { + // amd64/v1,amd64/v2:"BSFL",-"BTSL" + // amd64/v3:"TZCNTL" + // arm64:"RBITW","CLZW",-"ORR" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t",-"ORR" + i += bits.TrailingZeros8(n) + n &= n - 1 + } +-- +2.50.1 + diff --git a/2034-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch b/2034-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch new file mode 100644 index 0000000000000000000000000000000000000000..cd2606075dcf38bcab2b1ab0806b8f0bb10ecdee --- /dev/null +++ b/2034-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch @@ -0,0 +1,94 @@ +From a64098dcd7fdde1408cc425c6dd38b3d79f14292 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Thu, 20 Mar 2025 00:09:52 +1100 +Subject: [PATCH 34/38] cmd/compile/internal/ssagen: use an alias for + math/bits.OnesCount + +Currently, only amd64 has an intrinsic for math/bits.OnesCount, which +generates the same code as math/bits.OnesCount64. Replace this with +an alias that maps math/bits.OnesCount to math/bits.OnesCount64 on +64 bit platforms. + +Change-Id: Ifa12a2173a201aacd52c3c22b9a948be6e314405 +Reviewed-on: https://go-review.googlesource.com/c/go/+/659215 +Reviewed-by: Keith Randall +Reviewed-by: Cherry Mui +Reviewed-by: Keith Randall +Auto-Submit: Keith Randall +LUCI-TryBot-Result: Go LUCI +--- + src/cmd/compile/internal/ssagen/intrinsics.go | 5 ++--- + src/cmd/compile/internal/ssagen/intrinsics_test.go | 6 ++++++ + 2 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index ed6683f102..2b9315f14b 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -1123,9 +1123,8 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0]) + }, + sys.S390X, sys.PPC64, sys.Wasm) +- addF("math/bits", "OnesCount", +- makeOnesCountAMD64(ssa.OpPopCount64), +- sys.AMD64) ++ alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...) ++ + addF("math/bits", "Mul64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) +diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go +index 192f91c183..a06fdeedb2 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go +@@ -297,6 +297,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"arm64", "math/bits", "Len8"}: struct{}{}, + {"arm64", "math/bits", "Mul"}: struct{}{}, + {"arm64", "math/bits", "Mul64"}: struct{}{}, ++ {"arm64", "math/bits", "OnesCount"}: struct{}{}, + {"arm64", "math/bits", "OnesCount16"}: struct{}{}, + {"arm64", "math/bits", "OnesCount32"}: struct{}{}, + {"arm64", "math/bits", "OnesCount64"}: struct{}{}, +@@ -434,6 +435,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"loong64", "math/bits", "Len16"}: struct{}{}, + {"loong64", "math/bits", "Len32"}: struct{}{}, + {"loong64", "math/bits", "Len64"}: struct{}{}, ++ {"loong64", "math/bits", "OnesCount"}: struct{}{}, + {"loong64", "math/bits", "OnesCount16"}: struct{}{}, + {"loong64", "math/bits", "OnesCount32"}: struct{}{}, + {"loong64", "math/bits", "OnesCount64"}: struct{}{}, +@@ -888,6 +890,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64", "math/bits", "Len8"}: struct{}{}, + {"ppc64", "math/bits", "Mul"}: struct{}{}, + {"ppc64", "math/bits", "Mul64"}: struct{}{}, ++ {"ppc64", "math/bits", "OnesCount"}: struct{}{}, + {"ppc64", "math/bits", "OnesCount16"}: struct{}{}, + {"ppc64", "math/bits", "OnesCount32"}: struct{}{}, + {"ppc64", "math/bits", "OnesCount64"}: struct{}{}, +@@ -1011,6 +1014,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"ppc64le", "math/bits", "Len8"}: struct{}{}, + {"ppc64le", "math/bits", "Mul"}: struct{}{}, + {"ppc64le", "math/bits", "Mul64"}: struct{}{}, ++ {"ppc64le", "math/bits", "OnesCount"}: struct{}{}, + {"ppc64le", "math/bits", "OnesCount16"}: struct{}{}, + {"ppc64le", "math/bits", "OnesCount32"}: struct{}{}, + {"ppc64le", "math/bits", "OnesCount64"}: struct{}{}, +@@ -1232,6 +1236,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"s390x", "math/bits", "Len8"}: struct{}{}, + {"s390x", "math/bits", "Mul"}: struct{}{}, + {"s390x", "math/bits", "Mul64"}: struct{}{}, ++ {"s390x", "math/bits", "OnesCount"}: struct{}{}, + {"s390x", "math/bits", "OnesCount16"}: struct{}{}, + {"s390x", "math/bits", "OnesCount32"}: struct{}{}, + {"s390x", "math/bits", "OnesCount64"}: struct{}{}, +@@ -1298,6 +1303,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"wasm", "math/bits", "Len32"}: struct{}{}, + {"wasm", "math/bits", "Len64"}: struct{}{}, + {"wasm", "math/bits", "Len8"}: struct{}{}, ++ {"wasm", "math/bits", "OnesCount"}: struct{}{}, + {"wasm", "math/bits", "OnesCount16"}: struct{}{}, + {"wasm", "math/bits", "OnesCount32"}: struct{}{}, + {"wasm", "math/bits", "OnesCount64"}: struct{}{}, +-- +2.50.1 + diff --git a/2035-cmd-compile-intrinsify-math-bits.Len-on-riscv64.patch b/2035-cmd-compile-intrinsify-math-bits.Len-on-riscv64.patch new file mode 100644 index 0000000000000000000000000000000000000000..1d7099faafbb6908d56d59a4bb9f12cc00ba6291 --- /dev/null +++ b/2035-cmd-compile-intrinsify-math-bits.Len-on-riscv64.patch @@ -0,0 +1,484 @@ +From ff7b5ecf8d8945f0854593a1f78bf7e702bfbe96 Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Mon, 24 Feb 2025 00:27:34 +1100 +Subject: [PATCH 35/38] cmd/compile: intrinsify math/bits.Len on riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For riscv64/rva22u64 and above, we can intrinsify math/bits.Len using the +CLZ/CLZW machine instructions. + +On a StarFive VisionFive 2 with GORISCV64=rva22u64: + + │ clz.b.1 │ clz.b.2 │ + │ sec/op │ sec/op vs base │ +LeadingZeros-4 28.89n ± 0% 12.08n ± 0% -58.19% (p=0.000 n=10) +LeadingZeros8-4 18.79n ± 0% 14.76n ± 0% -21.45% (p=0.000 n=10) +LeadingZeros16-4 25.27n ± 0% 14.76n ± 0% -41.59% (p=0.000 n=10) +LeadingZeros32-4 25.12n ± 0% 12.08n ± 0% -51.92% (p=0.000 n=10) +LeadingZeros64-4 25.89n ± 0% 12.08n ± 0% -53.35% (p=0.000 n=10) +geomean 24.55n 13.09n -46.70% + +Change-Id: I0dda684713dbdf5336af393f5ccbdae861c4f694 +Reviewed-on: https://go-review.googlesource.com/c/go/+/652321 +Reviewed-by: David Chase +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Mark Ryan +Reviewed-by: Cherry Mui +--- + src/cmd/compile/internal/riscv64/ssa.go | 2 +- + .../compile/internal/ssa/_gen/RISCV64.rules | 6 ++ + .../compile/internal/ssa/_gen/RISCV64Ops.go | 2 + + src/cmd/compile/internal/ssa/opGen.go | 28 +++++++ + .../compile/internal/ssa/rewriteRISCV64.go | 74 +++++++++++++++++ + src/cmd/compile/internal/ssagen/intrinsics.go | 24 ++++++ + .../internal/ssagen/intrinsics_test.go | 7 ++ + test/codegen/mathbits.go | 83 ++++++++++++------- + 8 files changed, 195 insertions(+), 31 deletions(-) + +diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go +index 4acef5be43..d47dc47f75 100644 +--- a/src/cmd/compile/internal/riscv64/ssa.go ++++ b/src/cmd/compile/internal/riscv64/ssa.go +@@ -419,7 +419,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, + ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, + ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, +- ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: ++ ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index 2ed946b3b0..f7c0cd713f 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -225,6 +225,12 @@ + (Ctz16 x) => (CTZW (ORI [1<<16] x)) + (Ctz8 x) => (CTZW (ORI [1<<8] x)) + ++// Bit length (note that these will only be emitted for rva22u64 and above). ++(BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) ++(BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) ++(BitLen16 x) => (BitLen64 (ZeroExt16to64 x)) ++(BitLen8 x) => (BitLen64 (ZeroExt8to64 x)) ++ + (Less64 ...) => (SLT ...) + (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) + (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +index 85e9e47e82..cc2302ff37 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +@@ -229,6 +229,8 @@ func init() { + {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1 + {name: "ANDN", argLength: 2, reg: gp21, asm: "ANDN"}, // ^arg0 & arg1 + {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"}, // arg0 & auxint ++ {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zeros ++ {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"}, // count leading zeros of least significant word + {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"}, // count trailing zeros + {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"}, // count trailing zeros of least significant word + {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"}, // ^arg0 +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 3d441bcb95..82a66fefe4 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -2507,6 +2507,8 @@ const ( + OpRISCV64AND + OpRISCV64ANDN + OpRISCV64ANDI ++ OpRISCV64CLZ ++ OpRISCV64CLZW + OpRISCV64CTZ + OpRISCV64CTZW + OpRISCV64NOT +@@ -33739,6 +33741,32 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "CLZ", ++ argLen: 1, ++ asm: riscv.ACLZ, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ outputs: []outputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ }, ++ }, ++ { ++ name: "CLZW", ++ argLen: 1, ++ asm: riscv.ACLZW, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ outputs: []outputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ }, ++ }, + { + name: "CTZ", + argLen: 1, +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index 1bc6ff51bf..d6ab0451e0 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -102,6 +102,14 @@ func rewriteValueRISCV64(v *Value) bool { + return true + case OpAvg64u: + return rewriteValueRISCV64_OpAvg64u(v) ++ case OpBitLen16: ++ return rewriteValueRISCV64_OpBitLen16(v) ++ case OpBitLen32: ++ return rewriteValueRISCV64_OpBitLen32(v) ++ case OpBitLen64: ++ return rewriteValueRISCV64_OpBitLen64(v) ++ case OpBitLen8: ++ return rewriteValueRISCV64_OpBitLen8(v) + case OpClosureCall: + v.Op = OpRISCV64CALLclosure + return true +@@ -930,6 +938,72 @@ func rewriteValueRISCV64_OpAvg64u(v *Value) bool { + return true + } + } ++func rewriteValueRISCV64_OpBitLen16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen16 x) ++ // result: (BitLen64 (ZeroExt16to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueRISCV64_OpBitLen32(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen32 x) ++ // result: (SUB (MOVDconst [32]) (CLZW x)) ++ for { ++ t := v.Type ++ x := v_0 ++ v.reset(OpRISCV64SUB) ++ v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(32) ++ v1 := b.NewValue0(v.Pos, OpRISCV64CLZW, t) ++ v1.AddArg(x) ++ v.AddArg2(v0, v1) ++ return true ++ } ++} ++func rewriteValueRISCV64_OpBitLen64(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen64 x) ++ // result: (SUB (MOVDconst [64]) (CLZ x)) ++ for { ++ t := v.Type ++ x := v_0 ++ v.reset(OpRISCV64SUB) ++ v0 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) ++ v0.AuxInt = int64ToAuxInt(64) ++ v1 := b.NewValue0(v.Pos, OpRISCV64CLZ, t) ++ v1.AddArg(x) ++ v.AddArg2(v0, v1) ++ return true ++ } ++} ++func rewriteValueRISCV64_OpBitLen8(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ typ := &b.Func.Config.Types ++ // match: (BitLen8 x) ++ // result: (BitLen64 (ZeroExt8to64 x)) ++ for { ++ x := v_0 ++ v.reset(OpBitLen64) ++ v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueRISCV64_OpConst16(v *Value) bool { + // match: (Const16 [val]) + // result: (MOVDconst [int64(val)]) +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index 2b9315f14b..11533e42d3 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -971,6 +971,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) + }, + sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.PPC64, sys.S390X, sys.Wasm) ++ ++ if cfg.goriscv64 >= 22 { ++ addF("math/bits", "Len64", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "Len32", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "Len16", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ addF("math/bits", "Len8", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0]) ++ }, ++ sys.RISCV64) ++ } ++ + alias("math/bits", "Len", "math/bits", "Len64", p8...) + alias("math/bits", "Len", "math/bits", "Len32", p4...) + +diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go +index a06fdeedb2..230a7bdf67 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go +@@ -1110,6 +1110,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "Len64"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "Len8"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "TrailingZeros32"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, +@@ -1120,6 +1122,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "math/big", "mulWW"}: struct{}{}, + {"riscv64", "math/bits", "Add"}: struct{}{}, + {"riscv64", "math/bits", "Add64"}: struct{}{}, ++ {"riscv64", "math/bits", "Len"}: struct{}{}, ++ {"riscv64", "math/bits", "Len16"}: struct{}{}, ++ {"riscv64", "math/bits", "Len32"}: struct{}{}, ++ {"riscv64", "math/bits", "Len64"}: struct{}{}, ++ {"riscv64", "math/bits", "Len8"}: struct{}{}, + {"riscv64", "math/bits", "Mul"}: struct{}{}, + {"riscv64", "math/bits", "Mul64"}: struct{}{}, + {"riscv64", "math/bits", "RotateLeft"}: struct{}{}, +diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go +index 786fad3bd9..a9cf466780 100644 +--- a/test/codegen/mathbits.go ++++ b/test/codegen/mathbits.go +@@ -15,60 +15,70 @@ import "math/bits" + func LeadingZeros(n uint) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3:"LZCNTQ", -"BSRQ" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV",-"SUB" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"SUB" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.LeadingZeros(n) + } + + func LeadingZeros64(n uint64) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3:"LZCNTQ", -"BSRQ" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm:"CLZ" ++ // arm64:"CLZ" + // loong64:"CLZV",-"SUB" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.LeadingZeros64(n) + } + + func LeadingZeros32(n uint32) int { + // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZW" ++ // arm:"CLZ" ++ // arm64:"CLZW" + // loong64:"CLZW",-"SUB" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"CNTLZW" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZW",-"ADDI" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.LeadingZeros32(n) + } + + func LeadingZeros16(n uint16) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-48",-"NEG" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.LeadingZeros16(n) + } + + func LeadingZeros8(n uint8) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL",- "BSRL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-56",-"NEG" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.LeadingZeros8(n) + } + +@@ -79,30 +89,35 @@ func LeadingZeros8(n uint8) int { + func Len(n uint) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3: "LZCNTQ" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"SUBC","CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.Len(n) + } + + func Len64(n uint64) int { + // amd64/v1,amd64/v2:"BSRQ" + // amd64/v3: "LZCNTQ" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"SUBC","CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.Len64(n) + } + + func SubFromLen64(n uint64) int { + // loong64:"CLZV",-"ADD" + // ppc64x:"CNTLZD",-"SUBC" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t",-"ADDI",-"NEG" + return 64 - bits.Len64(n) + } + +@@ -114,36 +129,42 @@ func CompareWithLen64(n uint64) bool { + func Len32(n uint32) int { + // amd64/v1,amd64/v2:"BSRQ","LEAQ",-"CMOVQEQ" + // amd64/v3: "LZCNTL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZW" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x: "CNTLZW" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZW","ADDI\t\\$-32" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.Len32(n) + } + + func Len16(n uint16) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"SUBC","CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.Len16(n) + } + + func Len8(n uint8) int { + // amd64/v1,amd64/v2:"BSRL","LEAL",-"CMOVQEQ" + // amd64/v3: "LZCNTL" +- // s390x:"FLOGR" +- // arm:"CLZ" arm64:"CLZ" ++ // arm64:"CLZ" ++ // arm:"CLZ" + // loong64:"CLZV" + // mips:"CLZ" +- // wasm:"I64Clz" + // ppc64x:"SUBC","CNTLZD" ++ // riscv64/rva22u64,riscv64/rva23u64:"CLZ\t","ADDI\t\\$-64" ++ // s390x:"FLOGR" ++ // wasm:"I64Clz" + return bits.Len8(n) + } + +@@ -451,6 +472,7 @@ func IterateBits64(n uint64) int { + for n != 0 { + // amd64/v1,amd64/v2:"BSFQ",-"CMOVEQ" + // amd64/v3:"TZCNTQ" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" + i += bits.TrailingZeros64(n) + n &= n - 1 + } +@@ -462,6 +484,7 @@ func IterateBits32(n uint32) int { + for n != 0 { + // amd64/v1,amd64/v2:"BSFL",-"BTSQ" + // amd64/v3:"TZCNTL" ++ // riscv64/rva22u64,riscv64/rva23u64: "CTZ\t" + i += bits.TrailingZeros32(n) + n &= n - 1 + } +-- +2.50.1 + diff --git a/2036-cmd-internal-obj-riscv-internal-bytealg-synthesize-M.patch b/2036-cmd-internal-obj-riscv-internal-bytealg-synthesize-M.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b4e58a2ed35b3ae06732da94f580d415b252531 --- /dev/null +++ b/2036-cmd-internal-obj-riscv-internal-bytealg-synthesize-M.patch @@ -0,0 +1,453 @@ +From 2cc0798d8406a176bf39d8ec7e258e9f187f656b Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Thu, 6 Feb 2025 23:29:57 +1100 +Subject: [PATCH 36/38] cmd/internal/obj/riscv,internal/bytealg: synthesize + MIN/MAX/MINU/MAXU instructions + +Provide a synthesized version of the MIN/MAX/MINU/MAXU instructions +if they're not natively available. This allows these instructions to +be used in assembly unconditionally. + +Use MIN in internal/bytealg.compare. + +Cq-Include-Trybots: luci.golang.try:gotip-linux-riscv64 +Change-Id: I8a5a3a59f0a9205e136fc3d673b23eaf3ca469f8 +Reviewed-on: https://go-review.googlesource.com/c/go/+/653295 +Reviewed-by: Mark Ryan +Reviewed-by: Cherry Mui +Reviewed-by: Dmitri Shuralyov +Reviewed-by: Meng Zhuo +LUCI-TryBot-Result: Go LUCI +--- + src/cmd/asm/internal/asm/testdata/riscv64.s | 16 +- + src/cmd/internal/obj/riscv/asm_test.go | 14 ++ + src/cmd/internal/obj/riscv/obj.go | 44 ++++++ + .../riscv/testdata/testminmax/minmax_test.go | 140 ++++++++++++++++++ + .../riscv/testdata/testminmax/minmax_test.s | 131 ++++++++++++++++ + src/internal/bytealg/compare_riscv64.s | 8 +- + 6 files changed, 339 insertions(+), 14 deletions(-) + create mode 100644 src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.go + create mode 100644 src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.s + +diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s +index 583516efcb..12c1cafaa0 100644 +--- a/src/cmd/asm/internal/asm/testdata/riscv64.s ++++ b/src/cmd/asm/internal/asm/testdata/riscv64.s +@@ -376,14 +376,14 @@ start: + CPOPW X23, X24 // 1b9c2b60 + CTZ X24, X25 // 931c1c60 + CTZW X25, X26 // 1b9d1c60 +- MAX X26, X28, X29 // b36eae0b +- MAX X26, X28 // 336eae0b +- MAXU X28, X29, X30 // 33ffce0b +- MAXU X28, X29 // b3fece0b +- MIN X29, X30, X5 // b342df0b +- MIN X29, X30 // 334fdf0b +- MINU X30, X5, X6 // 33d3e20b +- MINU X30, X5 // b3d2e20b ++ MAX X26, X28, X29 // b36eae0b or b32fae01b30ff041b34eae01b3fedf01b34ede01 ++ MAX X26, X28 // 336eae0b or b32fcd01b30ff041334ecd0133fecf01334ecd01 ++ MAXU X28, X29, X30 // 33ffce0b or b3bfce01b30ff04133cfce0133ffef0133cfee01 ++ MAXU X28, X29 // b3fece0b or b33fde01b30ff041b34ede01b3fedf01b34ede01 ++ MIN X29, X30, X5 // b342df0b or b3afee01b30ff041b342df01b3f25f00b3425f00 ++ MIN X29, X30 // 334fdf0b or b32fdf01b30ff04133cfee0133ffef0133cfee01 ++ MINU X30, X5, X6 // 33d3e20b or b33f5f00b30ff04133c3e20133f36f0033c36200 ++ MINU X30, X5 // b3d2e20b or b3bfe201b30ff041b3425f00b3f25f00b3425f00 + ORN X6, X7, X8 // 33e46340 or 1344f3ff33e48300 + ORN X6, X7 // b3e36340 or 934ff3ffb3e3f301 + SEXTB X16, X17 // 93184860 +diff --git a/src/cmd/internal/obj/riscv/asm_test.go b/src/cmd/internal/obj/riscv/asm_test.go +index c2e1e12acc..f40e57fa64 100644 +--- a/src/cmd/internal/obj/riscv/asm_test.go ++++ b/src/cmd/internal/obj/riscv/asm_test.go +@@ -264,6 +264,20 @@ func TestBranch(t *testing.T) { + } + } + ++func TestMinMax(t *testing.T) { ++ if runtime.GOARCH != "riscv64" { ++ t.Skip("Requires riscv64 to run") ++ } ++ ++ testenv.MustHaveGoBuild(t) ++ ++ cmd := testenv.Command(t, testenv.GoToolPath(t), "test") ++ cmd.Dir = "testdata/testminmax" ++ if out, err := testenv.CleanCmdEnv(cmd).CombinedOutput(); err != nil { ++ t.Errorf("Min max test failed: %v\n%s", err, out) ++ } ++} ++ + func TestPCAlign(t *testing.T) { + dir := t.TempDir() + tmpfile := filepath.Join(dir, "x.s") +diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go +index cb4612ca91..732b76cc75 100644 +--- a/src/cmd/internal/obj/riscv/obj.go ++++ b/src/cmd/internal/obj/riscv/obj.go +@@ -3273,6 +3273,47 @@ func instructionsForRotate(p *obj.Prog, ins *instruction) []*instruction { + } + } + ++// instructionsForMinMax returns the machine instructions for an integer minimum or maximum. ++func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction { ++ if buildcfg.GORISCV64 >= 22 { ++ // Minimum and maximum instructions are supported natively. ++ return []*instruction{ins} ++ } ++ ++ // Generate a move for identical inputs. ++ if ins.rs1 == ins.rs2 { ++ ins.as, ins.rs2, ins.imm = AADDI, obj.REG_NONE, 0 ++ return []*instruction{ins} ++ } ++ ++ // Ensure that if one of the source registers is the same as the destination, ++ // it is processed first. ++ if ins.rs1 == ins.rd { ++ ins.rs1, ins.rs2 = ins.rs2, ins.rs1 ++ } ++ sltReg1, sltReg2 := ins.rs2, ins.rs1 ++ ++ // MIN -> SLT/SUB/XOR/AND/XOR ++ // MAX -> SLT/SUB/XOR/AND/XOR with swapped inputs to SLT ++ switch ins.as { ++ case AMIN: ++ ins.as = ASLT ++ case AMAX: ++ ins.as, sltReg1, sltReg2 = ASLT, sltReg2, sltReg1 ++ case AMINU: ++ ins.as = ASLTU ++ case AMAXU: ++ ins.as, sltReg1, sltReg2 = ASLTU, sltReg2, sltReg1 ++ } ++ return []*instruction{ ++ &instruction{as: ins.as, rs1: sltReg1, rs2: sltReg2, rd: REG_TMP}, ++ &instruction{as: ASUB, rs1: REG_ZERO, rs2: REG_TMP, rd: REG_TMP}, ++ &instruction{as: AXOR, rs1: ins.rs1, rs2: ins.rs2, rd: ins.rd}, ++ &instruction{as: AAND, rs1: REG_TMP, rs2: ins.rd, rd: ins.rd}, ++ &instruction{as: AXOR, rs1: ins.rs1, rs2: ins.rd, rd: ins.rd}, ++ } ++} ++ + // instructionsForProg returns the machine instructions for an *obj.Prog. + func instructionsForProg(p *obj.Prog) []*instruction { + ins := instructionForProg(p) +@@ -3522,6 +3563,9 @@ func instructionsForProg(p *obj.Prog) []*instruction { + ins.as = AXOR + inss = append(inss, &instruction{as: AXORI, rs1: ins.rd, rs2: obj.REG_NONE, rd: ins.rd, imm: -1}) + ++ case AMIN, AMAX, AMINU, AMAXU: ++ inss = instructionsForMinMax(p, ins) ++ + case AVSETVLI, AVSETIVLI: + ins.rs1, ins.rs2 = ins.rs2, obj.REG_NONE + vtype, err := EncodeVectorType(p.RestArgs[0].Offset, p.RestArgs[1].Offset, p.RestArgs[2].Offset, p.RestArgs[3].Offset) +diff --git a/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.go b/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.go +new file mode 100644 +index 0000000000..46d321147b +--- /dev/null ++++ b/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.go +@@ -0,0 +1,140 @@ ++// Copyright 2025 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build riscv64 ++ ++package testminmax ++ ++import ( ++ "testing" ++) ++ ++func testMIN1(a int64) (r int64) ++func testMIN2(a, b int64) (r int64) ++func testMIN3(a, b int64) (r int64) ++func testMIN4(a, b int64) (r int64) ++func testMAX1(a int64) (r int64) ++func testMAX2(a, b int64) (r int64) ++func testMAX3(a, b int64) (r int64) ++func testMAX4(a, b int64) (r int64) ++func testMINU1(a int64) (r int64) ++func testMINU2(a, b int64) (r int64) ++func testMINU3(a, b int64) (r int64) ++func testMINU4(a, b int64) (r int64) ++func testMAXU1(a int64) (r int64) ++func testMAXU2(a, b int64) (r int64) ++func testMAXU3(a, b int64) (r int64) ++func testMAXU4(a, b int64) (r int64) ++ ++func TestMin(t *testing.T) { ++ tests := []struct { ++ a int64 ++ b int64 ++ want int64 ++ }{ ++ {1, 2, 1}, ++ {2, 1, 1}, ++ {2, 2, 2}, ++ {1, -1, -1}, ++ {-1, 1, -1}, ++ } ++ for _, test := range tests { ++ if got := testMIN1(test.a); got != test.a { ++ t.Errorf("Assembly testMIN1 %v = %v, want %v", test.a, got, test.a) ++ } ++ if got := testMIN2(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMIN2 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMIN3(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMIN3 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMIN4(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMIN4 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ } ++} ++ ++func TestMax(t *testing.T) { ++ tests := []struct { ++ a int64 ++ b int64 ++ want int64 ++ }{ ++ {1, 2, 2}, ++ {2, 1, 2}, ++ {2, 2, 2}, ++ {1, -1, 1}, ++ {-1, 1, 1}, ++ } ++ for _, test := range tests { ++ if got := testMAX1(test.a); got != test.a { ++ t.Errorf("Assembly testMAX1 %v = %v, want %v", test.a, got, test.a) ++ } ++ if got := testMAX2(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAX2 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMAX3(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAX3 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMAX4(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAX4 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ } ++} ++ ++func TestMinU(t *testing.T) { ++ tests := []struct { ++ a int64 ++ b int64 ++ want int64 ++ }{ ++ {1, 2, 1}, ++ {2, 1, 1}, ++ {2, 2, 2}, ++ {1, -1, 1}, ++ {-1, 1, 1}, ++ } ++ for _, test := range tests { ++ if got := testMINU1(test.a); got != test.a { ++ t.Errorf("Assembly testMINU1 %v = %v, want %v", test.a, got, test.a) ++ } ++ if got := testMINU2(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMINU2 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMINU3(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMINU3 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMINU4(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMINU4 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ } ++} ++ ++func TestMaxU(t *testing.T) { ++ tests := []struct { ++ a int64 ++ b int64 ++ want int64 ++ }{ ++ {1, 2, 2}, ++ {2, 1, 2}, ++ {2, 2, 2}, ++ {1, -1, -1}, ++ {-1, 1, -1}, ++ } ++ for _, test := range tests { ++ if got := testMAXU1(test.a); got != test.a { ++ t.Errorf("Assembly testMAXU1 %v = %v, want %v", test.a, got, test.a) ++ } ++ if got := testMAXU2(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAXU2 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMAXU3(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAXU3 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ if got := testMAXU4(test.a, test.b); got != test.want { ++ t.Errorf("Assembly testMAXU4 %v, %v = %v, want %v", test.a, test.b, got, test.want) ++ } ++ } ++} +diff --git a/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.s b/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.s +new file mode 100644 +index 0000000000..9d295791a5 +--- /dev/null ++++ b/src/cmd/internal/obj/riscv/testdata/testminmax/minmax_test.s +@@ -0,0 +1,131 @@ ++// Copyright 2025 The Go Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style ++// license that can be found in the LICENSE file. ++ ++//go:build riscv64 ++ ++#include "textflag.h" ++ ++// func testMIN1(a int64) (r int64) ++TEXT ·testMIN1(SB),NOSPLIT,$0-16 ++ MOV a+0(FP), X5 ++ MIN X5, X5, X6 ++ MOV X6, r+8(FP) ++ RET ++ ++// func testMIN2(a, b int64) (r int64) ++TEXT ·testMIN2(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MIN X5, X6, X6 ++ MOV X6, r+16(FP) ++ RET ++ ++// func testMIN3(a, b int64) (r int64) ++TEXT ·testMIN3(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MIN X6, X5, X5 ++ MOV X5, r+16(FP) ++ RET ++ ++// func testMIN4(a, b int64) (r int64) ++TEXT ·testMIN4(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MIN X5, X6, X7 ++ MOV X7, r+16(FP) ++ RET ++ ++// func testMAX1(a int64) (r int64) ++TEXT ·testMAX1(SB),NOSPLIT,$0-16 ++ MOV a+0(FP), X5 ++ MAX X5, X5, X6 ++ MOV X6, r+8(FP) ++ RET ++ ++// func testMAX2(a, b int64) (r int64) ++TEXT ·testMAX2(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAX X5, X6, X6 ++ MOV X6, r+16(FP) ++ RET ++ ++// func testMAX3(a, b int64) (r int64) ++TEXT ·testMAX3(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAX X6, X5, X5 ++ MOV X5, r+16(FP) ++ RET ++ ++// func testMAX4(a, b int64) (r int64) ++TEXT ·testMAX4(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAX X5, X6, X7 ++ MOV X7, r+16(FP) ++ RET ++ ++// func testMINU1(a int64) (r int64) ++TEXT ·testMINU1(SB),NOSPLIT,$0-16 ++ MOV a+0(FP), X5 ++ MINU X5, X5, X6 ++ MOV X6, r+8(FP) ++ RET ++ ++// func testMINU2(a, b int64) (r int64) ++TEXT ·testMINU2(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MINU X5, X6, X6 ++ MOV X6, r+16(FP) ++ RET ++ ++// func testMINU3(a, b int64) (r int64) ++TEXT ·testMINU3(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MINU X6, X5, X5 ++ MOV X5, r+16(FP) ++ RET ++ ++// func testMINU4(a, b int64) (r int64) ++TEXT ·testMINU4(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MINU X5, X6, X7 ++ MOV X7, r+16(FP) ++ RET ++ ++// func testMAXU1(a int64) (r int64) ++TEXT ·testMAXU1(SB),NOSPLIT,$0-16 ++ MOV a+0(FP), X5 ++ MAXU X5, X5, X6 ++ MOV X6, r+8(FP) ++ RET ++ ++// func testMAXU2(a, b int64) (r int64) ++TEXT ·testMAXU2(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAXU X5, X6, X6 ++ MOV X6, r+16(FP) ++ RET ++ ++// func testMAXU3(a, b int64) (r int64) ++TEXT ·testMAXU3(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAXU X6, X5, X5 ++ MOV X5, r+16(FP) ++ RET ++ ++// func testMAXU4(a, b int64) (r int64) ++TEXT ·testMAXU4(SB),NOSPLIT,$0-24 ++ MOV a+0(FP), X5 ++ MOV b+8(FP), X6 ++ MAXU X5, X6, X7 ++ MOV X7, r+16(FP) ++ RET +diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s +index b6166a8286..3b1523dfbf 100644 +--- a/src/internal/bytealg/compare_riscv64.s ++++ b/src/internal/bytealg/compare_riscv64.s +@@ -29,15 +29,11 @@ TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40 + // X11 length of a + // X12 points to start of b + // X13 length of b +-// for non-regabi X14 points to the address to store the return value (-1/0/1) +-// for regabi the return value in X10 ++// return value in X10 (-1/0/1) + TEXT compare<>(SB),NOSPLIT|NOFRAME,$0 + BEQ X10, X12, cmp_len + +- MOV X11, X5 +- BGE X13, X5, use_a_len // X5 = min(len(a), len(b)) +- MOV X13, X5 +-use_a_len: ++ MIN X11, X13, X5 + BEQZ X5, cmp_len + + MOV $16, X6 +-- +2.50.1 + diff --git a/2037-cmd-compile-internal-ssa-optimise-more-branches-with.patch b/2037-cmd-compile-internal-ssa-optimise-more-branches-with.patch new file mode 100644 index 0000000000000000000000000000000000000000..a9f0a293998f5be824602cc75a281c19bb37e341 --- /dev/null +++ b/2037-cmd-compile-internal-ssa-optimise-more-branches-with.patch @@ -0,0 +1,125 @@ +From c43fb5b91f46060cacf6de97b651f000e352118a Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Mon, 19 Aug 2024 23:57:56 +1000 +Subject: [PATCH 37/38] cmd/compile/internal/ssa: optimise more branches with + zero on riscv64 + +Optimise more branches with zero on riscv64. In particular, BLTU with +zero occurs with IsInBounds checks for index zero. This currently results +in two instructions and requires an additional register: + + li t2, 0 + bltu t2, t1, 0x174b4 + +This is equivalent to checking if the bounds is not equal to zero. With +this change: + + bnez t1, 0x174c0 + +This removes more than 500 instructions from the Go binary on riscv64. + +Change-Id: I6cd861d853e3ef270bd46dacecdfaa205b1c4644 +Reviewed-on: https://go-review.googlesource.com/c/go/+/606715 +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +Reviewed-by: Cherry Mui +Reviewed-by: Dmitri Shuralyov +--- + .../compile/internal/ssa/_gen/RISCV64.rules | 18 +++++++------- + .../compile/internal/ssa/rewriteRISCV64.go | 24 +++++++++++++++++++ + test/codegen/compare_and_branch.go | 10 ++++++++ + 3 files changed, 44 insertions(+), 8 deletions(-) + +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index f7c0cd713f..86023001aa 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -594,14 +594,16 @@ + (BNEZ (SLTIU [x] y) yes no) => (BLTU y (MOVDconst [x]) yes no) + + // Convert branch with zero to more optimal branch zero. +-(BEQ (MOVDconst [0]) cond yes no) => (BEQZ cond yes no) +-(BEQ cond (MOVDconst [0]) yes no) => (BEQZ cond yes no) +-(BNE (MOVDconst [0]) cond yes no) => (BNEZ cond yes no) +-(BNE cond (MOVDconst [0]) yes no) => (BNEZ cond yes no) +-(BLT (MOVDconst [0]) cond yes no) => (BGTZ cond yes no) +-(BLT cond (MOVDconst [0]) yes no) => (BLTZ cond yes no) +-(BGE (MOVDconst [0]) cond yes no) => (BLEZ cond yes no) +-(BGE cond (MOVDconst [0]) yes no) => (BGEZ cond yes no) ++(BEQ (MOVDconst [0]) cond yes no) => (BEQZ cond yes no) ++(BEQ cond (MOVDconst [0]) yes no) => (BEQZ cond yes no) ++(BNE (MOVDconst [0]) cond yes no) => (BNEZ cond yes no) ++(BNE cond (MOVDconst [0]) yes no) => (BNEZ cond yes no) ++(BLT (MOVDconst [0]) cond yes no) => (BGTZ cond yes no) ++(BLT cond (MOVDconst [0]) yes no) => (BLTZ cond yes no) ++(BLTU (MOVDconst [0]) cond yes no) => (BNEZ cond yes no) ++(BGE (MOVDconst [0]) cond yes no) => (BLEZ cond yes no) ++(BGE cond (MOVDconst [0]) yes no) => (BGEZ cond yes no) ++(BGEU (MOVDconst [0]) cond yes no) => (BEQZ cond yes no) + + // Remove redundant NEG from SEQZ/SNEZ. + (SEQZ (NEG x)) => (SEQZ x) +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index d6ab0451e0..0dfa93cbe1 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -9449,6 +9449,18 @@ func rewriteBlockRISCV64(b *Block) bool { + b.resetWithControl(BlockRISCV64BGEZ, cond) + return true + } ++ case BlockRISCV64BGEU: ++ // match: (BGEU (MOVDconst [0]) cond yes no) ++ // result: (BEQZ cond yes no) ++ for b.Controls[0].Op == OpRISCV64MOVDconst { ++ v_0 := b.Controls[0] ++ if auxIntToInt64(v_0.AuxInt) != 0 { ++ break ++ } ++ cond := b.Controls[1] ++ b.resetWithControl(BlockRISCV64BEQZ, cond) ++ return true ++ } + case BlockRISCV64BLT: + // match: (BLT (MOVDconst [0]) cond yes no) + // result: (BGTZ cond yes no) +@@ -9472,6 +9484,18 @@ func rewriteBlockRISCV64(b *Block) bool { + b.resetWithControl(BlockRISCV64BLTZ, cond) + return true + } ++ case BlockRISCV64BLTU: ++ // match: (BLTU (MOVDconst [0]) cond yes no) ++ // result: (BNEZ cond yes no) ++ for b.Controls[0].Op == OpRISCV64MOVDconst { ++ v_0 := b.Controls[0] ++ if auxIntToInt64(v_0.AuxInt) != 0 { ++ break ++ } ++ cond := b.Controls[1] ++ b.resetWithControl(BlockRISCV64BNEZ, cond) ++ return true ++ } + case BlockRISCV64BNE: + // match: (BNE (MOVDconst [0]) cond yes no) + // result: (BNEZ cond yes no) +diff --git a/test/codegen/compare_and_branch.go b/test/codegen/compare_and_branch.go +index c121f1d2cc..759dd26358 100644 +--- a/test/codegen/compare_and_branch.go ++++ b/test/codegen/compare_and_branch.go +@@ -241,4 +241,14 @@ func ui64x0(x chan uint64) { + for <-x < 1 { + dummy() + } ++ ++ // riscv64:"BNEZ" ++ for 0 < <-x { ++ dummy() ++ } ++ ++ // riscv64:"BEQZ" ++ for 0 >= <-x { ++ dummy() ++ } + } +-- +2.50.1 + diff --git a/2038-cmd-compile-intrinsify-math-bits.Bswap-on-riscv64.patch b/2038-cmd-compile-intrinsify-math-bits.Bswap-on-riscv64.patch new file mode 100644 index 0000000000000000000000000000000000000000..e2db77663b98d4fae9a7f05d818f7623040e648e --- /dev/null +++ b/2038-cmd-compile-intrinsify-math-bits.Bswap-on-riscv64.patch @@ -0,0 +1,346 @@ +From 9cfbbf81898dde57986917a4c3492ef275c1911c Mon Sep 17 00:00:00 2001 +From: Joel Sing +Date: Thu, 20 Mar 2025 01:09:23 +1100 +Subject: [PATCH 38/38] cmd/compile: intrinsify math/bits.Bswap on riscv64 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For riscv64/rva22u64 and above, we can intrinsify math/bits.Bswap +using the REV8 machine instruction. + +On a StarFive VisionFive 2 with GORISCV64=rva22u64: + + │ rb.1 │ rb.2 │ + │ sec/op │ sec/op vs base │ +ReverseBytes-4 18.790n ± 0% 4.026n ± 0% -78.57% (p=0.000 n=10) +ReverseBytes16-4 6.710n ± 0% 5.368n ± 0% -20.00% (p=0.000 n=10) +ReverseBytes32-4 13.420n ± 0% 5.368n ± 0% -60.00% (p=0.000 n=10) +ReverseBytes64-4 17.450n ± 0% 4.026n ± 0% -76.93% (p=0.000 n=10) +geomean 13.11n 4.649n -64.54% + +Change-Id: I26eee34270b1721f7304bb1cddb0fda129b20ece +Reviewed-on: https://go-review.googlesource.com/c/go/+/660855 +Reviewed-by: Mark Ryan +LUCI-TryBot-Result: Go LUCI +Reviewed-by: Meng Zhuo +Reviewed-by: Carlos Amedee +Reviewed-by: Junyang Shao +--- + src/cmd/compile/internal/riscv64/ssa.go | 3 +- + .../compile/internal/ssa/_gen/RISCV64.rules | 5 ++ + .../compile/internal/ssa/_gen/RISCV64Ops.go | 1 + + src/cmd/compile/internal/ssa/opGen.go | 14 +++++ + .../compile/internal/ssa/rewriteRISCV64.go | 39 ++++++++++++++ + src/cmd/compile/internal/ssagen/intrinsics.go | 51 +++++++++++++++---- + .../internal/ssagen/intrinsics_test.go | 5 ++ + test/codegen/mathbits.go | 20 +++++--- + 8 files changed, 118 insertions(+), 20 deletions(-) + +diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go +index d47dc47f75..97c56a3646 100644 +--- a/src/cmd/compile/internal/riscv64/ssa.go ++++ b/src/cmd/compile/internal/riscv64/ssa.go +@@ -419,7 +419,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { + ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX, + ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS, + ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD, +- ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW: ++ ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW, ++ ssa.OpRISCV64REV8: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +index 86023001aa..44496049f2 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +@@ -231,6 +231,11 @@ + (BitLen16 x) => (BitLen64 (ZeroExt16to64 x)) + (BitLen8 x) => (BitLen64 (ZeroExt8to64 x)) + ++// Byte swap (note that these will only be emitted for rva22u64 and above). ++(Bswap64 ...) => (REV8 ...) ++(Bswap32 x) => (SRLI [32] (REV8 x)) ++(Bswap16 x) => (SRLI [48] (REV8 x)) ++ + (Less64 ...) => (SLT ...) + (Less32 x y) => (SLT (SignExt32to64 x) (SignExt32to64 y)) + (Less16 x y) => (SLT (SignExt16to64 x) (SignExt16to64 y)) +diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +index cc2302ff37..86412ce8a6 100644 +--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go ++++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +@@ -237,6 +237,7 @@ func init() { + {name: "OR", argLength: 2, reg: gp21, asm: "OR", commutative: true}, // arg0 | arg1 + {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // ^arg0 | arg1 + {name: "ORI", argLength: 1, reg: gp11, asm: "ORI", aux: "Int64"}, // arg0 | auxint ++ {name: "REV8", argLength: 1, reg: gp11, asm: "REV8"}, // reverse bytes + {name: "ROL", argLength: 2, reg: gp21, asm: "ROL"}, // rotate left arg0 by (arg1 & 63) + {name: "ROLW", argLength: 2, reg: gp21, asm: "ROLW"}, // rotate left least significant word of arg0 by (arg1 & 31), sign extended + {name: "ROR", argLength: 2, reg: gp21, asm: "ROR"}, // rotate right arg0 by (arg1 & 63) +diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go +index 82a66fefe4..688f11368b 100644 +--- a/src/cmd/compile/internal/ssa/opGen.go ++++ b/src/cmd/compile/internal/ssa/opGen.go +@@ -2515,6 +2515,7 @@ const ( + OpRISCV64OR + OpRISCV64ORN + OpRISCV64ORI ++ OpRISCV64REV8 + OpRISCV64ROL + OpRISCV64ROLW + OpRISCV64ROR +@@ -33849,6 +33850,19 @@ var opcodeTable = [...]opInfo{ + }, + }, + }, ++ { ++ name: "REV8", ++ argLen: 1, ++ asm: riscv.AREV8, ++ reg: regInfo{ ++ inputs: []inputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ outputs: []outputInfo{ ++ {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 ++ }, ++ }, ++ }, + { + name: "ROL", + argLen: 2, +diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +index 0dfa93cbe1..847039f1ef 100644 +--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go ++++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go +@@ -110,6 +110,13 @@ func rewriteValueRISCV64(v *Value) bool { + return rewriteValueRISCV64_OpBitLen64(v) + case OpBitLen8: + return rewriteValueRISCV64_OpBitLen8(v) ++ case OpBswap16: ++ return rewriteValueRISCV64_OpBswap16(v) ++ case OpBswap32: ++ return rewriteValueRISCV64_OpBswap32(v) ++ case OpBswap64: ++ v.Op = OpRISCV64REV8 ++ return true + case OpClosureCall: + v.Op = OpRISCV64CALLclosure + return true +@@ -1004,6 +1011,38 @@ func rewriteValueRISCV64_OpBitLen8(v *Value) bool { + return true + } + } ++func rewriteValueRISCV64_OpBswap16(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ // match: (Bswap16 x) ++ // result: (SRLI [48] (REV8 x)) ++ for { ++ t := v.Type ++ x := v_0 ++ v.reset(OpRISCV64SRLI) ++ v.AuxInt = int64ToAuxInt(48) ++ v0 := b.NewValue0(v.Pos, OpRISCV64REV8, t) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} ++func rewriteValueRISCV64_OpBswap32(v *Value) bool { ++ v_0 := v.Args[0] ++ b := v.Block ++ // match: (Bswap32 x) ++ // result: (SRLI [32] (REV8 x)) ++ for { ++ t := v.Type ++ x := v_0 ++ v.reset(OpRISCV64SRLI) ++ v.AuxInt = int64ToAuxInt(32) ++ v0 := b.NewValue0(v.Pos, OpRISCV64REV8, t) ++ v0.AddArg(x) ++ v.AddArg(v0) ++ return true ++ } ++} + func rewriteValueRISCV64_OpConst16(v *Value) bool { + // match: (Const16 [val]) + // result: (MOVDconst [int64(val)]) +diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go +index 11533e42d3..0900b704ba 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics.go +@@ -184,22 +184,44 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + }, + all...) + +- brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X} +- if cfg.goppc64 >= 10 { +- // Use only on Power10 as the new byte reverse instructions that Power10 provide +- // make it worthwhile as an intrinsic +- brev_arch = append(brev_arch, sys.PPC64) +- } + addF("internal/runtime/sys", "Bswap32", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0]) + }, +- brev_arch...) ++ sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X) + addF("internal/runtime/sys", "Bswap64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0]) + }, +- brev_arch...) ++ sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X) ++ ++ if cfg.goppc64 >= 10 { ++ // Use only on Power10 as the new byte reverse instructions that Power10 provide ++ // make it worthwhile as an intrinsic ++ addF("internal/runtime/sys", "Bswap32", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0]) ++ }, ++ sys.PPC64) ++ addF("internal/runtime/sys", "Bswap64", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0]) ++ }, ++ sys.PPC64) ++ } ++ ++ if cfg.goriscv64 >= 22 { ++ addF("internal/runtime/sys", "Bswap32", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0]) ++ }, ++ sys.RISCV64) ++ addF("internal/runtime/sys", "Bswap64", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0]) ++ }, ++ sys.RISCV64) ++ } + + /****** Prefetch ******/ + makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +@@ -933,23 +955,30 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { + sys.RISCV64) + } + ++ // ReverseBytes inlines correctly, no need to intrinsify it. + alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...) + alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...) ++ // Nothing special is needed for targets where ReverseBytes16 lowers to a rotate + addF("math/bits", "ReverseBytes16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0]) + }, + sys.Loong64) +- // ReverseBytes inlines correctly, no need to intrinsify it. +- // Nothing special is needed for targets where ReverseBytes16 lowers to a rotate +- // On Power10, 16-bit rotate is not available so use BRH instruction + if cfg.goppc64 >= 10 { ++ // On Power10, 16-bit rotate is not available so use BRH instruction + addF("math/bits", "ReverseBytes16", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { + return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0]) + }, + sys.PPC64) + } ++ if cfg.goriscv64 >= 22 { ++ addF("math/bits", "ReverseBytes16", ++ func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { ++ return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0]) ++ }, ++ sys.RISCV64) ++ } + + addF("math/bits", "Len64", + func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { +diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go +index 230a7bdf67..e6275734f2 100644 +--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go ++++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go +@@ -1107,6 +1107,8 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "internal/runtime/math", "Add64"}: struct{}{}, + {"riscv64", "internal/runtime/math", "Mul64"}: struct{}{}, + {"riscv64", "internal/runtime/math", "MulUintptr"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "Bswap32"}: struct{}{}, ++ {"riscv64", "internal/runtime/sys", "Bswap64"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetCallerPC"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetCallerSP"}: struct{}{}, + {"riscv64", "internal/runtime/sys", "GetClosurePtr"}: struct{}{}, +@@ -1129,6 +1131,9 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ + {"riscv64", "math/bits", "Len8"}: struct{}{}, + {"riscv64", "math/bits", "Mul"}: struct{}{}, + {"riscv64", "math/bits", "Mul64"}: struct{}{}, ++ {"riscv64", "math/bits", "ReverseBytes16"}: struct{}{}, ++ {"riscv64", "math/bits", "ReverseBytes32"}: struct{}{}, ++ {"riscv64", "math/bits", "ReverseBytes64"}: struct{}{}, + {"riscv64", "math/bits", "RotateLeft"}: struct{}{}, + {"riscv64", "math/bits", "RotateLeft16"}: struct{}{}, + {"riscv64", "math/bits", "RotateLeft32"}: struct{}{}, +diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go +index a9cf466780..08fcc8b410 100644 +--- a/test/codegen/mathbits.go ++++ b/test/codegen/mathbits.go +@@ -258,42 +258,46 @@ func Reverse8(n uint8) uint8 { + // ----------------------- // + + func ReverseBytes(n uint) uint { +- // amd64:"BSWAPQ" + // 386:"BSWAPL" +- // s390x:"MOVDBR" ++ // amd64:"BSWAPQ" + // arm64:"REV" + // loong64:"REVBV" ++ // riscv64/rva22u64,riscv64/rva23u64:"REV8" ++ // s390x:"MOVDBR" + return bits.ReverseBytes(n) + } + + func ReverseBytes64(n uint64) uint64 { +- // amd64:"BSWAPQ" + // 386:"BSWAPL" +- // s390x:"MOVDBR" ++ // amd64:"BSWAPQ" + // arm64:"REV" +- // ppc64x/power10: "BRD" + // loong64:"REVBV" ++ // ppc64x/power10: "BRD" ++ // riscv64/rva22u64,riscv64/rva23u64:"REV8" ++ // s390x:"MOVDBR" + return bits.ReverseBytes64(n) + } + + func ReverseBytes32(n uint32) uint32 { +- // amd64:"BSWAPL" + // 386:"BSWAPL" +- // s390x:"MOVWBR" ++ // amd64:"BSWAPL" + // arm64:"REVW" + // loong64:"REVB2W" + // ppc64x/power10: "BRW" ++ // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$32" ++ // s390x:"MOVWBR" + return bits.ReverseBytes32(n) + } + + func ReverseBytes16(n uint16) uint16 { + // amd64:"ROLW" +- // arm64:"REV16W",-"UBFX",-"ORR" + // arm/5:"SLL","SRL","ORR" + // arm/6:"REV16" + // arm/7:"REV16" ++ // arm64:"REV16W",-"UBFX",-"ORR" + // loong64:"REVB2H" + // ppc64x/power10: "BRH" ++ // riscv64/rva22u64,riscv64/rva23u64:"REV8","SRLI\t\\$48" + return bits.ReverseBytes16(n) + } + +-- +2.50.1 + diff --git a/golang.spec b/golang.spec index 529141f5745345a096a6db7e17c19701ce3ef39d..2d9ce395476995f74061bc90b9bea13246040f52 100644 --- a/golang.spec +++ b/golang.spec @@ -67,8 +67,8 @@ %endif Name: golang -Version: 1.24.2 -Release: 34 +Version: 1.24.6 +Release: 35 Summary: The Go Programming Language License: BSD and Public Domain URL: https://golang.org/ @@ -128,8 +128,45 @@ Requires: %{vendor}-rpm-config Patch1000: 1000-all-implement-plugin-build-mode-for-riscv64.patch Patch1001: 1001-cmd-link-cmd-internal-add-R_GOT_PCREL_ITYPE_RELOC-fo.patch Patch1002: 1002-cmd-compile-don-t-merge-symbols-on-riscv64-when-dyna.patch -Patch1003: 1003-CVE-2025-22874-crypto-x509-decouple-key-usage-and-po.patch -Patch1004: 1004-CVE-2025-4673-net-http-strip-sensitive-proxy-headers.patch + +Patch2001: 2001-cpu-internal-provide-runtime-detection-of-RISC-V-ext.patch +Patch2002: 2002-cmd-go-add-rva23u64-as-a-valid-value-for-GORISCV64.patch +Patch2003: 2003-cmd-internal-obj-riscv-update-references-to-RISC-V-s.patch +Patch2004: 2004-cmd-asm-cmd-internal-obj-riscv-implement-vector-conf.patch +Patch2005: 2005-cmd-internal-obj-riscv-support-MOVD-with-floating-po.patch +Patch2006: 2006-internal-bytealg-clean-up-and-simplify-the-riscv64-e.patch +Patch2007: 2007-cmd-internal-obj-riscv-implement-vector-load-store-i.patch +Patch2008: 2008-cmd-internal-obj-riscv-add-riscv64-CSR-map.patch +Patch2009: 2009-test-codegen-add-riscv64-rva23u64-specifiers-to-exis.patch +Patch2010: 2010-test-codegen-add-riscv64-codegen-for-arithmetic-test.patch +Patch2011: 2011-cmd-compile-internal-ssa-remove-double-negation-with.patch +Patch2012: 2012-cmd-internal-obj-riscv-prevent-duplicate-error-repor.patch +Patch2013: 2013-cmd-internal-obj-riscv-add-support-for-vector-intege.patch +Patch2014: 2014-cmd-internal-obj-riscv-add-support-for-vector-fixed-.patch +Patch2015: 2015-crypto-sha512-remove-unnecessary-move-op-replace-wit.patch +Patch2016: 2016-crypto-sha256-improve-performance-of-riscv64-assembl.patch +Patch2017: 2017-cmd-internal-obj-riscv-add-support-for-vector-floati.patch +Patch2018: 2018-internal-bytealg-deduplicate-code-between-Count-Coun.patch +Patch2019: 2019-cmd-internal-obj-riscv-add-support-for-vector-reduct.patch +Patch2020: 2020-cmd-internal-obj-riscv-add-support-for-vector-mask-i.patch +Patch2021: 2021-cmd-internal-obj-riscv-add-support-for-vector-permut.patch +Patch2022: 2022-cmd-internal-obj-riscv-fix-LMUL-encoding-for-MF2-and.patch +Patch2023: 2023-cmd-internal-obj-riscv-reject-invalid-vadc-vsbc-enco.patch +Patch2024: 2024-cmd-compile-add-generic-simplifications-on-riscv64.patch +Patch2025: 2025-cmd-internal-obj-riscv-fix-vector-integer-multiply-a.patch +Patch2026: 2026-internal-bytealg-vector-implementation-of-equal-for-.patch +Patch2027: 2027-internal-bytealg-vector-implementation-of-indexbyte-.patch +Patch2028: 2028-internal-bytealg-vector-implementation-of-compare-fo.patch +Patch2029: 2029-cmd-compile-simplify-intrinsification-of-BitLen16-an.patch +Patch2030: 2030-cmd-compile-simplify-intrinsification-of-TrailingZer.patch +Patch2031: 2031-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch +Patch2032: 2032-test-codegen-tighten-the-TrailingZeros64-test-on-386.patch +Patch2033: 2033-cmd-compile-intrinsify-math-bits.TrailingZeros-on-ri.patch +Patch2034: 2034-cmd-compile-internal-ssagen-use-an-alias-for-math-bi.patch +Patch2035: 2035-cmd-compile-intrinsify-math-bits.Len-on-riscv64.patch +Patch2036: 2036-cmd-internal-obj-riscv-internal-bytealg-synthesize-M.patch +Patch2037: 2037-cmd-compile-internal-ssa-optimise-more-branches-with.patch +Patch2038: 2038-cmd-compile-intrinsify-math-bits.Bswap-on-riscv64.patch Patch9001: 0001-fix-asan_test-test-case-failure.patch @@ -370,6 +407,9 @@ fi %files devel -f go-tests.list -f go-misc.list -f go-src.list %changelog +* Tue Aug 19 2025 Julian Zhu - 1.24.2-35 +- Backport RISC-V RVA23 support + * Fri Jun 20 2025 wujichao - 1.24.2-34 - Type:CVE - CVE:CVE-2025-22874,CVE-2025-4673