diff --git a/8207160-ClassReader-adjustMethodParams-can-potentially-return-null-if-the-args-list-is-empty.patch b/8207160-ClassReader-adjustMethodParams-can-potentially-return-null-if-the-args-list-is-empty.patch new file mode 100755 index 0000000000000000000000000000000000000000..f95d845b3d32c5d6530c1dac53d937134042e8c9 --- /dev/null +++ b/8207160-ClassReader-adjustMethodParams-can-potentially-return-null-if-the-args-list-is-empty.patch @@ -0,0 +1,168 @@ +diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java +index 5a1d3b900..ab327bf9a 100644 +--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java ++++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/jvm/ClassReader.java +@@ -2471,6 +2471,9 @@ public class ClassReader { + } + + private List adjustMethodParams(long flags, List args) { ++ if (args.isEmpty()) { ++ return args; ++ } + boolean isVarargs = (flags & VARARGS) != 0; + if (isVarargs) { + Type varargsElem = args.last(); +diff --git a/test/langtools/tools/javac/AvoidNPEAtClassReader/AvoidNPEAtClassReaderTest.java b/test/langtools/tools/javac/AvoidNPEAtClassReader/AvoidNPEAtClassReaderTest.java +new file mode 100644 +index 000000000..3b47d6944 +--- /dev/null ++++ b/test/langtools/tools/javac/AvoidNPEAtClassReader/AvoidNPEAtClassReaderTest.java +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * @test ++ * @bug 8207160 ++ * @summary ClassReader::adjustMethodParams can potentially return null if the args list is empty ++ * @compile pkg/Outer.jasm pkg/Outer$Inner.jasm AvoidNPEAtClassReaderTest.java ++ */ ++ ++ ++/** this test is checking that javac doesn't fail with NPE when reading inner classes with constructors ++ * that doesn't have as a parameter a reference to the outer class. Such constructors were generated by ++ * versions of javac previous to JDK7. ++ */ ++ ++import pkg.*; ++ ++public class AvoidNPEAtClassReaderTest { ++ public void bar(Outer outer) { ++ Object stuff = outer.foo(); ++ } ++} +diff --git a/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer$Inner.jasm b/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer$Inner.jasm +new file mode 100644 +index 000000000..23fe2eb4b +--- /dev/null ++++ b/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer$Inner.jasm +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package pkg; ++ ++super public final class Outer$Inner ++ version 55:0 ++{ ++ ++final synthetic Field this$0:"Lpkg/Outer;"; ++ ++public Method "":"()V" ++ stack 1 locals 1 ++{ ++ aload_0; ++ invokespecial Method java/lang/Object."":"()V"; ++ return; ++} ++ ++public final InnerClass Inner=class Outer$Inner of class Outer; ++ ++} // end Class Outer$Inner +diff --git a/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer.jasm b/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer.jasm +new file mode 100644 +index 000000000..13baaf761 +--- /dev/null ++++ b/test/langtools/tools/javac/AvoidNPEAtClassReader/pkg/Outer.jasm +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package pkg; ++ ++super public class Outer ++ version 55:0 ++{ ++ ++ ++public Method "":"()V" ++ stack 1 locals 1 ++{ ++ aload_0; ++ invokespecial Method java/lang/Object."":"()V"; ++ return; ++} ++ ++public Method foo:"()Lpkg/Outer$Inner;" ++ stack 1 locals 1 ++{ ++ aconst_null; ++ areturn; ++} ++ ++public final InnerClass Inner=class Outer$Inner of class Outer; ++ ++} // end Class Outer +-- +2.19.1 + diff --git a/8215047-Task-terminators-do-not-complete-termination-in-consistent-state.patch b/8215047-Task-terminators-do-not-complete-termination-in-consistent-state.patch new file mode 100644 index 0000000000000000000000000000000000000000..b2b9e5b875d2dd4e8565bfece89c1ca861b3ecfe --- /dev/null +++ b/8215047-Task-terminators-do-not-complete-termination-in-consistent-state.patch @@ -0,0 +1,399 @@ +diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp +index 3749a99bb..ef8fb4ac0 100644 +--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp ++++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -2202,7 +2202,10 @@ void G1CMTask::reset(G1CMBitMap* next_mark_bitmap) { + } + + bool G1CMTask::should_exit_termination() { +- regular_clock_call(); ++ if (!regular_clock_call()) { ++ return true; ++ } ++ + // This is called when we are in the termination protocol. We should + // quit if, for some reason, this task wants to abort or the global + // stack is not empty (this means that we can get work from it). +@@ -2213,12 +2216,12 @@ void G1CMTask::reached_limit() { + assert(_words_scanned >= _words_scanned_limit || + _refs_reached >= _refs_reached_limit , + "shouldn't have been called otherwise"); +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } + +-void G1CMTask::regular_clock_call() { ++bool G1CMTask::regular_clock_call() { + if (has_aborted()) { +- return; ++ return false; + } + + // First, we need to recalculate the words scanned and refs reached +@@ -2229,21 +2232,19 @@ void G1CMTask::regular_clock_call() { + + // (1) If an overflow has been flagged, then we abort. + if (_cm->has_overflown()) { +- set_has_aborted(); +- return; ++ return false; + } + + // If we are not concurrent (i.e. we're doing remark) we don't need + // to check anything else. The other steps are only needed during + // the concurrent marking phase. + if (!_cm->concurrent()) { +- return; ++ return true; + } + + // (2) If marking has been aborted for Full GC, then we also abort. + if (_cm->has_aborted()) { +- set_has_aborted(); +- return; ++ return false; + } + + double curr_time_ms = os::elapsedVTime() * 1000.0; +@@ -2252,17 +2253,15 @@ void G1CMTask::regular_clock_call() { + if (SuspendibleThreadSet::should_yield()) { + // We should yield. To do this we abort the task. The caller is + // responsible for yielding. +- set_has_aborted(); +- return; ++ return false; + } + + // (5) We check whether we've reached our time quota. If we have, + // then we abort. + double elapsed_time_ms = curr_time_ms - _start_time_ms; + if (elapsed_time_ms > _time_target_ms) { +- set_has_aborted(); + _has_timed_out = true; +- return; ++ return false; + } + + // (6) Finally, we check whether there are enough completed STAB +@@ -2271,9 +2270,9 @@ void G1CMTask::regular_clock_call() { + if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { + // we do need to process SATB buffers, we'll abort and restart + // the marking task to do so +- set_has_aborted(); +- return; ++ return false; + } ++ return true; + } + + void G1CMTask::recalculate_limits() { +@@ -2428,7 +2427,7 @@ void G1CMTask::drain_satb_buffers() { + // until we run out of buffers or we need to abort. + while (!has_aborted() && + satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) { +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } + + _draining_satb_buffers = false; +@@ -2671,7 +2670,7 @@ void G1CMTask::do_marking_step(double time_target_ms, + // If the iteration is successful, give up the region. + if (mr.is_empty()) { + giveup_current_region(); +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { + if (_next_mark_bitmap->is_marked(mr.start())) { + // The object is marked - apply the closure +@@ -2680,10 +2679,10 @@ void G1CMTask::do_marking_step(double time_target_ms, + // Even if this task aborted while scanning the humongous object + // we can (and should) give up the current region. + giveup_current_region(); +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } else if (_next_mark_bitmap->iterate(&bitmap_closure, mr)) { + giveup_current_region(); +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } else { + assert(has_aborted(), "currently the only way to do so"); + // The only way to abort the bitmap iteration is to return +@@ -2738,7 +2737,7 @@ void G1CMTask::do_marking_step(double time_target_ms, + // block of empty regions. So we need to call the regular clock + // method once round the loop to make sure it's called + // frequently enough. +- regular_clock_call(); ++ abort_marking_if_regular_check_fail(); + } + + if (!has_aborted() && _curr_region == NULL) { +@@ -2816,6 +2815,7 @@ void G1CMTask::do_marking_step(double time_target_ms, + guarantee(_cm->mark_stack_empty(), "only way to reach here"); + guarantee(_task_queue->size() == 0, "only way to reach here"); + guarantee(!_cm->has_overflown(), "only way to reach here"); ++ guarantee(!has_aborted(), "should never happen if termination has completed"); + } else { + // Apparently there's more work to do. Let's abort this task. It + // will restart it and we can hopefully find more things to do. +diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp +index b5eb26197..b760fe977 100644 +--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp ++++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp +@@ -730,7 +730,11 @@ private: + // Supposed to be called regularly during a marking step as + // it checks a bunch of conditions that might cause the marking step + // to abort +- void regular_clock_call(); ++ // Return true if the marking step should continue. Otherwise, return false to abort ++ bool regular_clock_call(); ++ ++ // Set abort flag if regular_clock_call() check fails ++ inline void abort_marking_if_regular_check_fail(); + + // Test whether obj might have already been passed over by the + // mark bitmap scan, and so needs to be pushed onto the mark stack. +diff --git a/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp b/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp +index 4a969c511..383cdc563 100644 +--- a/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp ++++ b/src/hotspot/share/gc/g1/g1ConcurrentMark.inline.hpp +@@ -210,6 +210,12 @@ inline void G1ConcurrentMark::add_to_liveness(uint worker_id, oop const obj, siz + task(worker_id)->update_liveness(obj, size); + } + ++inline void G1CMTask::abort_marking_if_regular_check_fail() { ++ if (!regular_clock_call()) { ++ set_has_aborted(); ++ } ++} ++ + inline bool G1CMTask::make_reference_grey(oop obj) { + if (!_cm->mark_in_next_bitmap(_worker_id, obj)) { + return false; +diff --git a/src/hotspot/share/gc/shared/owstTaskTerminator.cpp b/src/hotspot/share/gc/shared/owstTaskTerminator.cpp +index 3c32ab627..2856a9981 100644 +--- a/src/hotspot/share/gc/shared/owstTaskTerminator.cpp ++++ b/src/hotspot/share/gc/shared/owstTaskTerminator.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as +@@ -38,15 +38,17 @@ bool OWSTTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + // Single worker, done + if (_n_threads == 1) { + _offered_termination = 1; ++ assert(!peek_in_queue_set(), "Precondition"); + return true; + } + + _blocker->lock_without_safepoint_check(); +- // All arrived, done + _offered_termination++; ++ // All arrived, done + if (_offered_termination == _n_threads) { + _blocker->notify_all(); + _blocker->unlock(); ++ assert(!peek_in_queue_set(), "Precondition"); + return true; + } + +@@ -59,21 +61,31 @@ bool OWSTTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + + if (do_spin_master_work(terminator)) { + assert(_offered_termination == _n_threads, "termination condition"); ++ assert(!peek_in_queue_set(), "Precondition"); + return true; + } else { + _blocker->lock_without_safepoint_check(); ++ // There is possibility that termination is reached between dropping the lock ++ // before returning from do_spin_master_work() and acquiring lock above. ++ if (_offered_termination == _n_threads) { ++ _blocker->unlock(); ++ assert(!peek_in_queue_set(), "Precondition"); ++ return true; ++ } + } + } else { + _blocker->wait(true, WorkStealingSleepMillis); + + if (_offered_termination == _n_threads) { + _blocker->unlock(); ++ assert(!peek_in_queue_set(), "Precondition"); + return true; + } + } + + size_t tasks = tasks_in_queue_set(); + if (exit_termination(tasks, terminator)) { ++ assert_lock_strong(_blocker); + _offered_termination--; + _blocker->unlock(); + return false; +@@ -153,19 +165,24 @@ bool OWSTTaskTerminator::do_spin_master_work(TerminatorTerminator* terminator) { + _total_peeks++; + #endif + size_t tasks = tasks_in_queue_set(); +- if (exit_termination(tasks, terminator)) { ++ bool exit = exit_termination(tasks, terminator); ++ { + MonitorLockerEx locker(_blocker, Mutex::_no_safepoint_check_flag); +- if (tasks >= _offered_termination - 1) { +- locker.notify_all(); +- } else { +- for (; tasks > 1; tasks--) { +- locker.notify(); ++ // Termination condition reached ++ if (_offered_termination == _n_threads) { ++ _spin_master = NULL; ++ return true; ++ } else if (exit) { ++ if (tasks >= _offered_termination - 1) { ++ locker.notify_all(); ++ } else { ++ for (; tasks > 1; tasks--) { ++ locker.notify(); ++ } + } ++ _spin_master = NULL; ++ return false; + } +- _spin_master = NULL; +- return false; +- } else if (_offered_termination == _n_threads) { +- return true; + } + } + } +diff --git a/src/hotspot/share/gc/shared/owstTaskTerminator.hpp b/src/hotspot/share/gc/shared/owstTaskTerminator.hpp +index 9e6fe135a..190033eb7 100644 +--- a/src/hotspot/share/gc/shared/owstTaskTerminator.hpp ++++ b/src/hotspot/share/gc/shared/owstTaskTerminator.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2018, Red Hat, Inc. All rights reserved. ++ * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as +@@ -55,6 +55,7 @@ public: + } + + virtual ~OWSTTaskTerminator() { ++ assert(_spin_master == NULL, "Should have been reset"); + assert(_blocker != NULL, "Can not be NULL"); + delete _blocker; + } +diff --git a/src/hotspot/share/gc/shared/taskqueue.cpp b/src/hotspot/share/gc/shared/taskqueue.cpp +index 47639bdf9..697c13645 100644 +--- a/src/hotspot/share/gc/shared/taskqueue.cpp ++++ b/src/hotspot/share/gc/shared/taskqueue.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -25,6 +25,9 @@ + #include "precompiled.hpp" + #include "gc/shared/taskqueue.hpp" + #include "gc/shared/owstTaskTerminator.hpp" ++#if INCLUDE_SHENANDOAHGC ++#include "gc/shenandoah/shenandoahHeap.hpp" ++#endif + #include "oops/oop.inline.hpp" + #include "logging/log.hpp" + #include "runtime/atomic.hpp" +@@ -118,6 +121,14 @@ ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : + _queue_set(queue_set), + _offered_termination(0) {} + ++ParallelTaskTerminator::~ParallelTaskTerminator() { ++ assert(_offered_termination == 0 || !peek_in_queue_set(), "Precondition"); ++#if INCLUDE_SHENANDOAHGC ++ if (UseShenandoahGC && ShenandoahHeap::heap()->cancelled_gc()) return; ++#endif ++ assert(_offered_termination == 0 || _offered_termination == _n_threads, "Terminated or aborted" ); ++} ++ + bool ParallelTaskTerminator::peek_in_queue_set() { + return _queue_set->peek(); + } +@@ -162,6 +173,7 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + assert(_offered_termination <= _n_threads, "Invariant"); + // Are all threads offering termination? + if (_offered_termination == _n_threads) { ++ assert(!peek_in_queue_set(), "Precondition"); + return true; + } else { + // Look for more work. +@@ -211,9 +223,7 @@ ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { + #endif + if (peek_in_queue_set() || + (terminator != NULL && terminator->should_exit_termination())) { +- Atomic::dec(&_offered_termination); +- assert(_offered_termination < _n_threads, "Invariant"); +- return false; ++ return complete_or_exit_termination(); + } + } + } +@@ -229,6 +239,23 @@ void ParallelTaskTerminator::print_termination_counts() { + } + #endif + ++bool ParallelTaskTerminator::complete_or_exit_termination() { ++ // If termination is ever reached, terminator should stay in such state, ++ // so that all threads see the same state ++ uint current_offered = _offered_termination; ++ uint expected_value; ++ do { ++ if (current_offered == _n_threads) { ++ assert(!peek_in_queue_set(), "Precondition"); ++ return true; ++ } ++ expected_value = current_offered; ++ } while ((current_offered = Atomic::cmpxchg(current_offered - 1, &_offered_termination, current_offered)) != expected_value); ++ ++ assert(_offered_termination < _n_threads, "Invariant"); ++ return false; ++} ++ + void ParallelTaskTerminator::reset_for_reuse() { + if (_offered_termination != 0) { + assert(_offered_termination == _n_threads, +diff --git a/src/hotspot/share/gc/shared/taskqueue.hpp b/src/hotspot/share/gc/shared/taskqueue.hpp +index 1b60a62c2..110757684 100644 +--- a/src/hotspot/share/gc/shared/taskqueue.hpp ++++ b/src/hotspot/share/gc/shared/taskqueue.hpp +@@ -491,11 +491,18 @@ protected: + virtual void yield(); + void sleep(uint millis); + ++ // Called when exiting termination is requested. ++ // When the request is made, terminator may have already terminated ++ // (e.g. all threads are arrived and offered termination). In this case, ++ // it should ignore the request and complete the termination. ++ // Return true if termination is completed. Otherwise, return false. ++ bool complete_or_exit_termination(); + public: + + // "n_threads" is the number of threads to be terminated. "queue_set" is a + // queue sets of work queues of other threads. + ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); ++ virtual ~ParallelTaskTerminator(); + + // The current thread has no work, and is ready to terminate if everyone + // else is. If returns "true", all threads are terminated. If returns +-- +2.19.1 + diff --git a/8236512-PKCS11-Connection-closed-after-Cipher-doFinal-and-NoPadding.patch b/8236512-PKCS11-Connection-closed-after-Cipher-doFinal-and-NoPadding.patch new file mode 100755 index 0000000000000000000000000000000000000000..58489bc2664f52942bd93940adc4e9478302ee36 --- /dev/null +++ b/8236512-PKCS11-Connection-closed-after-Cipher-doFinal-and-NoPadding.patch @@ -0,0 +1,479 @@ +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11AEADCipher.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11AEADCipher.java +index d1b9d06d8..82d0dc164 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11AEADCipher.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11AEADCipher.java +@@ -1,4 +1,5 @@ +-/* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++/* ++ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -334,25 +335,25 @@ final class P11AEADCipher extends CipherSpi { + } + + private void cancelOperation() { ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ int bufLen = doFinalLength(0); ++ byte[] buffer = new byte[bufLen]; ++ byte[] in = dataBuffer.toByteArray(); ++ int inLen = in.length; + try { +- if (session.hasObjects() == false) { +- session = token.killSession(session); +- return; ++ if (encrypt) { ++ token.p11.C_Encrypt(session.id(), 0, in, 0, inLen, ++ 0, buffer, 0, bufLen); + } else { +- // cancel operation by finishing it +- int bufLen = doFinalLength(0); +- byte[] buffer = new byte[bufLen]; +- +- if (encrypt) { +- token.p11.C_Encrypt(session.id(), 0, buffer, 0, bufLen, +- 0, buffer, 0, bufLen); +- } else { +- token.p11.C_Decrypt(session.id(), 0, buffer, 0, bufLen, +- 0, buffer, 0, bufLen); +- } ++ token.p11.C_Decrypt(session.id(), 0, in, 0, inLen, ++ 0, buffer, 0, bufLen); + } + } catch (PKCS11Exception e) { +- throw new ProviderException("Cancel failed", e); ++ if (encrypt) { ++ throw new ProviderException("Cancel failed", e); ++ } ++ // ignore failure for decryption + } + } + +@@ -434,18 +435,21 @@ final class P11AEADCipher extends CipherSpi { + if (!initialized) { + return; + } ++ initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } + } finally { + p11Key.releaseKeyID(); + session = token.releaseSession(session); ++ dataBuffer.reset(); + } +- initialized = false; + } + + // see JCE spec +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Cipher.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Cipher.java +index cc4535e7b..470a888cd 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Cipher.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Cipher.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -409,10 +409,12 @@ final class P11Cipher extends CipherSpi { + return; + } + initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } +@@ -426,22 +428,21 @@ final class P11Cipher extends CipherSpi { + + private void cancelOperation() { + token.ensureValid(); +- if (session.hasObjects() == false) { +- session = token.killSession(session); +- return; +- } else { +- try { +- // cancel operation by finishing it +- int bufLen = doFinalLength(0); +- byte[] buffer = new byte[bufLen]; +- if (encrypt) { +- token.p11.C_EncryptFinal(session.id(), 0, buffer, 0, bufLen); +- } else { +- token.p11.C_DecryptFinal(session.id(), 0, buffer, 0, bufLen); +- } +- } catch (PKCS11Exception e) { ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ try { ++ int bufLen = doFinalLength(0); ++ byte[] buffer = new byte[bufLen]; ++ if (encrypt) { ++ token.p11.C_EncryptFinal(session.id(), 0, buffer, 0, bufLen); ++ } else { ++ token.p11.C_DecryptFinal(session.id(), 0, buffer, 0, bufLen); ++ } ++ } catch (PKCS11Exception e) { ++ if (encrypt) { + throw new ProviderException("Cancel failed", e); + } ++ // ignore failure for decryption + } + } + +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Mac.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Mac.java +index 338cb215d..634e0855f 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Mac.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Mac.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -124,10 +124,12 @@ final class P11Mac extends MacSpi { + return; + } + initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } +@@ -139,15 +141,12 @@ final class P11Mac extends MacSpi { + + private void cancelOperation() { + token.ensureValid(); +- if (session.hasObjects() == false) { +- session = token.killSession(session); +- return; +- } else { +- try { +- token.p11.C_SignFinal(session.id(), 0); +- } catch (PKCS11Exception e) { +- throw new ProviderException("Cancel failed", e); +- } ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ try { ++ token.p11.C_SignFinal(session.id(), 0); ++ } catch (PKCS11Exception e) { ++ throw new ProviderException("Cancel failed", e); + } + } + +@@ -209,7 +208,6 @@ final class P11Mac extends MacSpi { + ensureInitialized(); + return token.p11.C_SignFinal(session.id(), 0); + } catch (PKCS11Exception e) { +- reset(true); + throw new ProviderException("doFinal() failed", e); + } finally { + reset(false); +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11PSSSignature.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11PSSSignature.java +index 763fb98a8..0a470b932 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11PSSSignature.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11PSSSignature.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -223,10 +223,12 @@ final class P11PSSSignature extends SignatureSpi { + return; + } + initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } +@@ -242,14 +244,10 @@ final class P11PSSSignature extends SignatureSpi { + token.ensureValid(); + if (DEBUG) System.out.print("Cancelling operation"); + +- if (session.hasObjects() == false) { +- if (DEBUG) System.out.println(" by killing session"); +- session = token.killSession(session); +- return; +- } +- // "cancel" operation by finishing it +- if (mode == M_SIGN) { +- try { ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ try { ++ if (mode == M_SIGN) { + if (type == T_UPDATE) { + if (DEBUG) System.out.println(" by C_SignFinal"); + token.p11.C_SignFinal(session.id(), 0); +@@ -259,11 +257,7 @@ final class P11PSSSignature extends SignatureSpi { + if (DEBUG) System.out.println(" by C_Sign"); + token.p11.C_Sign(session.id(), digest); + } +- } catch (PKCS11Exception e) { +- throw new ProviderException("cancel failed", e); +- } +- } else { // M_VERIFY +- try { ++ } else { // M_VERIFY + byte[] signature = + new byte[(p11Key.length() + 7) >> 3]; + if (type == T_UPDATE) { +@@ -275,10 +269,12 @@ final class P11PSSSignature extends SignatureSpi { + if (DEBUG) System.out.println(" by C_Verify"); + token.p11.C_Verify(session.id(), digest, signature); + } +- } catch (PKCS11Exception e) { +- // will fail since the signature is incorrect +- // XXX check error code + } ++ } catch (PKCS11Exception e) { ++ if (mode == M_SIGN) { ++ throw new ProviderException("cancel failed", e); ++ } ++ // ignore failure for verification + } + } + +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11RSACipher.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11RSACipher.java +index 3f32501e0..06d65e893 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11RSACipher.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11RSACipher.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -247,10 +247,12 @@ final class P11RSACipher extends CipherSpi { + return; + } + initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } +@@ -264,36 +266,33 @@ final class P11RSACipher extends CipherSpi { + // state variables such as "initialized" + private void cancelOperation() { + token.ensureValid(); +- if (session.hasObjects() == false) { +- session = token.killSession(session); +- return; +- } else { +- try { +- PKCS11 p11 = token.p11; +- int inLen = maxInputSize; +- int outLen = buffer.length; +- long sessId = session.id(); +- switch (mode) { +- case MODE_ENCRYPT: +- p11.C_Encrypt(sessId, 0, buffer, 0, inLen, 0, buffer, 0, outLen); +- break; +- case MODE_DECRYPT: +- p11.C_Decrypt(sessId, 0, buffer, 0, inLen, 0, buffer, 0, outLen); +- break; +- case MODE_SIGN: +- byte[] tmpBuffer = new byte[maxInputSize]; +- p11.C_Sign(sessId, tmpBuffer); +- break; +- case MODE_VERIFY: +- p11.C_VerifyRecover(sessId, buffer, 0, inLen, buffer, +- 0, outLen); +- break; +- default: +- throw new ProviderException("internal error"); +- } +- } catch (PKCS11Exception e) { +- // XXX ensure this always works, ignore error ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ try { ++ PKCS11 p11 = token.p11; ++ int inLen = maxInputSize; ++ int outLen = buffer.length; ++ long sessId = session.id(); ++ switch (mode) { ++ case MODE_ENCRYPT: ++ p11.C_Encrypt(sessId, 0, buffer, 0, inLen, 0, buffer, 0, outLen); ++ break; ++ case MODE_DECRYPT: ++ p11.C_Decrypt(sessId, 0, buffer, 0, inLen, 0, buffer, 0, outLen); ++ break; ++ case MODE_SIGN: ++ byte[] tmpBuffer = new byte[maxInputSize]; ++ p11.C_Sign(sessId, tmpBuffer); ++ break; ++ case MODE_VERIFY: ++ p11.C_VerifyRecover(sessId, buffer, 0, inLen, buffer, ++ 0, outLen); ++ break; ++ default: ++ throw new ProviderException("internal error"); + } ++ } catch (PKCS11Exception e) { ++ // XXX ensure this always works, ignore error + } + } + +@@ -362,6 +361,7 @@ final class P11RSACipher extends CipherSpi { + private int implDoFinal(byte[] out, int outOfs, int outLen) + throws BadPaddingException, IllegalBlockSizeException { + if (bufOfs > maxInputSize) { ++ reset(true); + throw new IllegalBlockSizeException("Data must not be longer " + + "than " + maxInputSize + " bytes"); + } +diff --git a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Signature.java b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Signature.java +index 159c65f59..f41538cda 100644 +--- a/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Signature.java ++++ b/src/jdk.crypto.cryptoki/share/classes/sun/security/pkcs11/P11Signature.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -270,10 +270,12 @@ final class P11Signature extends SignatureSpi { + return; + } + initialized = false; ++ + try { + if (session == null) { + return; + } ++ + if (doCancel && token.explicitCancel) { + cancelOperation(); + } +@@ -284,59 +286,51 @@ final class P11Signature extends SignatureSpi { + } + + private void cancelOperation() { +- + token.ensureValid(); +- if (session.hasObjects() == false) { +- session = token.killSession(session); +- return; +- } else { +- // "cancel" operation by finishing it +- // XXX make sure all this always works correctly ++ // cancel operation by finishing it; avoid killSession as some ++ // hardware vendors may require re-login ++ try { + if (mode == M_SIGN) { +- try { +- if (type == T_UPDATE) { +- token.p11.C_SignFinal(session.id(), 0); +- } else { +- byte[] digest; +- if (type == T_DIGEST) { +- digest = md.digest(); +- } else { // T_RAW +- digest = buffer; +- } +- token.p11.C_Sign(session.id(), digest); ++ if (type == T_UPDATE) { ++ token.p11.C_SignFinal(session.id(), 0); ++ } else { ++ byte[] digest; ++ if (type == T_DIGEST) { ++ digest = md.digest(); ++ } else { // T_RAW ++ digest = buffer; + } +- } catch (PKCS11Exception e) { +- throw new ProviderException("cancel failed", e); ++ token.p11.C_Sign(session.id(), digest); + } + } else { // M_VERIFY + byte[] signature; +- try { +- if (keyAlgorithm.equals("DSA")) { +- signature = new byte[40]; +- } else { +- signature = new byte[(p11Key.length() + 7) >> 3]; +- } +- if (type == T_UPDATE) { +- token.p11.C_VerifyFinal(session.id(), signature); +- } else { +- byte[] digest; +- if (type == T_DIGEST) { +- digest = md.digest(); +- } else { // T_RAW +- digest = buffer; +- } +- token.p11.C_Verify(session.id(), digest, signature); +- } +- } catch (PKCS11Exception e) { +- long errorCode = e.getErrorCode(); +- if ((errorCode == CKR_SIGNATURE_INVALID) || +- (errorCode == CKR_SIGNATURE_LEN_RANGE)) { +- // expected since signature is incorrect +- return; ++ if (keyAlgorithm.equals("DSA")) { ++ signature = new byte[40]; ++ } else { ++ signature = new byte[(p11Key.length() + 7) >> 3]; ++ } ++ if (type == T_UPDATE) { ++ token.p11.C_VerifyFinal(session.id(), signature); ++ } else { ++ byte[] digest; ++ if (type == T_DIGEST) { ++ digest = md.digest(); ++ } else { // T_RAW ++ digest = buffer; + } +- throw new ProviderException("cancel failed", e); ++ token.p11.C_Verify(session.id(), digest, signature); ++ } ++ } ++ } catch (PKCS11Exception e) { ++ if (mode == M_VERIFY) { ++ long errorCode = e.getErrorCode(); ++ if ((errorCode == CKR_SIGNATURE_INVALID) || ++ (errorCode == CKR_SIGNATURE_LEN_RANGE)) { ++ // expected since signature is incorrect ++ return; + } + } ++ throw new ProviderException("cancel failed", e); + } + } + +-- +2.19.1 + diff --git a/8247766-aarch64-guarantee-val-1U--nbits-failed-Field-too-big-for-insn.patch b/8247766-aarch64-guarantee-val-1U--nbits-failed-Field-too-big-for-insn.patch new file mode 100644 index 0000000000000000000000000000000000000000..57d4598ceaff88605ac493a29696a648059bbdd0 --- /dev/null +++ b/8247766-aarch64-guarantee-val-1U--nbits-failed-Field-too-big-for-insn.patch @@ -0,0 +1,257 @@ +diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +index db582f25f..80ddb9b31 100644 +--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +@@ -553,14 +553,7 @@ class Address { + + void lea(MacroAssembler *, Register) const; + +- static bool offset_ok_for_immed(long offset, int shift = 0) { +- unsigned mask = (1 << shift) - 1; +- if (offset < 0 || offset & mask) { +- return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset +- } else { +- return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset +- } +- } ++ static bool offset_ok_for_immed(long offset, uint shift); + }; + + // Convience classes +diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp +index 86eb8c2f8..a475575bf 100644 +--- a/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp ++++ b/src/hotspot/cpu/aarch64/assembler_aarch64.inline.hpp +@@ -30,4 +30,15 @@ + #include "asm/codeBuffer.hpp" + #include "code/codeCache.hpp" + ++inline bool Address::offset_ok_for_immed(long offset, uint shift = 0) { ++ uint mask = (1 << shift) - 1; ++ if (offset < 0 || (offset & mask) != 0) { ++ // Unscaled signed offset, encoded in a signed imm9 field. ++ return Assembler::is_simm9(offset); ++ } else { ++ // Scaled unsigned offset, encoded in an unsigned imm12:_ field. ++ return Assembler::is_uimm12(offset >> shift); ++ } ++} ++ + #endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP +diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +index f6a77dc78..7798aa509 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +@@ -226,6 +226,19 @@ Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + // FIXME: This needs to be much more clever. See x86. + } + ++// Ensure a valid Address (base + offset) to a stack-slot. If stack access is ++// not encodable as a base + (immediate) offset, generate an explicit address ++// calculation to hold the address in a temporary register. ++Address LIR_Assembler::stack_slot_address(int index, uint size, Register tmp, int adjust) { ++ precond(size == 4 || size == 8); ++ Address addr = frame_map()->address_for_slot(index, adjust); ++ precond(addr.getMode() == Address::base_plus_offset); ++ precond(addr.base() == sp); ++ precond(addr.offset() > 0); ++ uint mask = size - 1; ++ assert((addr.offset() & mask) == 0, "scaled offsets only"); ++ return __ legitimize_address(addr, size, tmp); ++} + + void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); +@@ -745,32 +758,38 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + } + + void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { ++ precond(src->is_register() && dest->is_stack()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); ++ + if (src->is_single_cpu()) { ++ int index = dest->single_stack_ix(); + if (type == T_ARRAY || type == T_OBJECT) { +- __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); ++ __ str(src->as_register(), stack_slot_address(index, c_sz64, rscratch1)); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { +- __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); ++ __ str(src->as_register(), stack_slot_address(index, c_sz64, rscratch1)); + } else { +- __ strw(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); ++ __ strw(src->as_register(), stack_slot_address(index, c_sz32, rscratch1)); + } + + } else if (src->is_double_cpu()) { +- Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes); ++ int index = dest->double_stack_ix(); ++ Address dest_addr_LO = stack_slot_address(index, c_sz64, rscratch1, lo_word_offset_in_bytes); + __ str(src->as_register_lo(), dest_addr_LO); + + } else if (src->is_single_fpu()) { +- Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); +- __ strs(src->as_float_reg(), dest_addr); ++ int index = dest->single_stack_ix(); ++ __ strs(src->as_float_reg(), stack_slot_address(index, c_sz32, rscratch1)); + + } else if (src->is_double_fpu()) { +- Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix()); +- __ strd(src->as_double_reg(), dest_addr); ++ int index = dest->double_stack_ix(); ++ __ strd(src->as_double_reg(), stack_slot_address(index, c_sz64, rscratch1)); + + } else { + ShouldNotReachHere(); + } +- + } + + +@@ -855,32 +874,34 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch + + + void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { +- assert(src->is_stack(), "should not call otherwise"); +- assert(dest->is_register(), "should not call otherwise"); ++ precond(src->is_stack() && dest->is_register()); ++ ++ uint const c_sz32 = sizeof(uint32_t); ++ uint const c_sz64 = sizeof(uint64_t); + + if (dest->is_single_cpu()) { ++ int index = src->single_stack_ix(); + if (type == T_ARRAY || type == T_OBJECT) { +- __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); ++ __ ldr(dest->as_register(), stack_slot_address(index, c_sz64, rscratch1)); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA || type == T_ADDRESS) { +- __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); ++ __ ldr(dest->as_register(), stack_slot_address(index, c_sz64, rscratch1)); + } else { +- Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); +- Address data_addr = __ form_address(rscratch1, sp, src_addr.offset(), 2); +- __ ldrw(dest->as_register(), data_addr); ++ __ ldrw(dest->as_register(), stack_slot_address(index, c_sz32, rscratch1)); + } + + } else if (dest->is_double_cpu()) { +- Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes); ++ int index = src->double_stack_ix(); ++ Address src_addr_LO = stack_slot_address(index, c_sz64, rscratch1, lo_word_offset_in_bytes); + __ ldr(dest->as_register_lo(), src_addr_LO); + + } else if (dest->is_single_fpu()) { +- Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); +- __ ldrs(dest->as_float_reg(), src_addr); ++ int index = src->single_stack_ix(); ++ __ ldrs(dest->as_float_reg(), stack_slot_address(index, c_sz32, rscratch1)); + + } else if (dest->is_double_fpu()) { +- Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); +- __ ldrd(dest->as_double_reg(), src_addr); ++ int index = src->double_stack_ix(); ++ __ ldrd(dest->as_double_reg(), stack_slot_address(index, c_sz64, rscratch1)); + + } else { + ShouldNotReachHere(); +diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp +index 6374a33e6..9db81fed9 100644 +--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp +@@ -45,10 +45,12 @@ friend class ArrayCopyStub; + + bool is_literal_address(LIR_Address* addr); + +- // When we need to use something other than rscratch1 use this +- // method. ++ // When we need to use something other than rscratch1 use this method. + Address as_Address(LIR_Address* addr, Register tmp); + ++ // Ensure we have a valid Address (base+offset) to a stack-slot. ++ Address stack_slot_address(int index, uint shift, Register tmp, int adjust = 0); ++ + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, +diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +index 44497ea7c..014a4d3c6 100644 +--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +@@ -26,7 +26,7 @@ + #ifndef CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP + #define CPU_AARCH64_VM_MACROASSEMBLER_AARCH64_HPP + +-#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" + + // MacroAssembler extends Assembler by frequently used macros. + // +@@ -132,6 +132,20 @@ class MacroAssembler: public Assembler { + a.lea(this, r); + } + ++ /* Sometimes we get misaligned loads and stores, usually from Unsafe ++ accesses, and these can exceed the offset range. */ ++ Address legitimize_address(const Address &a, int size, Register scratch) { ++ if (a.getMode() == Address::base_plus_offset) { ++ if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) { ++ block_comment("legitimize_address {"); ++ lea(scratch, a); ++ block_comment("} legitimize_address"); ++ return Address(scratch); ++ } ++ } ++ return a; ++ } ++ + void addmw(Address a, Register incr, Register scratch) { + ldrw(scratch, a); + addw(scratch, scratch, incr); +diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp +index da181b90b..56c3068e4 100644 +--- a/src/hotspot/share/asm/assembler.hpp ++++ b/src/hotspot/share/asm/assembler.hpp +@@ -302,6 +302,7 @@ class AbstractAssembler : public ResourceObj { + // Define some: + static bool is_simm5( intptr_t x) { return is_simm(x, 5 ); } + static bool is_simm8( intptr_t x) { return is_simm(x, 8 ); } ++ static bool is_simm9( intptr_t x) { return is_simm(x, 9 ); } + static bool is_simm10(intptr_t x) { return is_simm(x, 10); } + static bool is_simm11(intptr_t x) { return is_simm(x, 11); } + static bool is_simm12(intptr_t x) { return is_simm(x, 12); } +@@ -310,6 +311,15 @@ class AbstractAssembler : public ResourceObj { + static bool is_simm26(intptr_t x) { return is_simm(x, 26); } + static bool is_simm32(intptr_t x) { return is_simm(x, 32); } + ++ // Test if x is within unsigned immediate range for width. ++ static bool is_uimm(intptr_t x, uint w) { ++ precond(0 < w && w < 64); ++ intptr_t limes = intptr_t(1) << w; ++ return x < limes; ++ } ++ ++ static bool is_uimm12(intptr_t x) { return is_uimm(x, 12); } ++ + // Accessors + CodeSection* code_section() const { return _code_section; } + CodeBuffer* code() const { return code_section()->outer(); } +diff --git a/src/hotspot/share/utilities/debug.hpp b/src/hotspot/share/utilities/debug.hpp +index aa594754a..c66c710f2 100644 +--- a/src/hotspot/share/utilities/debug.hpp ++++ b/src/hotspot/share/utilities/debug.hpp +@@ -66,6 +66,9 @@ do { \ + // For backward compatibility. + #define assert(p, ...) vmassert(p, __VA_ARGS__) + ++#define precond(p) assert(p, "precond") ++#define postcond(p) assert(p, "postcond") ++ + #ifndef ASSERT + #define vmassert_status(p, status, msg) + #else +-- +2.19.1 + diff --git a/add-SVE-backend-feature.patch b/add-SVE-backend-feature.patch new file mode 100755 index 0000000000000000000000000000000000000000..6ef41c32bbcc20adc2c70dde430fe9432fd4e25b --- /dev/null +++ b/add-SVE-backend-feature.patch @@ -0,0 +1,8263 @@ +diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk +index bb9721c8e..3774dd730 100644 +--- a/make/hotspot/gensrc/GensrcAdlc.gmk ++++ b/make/hotspot/gensrc/GensrcAdlc.gmk +@@ -140,6 +140,12 @@ ifeq ($(call check-jvm-feature, compiler2), true) + $d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \ + ))) + ++ ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) ++ AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ ++ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ ++ ))) ++ endif ++ + ifeq ($(call check-jvm-feature, shenandoahgc), true) + AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \ +diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad +index 617b2b8fb..eab0101b0 100644 +--- a/src/hotspot/cpu/aarch64/aarch64.ad ++++ b/src/hotspot/cpu/aarch64/aarch64.ad +@@ -69,7 +69,7 @@ register %{ + // + // r0-r7,r10-r26 volatile (caller save) + // r27-r32 system (no save, no allocate) +-// r8-r9 invisible to the allocator (so we can use them as scratch regs) ++// r8-r9 non-allocatable (so we can use them as scratch regs) + // + // as regards Java usage. we don't use any callee save registers + // because this makes it difficult to de-optimise a frame (see comment +@@ -94,6 +94,10 @@ reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() ); + reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); + reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); + reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); ++reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable ++reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() ); ++reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable ++reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() ); + reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); + reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); + reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); +@@ -140,7 +144,7 @@ reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp + reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); + + // ---------------------------- +-// Float/Double Registers ++// Float/Double/Vector Registers + // ---------------------------- + + // Double Registers +@@ -161,165 +165,316 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); + // the platform ABI treats v8-v15 as callee save). float registers + // v16-v31 are SOC as per the platform spec + +- reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); +- reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); +- reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); +- reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); +- +- reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); +- reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); +- reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); +- reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); +- +- reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); +- reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); +- reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); +- reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); +- +- reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); +- reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); +- reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); +- reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); +- +- reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); +- reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); +- reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); +- reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); +- +- reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); +- reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); +- reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); +- reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); +- +- reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); +- reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); +- reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); +- reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); +- +- reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); +- reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); +- reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); +- reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); +- +- reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); +- reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); +- reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); +- reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); +- +- reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); +- reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); +- reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); +- reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); +- +- reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); +- reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); +- reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); +- reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); +- +- reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); +- reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); +- reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); +- reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); +- +- reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); +- reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); +- reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); +- reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); +- +- reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); +- reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); +- reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); +- reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); +- +- reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); +- reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); +- reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); +- reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); +- +- reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); +- reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); +- reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); +- reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); +- +- reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); +- reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); +- reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); +- reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); +- +- reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); +- reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); +- reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); +- reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); +- +- reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); +- reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); +- reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); +- reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); +- +- reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); +- reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); +- reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); +- reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); +- +- reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); +- reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); +- reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); +- reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); +- +- reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); +- reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); +- reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); +- reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); +- +- reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); +- reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); +- reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); +- reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); +- +- reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); +- reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); +- reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); +- reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); +- +- reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); +- reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); +- reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); +- reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); +- +- reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); +- reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); +- reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); +- reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); +- +- reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); +- reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); +- reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); +- reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); +- +- reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); +- reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); +- reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); +- reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); +- +- reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); +- reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); +- reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); +- reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); +- +- reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); +- reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); +- reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); +- reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); +- +- reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); +- reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); +- reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); +- reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); +- +- reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); +- reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); +- reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); +- reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); ++// For SVE vector registers, we simply extend vector register size to 8 ++// 'logical' slots. This is nominally 256 bits but it actually covers ++// all possible 'physical' SVE vector register lengths from 128 ~ 2048 ++// bits. The 'physical' SVE vector register length is detected during ++// startup, so the register allocator is able to identify the correct ++// number of bytes needed for an SVE spill/unspill. ++// Note that a vector register with 4 slots denotes a 128-bit NEON ++// register allowing it to be distinguished from the corresponding SVE ++// vector register when the SVE vector length is 128 bits. ++ ++ reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); ++ reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); ++ reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); ++ reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); ++ reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) ); ++ reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) ); ++ reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) ); ++ reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) ); ++ ++ reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); ++ reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); ++ reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); ++ reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); ++ reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) ); ++ reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) ); ++ reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) ); ++ reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) ); ++ ++ reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); ++ reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); ++ reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); ++ reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); ++ reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) ); ++ reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) ); ++ reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) ); ++ reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) ); ++ ++ reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); ++ reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); ++ reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); ++ reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); ++ reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) ); ++ reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) ); ++ reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) ); ++ reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) ); ++ ++ reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); ++ reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); ++ reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); ++ reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); ++ reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) ); ++ reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) ); ++ reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) ); ++ reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) ); ++ ++ reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); ++ reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); ++ reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); ++ reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); ++ reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) ); ++ reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) ); ++ reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) ); ++ reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) ); ++ ++ reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); ++ reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); ++ reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); ++ reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); ++ reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) ); ++ reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) ); ++ reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) ); ++ reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) ); ++ ++ reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); ++ reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); ++ reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); ++ reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); ++ reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) ); ++ reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) ); ++ reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) ); ++ reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) ); ++ ++ reg_def V8 ( SOC, SOC, Op_RegF, 8, v8->as_VMReg() ); ++ reg_def V8_H ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next() ); ++ reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); ++ reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); ++ reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) ); ++ reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) ); ++ reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) ); ++ reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) ); ++ ++ reg_def V9 ( SOC, SOC, Op_RegF, 9, v9->as_VMReg() ); ++ reg_def V9_H ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next() ); ++ reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); ++ reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); ++ reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) ); ++ reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) ); ++ reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) ); ++ reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) ); ++ ++ reg_def V10 ( SOC, SOC, Op_RegF, 10, v10->as_VMReg() ); ++ reg_def V10_H ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() ); ++ reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) ); ++ reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) ); ++ reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) ); ++ reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) ); ++ reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) ); ++ reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) ); ++ ++ reg_def V11 ( SOC, SOC, Op_RegF, 11, v11->as_VMReg() ); ++ reg_def V11_H ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() ); ++ reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) ); ++ reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) ); ++ reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) ); ++ reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) ); ++ reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) ); ++ reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) ); ++ ++ reg_def V12 ( SOC, SOC, Op_RegF, 12, v12->as_VMReg() ); ++ reg_def V12_H ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() ); ++ reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) ); ++ reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) ); ++ reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) ); ++ reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) ); ++ reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) ); ++ reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) ); ++ ++ reg_def V13 ( SOC, SOC, Op_RegF, 13, v13->as_VMReg() ); ++ reg_def V13_H ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() ); ++ reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) ); ++ reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) ); ++ reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) ); ++ reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) ); ++ reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) ); ++ reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) ); ++ ++ reg_def V14 ( SOC, SOC, Op_RegF, 14, v14->as_VMReg() ); ++ reg_def V14_H ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() ); ++ reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) ); ++ reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) ); ++ reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) ); ++ reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) ); ++ reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) ); ++ reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) ); ++ ++ reg_def V15 ( SOC, SOC, Op_RegF, 15, v15->as_VMReg() ); ++ reg_def V15_H ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() ); ++ reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) ); ++ reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) ); ++ reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) ); ++ reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) ); ++ reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) ); ++ reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) ); ++ ++ reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); ++ reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); ++ reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) ); ++ reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) ); ++ reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) ); ++ reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) ); ++ reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) ); ++ reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) ); ++ ++ reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); ++ reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); ++ reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) ); ++ reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) ); ++ reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) ); ++ reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) ); ++ reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) ); ++ reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) ); ++ ++ reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); ++ reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); ++ reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) ); ++ reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) ); ++ reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) ); ++ reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) ); ++ reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) ); ++ reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) ); ++ ++ reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); ++ reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); ++ reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) ); ++ reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) ); ++ reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) ); ++ reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) ); ++ reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) ); ++ reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) ); ++ ++ reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); ++ reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); ++ reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) ); ++ reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) ); ++ reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) ); ++ reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) ); ++ reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) ); ++ reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) ); ++ ++ reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); ++ reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); ++ reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) ); ++ reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) ); ++ reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) ); ++ reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) ); ++ reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) ); ++ reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) ); ++ ++ reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); ++ reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); ++ reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) ); ++ reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) ); ++ reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) ); ++ reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) ); ++ reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) ); ++ reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) ); ++ ++ reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); ++ reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); ++ reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) ); ++ reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) ); ++ reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) ); ++ reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) ); ++ reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) ); ++ reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) ); ++ ++ reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); ++ reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); ++ reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) ); ++ reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) ); ++ reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) ); ++ reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) ); ++ reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) ); ++ reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) ); ++ ++ reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); ++ reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); ++ reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) ); ++ reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) ); ++ reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) ); ++ reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) ); ++ reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) ); ++ reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) ); ++ ++ reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); ++ reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); ++ reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) ); ++ reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) ); ++ reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) ); ++ reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) ); ++ reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) ); ++ reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) ); ++ ++ reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); ++ reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); ++ reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) ); ++ reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) ); ++ reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) ); ++ reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) ); ++ reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) ); ++ reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) ); ++ ++ reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); ++ reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); ++ reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) ); ++ reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) ); ++ reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) ); ++ reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) ); ++ reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) ); ++ reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) ); ++ ++ reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); ++ reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); ++ reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) ); ++ reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) ); ++ reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) ); ++ reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) ); ++ reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) ); ++ reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) ); ++ ++ reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); ++ reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); ++ reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) ); ++ reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) ); ++ reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) ); ++ reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) ); ++ reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) ); ++ reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) ); ++ ++ reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); ++ reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); ++ reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) ); ++ reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) ); ++ reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) ); ++ reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) ); ++ reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) ); ++ reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) ); ++ ++ ++// ---------------------------- ++// SVE Predicate Registers ++// ---------------------------- ++ reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg()); ++ reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg()); ++ reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg()); ++ reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg()); ++ reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg()); ++ reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg()); ++ reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg()); ++ reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg()); + + // ---------------------------- + // Special Registers +@@ -333,7 +488,6 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); + + reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad()); + +- + // Specify priority of register selection within phases of register + // allocation. Highest priority is first. A useful heuristic is to + // give registers a low priority when they are required by machine +@@ -381,50 +535,64 @@ alloc_class chunk0( + R29, R29_H, // fp + R30, R30_H, // lr + R31, R31_H, // sp ++ R8, R8_H, // rscratch1 ++ R9, R9_H, // rscratch2 + ); + + alloc_class chunk1( + + // no save +- V16, V16_H, V16_J, V16_K, +- V17, V17_H, V17_J, V17_K, +- V18, V18_H, V18_J, V18_K, +- V19, V19_H, V19_J, V19_K, +- V20, V20_H, V20_J, V20_K, +- V21, V21_H, V21_J, V21_K, +- V22, V22_H, V22_J, V22_K, +- V23, V23_H, V23_J, V23_K, +- V24, V24_H, V24_J, V24_K, +- V25, V25_H, V25_J, V25_K, +- V26, V26_H, V26_J, V26_K, +- V27, V27_H, V27_J, V27_K, +- V28, V28_H, V28_J, V28_K, +- V29, V29_H, V29_J, V29_K, +- V30, V30_H, V30_J, V30_K, +- V31, V31_H, V31_J, V31_K, ++ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, ++ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, ++ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, ++ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, ++ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, ++ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, ++ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, ++ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, ++ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, ++ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, ++ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, ++ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, ++ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, ++ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, ++ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, ++ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, + + // arg registers +- V0, V0_H, V0_J, V0_K, +- V1, V1_H, V1_J, V1_K, +- V2, V2_H, V2_J, V2_K, +- V3, V3_H, V3_J, V3_K, +- V4, V4_H, V4_J, V4_K, +- V5, V5_H, V5_J, V5_K, +- V6, V6_H, V6_J, V6_K, +- V7, V7_H, V7_J, V7_K, ++ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, ++ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, ++ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, ++ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, ++ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, ++ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, ++ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, ++ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, + + // non-volatiles +- V8, V8_H, V8_J, V8_K, +- V9, V9_H, V9_J, V9_K, +- V10, V10_H, V10_J, V10_K, +- V11, V11_H, V11_J, V11_K, +- V12, V12_H, V12_J, V12_K, +- V13, V13_H, V13_J, V13_K, +- V14, V14_H, V14_J, V14_K, +- V15, V15_H, V15_J, V15_K, ++ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, ++ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, ++ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, ++ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, ++ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, ++ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, ++ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, ++ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, ++); ++ ++alloc_class chunk2 ( ++ P0, ++ P1, ++ P2, ++ P3, ++ P4, ++ P5, ++ P6, ++ P7, ++ // Only use P0~P7 here for performance + ); + +-alloc_class chunk2(RFLAGS); ++alloc_class chunk3(RFLAGS); + + //----------Architecture Description Register Classes-------------------------- + // Several register classes are automatically defined based upon information in +@@ -865,6 +1033,42 @@ reg_class double_reg( + V31, V31_H + ); + ++// Class for all SVE vector registers. ++reg_class vectora_reg ( ++ V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, ++ V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, ++ V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, ++ V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, ++ V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, ++ V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, ++ V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, ++ V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, ++ V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, ++ V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, ++ V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, ++ V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, ++ V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, ++ V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, ++ V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, ++ V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, ++ V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, ++ V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, ++ V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, ++ V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, ++ V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, ++ V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, ++ V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, ++ V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, ++ V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, ++ V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, ++ V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, ++ V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, ++ V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, ++ V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, ++ V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, ++ V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, ++); ++ + // Class for all 64bit vector registers + reg_class vectord_reg( + V0, V0_H, +@@ -1097,6 +1301,31 @@ reg_class v31_reg( + V31, V31_H + ); + ++// Class for all SVE predicate registers. ++reg_class pr_reg ( ++ P0, ++ P1, ++ P2, ++ P3, ++ P4, ++ P5, ++ P6, ++ // P7, non-allocatable, preserved with all elements preset to TRUE. ++); ++ ++// Class for SVE governing predicate registers, which are used ++// to determine the active elements of a predicated instruction. ++reg_class gov_pr ( ++ P0, ++ P1, ++ P2, ++ P3, ++ P4, ++ P5, ++ P6, ++ // P7, non-allocatable, preserved with all elements preset to TRUE. ++); ++ + // Singleton class for condition codes + reg_class int_flags(RFLAGS); + +@@ -1758,6 +1987,10 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + // branch if we need to invalidate the method later + __ nop(); + ++ if (UseSVE > 0 && C->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); ++ } ++ + int bangsize = C->bang_size_in_bytes(); + if (C->need_stack_bang(bangsize) && UseStackBanging) + __ generate_stack_overflow_check(bangsize); +@@ -1859,7 +2092,7 @@ int MachEpilogNode::safepoint_offset() const { + + // Figure out which register class each belongs in: rc_int, rc_float or + // rc_stack. +-enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack }; + + static enum RC rc_class(OptoReg::Name reg) { + +@@ -1867,19 +2100,25 @@ static enum RC rc_class(OptoReg::Name reg) { + return rc_bad; + } + +- // we have 30 int registers * 2 halves +- // (rscratch1 and rscratch2 are omitted) ++ // we have 32 int registers * 2 halves ++ int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers; + +- if (reg < 60) { ++ if (reg < slots_of_int_registers) { + return rc_int; + } + +- // we have 32 float register * 2 halves +- if (reg < 60 + 128) { ++ // we have 32 float register * 8 halves ++ int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers) { + return rc_float; + } + +- // Between float regs & stack is the flags regs. ++ int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers; ++ if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) { ++ return rc_predicate; ++ } ++ ++ // Between predicate regs & stack is the flags. + assert(OptoReg::is_stack(reg), "blow up if spilling flags"); + + return rc_stack; +@@ -1918,8 +2157,28 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); +- assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); +- if (cbuf) { ++ if (ireg == Op_VecA && cbuf) { ++ MacroAssembler _masm(cbuf); ++ int sve_vector_reg_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { ++ // stack->stack ++ __ spill_copy_sve_vector_stack_to_stack(src_offset, dst_offset, ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { ++ __ spill_sve_vector(as_FloatRegister(Matcher::_regEncode[src_lo]), ra_->reg2offset(dst_lo), ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { ++ __ unspill_sve_vector(as_FloatRegister(Matcher::_regEncode[dst_lo]), ra_->reg2offset(src_lo), ++ sve_vector_reg_size_in_bytes); ++ } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { ++ __ sve_orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo]), ++ as_FloatRegister(Matcher::_regEncode[src_lo])); ++ } else { ++ ShouldNotReachHere(); ++ } ++ } else if (cbuf) { ++ assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); + MacroAssembler _masm(cbuf); + assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); + if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { +@@ -1937,12 +2196,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + as_FloatRegister(Matcher::_regEncode[src_lo])); + } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { + __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), +- ireg == Op_VecD ? __ D : __ Q, +- ra_->reg2offset(dst_lo)); ++ ireg == Op_VecD ? __ D : __ Q, ++ ra_->reg2offset(dst_lo)); + } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { + __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), +- ireg == Op_VecD ? __ D : __ Q, +- ra_->reg2offset(src_lo)); ++ ireg == Op_VecD ? __ D : __ Q, ++ ra_->reg2offset(src_lo)); + } else { + ShouldNotReachHere(); + } +@@ -2027,9 +2286,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo + st->print("%s", Matcher::regName[dst_lo]); + } + if (bottom_type()->isa_vect() != NULL) { +- st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); ++ int vsize = 0; ++ switch (ideal_reg()) { ++ case Op_VecD: ++ vsize = 64; ++ break; ++ case Op_VecX: ++ vsize = 128; ++ break; ++ case Op_VecA: ++ vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; ++ break; ++ default: ++ assert(false, "bad register type for spill"); ++ ShouldNotReachHere(); ++ } ++ st->print("\t# vector spill size = %d", vsize); + } else { +- st->print("\t# spill size = %d", is64 ? 64:32); ++ st->print("\t# spill size = %d", is64 ? 64 : 32); + } + } + +@@ -2188,19 +2462,32 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { +- +- // TODO +- // identify extra cases that we might want to provide match rules for +- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- bool ret_value = match_rule_supported(opcode); +- // Add rules here. +- +- return ret_value; // Per default match rules are supported. ++ // Identify extra cases that we might want to provide match rules for vector nodes and ++ // other intrinsics guarded with vector length (vlen) and element type (bt). ++ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { ++ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { ++ return false; ++ } ++ int bit_size = vlen * type2aelembytes(bt) * 8; ++ if (UseSVE == 0 && bit_size > 128) { ++ return false; ++ } ++ if (UseSVE > 0) { ++ return op_sve_supported(opcode); ++ } else { // NEON ++ // Special cases ++ switch (opcode) { ++ case Op_MulVL: ++ return false; ++ default: ++ break; ++ } ++ } ++ return true; // Per default match rules are supported. + } + + const bool Matcher::has_predicated_vectors(void) { +- return false; ++ return UseSVE > 0; + } + + const int Matcher::float_pressure(int default_pressure_threshold) { +@@ -2236,7 +2523,8 @@ const bool Matcher::convL2FSupported(void) { + + // Vector width in bytes. + const int Matcher::vector_width_in_bytes(BasicType bt) { +- int size = MIN2(16,(int)MaxVectorSize); ++ // The MaxVectorSize should have been set by detecting SVE max vector register size. ++ int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize); + // Minimum 2 values in vector + if (size < 2*type2aelembytes(bt)) size = 0; + // But never < 4 +@@ -2249,14 +2537,32 @@ const int Matcher::max_vector_size(const BasicType bt) { + return vector_width_in_bytes(bt)/type2aelembytes(bt); + } + const int Matcher::min_vector_size(const BasicType bt) { +-// For the moment limit the vector size to 8 bytes ++ int max_size = max_vector_size(bt); ++ if ((UseSVE > 0) && (MaxVectorSize >= 16)) { ++ // Currently vector length less than SVE vector register size is not supported. ++ return max_size; ++ } else { ++ // For the moment limit the vector size to 8 bytes with NEON. + int size = 8 / type2aelembytes(bt); + if (size < 2) size = 2; + return size; ++ } ++} ++ ++const bool Matcher::supports_scalable_vector() { ++ return UseSVE > 0; ++} ++ ++// Actual max scalable vector register length. ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return Matcher::max_vector_size(bt); + } + + // Vector ideal reg. + const uint Matcher::vector_ideal_reg(int len) { ++ if (UseSVE > 0 && 16 <= len && len <= 256) { ++ return Op_VecA; ++ } + switch(len) { + case 8: return Op_VecD; + case 16: return Op_VecX; +@@ -2266,6 +2572,9 @@ const uint Matcher::vector_ideal_reg(int len) { + } + + const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ if (UseSVE > 0 && 16 <= size && size <= 256) { ++ return Op_VecA; ++ } + switch(size) { + case 8: return Op_VecD; + case 16: return Op_VecX; +@@ -3419,6 +3728,11 @@ encode %{ + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; ++ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ // Only non uncommon_trap calls need to reinitialize ptrue. ++ if (uncommon_trap_request() == 0) { ++ __ reinitialize_ptrue(); ++ } + } + %} + +@@ -3429,6 +3743,8 @@ encode %{ + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; ++ } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); + } + %} + +@@ -3465,6 +3781,9 @@ encode %{ + __ bind(retaddr); + __ add(sp, sp, 2 * wordSize); + } ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ reinitialize_ptrue(); ++ } + %} + + enc_class aarch64_enc_rethrow() %{ +@@ -3474,6 +3793,11 @@ encode %{ + + enc_class aarch64_enc_ret() %{ + MacroAssembler _masm(&cbuf); ++#ifdef ASSERT ++ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { ++ __ verify_ptrue(); ++ } ++#endif + __ ret(lr); + %} + +@@ -4203,6 +4527,41 @@ operand immLoffset16() + interface(CONST_INTER); + %} + ++// 8 bit signed value. ++operand immI8() ++%{ ++ predicate(n->get_int() <= 127 && n->get_int() >= -128); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 8 bit signed value (simm8), or #simm8 LSL 8. ++operand immI8_shift8() ++%{ ++ predicate((n->get_int() <= 127 && n->get_int() >= -128) || ++ (n->get_int() <= 32512 && n->get_int() >= -32768 && (n->get_int() & 0xff) == 0)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// 8 bit signed value (simm8), or #simm8 LSL 8. ++operand immL8_shift8() ++%{ ++ predicate((n->get_long() <= 127 && n->get_long() >= -128) || ++ (n->get_long() <= 32512 && n->get_long() >= -32768 && (n->get_long() & 0xff) == 0)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ + // 32 bit integer valid for add sub immediate + operand immIAddSub() + %{ +@@ -4832,6 +5191,18 @@ operand vRegD() + interface(REG_INTER); + %} + ++// Generic vector class. This will be used for ++// all vector operands, including NEON and SVE, ++// but currently only used for SVE VecA. ++operand vReg() ++%{ ++ constraint(ALLOC_IN_RC(vectora_reg)); ++ match(VecA); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ + operand vecD() + %{ + constraint(ALLOC_IN_RC(vectord_reg)); +@@ -5140,6 +5511,15 @@ operand vRegD_V31() + interface(REG_INTER); + %} + ++operand pRegGov() ++%{ ++ constraint(ALLOC_IN_RC(gov_pr)); ++ match(RegVMask); ++ op_cost(0); ++ format %{ %} ++ interface(REG_INTER); ++%} ++ + // Flags register, used as output of signed compare instructions + + // note that on AArch64 we also use this register as the output for +@@ -15477,7 +15857,7 @@ instruct loadV8(vecD dst, vmem8 mem) + // Load Vector (128 bits) + instruct loadV16(vecX dst, vmem16 mem) + %{ +- predicate(n->as_LoadVector()->memory_size() == 16); ++ predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrq $dst,$mem\t# vector (128 bits)" %} +@@ -15533,7 +15913,7 @@ instruct replicate8B(vecD dst, iRegIorL2I src) + + instruct replicate16B(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 16); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (16B)" %} +@@ -15558,7 +15938,7 @@ instruct replicate8B_imm(vecD dst, immI con) + + instruct replicate16B_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 16); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 16); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(16B)" %} +@@ -15583,7 +15963,7 @@ instruct replicate4S(vecD dst, iRegIorL2I src) + + instruct replicate8S(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 8); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8S)" %} +@@ -15608,7 +15988,7 @@ instruct replicate4S_imm(vecD dst, immI con) + + instruct replicate8S_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 8); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 8); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8H)" %} +@@ -15632,7 +16012,7 @@ instruct replicate2I(vecD dst, iRegIorL2I src) + + instruct replicate4I(vecX dst, iRegIorL2I src) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4I)" %} +@@ -15656,7 +16036,7 @@ instruct replicate2I_imm(vecD dst, immI con) + + instruct replicate4I_imm(vecX dst, immI con) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4I)" %} +@@ -15668,7 +16048,7 @@ instruct replicate4I_imm(vecX dst, immI con) + + instruct replicate2L(vecX dst, iRegL src) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2L)" %} +@@ -15680,7 +16060,7 @@ instruct replicate2L(vecX dst, iRegL src) + + instruct replicate2L_zero(vecX dst, immI0 zero) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + ins_cost(INSN_COST); + format %{ "movi $dst, $zero\t# vector(4I)" %} +@@ -15707,7 +16087,7 @@ instruct replicate2F(vecD dst, vRegF src) + + instruct replicate4F(vecX dst, vRegF src) + %{ +- predicate(n->as_Vector()->length() == 4); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4F)" %} +@@ -15720,7 +16100,7 @@ instruct replicate4F(vecX dst, vRegF src) + + instruct replicate2D(vecX dst, vRegD src) + %{ +- predicate(n->as_Vector()->length() == 2); ++ predicate(UseSVE == 0 && n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2D)" %} +@@ -17496,6 +17876,43 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + ins_pipe(vshift128_imm); + %} + ++instruct vpopcount4I(vecX dst, vecX src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 4); ++ match(Set dst (PopCountVI src)); ++ format %{ ++ "cnt $dst, $src\t# vector (16B)\n\t" ++ "uaddlp $dst, $dst\t# vector (16B)\n\t" ++ "uaddlp $dst, $dst\t# vector (8H)" ++ %} ++ ins_encode %{ ++ __ cnt(as_FloatRegister($dst$$reg), __ T16B, ++ as_FloatRegister($src$$reg)); ++ __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, ++ as_FloatRegister($dst$$reg)); ++ __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, ++ as_FloatRegister($dst$$reg)); ++ %} ++ ins_pipe(pipe_class_default); ++%} ++ ++instruct vpopcount2I(vecD dst, vecD src) %{ ++ predicate(UsePopCountInstruction && n->as_Vector()->length() == 2); ++ match(Set dst (PopCountVI src)); ++ format %{ ++ "cnt $dst, $src\t# vector (8B)\n\t" ++ "uaddlp $dst, $dst\t# vector (8B)\n\t" ++ "uaddlp $dst, $dst\t# vector (4H)" ++ %} ++ ins_encode %{ ++ __ cnt(as_FloatRegister($dst$$reg), __ T8B, ++ as_FloatRegister($src$$reg)); ++ __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, ++ as_FloatRegister($dst$$reg)); ++ __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, ++ as_FloatRegister($dst$$reg)); ++ %} ++ ins_pipe(pipe_class_default); ++%} + + //----------PEEPHOLE RULES----------------------------------------------------- + // These must follow all instruction definitions as they use the names +diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad +new file mode 100644 +index 000000000..8d80cb37a +--- /dev/null ++++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad +@@ -0,0 +1,1366 @@ ++// ++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, Arm Limited. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- ++ ++// AArch64 SVE Architecture Description File ++ ++ ++// 4 bit signed offset -- for predicated load/store ++ ++operand vmemA_immIOffset4() ++%{ ++ predicate(Address::offset_ok_for_sve_immed(n->get_int(), 4, ++ Matcher::scalable_vector_reg_size(T_BYTE))); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand vmemA_immLOffset4() ++%{ ++ predicate(Address::offset_ok_for_sve_immed(n->get_long(), 4, ++ Matcher::scalable_vector_reg_size(T_BYTE))); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++ ++operand vmemA_indOffI4(iRegP reg, vmemA_immIOffset4 off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off, MUL VL]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand vmemA_indOffL4(iRegP reg, vmemA_immLOffset4 off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off, MUL VL]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); ++ ++source_hpp %{ ++ bool op_sve_supported(int opcode); ++%} ++ ++source %{ ++ ++ static inline BasicType vector_element_basic_type(const MachNode* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { ++ int def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ const TypeVect* vt = def->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, ++ PRegister Pg, const Address &adr); ++ ++ // Predicated load/store, with optional ptrue to all elements of given predicate register. ++ static void loadStoreA_predicate(MacroAssembler masm, bool is_store, ++ FloatRegister reg, PRegister pg, BasicType bt, ++ int opcode, Register base, int index, int size, int disp) { ++ sve_mem_insn_predicate insn = NULL; ++ Assembler::SIMD_RegVariant type = Assembler::B; ++ int esize = type2aelembytes(bt); ++ if (index == -1) { ++ assert(size == 0, "unsupported address mode: scale size = %d", size); ++ switch(esize) { ++ case 1: ++ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b; ++ type = Assembler::B; ++ break; ++ case 2: ++ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h; ++ type = Assembler::H; ++ break; ++ case 4: ++ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w; ++ type = Assembler::S; ++ break; ++ case 8: ++ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d; ++ type = Assembler::D; ++ break; ++ default: ++ assert(false, "unsupported"); ++ ShouldNotReachHere(); ++ } ++ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); ++ } else { ++ assert(false, "unimplemented"); ++ ShouldNotReachHere(); ++ } ++ } ++ ++ bool op_sve_supported(int opcode) { ++ switch (opcode) { ++ // No multiply reduction instructions ++ case Op_MulReductionVD: ++ case Op_MulReductionVF: ++ case Op_MulReductionVI: ++ case Op_MulReductionVL: ++ // Others ++ case Op_Extract: ++ case Op_ExtractB: ++ case Op_ExtractC: ++ case Op_ExtractD: ++ case Op_ExtractF: ++ case Op_ExtractI: ++ case Op_ExtractL: ++ case Op_ExtractS: ++ case Op_ExtractUB: ++ return false; ++ default: ++ return true; ++ } ++ } ++ ++%} ++ ++definitions %{ ++ int_def SVE_COST (200, 200); ++%} ++ ++ ++ ++ ++// All SVE instructions ++ ++// vector load/store ++ ++// Use predicated vector load/store ++instruct loadV(vReg dst, vmemA mem) %{ ++ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); ++ match(Set dst (LoadVector mem)); ++ ins_cost(SVE_COST); ++ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} ++ ins_encode %{ ++ FloatRegister dst_reg = as_FloatRegister($dst$$reg); ++ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue, ++ vector_element_basic_type(this), $mem->opcode(), ++ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct storeV(vReg src, vmemA mem) %{ ++ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_str $mem, $src\t # vector (sve)" %} ++ ins_encode %{ ++ FloatRegister src_reg = as_FloatRegister($src$$reg); ++ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue, ++ vector_element_basic_type(this, $src), $mem->opcode(), ++ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector add ++ ++instruct vaddB(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (AddVB src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_add(as_FloatRegister($dst$$reg), __ B, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddS(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (AddVS src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_add(as_FloatRegister($dst$$reg), __ H, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddI(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (AddVI src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_add(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddL(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (AddVL src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_add $dst, $src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_add(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (AddVF src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fadd(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vaddD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (AddVD src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fadd $dst, $src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fadd(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector and ++ ++instruct vand(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (AndV src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_and $dst, $src1, $src2\t# vector (sve)" %} ++ ins_encode %{ ++ __ sve_and(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector or ++ ++instruct vor(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (OrV src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_orr $dst, $src1, $src2\t# vector (sve)" %} ++ ins_encode %{ ++ __ sve_orr(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector xor ++ ++instruct vxor(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (XorV src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_eor $dst, $src1, $src2\t# vector (sve)" %} ++ ins_encode %{ ++ __ sve_eor(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector float div ++ ++instruct vdivF(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (DivVF dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vdivD(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (DivVD dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fmla ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fmls ++ ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fnmla ++ ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3))); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3))); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fnmls ++ ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector mla ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ H, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector mls ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ H, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// vector mul ++ ++instruct vmulS(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst_src1 (MulVS dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ H, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulI(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst_src1 (MulVI dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulL(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst_src1 (MulVL dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_mul $dst_src1, $dst_src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_mul(as_FloatRegister($dst_src1$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (MulVF src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fmul(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vmulD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (MulVD src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmul $dst, $src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fmul(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector fneg ++ ++instruct vnegF(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (NegVF src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fneg $dst, $src\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fneg(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vnegD(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (NegVD src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fneg $dst, $src\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fneg(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// popcount vector ++ ++instruct vpopcountI(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (PopCountVI src)); ++ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} ++ ins_encode %{ ++ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector add reduction ++ ++instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && ++ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); ++ match(Set dst (AddReductionVI src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ ins_cost(SVE_COST); ++ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (S)\n\t" ++ "umov $dst, $tmp, S, 0\n\t" ++ "addw $dst, $dst, $src1\t # add reduction S" %} ++ ins_encode %{ ++ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg)); ++ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ S, 0); ++ __ addw($dst$$Register, $dst$$Register, $src1$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && ++ (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG)); ++ match(Set dst (AddReductionVL src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ ins_cost(SVE_COST); ++ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (D)\n\t" ++ "umov $dst, $tmp, D, 0\n\t" ++ "add $dst, $dst, $src1\t # add reduction D" %} ++ ins_encode %{ ++ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg)); ++ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); ++ __ add($dst$$Register, $dst$$Register, $src1$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addF(vRegF src1_dst, vReg src2) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); ++ match(Set src1_dst (AddReductionVF src1_dst src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ S, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct reduce_addD(vRegD src1_dst, vReg src2) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); ++ match(Set src1_dst (AddReductionVD src1_dst src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ D, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector replicate ++ ++instruct replicateB(vReg dst, iRegIorL2I src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (ReplicateB src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $src\t# vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateS(vReg dst, iRegIorL2I src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (ReplicateS src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $src\t# vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateI(vReg dst, iRegIorL2I src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (ReplicateI src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $src\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateL(vReg dst, iRegL src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (ReplicateL src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $src\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++instruct replicateB_imm8(vReg dst, immI8 con) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (ReplicateB con)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $con\t# vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ B, $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateS_imm8(vReg dst, immI8_shift8 con) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (ReplicateS con)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $con\t# vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ H, $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateI_imm8(vReg dst, immI8_shift8 con) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (ReplicateI con)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $con\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ S, $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateL_imm8(vReg dst, immL8_shift8 con) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (ReplicateL con)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $con\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ D, $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++instruct replicateF(vReg dst, vRegF src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (ReplicateF src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_cpy $dst, $src\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_cpy(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct replicateD(vReg dst, vRegD src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (ReplicateD src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_cpy $dst, $src\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_cpy(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector shift ++ ++instruct vasrB(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (RShiftVB dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_asr(as_FloatRegister($dst$$reg), __ B, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrS(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (RShiftVS dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_asr(as_FloatRegister($dst$$reg), __ H, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrI(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (RShiftVI dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_asr(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrL(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (RShiftVL dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $dst, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_asr(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslB(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (LShiftVB dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ B, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslS(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (LShiftVS dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ H, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslI(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (LShiftVI dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslL(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (LShiftVL dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $dst, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrB(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (URShiftVB dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ B, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrS(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (URShiftVS dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ H, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrI(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (URShiftVI dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrL(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (URShiftVL dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $dst, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (RShiftVB src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ if (con >= 8) con = 7; ++ __ sve_asr(as_FloatRegister($dst$$reg), __ B, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (RShiftVS src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ if (con >= 16) con = 15; ++ __ sve_asr(as_FloatRegister($dst$$reg), __ H, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (RShiftVI src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_asr(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (RShiftVL src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_asr(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (URShiftVB src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ if (con >= 8) { ++ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ B, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (URShiftVS src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ if (con >= 8) { ++ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ H, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (URShiftVI src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (URShiftVL src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsr(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (LShiftVB src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con >= 8) { ++ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ B, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (LShiftVS src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ if (con >= 8) { ++ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ } ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ H, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (LShiftVI src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (LShiftVL src shift)); ++ ins_cost(SVE_COST); ++ format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant; ++ __ sve_lsl(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16 && ++ (n->bottom_type()->is_vect()->element_basic_type() == T_BYTE)); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (B)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ B, as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8 && ++ (n->bottom_type()->is_vect()->element_basic_type() == T_SHORT || ++ (n->bottom_type()->is_vect()->element_basic_type() == T_CHAR))); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (H)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ H, as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4 && ++ (n->bottom_type()->is_vect()->element_basic_type() == T_INT)); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (S)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ S, as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 && ++ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG)); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) (D)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ D, as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector sqrt ++ ++instruct vsqrtF(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (SqrtVF src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ S, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsqrtD(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= 16); ++ match(Set dst (SqrtVD src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fsqrt $dst, $src\t# vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fsqrt(as_FloatRegister($dst$$reg), __ D, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++// vector sub ++ ++instruct vsubB(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); ++ match(Set dst (SubVB src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (B)" %} ++ ins_encode %{ ++ __ sve_sub(as_FloatRegister($dst$$reg), __ B, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubS(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); ++ match(Set dst (SubVS src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (H)" %} ++ ins_encode %{ ++ __ sve_sub(as_FloatRegister($dst$$reg), __ H, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubI(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (SubVI src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_sub(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubL(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (SubVL src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_sub $dst, $src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_sub(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubF(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (SubVF src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (S)" %} ++ ins_encode %{ ++ __ sve_fsub(as_FloatRegister($dst$$reg), __ S, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct vsubD(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); ++ match(Set dst (SubVD src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fsub $dst, $src1, $src2\t # vector (sve) (D)" %} ++ ins_encode %{ ++ __ sve_fsub(as_FloatRegister($dst$$reg), __ D, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} +diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +new file mode 100644 +index 000000000..0323f2f8c +--- /dev/null ++++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +@@ -0,0 +1,727 @@ ++// ++// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2020, Arm Limited. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++dnl Generate the warning ++// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ---- ++dnl ++ ++// AArch64 SVE Architecture Description File ++ ++dnl ++dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 ) ++dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len) ++define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', ` ++operand vmemA_imm$1Offset$3() ++%{ ++ predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3, ++ Matcher::scalable_vector_reg_size(T_BYTE))); ++ match(Con$1); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%}') ++dnl ++// 4 bit signed offset -- for predicated load/store ++OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4) ++OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4) ++dnl ++dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 ) ++dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len) ++define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', ` ++operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off) ++%{ ++ constraint(ALLOC_IN_RC(ptr_reg)); ++ match(AddP reg off); ++ op_cost(0); ++ format %{ "[$reg, $off, MUL VL]" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ `index'(0xffffffff); ++ scale(0x0); ++ disp($off); ++ %} ++%}') ++dnl ++OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4) ++OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4) ++ ++opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4); ++ ++source_hpp %{ ++ bool op_sve_supported(int opcode); ++%} ++ ++source %{ ++ ++ static inline BasicType vector_element_basic_type(const MachNode* n) { ++ const TypeVect* vt = n->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) { ++ int def_idx = use->operand_index(opnd); ++ Node* def = use->in(def_idx); ++ const TypeVect* vt = def->bottom_type()->is_vect(); ++ return vt->element_basic_type(); ++ } ++ ++ typedef void (MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T, ++ PRegister Pg, const Address &adr); ++ ++ // Predicated load/store, with optional ptrue to all elements of given predicate register. ++ static void loadStoreA_predicate(MacroAssembler masm, bool is_store, ++ FloatRegister reg, PRegister pg, BasicType bt, ++ int opcode, Register base, int index, int size, int disp) { ++ sve_mem_insn_predicate insn; ++ Assembler::SIMD_RegVariant type; ++ int esize = type2aelembytes(bt); ++ if (index == -1) { ++ assert(size == 0, "unsupported address mode: scale size = %d", size); ++ switch(esize) { ++ case 1: ++ insn = is_store ? &MacroAssembler::sve_st1b : &MacroAssembler::sve_ld1b; ++ type = Assembler::B; ++ break; ++ case 2: ++ insn = is_store ? &MacroAssembler::sve_st1h : &MacroAssembler::sve_ld1h; ++ type = Assembler::H; ++ break; ++ case 4: ++ insn = is_store ? &MacroAssembler::sve_st1w : &MacroAssembler::sve_ld1w; ++ type = Assembler::S; ++ break; ++ case 8: ++ insn = is_store ? &MacroAssembler::sve_st1d : &MacroAssembler::sve_ld1d; ++ type = Assembler::D; ++ break; ++ default: ++ assert(false, "unsupported"); ++ ShouldNotReachHere(); ++ } ++ (masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE))); ++ } else { ++ assert(false, "unimplemented"); ++ ShouldNotReachHere(); ++ } ++ } ++ ++ bool op_sve_supported(int opcode) { ++ switch (opcode) { ++ // No multiply reduction instructions ++ case Op_MulReductionVD: ++ case Op_MulReductionVF: ++ case Op_MulReductionVI: ++ case Op_MulReductionVL: ++ // Others ++ case Op_Extract: ++ case Op_ExtractB: ++ case Op_ExtractC: ++ case Op_ExtractD: ++ case Op_ExtractF: ++ case Op_ExtractI: ++ case Op_ExtractL: ++ case Op_ExtractS: ++ case Op_ExtractUB: ++ return false; ++ default: ++ return true; ++ } ++ } ++ ++%} ++ ++definitions %{ ++ int_def SVE_COST (200, 200); ++%} ++ ++ ++dnl ++dnl ELEMENT_SHORT_CHART($1, $2) ++dnl ELEMENT_SHORT_CHART(etype, node) ++define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT', ++ `($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT || ++ ($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))', ++ `($2->bottom_type()->is_vect()->element_basic_type() == $1)')') ++dnl ++ ++// All SVE instructions ++ ++// vector load/store ++ ++// Use predicated vector load/store ++instruct loadV(vReg dst, vmemA mem) %{ ++ predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16); ++ match(Set dst (LoadVector mem)); ++ ins_cost(SVE_COST); ++ format %{ "sve_ldr $dst, $mem\t # vector (sve)" %} ++ ins_encode %{ ++ FloatRegister dst_reg = as_FloatRegister($dst$$reg); ++ loadStoreA_predicate(MacroAssembler(&cbuf), false, dst_reg, ptrue, ++ vector_element_basic_type(this), $mem->opcode(), ++ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct storeV(vReg src, vmemA mem) %{ ++ predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_str $mem, $src\t # vector (sve)" %} ++ ins_encode %{ ++ FloatRegister src_reg = as_FloatRegister($src$$reg); ++ loadStoreA_predicate(MacroAssembler(&cbuf), true, src_reg, ptrue, ++ vector_element_basic_type(this, $src), $mem->opcode(), ++ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++dnl ++dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 ) ++dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) ++define(`UNARY_OP_TRUE_PREDICATE_ETYPE', ` ++instruct $1(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 && ++ n->bottom_type()->is_vect()->element_basic_type() == $3); ++ match(Set dst ($2 src)); ++ ins_cost(SVE_COST); ++ format %{ "$6 $dst, $src\t# vector (sve) ($4)" %} ++ ins_encode %{ ++ __ $6(as_FloatRegister($dst$$reg), __ $4, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++dnl ++dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 ) ++dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn) ++define(`BINARY_OP_UNPREDICATED', ` ++instruct $1(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); ++ match(Set dst ($2 src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %} ++ ins_encode %{ ++ __ $5(as_FloatRegister($dst$$reg), __ $3, ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector add ++BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add) ++BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add) ++BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add) ++BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add) ++BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd) ++BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd) ++dnl ++dnl BINARY_OP_UNSIZED($1, $2, $3, $4 ) ++dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn) ++define(`BINARY_OP_UNSIZED', ` ++instruct $1(vReg dst, vReg src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3); ++ match(Set dst ($2 src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %} ++ ins_encode %{ ++ __ $4(as_FloatRegister($dst$$reg), ++ as_FloatRegister($src1$$reg), ++ as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector and ++BINARY_OP_UNSIZED(vand, AndV, 16, sve_and) ++ ++// vector or ++BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr) ++ ++// vector xor ++BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor) ++dnl ++dnl VDIVF($1, $2 , $3 ) ++dnl VDIVF(name_suffix, size, min_vec_len) ++define(`VDIVF', ` ++instruct vdiv$1(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (DivV$1 dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector float div ++VDIVF(F, S, 4) ++VDIVF(D, D, 2) ++ ++dnl ++dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 ) ++dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn) ++define(`BINARY_OP_TRUE_PREDICATE_ETYPE', ` ++instruct $1(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 && ++ n->bottom_type()->is_vect()->element_basic_type() == $3); ++ match(Set dst_src1 ($2 dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %} ++ ins_encode %{ ++ __ $6(as_FloatRegister($dst_src1$$reg), __ $4, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++ ++dnl ++dnl VFMLA($1 $2 $3 ) ++dnl VFMLA(name_suffix, size, min_vec_len) ++define(`VFMLA', ` ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector fmla ++VFMLA(F, S, 4) ++VFMLA(D, D, 2) ++ ++dnl ++dnl VFMLS($1 $2 $3 ) ++dnl VFMLS(name_suffix, size, min_vec_len) ++define(`VFMLS', ` ++// dst_src1 = dst_src1 + -src2 * src3 ++// dst_src1 = dst_src1 + src2 * -src3 ++instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3))); ++ match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector fmls ++VFMLS(F, S, 4) ++VFMLS(D, D, 2) ++ ++dnl ++dnl VFNMLA($1 $2 $3 ) ++dnl VFNMLA(name_suffix, size, min_vec_len) ++define(`VFNMLA', ` ++// dst_src1 = -dst_src1 + -src2 * src3 ++// dst_src1 = -dst_src1 + src2 * -src3 ++instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3))); ++ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3)))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector fnmla ++VFNMLA(F, S, 4) ++VFNMLA(D, D, 2) ++ ++dnl ++dnl VFNMLS($1 $2 $3 ) ++dnl VFNMLS(name_suffix, size, min_vec_len) ++define(`VFNMLS', ` ++// dst_src1 = -dst_src1 + src2 * src3 ++instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{ ++ predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector fnmls ++VFNMLS(F, S, 4) ++VFNMLS(D, D, 2) ++ ++dnl ++dnl VMLA($1 $2 $3 ) ++dnl VMLA(name_suffix, size, min_vec_len) ++define(`VMLA', ` ++// dst_src1 = dst_src1 + src2 * src3 ++instruct vmla$1(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector mla ++VMLA(B, B, 16) ++VMLA(S, H, 8) ++VMLA(I, S, 4) ++VMLA(L, D, 2) ++ ++dnl ++dnl VMLS($1 $2 $3 ) ++dnl VMLS(name_suffix, size, min_vec_len) ++define(`VMLS', ` ++// dst_src1 = dst_src1 - src2 * src3 ++instruct vmls$1(vReg dst_src1, vReg src2, vReg src3) ++%{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3); ++ match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3))); ++ ins_cost(SVE_COST); ++ format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2, ++ ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector mls ++VMLS(B, B, 16) ++VMLS(S, H, 8) ++VMLS(I, S, 4) ++VMLS(L, D, 2) ++ ++dnl ++dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) ++dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) ++define(`BINARY_OP_TRUE_PREDICATE', ` ++instruct $1(vReg dst_src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); ++ match(Set dst_src1 ($2 dst_src1 src2)); ++ ins_cost(SVE_COST); ++ format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %} ++ ins_encode %{ ++ __ $5(as_FloatRegister($dst_src1$$reg), __ $3, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector mul ++BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul) ++BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul) ++BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul) ++BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul) ++BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul) ++ ++dnl ++dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 ) ++dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn) ++define(`UNARY_OP_TRUE_PREDICATE', ` ++instruct $1(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4); ++ match(Set dst ($2 src)); ++ ins_cost(SVE_COST); ++ format %{ "$5 $dst, $src\t# vector (sve) ($3)" %} ++ ins_encode %{ ++ __ $5(as_FloatRegister($dst$$reg), __ $3, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector fneg ++UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg) ++UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg) ++ ++// popcount vector ++ ++instruct vpopcountI(vReg dst, vReg src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); ++ match(Set dst (PopCountVI src)); ++ format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %} ++ ins_encode %{ ++ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++dnl ++dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 ) ++dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) ++define(`REDUCE_ADD', ` ++instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && ++ ELEMENT_SHORT_CHAR($6, n->in(2))); ++ match(Set dst ($2 src1 src2)); ++ effect(TEMP_DEF dst, TEMP tmp); ++ ins_cost(SVE_COST); ++ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t" ++ "umov $dst, $tmp, $5, 0\n\t" ++ "$7 $dst, $dst, $src1\t # add reduction $5" %} ++ ins_encode %{ ++ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5, ++ ptrue, as_FloatRegister($src2$$reg)); ++ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0); ++ __ $7($dst$$Register, $dst$$Register, $src1$$Register); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++dnl REDUCE_ADDF($1, $2, $3, $4 ) ++dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size) ++define(`REDUCE_ADDF', ` ++instruct $1($3 src1_dst, vReg src2) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); ++ match(Set src1_dst ($2 src1_dst src2)); ++ ins_cost(SVE_COST); ++ format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %} ++ ins_encode %{ ++ __ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4, ++ ptrue, as_FloatRegister($src2$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++// vector add reduction ++REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw) ++REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add) ++REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S) ++REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D) ++ ++dnl ++dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 ) ++dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst) ++define(`REDUCE_FMINMAX', ` ++instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{ ++ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 && ++ n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16); ++ match(Set dst (translit($1, `m', `M')ReductionV src1 src2)); ++ ins_cost(INSN_COST); ++ effect(TEMP_DEF dst); ++ format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t" ++ "f$1s $dst, $dst, $src1\t # $1 reduction $2" %} ++ ins_encode %{ ++ __ sve_f$1v(as_FloatRegister($dst$$reg), __ $4, ++ ptrue, as_FloatRegister($src2$$reg)); ++ __ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++// vector max reduction ++REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF) ++REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD) ++ ++// vector min reduction ++REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF) ++REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD) ++ ++dnl ++dnl REPLICATE($1, $2, $3, $4, $5 ) ++dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len) ++define(`REPLICATE', ` ++instruct $1(vReg dst, $3 src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); ++ match(Set dst ($2 src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++dnl REPLICATE_IMM8($1, $2, $3, $4, $5 ) ++dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len) ++define(`REPLICATE_IMM8', ` ++instruct $1(vReg dst, $3 con) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); ++ match(Set dst ($2 con)); ++ ins_cost(SVE_COST); ++ format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++dnl FREPLICATE($1, $2, $3, $4, $5 ) ++dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len) ++define(`FREPLICATE', ` ++instruct $1(vReg dst, $3 src) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); ++ match(Set dst ($2 src)); ++ ins_cost(SVE_COST); ++ format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %} ++ ins_encode %{ ++ __ sve_cpy(as_FloatRegister($dst$$reg), __ $4, ++ ptrue, as_FloatRegister($src$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector replicate ++REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16) ++REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8) ++REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4) ++REPLICATE(replicateL, ReplicateL, iRegL, D, 2) ++ ++REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16) ++REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8) ++REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4) ++REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2) ++ ++FREPLICATE(replicateF, ReplicateF, vRegF, S, 4) ++FREPLICATE(replicateD, ReplicateD, vRegD, D, 2) ++dnl ++dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 ) ++dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn) ++define(`VSHIFT_TRUE_PREDICATE', ` ++instruct $1(vReg dst, vReg shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); ++ match(Set dst ($2 dst shift)); ++ ins_cost(SVE_COST); ++ format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %} ++ ins_encode %{ ++ __ $5(as_FloatRegister($dst$$reg), __ $3, ++ ptrue, as_FloatRegister($shift$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 ) ++dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn) ++define(`VSHIFT_IMM_UNPREDICATE', ` ++instruct $1(vReg dst, vReg src, immI shift) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); ++ match(Set dst ($2 src shift)); ++ ins_cost(SVE_COST); ++ format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %} ++ ins_encode %{ ++ int con = (int)$shift$$constant;dnl ++ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, ` ++ if (con == 0) { ++ __ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ }')dnl ++ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, ` ++ if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, ` ++ if (con >= 16) con = 15;')')dnl ++ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, ` ++ if (con >= 8) { ++ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), ++ as_FloatRegister($src$$reg)); ++ return; ++ }') ++ __ $5(as_FloatRegister($dst$$reg), __ $3, ++ as_FloatRegister($src$$reg), con); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++dnl ++dnl VSHIFT_COUNT($1, $2, $3, $4 ) ++dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type) ++define(`VSHIFT_COUNT', ` ++instruct $1(vReg dst, iRegIorL2I cnt) %{ ++ predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 && ++ ELEMENT_SHORT_CHAR($4, n)); ++ match(Set dst (LShiftCntV cnt)); ++ match(Set dst (RShiftCntV cnt)); ++ format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %} ++ ins_encode %{ ++ __ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg)); ++ %} ++ ins_pipe(pipe_slow); ++%}')dnl ++ ++// vector shift ++VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr) ++VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr) ++VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr) ++VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr) ++VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl) ++VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl) ++VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl) ++VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl) ++VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr) ++VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr) ++VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr) ++VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr) ++VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr) ++VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr) ++VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr) ++VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr) ++VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr) ++VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr) ++VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr) ++VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr) ++VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl) ++VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl) ++VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl) ++VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl) ++VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE) ++VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT) ++VSHIFT_COUNT(vshiftcntI, S, 4, T_INT) ++VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG) ++ ++// vector sqrt ++UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt) ++UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt) ++ ++// vector sub ++BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub) ++BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub) ++BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub) ++BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub) ++BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub) ++BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub) +diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +index 586743eb9..441ea4066 100644 +--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +@@ -98,531 +98,617 @@ void entry(CodeBuffer *cb) { + __ bind(back); + + // ArithOp +- __ add(r19, r22, r7, Assembler::LSL, 28); // add x19, x22, x7, LSL #28 +- __ sub(r16, r11, r10, Assembler::LSR, 13); // sub x16, x11, x10, LSR #13 +- __ adds(r27, r13, r28, Assembler::ASR, 2); // adds x27, x13, x28, ASR #2 +- __ subs(r20, r28, r26, Assembler::ASR, 41); // subs x20, x28, x26, ASR #41 +- __ addw(r8, r19, r19, Assembler::ASR, 19); // add w8, w19, w19, ASR #19 +- __ subw(r4, r9, r10, Assembler::LSL, 14); // sub w4, w9, w10, LSL #14 +- __ addsw(r8, r11, r30, Assembler::LSL, 13); // adds w8, w11, w30, LSL #13 +- __ subsw(r0, r25, r19, Assembler::LSL, 9); // subs w0, w25, w19, LSL #9 +- __ andr(r20, r0, r21, Assembler::LSL, 19); // and x20, x0, x21, LSL #19 +- __ orr(r21, r14, r20, Assembler::LSL, 17); // orr x21, x14, x20, LSL #17 +- __ eor(r25, r28, r1, Assembler::LSL, 51); // eor x25, x28, x1, LSL #51 +- __ ands(r10, r27, r11, Assembler::ASR, 15); // ands x10, x27, x11, ASR #15 +- __ andw(r25, r5, r12, Assembler::ASR, 23); // and w25, w5, w12, ASR #23 +- __ orrw(r18, r14, r10, Assembler::LSR, 4); // orr w18, w14, w10, LSR #4 +- __ eorw(r4, r21, r5, Assembler::ASR, 22); // eor w4, w21, w5, ASR #22 +- __ andsw(r21, r0, r5, Assembler::ASR, 29); // ands w21, w0, w5, ASR #29 +- __ bic(r26, r30, r6, Assembler::ASR, 37); // bic x26, x30, x6, ASR #37 +- __ orn(r3, r1, r13, Assembler::LSR, 29); // orn x3, x1, x13, LSR #29 +- __ eon(r0, r28, r9, Assembler::LSL, 47); // eon x0, x28, x9, LSL #47 +- __ bics(r29, r5, r28, Assembler::LSL, 46); // bics x29, x5, x28, LSL #46 +- __ bicw(r9, r18, r7, Assembler::LSR, 20); // bic w9, w18, w7, LSR #20 +- __ ornw(r26, r13, r25, Assembler::ASR, 24); // orn w26, w13, w25, ASR #24 +- __ eonw(r25, r4, r19, Assembler::LSL, 6); // eon w25, w4, w19, LSL #6 +- __ bicsw(r5, r26, r4, Assembler::LSR, 24); // bics w5, w26, w4, LSR #24 ++ __ add(r26, r23, r13, Assembler::LSL, 32); // add x26, x23, x13, LSL #32 ++ __ sub(r12, r24, r9, Assembler::LSR, 37); // sub x12, x24, x9, LSR #37 ++ __ adds(r28, r15, r8, Assembler::ASR, 39); // adds x28, x15, x8, ASR #39 ++ __ subs(r7, r28, r30, Assembler::ASR, 57); // subs x7, x28, x30, ASR #57 ++ __ addw(r9, r22, r27, Assembler::ASR, 15); // add w9, w22, w27, ASR #15 ++ __ subw(r3, r13, r18, Assembler::ASR, 30); // sub w3, w13, w18, ASR #30 ++ __ addsw(r14, r26, r8, Assembler::ASR, 17); // adds w14, w26, w8, ASR #17 ++ __ subsw(r0, r22, r12, Assembler::ASR, 21); // subs w0, w22, w12, ASR #21 ++ __ andr(r0, r15, r26, Assembler::LSL, 20); // and x0, x15, x26, LSL #20 ++ __ orr(r26, r5, r17, Assembler::LSL, 61); // orr x26, x5, x17, LSL #61 ++ __ eor(r24, r13, r2, Assembler::LSL, 32); // eor x24, x13, x2, LSL #32 ++ __ ands(r28, r3, r17, Assembler::ASR, 35); // ands x28, x3, x17, ASR #35 ++ __ andw(r25, r16, r29, Assembler::LSR, 18); // and w25, w16, w29, LSR #18 ++ __ orrw(r13, r18, r11, Assembler::LSR, 9); // orr w13, w18, w11, LSR #9 ++ __ eorw(r5, r5, r18, Assembler::LSR, 15); // eor w5, w5, w18, LSR #15 ++ __ andsw(r2, r23, r27, Assembler::ASR, 26); // ands w2, w23, w27, ASR #26 ++ __ bic(r27, r28, r16, Assembler::LSR, 45); // bic x27, x28, x16, LSR #45 ++ __ orn(r8, r25, r26, Assembler::ASR, 37); // orn x8, x25, x26, ASR #37 ++ __ eon(r29, r17, r13, Assembler::LSR, 63); // eon x29, x17, x13, LSR #63 ++ __ bics(r28, r24, r2, Assembler::LSR, 31); // bics x28, x24, x2, LSR #31 ++ __ bicw(r19, r26, r7, Assembler::ASR, 3); // bic w19, w26, w7, ASR #3 ++ __ ornw(r6, r24, r10, Assembler::ASR, 3); // orn w6, w24, w10, ASR #3 ++ __ eonw(r4, r21, r1, Assembler::LSR, 29); // eon w4, w21, w1, LSR #29 ++ __ bicsw(r16, r21, r0, Assembler::LSR, 19); // bics w16, w21, w0, LSR #19 + + // AddSubImmOp +- __ addw(r7, r19, 340u); // add w7, w19, #340 +- __ addsw(r8, r0, 401u); // adds w8, w0, #401 +- __ subw(r29, r20, 163u); // sub w29, w20, #163 +- __ subsw(r8, r23, 759u); // subs w8, w23, #759 +- __ add(r1, r12, 523u); // add x1, x12, #523 +- __ adds(r2, r11, 426u); // adds x2, x11, #426 +- __ sub(r14, r29, 716u); // sub x14, x29, #716 +- __ subs(r11, r5, 582u); // subs x11, x5, #582 ++ __ addw(r17, r12, 379u); // add w17, w12, #379 ++ __ addsw(r30, r1, 22u); // adds w30, w1, #22 ++ __ subw(r29, r5, 126u); // sub w29, w5, #126 ++ __ subsw(r6, r24, 960u); // subs w6, w24, #960 ++ __ add(r0, r13, 104u); // add x0, x13, #104 ++ __ adds(r8, r6, 663u); // adds x8, x6, #663 ++ __ sub(r10, r5, 516u); // sub x10, x5, #516 ++ __ subs(r1, r3, 1012u); // subs x1, x3, #1012 + + // LogicalImmOp +- __ andw(r23, r22, 32768ul); // and w23, w22, #0x8000 +- __ orrw(r4, r10, 4042322160ul); // orr w4, w10, #0xf0f0f0f0 +- __ eorw(r0, r24, 4042322160ul); // eor w0, w24, #0xf0f0f0f0 +- __ andsw(r19, r29, 2139127680ul); // ands w19, w29, #0x7f807f80 +- __ andr(r5, r10, 4503599627354112ul); // and x5, x10, #0xfffffffffc000 +- __ orr(r12, r30, 18445618178097414144ul); // orr x12, x30, #0xfffc0000fffc0000 +- __ eor(r30, r5, 262128ul); // eor x30, x5, #0x3fff0 +- __ ands(r26, r23, 4194300ul); // ands x26, x23, #0x3ffffc ++ __ andw(r6, r11, 4294049777ull); // and w6, w11, #0xfff1fff1 ++ __ orrw(r28, r5, 4294966791ull); // orr w28, w5, #0xfffffe07 ++ __ eorw(r1, r20, 134217216ull); // eor w1, w20, #0x7fffe00 ++ __ andsw(r7, r18, 1048576ull); // ands w7, w18, #0x100000 ++ __ andr(r14, r12, 9223372036854775808ull); // and x14, x12, #0x8000000000000000 ++ __ orr(r9, r11, 562675075514368ull); // orr x9, x11, #0x1ffc000000000 ++ __ eor(r17, r0, 18014398509481728ull); // eor x17, x0, #0x3fffffffffff00 ++ __ ands(r1, r8, 18446744073705357315ull); // ands x1, x8, #0xffffffffffc00003 + + // AbsOp +- __ b(__ pc()); // b . +- __ b(back); // b back +- __ b(forth); // b forth +- __ bl(__ pc()); // bl . +- __ bl(back); // bl back +- __ bl(forth); // bl forth ++ __ b(__ pc()); // b . ++ __ b(back); // b back ++ __ b(forth); // b forth ++ __ bl(__ pc()); // bl . ++ __ bl(back); // bl back ++ __ bl(forth); // bl forth + + // RegAndAbsOp +- __ cbzw(r12, __ pc()); // cbz w12, . +- __ cbzw(r12, back); // cbz w12, back +- __ cbzw(r12, forth); // cbz w12, forth +- __ cbnzw(r20, __ pc()); // cbnz w20, . +- __ cbnzw(r20, back); // cbnz w20, back +- __ cbnzw(r20, forth); // cbnz w20, forth +- __ cbz(r12, __ pc()); // cbz x12, . +- __ cbz(r12, back); // cbz x12, back +- __ cbz(r12, forth); // cbz x12, forth +- __ cbnz(r24, __ pc()); // cbnz x24, . +- __ cbnz(r24, back); // cbnz x24, back +- __ cbnz(r24, forth); // cbnz x24, forth +- __ adr(r6, __ pc()); // adr x6, . +- __ adr(r6, back); // adr x6, back +- __ adr(r6, forth); // adr x6, forth +- __ _adrp(r21, __ pc()); // adrp x21, . ++ __ cbzw(r10, __ pc()); // cbz w10, . ++ __ cbzw(r10, back); // cbz w10, back ++ __ cbzw(r10, forth); // cbz w10, forth ++ __ cbnzw(r8, __ pc()); // cbnz w8, . ++ __ cbnzw(r8, back); // cbnz w8, back ++ __ cbnzw(r8, forth); // cbnz w8, forth ++ __ cbz(r11, __ pc()); // cbz x11, . ++ __ cbz(r11, back); // cbz x11, back ++ __ cbz(r11, forth); // cbz x11, forth ++ __ cbnz(r29, __ pc()); // cbnz x29, . ++ __ cbnz(r29, back); // cbnz x29, back ++ __ cbnz(r29, forth); // cbnz x29, forth ++ __ adr(r19, __ pc()); // adr x19, . ++ __ adr(r19, back); // adr x19, back ++ __ adr(r19, forth); // adr x19, forth ++ __ _adrp(r19, __ pc()); // adrp x19, . + + // RegImmAbsOp +- __ tbz(r1, 1, __ pc()); // tbz x1, #1, . +- __ tbz(r1, 1, back); // tbz x1, #1, back +- __ tbz(r1, 1, forth); // tbz x1, #1, forth +- __ tbnz(r8, 9, __ pc()); // tbnz x8, #9, . +- __ tbnz(r8, 9, back); // tbnz x8, #9, back +- __ tbnz(r8, 9, forth); // tbnz x8, #9, forth ++ __ tbz(r22, 6, __ pc()); // tbz x22, #6, . ++ __ tbz(r22, 6, back); // tbz x22, #6, back ++ __ tbz(r22, 6, forth); // tbz x22, #6, forth ++ __ tbnz(r12, 11, __ pc()); // tbnz x12, #11, . ++ __ tbnz(r12, 11, back); // tbnz x12, #11, back ++ __ tbnz(r12, 11, forth); // tbnz x12, #11, forth + + // MoveWideImmOp +- __ movnw(r12, 23175, 0); // movn w12, #23175, lsl 0 +- __ movzw(r11, 20476, 16); // movz w11, #20476, lsl 16 +- __ movkw(r21, 3716, 0); // movk w21, #3716, lsl 0 +- __ movn(r29, 28661, 48); // movn x29, #28661, lsl 48 +- __ movz(r3, 6927, 0); // movz x3, #6927, lsl 0 +- __ movk(r22, 9828, 16); // movk x22, #9828, lsl 16 ++ __ movnw(r0, 6301, 0); // movn w0, #6301, lsl 0 ++ __ movzw(r7, 20886, 0); // movz w7, #20886, lsl 0 ++ __ movkw(r27, 18617, 0); // movk w27, #18617, lsl 0 ++ __ movn(r12, 22998, 16); // movn x12, #22998, lsl 16 ++ __ movz(r20, 1532, 16); // movz x20, #1532, lsl 16 ++ __ movk(r8, 5167, 32); // movk x8, #5167, lsl 32 + + // BitfieldOp +- __ sbfm(r12, r8, 6, 22); // sbfm x12, x8, #6, #22 +- __ bfmw(r19, r25, 25, 19); // bfm w19, w25, #25, #19 +- __ ubfmw(r9, r12, 29, 15); // ubfm w9, w12, #29, #15 +- __ sbfm(r28, r25, 16, 16); // sbfm x28, x25, #16, #16 +- __ bfm(r12, r5, 4, 25); // bfm x12, x5, #4, #25 +- __ ubfm(r0, r10, 6, 8); // ubfm x0, x10, #6, #8 ++ __ sbfm(r15, r17, 24, 28); // sbfm x15, x17, #24, #28 ++ __ bfmw(r15, r9, 14, 25); // bfm w15, w9, #14, #25 ++ __ ubfmw(r27, r25, 6, 31); // ubfm w27, w25, #6, #31 ++ __ sbfm(r19, r2, 23, 31); // sbfm x19, x2, #23, #31 ++ __ bfm(r12, r21, 10, 6); // bfm x12, x21, #10, #6 ++ __ ubfm(r22, r0, 26, 16); // ubfm x22, x0, #26, #16 + + // ExtractOp +- __ extrw(r4, r13, r26, 24); // extr w4, w13, w26, #24 +- __ extr(r23, r30, r24, 31); // extr x23, x30, x24, #31 ++ __ extrw(r3, r3, r20, 27); // extr w3, w3, w20, #27 ++ __ extr(r8, r30, r3, 54); // extr x8, x30, x3, #54 + + // CondBranchOp +- __ br(Assembler::EQ, __ pc()); // b.EQ . +- __ br(Assembler::EQ, back); // b.EQ back +- __ br(Assembler::EQ, forth); // b.EQ forth +- __ br(Assembler::NE, __ pc()); // b.NE . +- __ br(Assembler::NE, back); // b.NE back +- __ br(Assembler::NE, forth); // b.NE forth +- __ br(Assembler::HS, __ pc()); // b.HS . +- __ br(Assembler::HS, back); // b.HS back +- __ br(Assembler::HS, forth); // b.HS forth +- __ br(Assembler::CS, __ pc()); // b.CS . +- __ br(Assembler::CS, back); // b.CS back +- __ br(Assembler::CS, forth); // b.CS forth +- __ br(Assembler::LO, __ pc()); // b.LO . +- __ br(Assembler::LO, back); // b.LO back +- __ br(Assembler::LO, forth); // b.LO forth +- __ br(Assembler::CC, __ pc()); // b.CC . +- __ br(Assembler::CC, back); // b.CC back +- __ br(Assembler::CC, forth); // b.CC forth +- __ br(Assembler::MI, __ pc()); // b.MI . +- __ br(Assembler::MI, back); // b.MI back +- __ br(Assembler::MI, forth); // b.MI forth +- __ br(Assembler::PL, __ pc()); // b.PL . +- __ br(Assembler::PL, back); // b.PL back +- __ br(Assembler::PL, forth); // b.PL forth +- __ br(Assembler::VS, __ pc()); // b.VS . +- __ br(Assembler::VS, back); // b.VS back +- __ br(Assembler::VS, forth); // b.VS forth +- __ br(Assembler::VC, __ pc()); // b.VC . +- __ br(Assembler::VC, back); // b.VC back +- __ br(Assembler::VC, forth); // b.VC forth +- __ br(Assembler::HI, __ pc()); // b.HI . +- __ br(Assembler::HI, back); // b.HI back +- __ br(Assembler::HI, forth); // b.HI forth +- __ br(Assembler::LS, __ pc()); // b.LS . +- __ br(Assembler::LS, back); // b.LS back +- __ br(Assembler::LS, forth); // b.LS forth +- __ br(Assembler::GE, __ pc()); // b.GE . +- __ br(Assembler::GE, back); // b.GE back +- __ br(Assembler::GE, forth); // b.GE forth +- __ br(Assembler::LT, __ pc()); // b.LT . +- __ br(Assembler::LT, back); // b.LT back +- __ br(Assembler::LT, forth); // b.LT forth +- __ br(Assembler::GT, __ pc()); // b.GT . +- __ br(Assembler::GT, back); // b.GT back +- __ br(Assembler::GT, forth); // b.GT forth +- __ br(Assembler::LE, __ pc()); // b.LE . +- __ br(Assembler::LE, back); // b.LE back +- __ br(Assembler::LE, forth); // b.LE forth +- __ br(Assembler::AL, __ pc()); // b.AL . +- __ br(Assembler::AL, back); // b.AL back +- __ br(Assembler::AL, forth); // b.AL forth +- __ br(Assembler::NV, __ pc()); // b.NV . +- __ br(Assembler::NV, back); // b.NV back +- __ br(Assembler::NV, forth); // b.NV forth ++ __ br(Assembler::EQ, __ pc()); // b.EQ . ++ __ br(Assembler::EQ, back); // b.EQ back ++ __ br(Assembler::EQ, forth); // b.EQ forth ++ __ br(Assembler::NE, __ pc()); // b.NE . ++ __ br(Assembler::NE, back); // b.NE back ++ __ br(Assembler::NE, forth); // b.NE forth ++ __ br(Assembler::HS, __ pc()); // b.HS . ++ __ br(Assembler::HS, back); // b.HS back ++ __ br(Assembler::HS, forth); // b.HS forth ++ __ br(Assembler::CS, __ pc()); // b.CS . ++ __ br(Assembler::CS, back); // b.CS back ++ __ br(Assembler::CS, forth); // b.CS forth ++ __ br(Assembler::LO, __ pc()); // b.LO . ++ __ br(Assembler::LO, back); // b.LO back ++ __ br(Assembler::LO, forth); // b.LO forth ++ __ br(Assembler::CC, __ pc()); // b.CC . ++ __ br(Assembler::CC, back); // b.CC back ++ __ br(Assembler::CC, forth); // b.CC forth ++ __ br(Assembler::MI, __ pc()); // b.MI . ++ __ br(Assembler::MI, back); // b.MI back ++ __ br(Assembler::MI, forth); // b.MI forth ++ __ br(Assembler::PL, __ pc()); // b.PL . ++ __ br(Assembler::PL, back); // b.PL back ++ __ br(Assembler::PL, forth); // b.PL forth ++ __ br(Assembler::VS, __ pc()); // b.VS . ++ __ br(Assembler::VS, back); // b.VS back ++ __ br(Assembler::VS, forth); // b.VS forth ++ __ br(Assembler::VC, __ pc()); // b.VC . ++ __ br(Assembler::VC, back); // b.VC back ++ __ br(Assembler::VC, forth); // b.VC forth ++ __ br(Assembler::HI, __ pc()); // b.HI . ++ __ br(Assembler::HI, back); // b.HI back ++ __ br(Assembler::HI, forth); // b.HI forth ++ __ br(Assembler::LS, __ pc()); // b.LS . ++ __ br(Assembler::LS, back); // b.LS back ++ __ br(Assembler::LS, forth); // b.LS forth ++ __ br(Assembler::GE, __ pc()); // b.GE . ++ __ br(Assembler::GE, back); // b.GE back ++ __ br(Assembler::GE, forth); // b.GE forth ++ __ br(Assembler::LT, __ pc()); // b.LT . ++ __ br(Assembler::LT, back); // b.LT back ++ __ br(Assembler::LT, forth); // b.LT forth ++ __ br(Assembler::GT, __ pc()); // b.GT . ++ __ br(Assembler::GT, back); // b.GT back ++ __ br(Assembler::GT, forth); // b.GT forth ++ __ br(Assembler::LE, __ pc()); // b.LE . ++ __ br(Assembler::LE, back); // b.LE back ++ __ br(Assembler::LE, forth); // b.LE forth ++ __ br(Assembler::AL, __ pc()); // b.AL . ++ __ br(Assembler::AL, back); // b.AL back ++ __ br(Assembler::AL, forth); // b.AL forth ++ __ br(Assembler::NV, __ pc()); // b.NV . ++ __ br(Assembler::NV, back); // b.NV back ++ __ br(Assembler::NV, forth); // b.NV forth + + // ImmOp +- __ svc(12729); // svc #12729 +- __ hvc(6788); // hvc #6788 +- __ smc(1535); // smc #1535 +- __ brk(16766); // brk #16766 +- __ hlt(9753); // hlt #9753 ++ __ svc(12999); // svc #12999 ++ __ hvc(2665); // hvc #2665 ++ __ smc(9002); // smc #9002 ++ __ brk(14843); // brk #14843 ++ __ hlt(25964); // hlt #25964 + + // Op +- __ nop(); // nop +- __ eret(); // eret +- __ drps(); // drps +- __ isb(); // isb ++ __ nop(); // nop ++ __ eret(); // eret ++ __ drps(); // drps ++ __ isb(); // isb + + // SystemOp +- __ dsb(Assembler::SY); // dsb SY +- __ dmb(Assembler::ISHST); // dmb ISHST ++ __ dsb(Assembler::ST); // dsb ST ++ __ dmb(Assembler::OSHST); // dmb OSHST + + // OneRegOp +- __ br(r2); // br x2 +- __ blr(r5); // blr x5 ++ __ br(r16); // br x16 ++ __ blr(r20); // blr x20 + + // LoadStoreExclusiveOp +- __ stxr(r20, r21, r2); // stxr w20, x21, [x2] +- __ stlxr(r5, r29, r7); // stlxr w5, x29, [x7] +- __ ldxr(r5, r16); // ldxr x5, [x16] +- __ ldaxr(r27, r29); // ldaxr x27, [x29] +- __ stlr(r0, r29); // stlr x0, [x29] +- __ ldar(r21, r28); // ldar x21, [x28] ++ __ stxr(r10, r27, r8); // stxr w10, x27, [x8] ++ __ stlxr(r0, r1, r21); // stlxr w0, x1, [x21] ++ __ ldxr(r17, r29); // ldxr x17, [x29] ++ __ ldaxr(r29, r28); // ldaxr x29, [x28] ++ __ stlr(r1, r23); // stlr x1, [x23] ++ __ ldar(r21, r20); // ldar x21, [x20] + + // LoadStoreExclusiveOp +- __ stxrw(r21, r24, r7); // stxr w21, w24, [x7] +- __ stlxrw(r21, r26, r28); // stlxr w21, w26, [x28] +- __ ldxrw(r21, r6); // ldxr w21, [x6] +- __ ldaxrw(r15, r30); // ldaxr w15, [x30] +- __ stlrw(r19, r3); // stlr w19, [x3] +- __ ldarw(r22, r2); // ldar w22, [x2] ++ __ stxrw(r22, r27, r19); // stxr w22, w27, [x19] ++ __ stlxrw(r11, r16, r6); // stlxr w11, w16, [x6] ++ __ ldxrw(r18, r0); // ldxr w18, [x0] ++ __ ldaxrw(r4, r10); // ldaxr w4, [x10] ++ __ stlrw(r24, r22); // stlr w24, [x22] ++ __ ldarw(r10, r19); // ldar w10, [x19] + + // LoadStoreExclusiveOp +- __ stxrh(r18, r15, r0); // stxrh w18, w15, [x0] +- __ stlxrh(r11, r5, r28); // stlxrh w11, w5, [x28] +- __ ldxrh(r29, r6); // ldxrh w29, [x6] +- __ ldaxrh(r18, r7); // ldaxrh w18, [x7] +- __ stlrh(r25, r28); // stlrh w25, [x28] +- __ ldarh(r2, r19); // ldarh w2, [x19] ++ __ stxrh(r1, r5, r30); // stxrh w1, w5, [x30] ++ __ stlxrh(r8, r12, r17); // stlxrh w8, w12, [x17] ++ __ ldxrh(r9, r14); // ldxrh w9, [x14] ++ __ ldaxrh(r7, r1); // ldaxrh w7, [x1] ++ __ stlrh(r5, r16); // stlrh w5, [x16] ++ __ ldarh(r2, r12); // ldarh w2, [x12] + + // LoadStoreExclusiveOp +- __ stxrb(r10, r30, r1); // stxrb w10, w30, [x1] +- __ stlxrb(r20, r21, r22); // stlxrb w20, w21, [x22] +- __ ldxrb(r25, r2); // ldxrb w25, [x2] +- __ ldaxrb(r24, r5); // ldaxrb w24, [x5] +- __ stlrb(r16, r3); // stlrb w16, [x3] +- __ ldarb(r22, r29); // ldarb w22, [x29] ++ __ stxrb(r10, r12, r3); // stxrb w10, w12, [x3] ++ __ stlxrb(r28, r14, r26); // stlxrb w28, w14, [x26] ++ __ ldxrb(r30, r10); // ldxrb w30, [x10] ++ __ ldaxrb(r14, r21); // ldaxrb w14, [x21] ++ __ stlrb(r13, r9); // stlrb w13, [x9] ++ __ ldarb(r22, r27); // ldarb w22, [x27] + + // LoadStoreExclusiveOp +- __ ldxp(r8, r2, r19); // ldxp x8, x2, [x19] +- __ ldaxp(r7, r19, r14); // ldaxp x7, x19, [x14] +- __ stxp(r8, r27, r28, r5); // stxp w8, x27, x28, [x5] +- __ stlxp(r5, r8, r14, r6); // stlxp w5, x8, x14, [x6] ++ __ ldxp(r28, r19, r11); // ldxp x28, x19, [x11] ++ __ ldaxp(r30, r19, r2); // ldaxp x30, x19, [x2] ++ __ stxp(r2, r23, r1, r0); // stxp w2, x23, x1, [x0] ++ __ stlxp(r12, r16, r13, r15); // stlxp w12, x16, x13, [x15] + + // LoadStoreExclusiveOp +- __ ldxpw(r25, r4, r22); // ldxp w25, w4, [x22] +- __ ldaxpw(r13, r14, r15); // ldaxp w13, w14, [x15] +- __ stxpw(r20, r26, r8, r10); // stxp w20, w26, w8, [x10] +- __ stlxpw(r23, r18, r18, r18); // stlxp w23, w18, w18, [x18] ++ __ ldxpw(r18, r21, r13); // ldxp w18, w21, [x13] ++ __ ldaxpw(r11, r30, r8); // ldaxp w11, w30, [x8] ++ __ stxpw(r24, r13, r11, r1); // stxp w24, w13, w11, [x1] ++ __ stlxpw(r26, r21, r27, r13); // stlxp w26, w21, w27, [x13] + +-// base_plus_unscaled_offset ++// base_plus_unscaled_offset + // LoadStoreOp +- __ str(r30, Address(r11, 99)); // str x30, [x11, 99] +- __ strw(r23, Address(r25, -77)); // str w23, [x25, -77] +- __ strb(r2, Address(r14, 3)); // strb w2, [x14, 3] +- __ strh(r9, Address(r10, 5)); // strh w9, [x10, 5] +- __ ldr(r20, Address(r15, 57)); // ldr x20, [x15, 57] +- __ ldrw(r12, Address(r16, -78)); // ldr w12, [x16, -78] +- __ ldrb(r22, Address(r26, -3)); // ldrb w22, [x26, -3] +- __ ldrh(r30, Address(r19, -47)); // ldrh w30, [x19, -47] +- __ ldrsb(r9, Address(r10, -12)); // ldrsb x9, [x10, -12] +- __ ldrsh(r28, Address(r17, 14)); // ldrsh x28, [x17, 14] +- __ ldrshw(r3, Address(r5, 10)); // ldrsh w3, [x5, 10] +- __ ldrsw(r17, Address(r17, -91)); // ldrsw x17, [x17, -91] +- __ ldrd(v2, Address(r20, -17)); // ldr d2, [x20, -17] +- __ ldrs(v22, Address(r7, -10)); // ldr s22, [x7, -10] +- __ strd(v30, Address(r18, -223)); // str d30, [x18, -223] +- __ strs(v13, Address(r22, 21)); // str s13, [x22, 21] +- +-// pre ++ __ str(r11, Address(r20, -103)); // str x11, [x20, -103] ++ __ strw(r28, Address(r16, 62)); // str w28, [x16, 62] ++ __ strb(r27, Address(r9, -9)); // strb w27, [x9, -9] ++ __ strh(r2, Address(r25, -50)); // strh w2, [x25, -50] ++ __ ldr(r4, Address(r2, -241)); // ldr x4, [x2, -241] ++ __ ldrw(r30, Address(r20, -31)); // ldr w30, [x20, -31] ++ __ ldrb(r18, Address(r23, -23)); // ldrb w18, [x23, -23] ++ __ ldrh(r29, Address(r26, -1)); // ldrh w29, [x26, -1] ++ __ ldrsb(r1, Address(r9, 6)); // ldrsb x1, [x9, 6] ++ __ ldrsh(r11, Address(r12, 19)); // ldrsh x11, [x12, 19] ++ __ ldrshw(r11, Address(r1, -50)); // ldrsh w11, [x1, -50] ++ __ ldrsw(r19, Address(r24, 41)); // ldrsw x19, [x24, 41] ++ __ ldrd(v24, Address(r24, 95)); // ldr d24, [x24, 95] ++ __ ldrs(v15, Address(r5, -43)); // ldr s15, [x5, -43] ++ __ strd(v21, Address(r27, 1)); // str d21, [x27, 1] ++ __ strs(v23, Address(r13, -107)); // str s23, [x13, -107] ++ ++// pre + // LoadStoreOp +- __ str(r9, Address(__ pre(r18, -112))); // str x9, [x18, -112]! +- __ strw(r29, Address(__ pre(r23, 11))); // str w29, [x23, 11]! +- __ strb(r18, Address(__ pre(r12, -1))); // strb w18, [x12, -1]! +- __ strh(r16, Address(__ pre(r20, -23))); // strh w16, [x20, -23]! +- __ ldr(r3, Address(__ pre(r29, 9))); // ldr x3, [x29, 9]! +- __ ldrw(r25, Address(__ pre(r3, 19))); // ldr w25, [x3, 19]! +- __ ldrb(r1, Address(__ pre(r29, -1))); // ldrb w1, [x29, -1]! +- __ ldrh(r8, Address(__ pre(r29, -57))); // ldrh w8, [x29, -57]! +- __ ldrsb(r5, Address(__ pre(r14, -13))); // ldrsb x5, [x14, -13]! +- __ ldrsh(r10, Address(__ pre(r27, 1))); // ldrsh x10, [x27, 1]! +- __ ldrshw(r11, Address(__ pre(r10, 25))); // ldrsh w11, [x10, 25]! +- __ ldrsw(r4, Address(__ pre(r22, -92))); // ldrsw x4, [x22, -92]! +- __ ldrd(v11, Address(__ pre(r23, 8))); // ldr d11, [x23, 8]! +- __ ldrs(v25, Address(__ pre(r19, 54))); // ldr s25, [x19, 54]! +- __ strd(v1, Address(__ pre(r7, -174))); // str d1, [x7, -174]! +- __ strs(v8, Address(__ pre(r25, 54))); // str s8, [x25, 54]! +- +-// post ++ __ str(r11, Address(__ pre(r0, 8))); // str x11, [x0, 8]! ++ __ strw(r3, Address(__ pre(r0, 29))); // str w3, [x0, 29]! ++ __ strb(r11, Address(__ pre(r14, 9))); // strb w11, [x14, 9]! ++ __ strh(r29, Address(__ pre(r24, -3))); // strh w29, [x24, -3]! ++ __ ldr(r13, Address(__ pre(r17, -144))); // ldr x13, [x17, -144]! ++ __ ldrw(r12, Address(__ pre(r22, -6))); // ldr w12, [x22, -6]! ++ __ ldrb(r13, Address(__ pre(r12, -10))); // ldrb w13, [x12, -10]! ++ __ ldrh(r0, Address(__ pre(r21, -21))); // ldrh w0, [x21, -21]! ++ __ ldrsb(r23, Address(__ pre(r7, 4))); // ldrsb x23, [x7, 4]! ++ __ ldrsh(r3, Address(__ pre(r7, -53))); // ldrsh x3, [x7, -53]! ++ __ ldrshw(r28, Address(__ pre(r5, -7))); // ldrsh w28, [x5, -7]! ++ __ ldrsw(r24, Address(__ pre(r9, -18))); // ldrsw x24, [x9, -18]! ++ __ ldrd(v14, Address(__ pre(r11, 12))); // ldr d14, [x11, 12]! ++ __ ldrs(v19, Address(__ pre(r12, -67))); // ldr s19, [x12, -67]! ++ __ strd(v20, Address(__ pre(r0, -253))); // str d20, [x0, -253]! ++ __ strs(v8, Address(__ pre(r0, 64))); // str s8, [x0, 64]! ++ ++// post + // LoadStoreOp +- __ str(r5, Address(__ post(r11, 37))); // str x5, [x11], 37 +- __ strw(r24, Address(__ post(r15, 19))); // str w24, [x15], 19 +- __ strb(r15, Address(__ post(r26, -1))); // strb w15, [x26], -1 +- __ strh(r18, Address(__ post(r18, -6))); // strh w18, [x18], -6 +- __ ldr(r7, Address(__ post(r2, -230))); // ldr x7, [x2], -230 +- __ ldrw(r27, Address(__ post(r11, -27))); // ldr w27, [x11], -27 +- __ ldrb(r18, Address(__ post(r3, -25))); // ldrb w18, [x3], -25 +- __ ldrh(r10, Address(__ post(r24, -32))); // ldrh w10, [x24], -32 +- __ ldrsb(r22, Address(__ post(r10, 4))); // ldrsb x22, [x10], 4 +- __ ldrsh(r17, Address(__ post(r12, 25))); // ldrsh x17, [x12], 25 +- __ ldrshw(r8, Address(__ post(r7, -62))); // ldrsh w8, [x7], -62 +- __ ldrsw(r23, Address(__ post(r22, -51))); // ldrsw x23, [x22], -51 +- __ ldrd(v24, Address(__ post(r25, 48))); // ldr d24, [x25], 48 +- __ ldrs(v21, Address(__ post(r12, -10))); // ldr s21, [x12], -10 +- __ strd(v18, Address(__ post(r13, -222))); // str d18, [x13], -222 +- __ strs(v16, Address(__ post(r1, -41))); // str s16, [x1], -41 +- +-// base_plus_reg ++ __ str(r4, Address(__ post(r28, -94))); // str x4, [x28], -94 ++ __ strw(r12, Address(__ post(r7, -54))); // str w12, [x7], -54 ++ __ strb(r27, Address(__ post(r10, -24))); // strb w27, [x10], -24 ++ __ strh(r6, Address(__ post(r8, 27))); // strh w6, [x8], 27 ++ __ ldr(r14, Address(__ post(r10, -202))); // ldr x14, [x10], -202 ++ __ ldrw(r16, Address(__ post(r5, -41))); // ldr w16, [x5], -41 ++ __ ldrb(r2, Address(__ post(r14, 9))); // ldrb w2, [x14], 9 ++ __ ldrh(r28, Address(__ post(r13, -20))); // ldrh w28, [x13], -20 ++ __ ldrsb(r9, Address(__ post(r13, -31))); // ldrsb x9, [x13], -31 ++ __ ldrsh(r3, Address(__ post(r24, -36))); // ldrsh x3, [x24], -36 ++ __ ldrshw(r20, Address(__ post(r3, 6))); // ldrsh w20, [x3], 6 ++ __ ldrsw(r7, Address(__ post(r19, -1))); // ldrsw x7, [x19], -1 ++ __ ldrd(v30, Address(__ post(r8, -130))); // ldr d30, [x8], -130 ++ __ ldrs(v25, Address(__ post(r15, 21))); // ldr s25, [x15], 21 ++ __ strd(v14, Address(__ post(r23, 90))); // str d14, [x23], 90 ++ __ strs(v8, Address(__ post(r0, -33))); // str s8, [x0], -33 ++ ++// base_plus_reg + // LoadStoreOp +- __ str(r2, Address(r22, r15, Address::sxtw(0))); // str x2, [x22, w15, sxtw #0] +- __ strw(r2, Address(r16, r29, Address::lsl(0))); // str w2, [x16, x29, lsl #0] +- __ strb(r20, Address(r18, r14, Address::uxtw(0))); // strb w20, [x18, w14, uxtw #0] +- __ strh(r6, Address(r19, r20, Address::sxtx(1))); // strh w6, [x19, x20, sxtx #1] +- __ ldr(r14, Address(r29, r14, Address::sxtw(0))); // ldr x14, [x29, w14, sxtw #0] +- __ ldrw(r16, Address(r20, r12, Address::sxtw(2))); // ldr w16, [x20, w12, sxtw #2] +- __ ldrb(r9, Address(r12, r0, Address::sxtw(0))); // ldrb w9, [x12, w0, sxtw #0] +- __ ldrh(r12, Address(r17, r3, Address::lsl(1))); // ldrh w12, [x17, x3, lsl #1] +- __ ldrsb(r2, Address(r17, r3, Address::sxtx(0))); // ldrsb x2, [x17, x3, sxtx #0] +- __ ldrsh(r7, Address(r1, r17, Address::uxtw(1))); // ldrsh x7, [x1, w17, uxtw #1] +- __ ldrshw(r25, Address(r15, r18, Address::sxtw(1))); // ldrsh w25, [x15, w18, sxtw #1] +- __ ldrsw(r23, Address(r21, r12, Address::lsl(0))); // ldrsw x23, [x21, x12, lsl #0] +- __ ldrd(v5, Address(r13, r8, Address::lsl(3))); // ldr d5, [x13, x8, lsl #3] +- __ ldrs(v3, Address(r10, r22, Address::lsl(2))); // ldr s3, [x10, x22, lsl #2] +- __ strd(v14, Address(r2, r27, Address::sxtw(0))); // str d14, [x2, w27, sxtw #0] +- __ strs(v20, Address(r6, r25, Address::lsl(0))); // str s20, [x6, x25, lsl #0] +- +-// base_plus_scaled_offset ++ __ str(r10, Address(r18, r21, Address::sxtw(3))); // str x10, [x18, w21, sxtw #3] ++ __ strw(r4, Address(r13, r22, Address::sxtw(2))); // str w4, [x13, w22, sxtw #2] ++ __ strb(r13, Address(r0, r19, Address::uxtw(0))); // strb w13, [x0, w19, uxtw #0] ++ __ strh(r12, Address(r27, r6, Address::sxtw(0))); // strh w12, [x27, w6, sxtw #0] ++ __ ldr(r0, Address(r8, r16, Address::lsl(0))); // ldr x0, [x8, x16, lsl #0] ++ __ ldrw(r0, Address(r4, r26, Address::sxtx(0))); // ldr w0, [x4, x26, sxtx #0] ++ __ ldrb(r14, Address(r25, r5, Address::sxtw(0))); // ldrb w14, [x25, w5, sxtw #0] ++ __ ldrh(r9, Address(r4, r18, Address::uxtw(0))); // ldrh w9, [x4, w18, uxtw #0] ++ __ ldrsb(r27, Address(r4, r7, Address::lsl(0))); // ldrsb x27, [x4, x7, lsl #0] ++ __ ldrsh(r15, Address(r17, r30, Address::sxtw(0))); // ldrsh x15, [x17, w30, sxtw #0] ++ __ ldrshw(r16, Address(r0, r22, Address::sxtw(0))); // ldrsh w16, [x0, w22, sxtw #0] ++ __ ldrsw(r22, Address(r10, r30, Address::sxtx(2))); // ldrsw x22, [x10, x30, sxtx #2] ++ __ ldrd(v29, Address(r21, r10, Address::sxtx(3))); // ldr d29, [x21, x10, sxtx #3] ++ __ ldrs(v3, Address(r11, r19, Address::uxtw(0))); // ldr s3, [x11, w19, uxtw #0] ++ __ strd(v13, Address(r28, r29, Address::uxtw(3))); // str d13, [x28, w29, uxtw #3] ++ __ strs(v23, Address(r29, r5, Address::sxtx(2))); // str s23, [x29, x5, sxtx #2] ++ ++// base_plus_scaled_offset + // LoadStoreOp +- __ str(r30, Address(r7, 16256)); // str x30, [x7, 16256] +- __ strw(r15, Address(r8, 7588)); // str w15, [x8, 7588] +- __ strb(r11, Address(r0, 1866)); // strb w11, [x0, 1866] +- __ strh(r3, Address(r17, 3734)); // strh w3, [x17, 3734] +- __ ldr(r2, Address(r7, 14224)); // ldr x2, [x7, 14224] +- __ ldrw(r5, Address(r9, 7396)); // ldr w5, [x9, 7396] +- __ ldrb(r28, Address(r9, 1721)); // ldrb w28, [x9, 1721] +- __ ldrh(r2, Address(r20, 3656)); // ldrh w2, [x20, 3656] +- __ ldrsb(r22, Address(r14, 1887)); // ldrsb x22, [x14, 1887] +- __ ldrsh(r8, Address(r0, 4080)); // ldrsh x8, [x0, 4080] +- __ ldrshw(r0, Address(r30, 3916)); // ldrsh w0, [x30, 3916] +- __ ldrsw(r24, Address(r19, 6828)); // ldrsw x24, [x19, 6828] +- __ ldrd(v24, Address(r12, 13032)); // ldr d24, [x12, 13032] +- __ ldrs(v8, Address(r8, 7452)); // ldr s8, [x8, 7452] +- __ strd(v10, Address(r15, 15992)); // str d10, [x15, 15992] +- __ strs(v26, Address(r19, 6688)); // str s26, [x19, 6688] +- +-// pcrel ++ __ str(r5, Address(r8, 12600)); // str x5, [x8, 12600] ++ __ strw(r29, Address(r24, 7880)); // str w29, [x24, 7880] ++ __ strb(r19, Address(r17, 1566)); // strb w19, [x17, 1566] ++ __ strh(r13, Address(r19, 3984)); // strh w13, [x19, 3984] ++ __ ldr(r19, Address(r23, 13632)); // ldr x19, [x23, 13632] ++ __ ldrw(r23, Address(r29, 6264)); // ldr w23, [x29, 6264] ++ __ ldrb(r22, Address(r11, 2012)); // ldrb w22, [x11, 2012] ++ __ ldrh(r3, Address(r10, 3784)); // ldrh w3, [x10, 3784] ++ __ ldrsb(r8, Address(r16, 1951)); // ldrsb x8, [x16, 1951] ++ __ ldrsh(r23, Address(r20, 3346)); // ldrsh x23, [x20, 3346] ++ __ ldrshw(r2, Address(r1, 3994)); // ldrsh w2, [x1, 3994] ++ __ ldrsw(r4, Address(r17, 7204)); // ldrsw x4, [x17, 7204] ++ __ ldrd(v20, Address(r27, 14400)); // ldr d20, [x27, 14400] ++ __ ldrs(v25, Address(r14, 8096)); // ldr s25, [x14, 8096] ++ __ strd(v26, Address(r10, 15024)); // str d26, [x10, 15024] ++ __ strs(v9, Address(r3, 6936)); // str s9, [x3, 6936] ++ ++// pcrel + // LoadStoreOp +- __ ldr(r10, forth); // ldr x10, forth +- __ ldrw(r3, __ pc()); // ldr w3, . ++ __ ldr(r27, forth); // ldr x27, forth ++ __ ldrw(r11, __ pc()); // ldr w11, . + + // LoadStoreOp +- __ prfm(Address(r23, 9)); // prfm PLDL1KEEP, [x23, 9] ++ __ prfm(Address(r3, -187)); // prfm PLDL1KEEP, [x3, -187] + + // LoadStoreOp +- __ prfm(back); // prfm PLDL1KEEP, back ++ __ prfm(__ pc()); // prfm PLDL1KEEP, . + + // LoadStoreOp +- __ prfm(Address(r3, r8, Address::uxtw(0))); // prfm PLDL1KEEP, [x3, w8, uxtw #0] ++ __ prfm(Address(r29, r14, Address::lsl(0))); // prfm PLDL1KEEP, [x29, x14, lsl #0] + + // LoadStoreOp +- __ prfm(Address(r11, 15080)); // prfm PLDL1KEEP, [x11, 15080] ++ __ prfm(Address(r4, 13312)); // prfm PLDL1KEEP, [x4, 13312] + + // AddSubCarryOp +- __ adcw(r13, r9, r28); // adc w13, w9, w28 +- __ adcsw(r27, r19, r28); // adcs w27, w19, w28 +- __ sbcw(r19, r18, r6); // sbc w19, w18, w6 +- __ sbcsw(r14, r20, r3); // sbcs w14, w20, w3 +- __ adc(r16, r14, r8); // adc x16, x14, x8 +- __ adcs(r0, r29, r8); // adcs x0, x29, x8 +- __ sbc(r8, r24, r20); // sbc x8, x24, x20 +- __ sbcs(r12, r28, r0); // sbcs x12, x28, x0 ++ __ adcw(r21, r1, r7); // adc w21, w1, w7 ++ __ adcsw(r8, r5, r7); // adcs w8, w5, w7 ++ __ sbcw(r7, r27, r14); // sbc w7, w27, w14 ++ __ sbcsw(r27, r4, r17); // sbcs w27, w4, w17 ++ __ adc(r0, r28, r0); // adc x0, x28, x0 ++ __ adcs(r12, r24, r30); // adcs x12, x24, x30 ++ __ sbc(r0, r25, r15); // sbc x0, x25, x15 ++ __ sbcs(r1, r24, r3); // sbcs x1, x24, x3 + + // AddSubExtendedOp +- __ addw(r23, r6, r16, ext::uxtb, 4); // add w23, w6, w16, uxtb #4 +- __ addsw(r25, r25, r23, ext::sxth, 2); // adds w25, w25, w23, sxth #2 +- __ sub(r26, r22, r4, ext::uxtx, 1); // sub x26, x22, x4, uxtx #1 +- __ subsw(r17, r29, r19, ext::sxtx, 3); // subs w17, w29, w19, sxtx #3 +- __ add(r11, r30, r21, ext::uxtb, 3); // add x11, x30, x21, uxtb #3 +- __ adds(r16, r19, r0, ext::sxtb, 2); // adds x16, x19, x0, sxtb #2 +- __ sub(r11, r9, r25, ext::sxtx, 1); // sub x11, x9, x25, sxtx #1 +- __ subs(r17, r20, r12, ext::sxtb, 4); // subs x17, x20, x12, sxtb #4 ++ __ addw(r18, r24, r20, ext::uxtb, 2); // add w18, w24, w20, uxtb #2 ++ __ addsw(r13, r28, r10, ext::uxth, 1); // adds w13, w28, w10, uxth #1 ++ __ sub(r15, r16, r2, ext::sxth, 2); // sub x15, x16, x2, sxth #2 ++ __ subsw(r29, r13, r13, ext::uxth, 2); // subs w29, w13, w13, uxth #2 ++ __ add(r12, r20, r12, ext::sxtw, 3); // add x12, x20, x12, sxtw #3 ++ __ adds(r30, r27, r11, ext::sxtb, 1); // adds x30, x27, x11, sxtb #1 ++ __ sub(r14, r7, r1, ext::sxtw, 2); // sub x14, x7, x1, sxtw #2 ++ __ subs(r29, r3, r27, ext::sxth, 1); // subs x29, x3, x27, sxth #1 + + // ConditionalCompareOp +- __ ccmnw(r13, r11, 3u, Assembler::LE); // ccmn w13, w11, #3, LE +- __ ccmpw(r13, r12, 2u, Assembler::HI); // ccmp w13, w12, #2, HI +- __ ccmn(r3, r2, 12u, Assembler::NE); // ccmn x3, x2, #12, NE +- __ ccmp(r7, r21, 3u, Assembler::VS); // ccmp x7, x21, #3, VS ++ __ ccmnw(r0, r13, 14u, Assembler::MI); // ccmn w0, w13, #14, MI ++ __ ccmpw(r22, r18, 6u, Assembler::CC); // ccmp w22, w18, #6, CC ++ __ ccmn(r18, r30, 14u, Assembler::VS); // ccmn x18, x30, #14, VS ++ __ ccmp(r10, r19, 12u, Assembler::HI); // ccmp x10, x19, #12, HI + + // ConditionalCompareImmedOp +- __ ccmnw(r2, 14, 4, Assembler::CC); // ccmn w2, #14, #4, CC +- __ ccmpw(r17, 17, 6, Assembler::PL); // ccmp w17, #17, #6, PL +- __ ccmn(r10, 12, 0, Assembler::CS); // ccmn x10, #12, #0, CS +- __ ccmp(r21, 18, 14, Assembler::GE); // ccmp x21, #18, #14, GE ++ __ ccmnw(r6, 18, 2, Assembler::LE); // ccmn w6, #18, #2, LE ++ __ ccmpw(r9, 13, 4, Assembler::HI); // ccmp w9, #13, #4, HI ++ __ ccmn(r21, 11, 11, Assembler::LO); // ccmn x21, #11, #11, LO ++ __ ccmp(r4, 13, 2, Assembler::VC); // ccmp x4, #13, #2, VC + + // ConditionalSelectOp +- __ cselw(r21, r13, r12, Assembler::GT); // csel w21, w13, w12, GT +- __ csincw(r10, r27, r15, Assembler::LS); // csinc w10, w27, w15, LS +- __ csinvw(r0, r13, r9, Assembler::HI); // csinv w0, w13, w9, HI +- __ csnegw(r18, r4, r26, Assembler::VS); // csneg w18, w4, w26, VS +- __ csel(r12, r29, r7, Assembler::LS); // csel x12, x29, x7, LS +- __ csinc(r6, r7, r20, Assembler::VC); // csinc x6, x7, x20, VC +- __ csinv(r22, r21, r3, Assembler::LE); // csinv x22, x21, x3, LE +- __ csneg(r19, r12, r27, Assembler::LS); // csneg x19, x12, x27, LS ++ __ cselw(r12, r2, r22, Assembler::HI); // csel w12, w2, w22, HI ++ __ csincw(r24, r16, r17, Assembler::HS); // csinc w24, w16, w17, HS ++ __ csinvw(r6, r7, r16, Assembler::LT); // csinv w6, w7, w16, LT ++ __ csnegw(r11, r27, r22, Assembler::LS); // csneg w11, w27, w22, LS ++ __ csel(r10, r3, r29, Assembler::LT); // csel x10, x3, x29, LT ++ __ csinc(r12, r26, r27, Assembler::CC); // csinc x12, x26, x27, CC ++ __ csinv(r15, r10, r21, Assembler::GT); // csinv x15, x10, x21, GT ++ __ csneg(r30, r23, r9, Assembler::GT); // csneg x30, x23, x9, GT + + // TwoRegOp +- __ rbitw(r0, r16); // rbit w0, w16 +- __ rev16w(r17, r23); // rev16 w17, w23 +- __ revw(r17, r14); // rev w17, w14 +- __ clzw(r24, r30); // clz w24, w30 +- __ clsw(r24, r22); // cls w24, w22 +- __ rbit(r3, r17); // rbit x3, x17 +- __ rev16(r12, r13); // rev16 x12, x13 +- __ rev32(r9, r22); // rev32 x9, x22 +- __ rev(r0, r0); // rev x0, x0 +- __ clz(r5, r16); // clz x5, x16 +- __ cls(r25, r22); // cls x25, x22 ++ __ rbitw(r30, r10); // rbit w30, w10 ++ __ rev16w(r29, r15); // rev16 w29, w15 ++ __ revw(r29, r30); // rev w29, w30 ++ __ clzw(r25, r21); // clz w25, w21 ++ __ clsw(r4, r0); // cls w4, w0 ++ __ rbit(r18, r21); // rbit x18, x21 ++ __ rev16(r29, r16); // rev16 x29, x16 ++ __ rev32(r21, r20); // rev32 x21, x20 ++ __ rev(r6, r19); // rev x6, x19 ++ __ clz(r30, r3); // clz x30, x3 ++ __ cls(r21, r19); // cls x21, x19 + + // ThreeRegOp +- __ udivw(r29, r4, r0); // udiv w29, w4, w0 +- __ sdivw(r0, r29, r29); // sdiv w0, w29, w29 +- __ lslvw(r5, r17, r21); // lslv w5, w17, w21 +- __ lsrvw(r9, r9, r18); // lsrv w9, w9, w18 +- __ asrvw(r1, r27, r8); // asrv w1, w27, w8 +- __ rorvw(r18, r20, r13); // rorv w18, w20, w13 +- __ udiv(r8, r25, r12); // udiv x8, x25, x12 +- __ sdiv(r7, r5, r28); // sdiv x7, x5, x28 +- __ lslv(r5, r17, r27); // lslv x5, x17, x27 +- __ lsrv(r23, r26, r20); // lsrv x23, x26, x20 +- __ asrv(r28, r8, r28); // asrv x28, x8, x28 +- __ rorv(r3, r29, r4); // rorv x3, x29, x4 ++ __ udivw(r11, r24, r0); // udiv w11, w24, w0 ++ __ sdivw(r27, r25, r14); // sdiv w27, w25, w14 ++ __ lslvw(r3, r14, r18); // lslv w3, w14, w18 ++ __ lsrvw(r7, r15, r24); // lsrv w7, w15, w24 ++ __ asrvw(r28, r17, r25); // asrv w28, w17, w25 ++ __ rorvw(r2, r26, r28); // rorv w2, w26, w28 ++ __ udiv(r5, r25, r26); // udiv x5, x25, x26 ++ __ sdiv(r27, r16, r18); // sdiv x27, x16, x18 ++ __ lslv(r6, r21, r12); // lslv x6, x21, x12 ++ __ lsrv(r0, r4, r12); // lsrv x0, x4, x12 ++ __ asrv(r27, r17, r28); // asrv x27, x17, x28 ++ __ rorv(r28, r2, r18); // rorv x28, x2, x18 + + // FourRegMulOp +- __ maddw(r17, r14, r26, r21); // madd w17, w14, w26, w21 +- __ msubw(r1, r30, r11, r11); // msub w1, w30, w11, w11 +- __ madd(r1, r17, r6, r28); // madd x1, x17, x6, x28 +- __ msub(r30, r6, r30, r8); // msub x30, x6, x30, x8 +- __ smaddl(r21, r6, r14, r8); // smaddl x21, w6, w14, x8 +- __ smsubl(r10, r10, r24, r19); // smsubl x10, w10, w24, x19 +- __ umaddl(r20, r18, r14, r24); // umaddl x20, w18, w14, x24 +- __ umsubl(r18, r2, r5, r5); // umsubl x18, w2, w5, x5 ++ __ maddw(r10, r15, r14, r14); // madd w10, w15, w14, w14 ++ __ msubw(r3, r25, r15, r19); // msub w3, w25, w15, w19 ++ __ madd(r14, r5, r16, r4); // madd x14, x5, x16, x4 ++ __ msub(r26, r25, r4, r2); // msub x26, x25, x4, x2 ++ __ smaddl(r2, r12, r29, r17); // smaddl x2, w12, w29, x17 ++ __ smsubl(r8, r7, r3, r4); // smsubl x8, w7, w3, x4 ++ __ umaddl(r25, r4, r26, r25); // umaddl x25, w4, w26, x25 ++ __ umsubl(r4, r17, r0, r26); // umsubl x4, w17, w0, x26 + + // ThreeRegFloatOp +- __ fmuls(v8, v18, v13); // fmul s8, s18, s13 +- __ fdivs(v2, v14, v28); // fdiv s2, s14, s28 +- __ fadds(v15, v12, v28); // fadd s15, s12, s28 +- __ fsubs(v0, v12, v1); // fsub s0, s12, s1 +- __ fmuls(v15, v29, v4); // fmul s15, s29, s4 +- __ fmuld(v12, v1, v23); // fmul d12, d1, d23 +- __ fdivd(v27, v8, v18); // fdiv d27, d8, d18 +- __ faddd(v23, v20, v11); // fadd d23, d20, d11 +- __ fsubd(v8, v12, v18); // fsub d8, d12, d18 +- __ fmuld(v26, v24, v23); // fmul d26, d24, d23 ++ __ fmuls(v17, v23, v15); // fmul s17, s23, s15 ++ __ fdivs(v21, v28, v17); // fdiv s21, s28, s17 ++ __ fadds(v27, v10, v3); // fadd s27, s10, s3 ++ __ fsubs(v0, v7, v25); // fsub s0, s7, s25 ++ __ fmuls(v9, v6, v15); // fmul s9, s6, s15 ++ __ fmuld(v29, v15, v10); // fmul d29, d15, d10 ++ __ fdivd(v2, v17, v7); // fdiv d2, d17, d7 ++ __ faddd(v11, v11, v23); // fadd d11, d11, d23 ++ __ fsubd(v7, v29, v23); // fsub d7, d29, d23 ++ __ fmuld(v14, v27, v11); // fmul d14, d27, d11 + + // FourRegFloatOp +- __ fmadds(v21, v23, v13, v25); // fmadd s21, s23, s13, s25 +- __ fmsubs(v22, v10, v1, v14); // fmsub s22, s10, s1, s14 +- __ fnmadds(v14, v20, v2, v30); // fnmadd s14, s20, s2, s30 +- __ fnmadds(v7, v29, v22, v22); // fnmadd s7, s29, s22, s22 +- __ fmaddd(v13, v5, v15, v5); // fmadd d13, d5, d15, d5 +- __ fmsubd(v14, v12, v5, v10); // fmsub d14, d12, d5, d10 +- __ fnmaddd(v10, v19, v0, v1); // fnmadd d10, d19, d0, d1 +- __ fnmaddd(v20, v2, v2, v0); // fnmadd d20, d2, d2, d0 ++ __ fmadds(v11, v4, v24, v12); // fmadd s11, s4, s24, s12 ++ __ fmsubs(v15, v14, v20, v11); // fmsub s15, s14, s20, s11 ++ __ fnmadds(v28, v13, v11, v12); // fnmadd s28, s13, s11, s12 ++ __ fnmadds(v23, v30, v26, v14); // fnmadd s23, s30, s26, s14 ++ __ fmaddd(v9, v13, v10, v7); // fmadd d9, d13, d10, d7 ++ __ fmsubd(v5, v29, v15, v3); // fmsub d5, d29, d15, d3 ++ __ fnmaddd(v11, v12, v15, v30); // fnmadd d11, d12, d15, d30 ++ __ fnmaddd(v30, v17, v19, v20); // fnmadd d30, d17, d19, d20 + + // TwoRegFloatOp +- __ fmovs(v25, v9); // fmov s25, s9 +- __ fabss(v20, v4); // fabs s20, s4 +- __ fnegs(v3, v27); // fneg s3, s27 +- __ fsqrts(v1, v2); // fsqrt s1, s2 +- __ fcvts(v30, v0); // fcvt d30, s0 +- __ fmovd(v12, v4); // fmov d12, d4 +- __ fabsd(v1, v27); // fabs d1, d27 +- __ fnegd(v8, v22); // fneg d8, d22 +- __ fsqrtd(v11, v11); // fsqrt d11, d11 +- __ fcvtd(v22, v28); // fcvt s22, d28 ++ __ fmovs(v27, v7); // fmov s27, s7 ++ __ fabss(v9, v21); // fabs s9, s21 ++ __ fnegs(v2, v9); // fneg s2, s9 ++ __ fsqrts(v27, v7); // fsqrt s27, s7 ++ __ fcvts(v29, v30); // fcvt d29, s30 ++ __ fmovd(v17, v1); // fmov d17, d1 ++ __ fabsd(v2, v6); // fabs d2, d6 ++ __ fnegd(v10, v3); // fneg d10, d3 ++ __ fsqrtd(v24, v11); // fsqrt d24, d11 ++ __ fcvtd(v7, v1); // fcvt s7, d1 + + // FloatConvertOp +- __ fcvtzsw(r28, v22); // fcvtzs w28, s22 +- __ fcvtzs(r20, v27); // fcvtzs x20, s27 +- __ fcvtzdw(r14, v0); // fcvtzs w14, d0 +- __ fcvtzd(r26, v11); // fcvtzs x26, d11 +- __ scvtfws(v28, r22); // scvtf s28, w22 +- __ scvtfs(v16, r10); // scvtf s16, x10 +- __ scvtfwd(v8, r21); // scvtf d8, w21 +- __ scvtfd(v21, r28); // scvtf d21, x28 +- __ fmovs(r24, v24); // fmov w24, s24 +- __ fmovd(r8, v19); // fmov x8, d19 +- __ fmovs(v8, r12); // fmov s8, w12 +- __ fmovd(v6, r7); // fmov d6, x7 ++ __ fcvtzsw(r11, v0); // fcvtzs w11, s0 ++ __ fcvtzs(r3, v18); // fcvtzs x3, s18 ++ __ fcvtzdw(r28, v6); // fcvtzs w28, d6 ++ __ fcvtzd(r22, v6); // fcvtzs x22, d6 ++ __ scvtfws(v0, r27); // scvtf s0, w27 ++ __ scvtfs(v26, r2); // scvtf s26, x2 ++ __ scvtfwd(v5, r7); // scvtf d5, w7 ++ __ scvtfd(v28, r11); // scvtf d28, x11 ++ __ fmovs(r25, v13); // fmov w25, s13 ++ __ fmovd(r11, v23); // fmov x11, d23 ++ __ fmovs(v19, r8); // fmov s19, w8 ++ __ fmovd(v18, r21); // fmov d18, x21 + + // TwoRegFloatOp +- __ fcmps(v30, v16); // fcmp s30, s16 +- __ fcmpd(v25, v11); // fcmp d25, d11 +- __ fcmps(v11, 0.0); // fcmp s11, #0.0 +- __ fcmpd(v11, 0.0); // fcmp d11, #0.0 ++ __ fcmps(v25, v20); // fcmp s25, s20 ++ __ fcmpd(v19, v18); // fcmp d19, d18 ++ __ fcmps(v2, 0.0); // fcmp s2, #0.0 ++ __ fcmpd(v29, 0.0); // fcmp d29, #0.0 + + // LoadStorePairOp +- __ stpw(r29, r12, Address(r17, 128)); // stp w29, w12, [x17, #128] +- __ ldpw(r22, r18, Address(r14, -96)); // ldp w22, w18, [x14, #-96] +- __ ldpsw(r11, r16, Address(r1, 64)); // ldpsw x11, x16, [x1, #64] +- __ stp(r0, r11, Address(r26, 112)); // stp x0, x11, [x26, #112] +- __ ldp(r7, r1, Address(r26, 16)); // ldp x7, x1, [x26, #16] ++ __ stpw(r8, r21, Address(r19, 16)); // stp w8, w21, [x19, #16] ++ __ ldpw(r6, r15, Address(r20, 0)); // ldp w6, w15, [x20, #0] ++ __ ldpsw(r27, r14, Address(r3, -208)); // ldpsw x27, x14, [x3, #-208] ++ __ stp(r10, r12, Address(r11, -80)); // stp x10, x12, [x11, #-80] ++ __ ldp(r7, r14, Address(r7, -32)); // ldp x7, x14, [x7, #-32] + + // LoadStorePairOp +- __ stpw(r10, r7, Address(__ pre(r24, 0))); // stp w10, w7, [x24, #0]! +- __ ldpw(r7, r28, Address(__ pre(r24, -256))); // ldp w7, w28, [x24, #-256]! +- __ ldpsw(r25, r28, Address(__ pre(r21, -240))); // ldpsw x25, x28, [x21, #-240]! +- __ stp(r20, r18, Address(__ pre(r14, -16))); // stp x20, x18, [x14, #-16]! +- __ ldp(r8, r10, Address(__ pre(r13, 80))); // ldp x8, x10, [x13, #80]! ++ __ stpw(r0, r22, Address(__ pre(r12, 112))); // stp w0, w22, [x12, #112]! ++ __ ldpw(r14, r7, Address(__ pre(r8, 48))); // ldp w14, w7, [x8, #48]! ++ __ ldpsw(r16, r2, Address(__ pre(r9, 0))); // ldpsw x16, x2, [x9, #0]! ++ __ stp(r20, r29, Address(__ pre(r1, -64))); // stp x20, x29, [x1, #-64]! ++ __ ldp(r21, r12, Address(__ pre(r5, 80))); // ldp x21, x12, [x5, #80]! + + // LoadStorePairOp +- __ stpw(r26, r24, Address(__ post(r2, -128))); // stp w26, w24, [x2], #-128 +- __ ldpw(r2, r25, Address(__ post(r21, -192))); // ldp w2, w25, [x21], #-192 +- __ ldpsw(r17, r2, Address(__ post(r21, -144))); // ldpsw x17, x2, [x21], #-144 +- __ stp(r12, r10, Address(__ post(r11, 96))); // stp x12, x10, [x11], #96 +- __ ldp(r24, r6, Address(__ post(r17, -32))); // ldp x24, x6, [x17], #-32 ++ __ stpw(r24, r24, Address(__ post(r27, -112))); // stp w24, w24, [x27], #-112 ++ __ ldpw(r28, r22, Address(__ post(r18, 16))); // ldp w28, w22, [x18], #16 ++ __ ldpsw(r17, r6, Address(__ post(r13, -96))); // ldpsw x17, x6, [x13], #-96 ++ __ stp(r28, r26, Address(__ post(r5, -160))); // stp x28, x26, [x5], #-160 ++ __ ldp(r6, r21, Address(__ post(r26, -240))); // ldp x6, x21, [x26], #-240 + + // LoadStorePairOp +- __ stnpw(r3, r30, Address(r14, -224)); // stnp w3, w30, [x14, #-224] +- __ ldnpw(r15, r20, Address(r26, -144)); // ldnp w15, w20, [x26, #-144] +- __ stnp(r22, r25, Address(r12, -128)); // stnp x22, x25, [x12, #-128] +- __ ldnp(r27, r22, Address(r17, -176)); // ldnp x27, x22, [x17, #-176] ++ __ stnpw(r13, r20, Address(r30, 32)); // stnp w13, w20, [x30, #32] ++ __ ldnpw(r17, r11, Address(r5, 96)); // ldnp w17, w11, [x5, #96] ++ __ stnp(r13, r20, Address(r26, -96)); // stnp x13, x20, [x26, #-96] ++ __ ldnp(r29, r12, Address(r23, -80)); // ldnp x29, x12, [x23, #-80] ++ ++// SpecialCases ++ __ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1 ++ __ sve_inc(r0, __ S); // incw x0 ++ __ sve_dec(r1, __ H); // dech x1 ++ __ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7 ++ __ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15 ++ __ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31 ++ __ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63 ++ __ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7 ++ __ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15 ++ __ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31 ++ __ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63 ++ __ sve_addvl(sp, r0, 31); // addvl sp, x0, #31 ++ __ sve_addpl(r1, sp, -32); // addpl x1, sp, -32 ++ __ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b ++ __ sve_dup(z0, __ B, 127); // dup z0.b, 127 ++ __ sve_dup(z1, __ H, -128); // dup z1.h, -128 ++ __ sve_dup(z2, __ S, 32512); // dup z2.s, 32512 ++ __ sve_dup(z7, __ D, -32768); // dup z7.d, -32768 ++ __ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp] ++ __ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL] ++ __ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL] ++ __ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8] ++ __ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2] ++ __ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3] ++ __ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp] ++ __ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL] ++ __ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL] ++ __ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1] ++ __ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1] ++ __ sve_st1d(z0, __ D, p4, Address(r0, r18)); // st1d {z0.d}, p4, [x0, x18, LSL #3] ++ __ sve_ldr(z0, Address(sp)); // ldr z0, [sp] ++ __ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL] ++ __ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL] + + // FloatImmediateOp +- __ fmovd(v0, 2.0); // fmov d0, #2.0 +- __ fmovd(v0, 2.125); // fmov d0, #2.125 +- __ fmovd(v0, 4.0); // fmov d0, #4.0 +- __ fmovd(v0, 4.25); // fmov d0, #4.25 +- __ fmovd(v0, 8.0); // fmov d0, #8.0 +- __ fmovd(v0, 8.5); // fmov d0, #8.5 +- __ fmovd(v0, 16.0); // fmov d0, #16.0 +- __ fmovd(v0, 17.0); // fmov d0, #17.0 +- __ fmovd(v0, 0.125); // fmov d0, #0.125 +- __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125 +- __ fmovd(v0, 0.25); // fmov d0, #0.25 +- __ fmovd(v0, 0.265625); // fmov d0, #0.265625 +- __ fmovd(v0, 0.5); // fmov d0, #0.5 +- __ fmovd(v0, 0.53125); // fmov d0, #0.53125 +- __ fmovd(v0, 1.0); // fmov d0, #1.0 +- __ fmovd(v0, 1.0625); // fmov d0, #1.0625 +- __ fmovd(v0, -2.0); // fmov d0, #-2.0 +- __ fmovd(v0, -2.125); // fmov d0, #-2.125 +- __ fmovd(v0, -4.0); // fmov d0, #-4.0 +- __ fmovd(v0, -4.25); // fmov d0, #-4.25 +- __ fmovd(v0, -8.0); // fmov d0, #-8.0 +- __ fmovd(v0, -8.5); // fmov d0, #-8.5 +- __ fmovd(v0, -16.0); // fmov d0, #-16.0 +- __ fmovd(v0, -17.0); // fmov d0, #-17.0 +- __ fmovd(v0, -0.125); // fmov d0, #-0.125 +- __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125 +- __ fmovd(v0, -0.25); // fmov d0, #-0.25 +- __ fmovd(v0, -0.265625); // fmov d0, #-0.265625 +- __ fmovd(v0, -0.5); // fmov d0, #-0.5 +- __ fmovd(v0, -0.53125); // fmov d0, #-0.53125 +- __ fmovd(v0, -1.0); // fmov d0, #-1.0 +- __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 ++ __ fmovd(v0, 2.0); // fmov d0, #2.0 ++ __ fmovd(v0, 2.125); // fmov d0, #2.125 ++ __ fmovd(v0, 4.0); // fmov d0, #4.0 ++ __ fmovd(v0, 4.25); // fmov d0, #4.25 ++ __ fmovd(v0, 8.0); // fmov d0, #8.0 ++ __ fmovd(v0, 8.5); // fmov d0, #8.5 ++ __ fmovd(v0, 16.0); // fmov d0, #16.0 ++ __ fmovd(v0, 17.0); // fmov d0, #17.0 ++ __ fmovd(v0, 0.125); // fmov d0, #0.125 ++ __ fmovd(v0, 0.1328125); // fmov d0, #0.1328125 ++ __ fmovd(v0, 0.25); // fmov d0, #0.25 ++ __ fmovd(v0, 0.265625); // fmov d0, #0.265625 ++ __ fmovd(v0, 0.5); // fmov d0, #0.5 ++ __ fmovd(v0, 0.53125); // fmov d0, #0.53125 ++ __ fmovd(v0, 1.0); // fmov d0, #1.0 ++ __ fmovd(v0, 1.0625); // fmov d0, #1.0625 ++ __ fmovd(v0, -2.0); // fmov d0, #-2.0 ++ __ fmovd(v0, -2.125); // fmov d0, #-2.125 ++ __ fmovd(v0, -4.0); // fmov d0, #-4.0 ++ __ fmovd(v0, -4.25); // fmov d0, #-4.25 ++ __ fmovd(v0, -8.0); // fmov d0, #-8.0 ++ __ fmovd(v0, -8.5); // fmov d0, #-8.5 ++ __ fmovd(v0, -16.0); // fmov d0, #-16.0 ++ __ fmovd(v0, -17.0); // fmov d0, #-17.0 ++ __ fmovd(v0, -0.125); // fmov d0, #-0.125 ++ __ fmovd(v0, -0.1328125); // fmov d0, #-0.1328125 ++ __ fmovd(v0, -0.25); // fmov d0, #-0.25 ++ __ fmovd(v0, -0.265625); // fmov d0, #-0.265625 ++ __ fmovd(v0, -0.5); // fmov d0, #-0.5 ++ __ fmovd(v0, -0.53125); // fmov d0, #-0.53125 ++ __ fmovd(v0, -1.0); // fmov d0, #-1.0 ++ __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 ++ ++// SVEVectorOp ++ __ sve_add(z14, __ S, z16, z27); // add z14.s, z16.s, z27.s ++ __ sve_sub(z0, __ S, z6, z26); // sub z0.s, z6.s, z26.s ++ __ sve_fadd(z27, __ S, z12, z6); // fadd z27.s, z12.s, z6.s ++ __ sve_fmul(z30, __ S, z4, z19); // fmul z30.s, z4.s, z19.s ++ __ sve_fsub(z11, __ D, z16, z2); // fsub z11.d, z16.d, z2.d ++ __ sve_abs(z15, __ D, p0, z12); // abs z15.d, p0/m, z12.d ++ __ sve_add(z9, __ B, p5, z23); // add z9.b, p5/m, z9.b, z23.b ++ __ sve_asr(z30, __ S, p0, z26); // asr z30.s, p0/m, z30.s, z26.s ++ __ sve_cnt(z4, __ H, p2, z18); // cnt z4.h, p2/m, z18.h ++ __ sve_lsl(z25, __ S, p1, z11); // lsl z25.s, p1/m, z25.s, z11.s ++ __ sve_lsr(z10, __ B, p6, z8); // lsr z10.b, p6/m, z10.b, z8.b ++ __ sve_mul(z4, __ B, p5, z17); // mul z4.b, p5/m, z4.b, z17.b ++ __ sve_neg(z30, __ S, p3, z9); // neg z30.s, p3/m, z9.s ++ __ sve_not(z0, __ D, p3, z20); // not z0.d, p3/m, z20.d ++ __ sve_smax(z23, __ H, p7, z3); // smax z23.h, p7/m, z23.h, z3.h ++ __ sve_smin(z0, __ H, p2, z11); // smin z0.h, p2/m, z0.h, z11.h ++ __ sve_sub(z11, __ D, p6, z5); // sub z11.d, p6/m, z11.d, z5.d ++ __ sve_fabs(z16, __ S, p2, z17); // fabs z16.s, p2/m, z17.s ++ __ sve_fadd(z15, __ S, p0, z26); // fadd z15.s, p0/m, z15.s, z26.s ++ __ sve_fdiv(z10, __ S, p7, z19); // fdiv z10.s, p7/m, z10.s, z19.s ++ __ sve_fmax(z24, __ D, p0, z17); // fmax z24.d, p0/m, z24.d, z17.d ++ __ sve_fmin(z26, __ D, p4, z15); // fmin z26.d, p4/m, z26.d, z15.d ++ __ sve_fmul(z24, __ D, p2, z17); // fmul z24.d, p2/m, z24.d, z17.d ++ __ sve_fneg(z30, __ S, p5, z29); // fneg z30.s, p5/m, z29.s ++ __ sve_frintm(z18, __ S, p5, z10); // frintm z18.s, p5/m, z10.s ++ __ sve_frintn(z30, __ D, p2, z30); // frintn z30.d, p2/m, z30.d ++ __ sve_frintp(z6, __ S, p6, z30); // frintp z6.s, p6/m, z30.s ++ __ sve_fsqrt(z20, __ D, p6, z2); // fsqrt z20.d, p6/m, z2.d ++ __ sve_fsub(z9, __ S, p5, z29); // fsub z9.s, p5/m, z9.s, z29.s ++ __ sve_fmla(z18, __ D, p2, z3, z22); // fmla z18.d, p2/m, z3.d, z22.d ++ __ sve_fmls(z15, __ D, p2, z13, z12); // fmls z15.d, p2/m, z13.d, z12.d ++ __ sve_fnmla(z12, __ S, p0, z30, z30); // fnmla z12.s, p0/m, z30.s, z30.s ++ __ sve_fnmls(z7, __ D, p3, z21, z0); // fnmls z7.d, p3/m, z21.d, z0.d ++ __ sve_mla(z19, __ H, p2, z26, z20); // mla z19.h, p2/m, z26.h, z20.h ++ __ sve_mls(z16, __ D, p7, z1, z21); // mls z16.d, p7/m, z1.d, z21.d ++ __ sve_and(z21, z4, z18); // and z21.d, z4.d, z18.d ++ __ sve_eor(z12, z18, z7); // eor z12.d, z18.d, z7.d ++ __ sve_orr(z25, z15, z13); // orr z25.d, z15.d, z13.d ++ ++// SVEReductionOp ++ __ sve_andv(v11, __ D, p4, z7); // andv d11, p4, z7.d ++ __ sve_orv(v11, __ D, p1, z9); // orv d11, p1, z9.d ++ __ sve_eorv(v28, __ D, p7, z0); // eorv d28, p7, z0.d ++ __ sve_smaxv(v16, __ H, p0, z7); // smaxv h16, p0, z7.h ++ __ sve_sminv(v12, __ B, p3, z29); // sminv b12, p3, z29.b ++ __ sve_fminv(v21, __ S, p6, z11); // fminv s21, p6, z11.s ++ __ sve_fmaxv(v6, __ D, p2, z4); // fmaxv d6, p2, z4.d ++ __ sve_fadda(v7, __ D, p0, z7); // fadda d7, p0, d7, z7.d ++ __ sve_uaddv(v12, __ B, p7, z29); // uaddv d12, p7, z29.b + + __ bind(forth); + +@@ -633,542 +719,642 @@ aarch64ops.o: file format elf64-littleaarch64 + Disassembly of section .text: + + 0000000000000000 : +- 0: 8b0772d3 add x19, x22, x7, lsl #28 +- 4: cb4a3570 sub x16, x11, x10, lsr #13 +- 8: ab9c09bb adds x27, x13, x28, asr #2 +- c: eb9aa794 subs x20, x28, x26, asr #41 +- 10: 0b934e68 add w8, w19, w19, asr #19 +- 14: 4b0a3924 sub w4, w9, w10, lsl #14 +- 18: 2b1e3568 adds w8, w11, w30, lsl #13 +- 1c: 6b132720 subs w0, w25, w19, lsl #9 +- 20: 8a154c14 and x20, x0, x21, lsl #19 +- 24: aa1445d5 orr x21, x14, x20, lsl #17 +- 28: ca01cf99 eor x25, x28, x1, lsl #51 +- 2c: ea8b3f6a ands x10, x27, x11, asr #15 +- 30: 0a8c5cb9 and w25, w5, w12, asr #23 +- 34: 2a4a11d2 orr w18, w14, w10, lsr #4 +- 38: 4a855aa4 eor w4, w21, w5, asr #22 +- 3c: 6a857415 ands w21, w0, w5, asr #29 +- 40: 8aa697da bic x26, x30, x6, asr #37 +- 44: aa6d7423 orn x3, x1, x13, lsr #29 +- 48: ca29bf80 eon x0, x28, x9, lsl #47 +- 4c: ea3cb8bd bics x29, x5, x28, lsl #46 +- 50: 0a675249 bic w9, w18, w7, lsr #20 +- 54: 2ab961ba orn w26, w13, w25, asr #24 +- 58: 4a331899 eon w25, w4, w19, lsl #6 +- 5c: 6a646345 bics w5, w26, w4, lsr #24 +- 60: 11055267 add w7, w19, #0x154 +- 64: 31064408 adds w8, w0, #0x191 +- 68: 51028e9d sub w29, w20, #0xa3 +- 6c: 710bdee8 subs w8, w23, #0x2f7 +- 70: 91082d81 add x1, x12, #0x20b +- 74: b106a962 adds x2, x11, #0x1aa +- 78: d10b33ae sub x14, x29, #0x2cc +- 7c: f10918ab subs x11, x5, #0x246 +- 80: 121102d7 and w23, w22, #0x8000 +- 84: 3204cd44 orr w4, w10, #0xf0f0f0f0 +- 88: 5204cf00 eor w0, w24, #0xf0f0f0f0 +- 8c: 72099fb3 ands w19, w29, #0x7f807f80 +- 90: 92729545 and x5, x10, #0xfffffffffc000 +- 94: b20e37cc orr x12, x30, #0xfffc0000fffc0000 +- 98: d27c34be eor x30, x5, #0x3fff0 +- 9c: f27e4efa ands x26, x23, #0x3ffffc +- a0: 14000000 b a0 +- a4: 17ffffd7 b 0 +- a8: 1400017f b 6a4 +- ac: 94000000 bl ac +- b0: 97ffffd4 bl 0 +- b4: 9400017c bl 6a4 +- b8: 3400000c cbz w12, b8 +- bc: 34fffa2c cbz w12, 0 +- c0: 34002f2c cbz w12, 6a4 +- c4: 35000014 cbnz w20, c4 +- c8: 35fff9d4 cbnz w20, 0 +- cc: 35002ed4 cbnz w20, 6a4 +- d0: b400000c cbz x12, d0 +- d4: b4fff96c cbz x12, 0 +- d8: b4002e6c cbz x12, 6a4 +- dc: b5000018 cbnz x24, dc +- e0: b5fff918 cbnz x24, 0 +- e4: b5002e18 cbnz x24, 6a4 +- e8: 10000006 adr x6, e8 +- ec: 10fff8a6 adr x6, 0 +- f0: 10002da6 adr x6, 6a4 +- f4: 90000015 adrp x21, 0 +- f8: 36080001 tbz w1, #1, f8 +- fc: 360ff821 tbz w1, #1, 0 +- 100: 36082d21 tbz w1, #1, 6a4 +- 104: 37480008 tbnz w8, #9, 104 +- 108: 374ff7c8 tbnz w8, #9, 0 +- 10c: 37482cc8 tbnz w8, #9, 6a4 +- 110: 128b50ec movn w12, #0x5a87 +- 114: 52a9ff8b movz w11, #0x4ffc, lsl #16 +- 118: 7281d095 movk w21, #0xe84 +- 11c: 92edfebd movn x29, #0x6ff5, lsl #48 +- 120: d28361e3 movz x3, #0x1b0f +- 124: f2a4cc96 movk x22, #0x2664, lsl #16 +- 128: 9346590c sbfx x12, x8, #6, #17 +- 12c: 33194f33 bfi w19, w25, #7, #20 +- 130: 531d3d89 ubfiz w9, w12, #3, #16 +- 134: 9350433c sbfx x28, x25, #16, #1 +- 138: b34464ac bfxil x12, x5, #4, #22 +- 13c: d3462140 ubfx x0, x10, #6, #3 +- 140: 139a61a4 extr w4, w13, w26, #24 +- 144: 93d87fd7 extr x23, x30, x24, #31 +- 148: 54000000 b.eq 148 +- 14c: 54fff5a0 b.eq 0 +- 150: 54002aa0 b.eq 6a4 +- 154: 54000001 b.ne 154 +- 158: 54fff541 b.ne 0 +- 15c: 54002a41 b.ne 6a4 +- 160: 54000002 b.cs 160 +- 164: 54fff4e2 b.cs 0 +- 168: 540029e2 b.cs 6a4 +- 16c: 54000002 b.cs 16c +- 170: 54fff482 b.cs 0 +- 174: 54002982 b.cs 6a4 +- 178: 54000003 b.cc 178 +- 17c: 54fff423 b.cc 0 +- 180: 54002923 b.cc 6a4 +- 184: 54000003 b.cc 184 +- 188: 54fff3c3 b.cc 0 +- 18c: 540028c3 b.cc 6a4 +- 190: 54000004 b.mi 190 +- 194: 54fff364 b.mi 0 +- 198: 54002864 b.mi 6a4 +- 19c: 54000005 b.pl 19c +- 1a0: 54fff305 b.pl 0 +- 1a4: 54002805 b.pl 6a4 +- 1a8: 54000006 b.vs 1a8 +- 1ac: 54fff2a6 b.vs 0 +- 1b0: 540027a6 b.vs 6a4 +- 1b4: 54000007 b.vc 1b4 +- 1b8: 54fff247 b.vc 0 +- 1bc: 54002747 b.vc 6a4 +- 1c0: 54000008 b.hi 1c0 +- 1c4: 54fff1e8 b.hi 0 +- 1c8: 540026e8 b.hi 6a4 +- 1cc: 54000009 b.ls 1cc +- 1d0: 54fff189 b.ls 0 +- 1d4: 54002689 b.ls 6a4 +- 1d8: 5400000a b.ge 1d8 +- 1dc: 54fff12a b.ge 0 +- 1e0: 5400262a b.ge 6a4 +- 1e4: 5400000b b.lt 1e4 +- 1e8: 54fff0cb b.lt 0 +- 1ec: 540025cb b.lt 6a4 +- 1f0: 5400000c b.gt 1f0 +- 1f4: 54fff06c b.gt 0 +- 1f8: 5400256c b.gt 6a4 +- 1fc: 5400000d b.le 1fc +- 200: 54fff00d b.le 0 +- 204: 5400250d b.le 6a4 +- 208: 5400000e b.al 208 +- 20c: 54ffefae b.al 0 +- 210: 540024ae b.al 6a4 +- 214: 5400000f b.nv 214 +- 218: 54ffef4f b.nv 0 +- 21c: 5400244f b.nv 6a4 +- 220: d4063721 svc #0x31b9 +- 224: d4035082 hvc #0x1a84 +- 228: d400bfe3 smc #0x5ff +- 22c: d4282fc0 brk #0x417e +- 230: d444c320 hlt #0x2619 +- 234: d503201f nop +- 238: d69f03e0 eret +- 23c: d6bf03e0 drps +- 240: d5033fdf isb +- 244: d5033f9f dsb sy +- 248: d5033abf dmb ishst +- 24c: d61f0040 br x2 +- 250: d63f00a0 blr x5 +- 254: c8147c55 stxr w20, x21, [x2] +- 258: c805fcfd stlxr w5, x29, [x7] +- 25c: c85f7e05 ldxr x5, [x16] +- 260: c85fffbb ldaxr x27, [x29] +- 264: c89fffa0 stlr x0, [x29] +- 268: c8dfff95 ldar x21, [x28] +- 26c: 88157cf8 stxr w21, w24, [x7] +- 270: 8815ff9a stlxr w21, w26, [x28] +- 274: 885f7cd5 ldxr w21, [x6] +- 278: 885fffcf ldaxr w15, [x30] +- 27c: 889ffc73 stlr w19, [x3] +- 280: 88dffc56 ldar w22, [x2] +- 284: 48127c0f stxrh w18, w15, [x0] +- 288: 480bff85 stlxrh w11, w5, [x28] +- 28c: 485f7cdd ldxrh w29, [x6] +- 290: 485ffcf2 ldaxrh w18, [x7] +- 294: 489fff99 stlrh w25, [x28] +- 298: 48dffe62 ldarh w2, [x19] +- 29c: 080a7c3e stxrb w10, w30, [x1] +- 2a0: 0814fed5 stlxrb w20, w21, [x22] +- 2a4: 085f7c59 ldxrb w25, [x2] +- 2a8: 085ffcb8 ldaxrb w24, [x5] +- 2ac: 089ffc70 stlrb w16, [x3] +- 2b0: 08dfffb6 ldarb w22, [x29] +- 2b4: c87f0a68 ldxp x8, x2, [x19] +- 2b8: c87fcdc7 ldaxp x7, x19, [x14] +- 2bc: c82870bb stxp w8, x27, x28, [x5] +- 2c0: c825b8c8 stlxp w5, x8, x14, [x6] +- 2c4: 887f12d9 ldxp w25, w4, [x22] +- 2c8: 887fb9ed ldaxp w13, w14, [x15] +- 2cc: 8834215a stxp w20, w26, w8, [x10] +- 2d0: 8837ca52 stlxp w23, w18, w18, [x18] +- 2d4: f806317e str x30, [x11,#99] +- 2d8: b81b3337 str w23, [x25,#-77] +- 2dc: 39000dc2 strb w2, [x14,#3] +- 2e0: 78005149 strh w9, [x10,#5] +- 2e4: f84391f4 ldr x20, [x15,#57] +- 2e8: b85b220c ldr w12, [x16,#-78] +- 2ec: 385fd356 ldrb w22, [x26,#-3] +- 2f0: 785d127e ldrh w30, [x19,#-47] +- 2f4: 389f4149 ldrsb x9, [x10,#-12] +- 2f8: 79801e3c ldrsh x28, [x17,#14] +- 2fc: 79c014a3 ldrsh w3, [x5,#10] +- 300: b89a5231 ldrsw x17, [x17,#-91] +- 304: fc5ef282 ldr d2, [x20,#-17] +- 308: bc5f60f6 ldr s22, [x7,#-10] +- 30c: fc12125e str d30, [x18,#-223] +- 310: bc0152cd str s13, [x22,#21] +- 314: f8190e49 str x9, [x18,#-112]! +- 318: b800befd str w29, [x23,#11]! +- 31c: 381ffd92 strb w18, [x12,#-1]! +- 320: 781e9e90 strh w16, [x20,#-23]! +- 324: f8409fa3 ldr x3, [x29,#9]! +- 328: b8413c79 ldr w25, [x3,#19]! +- 32c: 385fffa1 ldrb w1, [x29,#-1]! +- 330: 785c7fa8 ldrh w8, [x29,#-57]! +- 334: 389f3dc5 ldrsb x5, [x14,#-13]! +- 338: 78801f6a ldrsh x10, [x27,#1]! +- 33c: 78c19d4b ldrsh w11, [x10,#25]! +- 340: b89a4ec4 ldrsw x4, [x22,#-92]! +- 344: fc408eeb ldr d11, [x23,#8]! +- 348: bc436e79 ldr s25, [x19,#54]! +- 34c: fc152ce1 str d1, [x7,#-174]! +- 350: bc036f28 str s8, [x25,#54]! +- 354: f8025565 str x5, [x11],#37 +- 358: b80135f8 str w24, [x15],#19 +- 35c: 381ff74f strb w15, [x26],#-1 +- 360: 781fa652 strh w18, [x18],#-6 +- 364: f851a447 ldr x7, [x2],#-230 +- 368: b85e557b ldr w27, [x11],#-27 +- 36c: 385e7472 ldrb w18, [x3],#-25 +- 370: 785e070a ldrh w10, [x24],#-32 +- 374: 38804556 ldrsb x22, [x10],#4 +- 378: 78819591 ldrsh x17, [x12],#25 +- 37c: 78dc24e8 ldrsh w8, [x7],#-62 +- 380: b89cd6d7 ldrsw x23, [x22],#-51 +- 384: fc430738 ldr d24, [x25],#48 +- 388: bc5f6595 ldr s21, [x12],#-10 +- 38c: fc1225b2 str d18, [x13],#-222 +- 390: bc1d7430 str s16, [x1],#-41 +- 394: f82fcac2 str x2, [x22,w15,sxtw] +- 398: b83d6a02 str w2, [x16,x29] +- 39c: 382e5a54 strb w20, [x18,w14,uxtw #0] +- 3a0: 7834fa66 strh w6, [x19,x20,sxtx #1] +- 3a4: f86ecbae ldr x14, [x29,w14,sxtw] +- 3a8: b86cda90 ldr w16, [x20,w12,sxtw #2] +- 3ac: 3860d989 ldrb w9, [x12,w0,sxtw #0] +- 3b0: 78637a2c ldrh w12, [x17,x3,lsl #1] +- 3b4: 38a3fa22 ldrsb x2, [x17,x3,sxtx #0] +- 3b8: 78b15827 ldrsh x7, [x1,w17,uxtw #1] +- 3bc: 78f2d9f9 ldrsh w25, [x15,w18,sxtw #1] +- 3c0: b8ac6ab7 ldrsw x23, [x21,x12] +- 3c4: fc6879a5 ldr d5, [x13,x8,lsl #3] +- 3c8: bc767943 ldr s3, [x10,x22,lsl #2] +- 3cc: fc3bc84e str d14, [x2,w27,sxtw] +- 3d0: bc3968d4 str s20, [x6,x25] +- 3d4: f91fc0fe str x30, [x7,#16256] +- 3d8: b91da50f str w15, [x8,#7588] +- 3dc: 391d280b strb w11, [x0,#1866] +- 3e0: 791d2e23 strh w3, [x17,#3734] +- 3e4: f95bc8e2 ldr x2, [x7,#14224] +- 3e8: b95ce525 ldr w5, [x9,#7396] +- 3ec: 395ae53c ldrb w28, [x9,#1721] +- 3f0: 795c9282 ldrh w2, [x20,#3656] +- 3f4: 399d7dd6 ldrsb x22, [x14,#1887] +- 3f8: 799fe008 ldrsh x8, [x0,#4080] +- 3fc: 79de9bc0 ldrsh w0, [x30,#3916] +- 400: b99aae78 ldrsw x24, [x19,#6828] +- 404: fd597598 ldr d24, [x12,#13032] +- 408: bd5d1d08 ldr s8, [x8,#7452] +- 40c: fd1f3dea str d10, [x15,#15992] +- 410: bd1a227a str s26, [x19,#6688] +- 414: 5800148a ldr x10, 6a4 +- 418: 18000003 ldr w3, 418 +- 41c: f88092e0 prfm pldl1keep, [x23,#9] +- 420: d8ffdf00 prfm pldl1keep, 0 +- 424: f8a84860 prfm pldl1keep, [x3,w8,uxtw] +- 428: f99d7560 prfm pldl1keep, [x11,#15080] +- 42c: 1a1c012d adc w13, w9, w28 +- 430: 3a1c027b adcs w27, w19, w28 +- 434: 5a060253 sbc w19, w18, w6 +- 438: 7a03028e sbcs w14, w20, w3 +- 43c: 9a0801d0 adc x16, x14, x8 +- 440: ba0803a0 adcs x0, x29, x8 +- 444: da140308 sbc x8, x24, x20 +- 448: fa00038c sbcs x12, x28, x0 +- 44c: 0b3010d7 add w23, w6, w16, uxtb #4 +- 450: 2b37ab39 adds w25, w25, w23, sxth #2 +- 454: cb2466da sub x26, x22, x4, uxtx #1 +- 458: 6b33efb1 subs w17, w29, w19, sxtx #3 +- 45c: 8b350fcb add x11, x30, w21, uxtb #3 +- 460: ab208a70 adds x16, x19, w0, sxtb #2 +- 464: cb39e52b sub x11, x9, x25, sxtx #1 +- 468: eb2c9291 subs x17, x20, w12, sxtb #4 +- 46c: 3a4bd1a3 ccmn w13, w11, #0x3, le +- 470: 7a4c81a2 ccmp w13, w12, #0x2, hi +- 474: ba42106c ccmn x3, x2, #0xc, ne +- 478: fa5560e3 ccmp x7, x21, #0x3, vs +- 47c: 3a4e3844 ccmn w2, #0xe, #0x4, cc +- 480: 7a515a26 ccmp w17, #0x11, #0x6, pl +- 484: ba4c2940 ccmn x10, #0xc, #0x0, cs +- 488: fa52aaae ccmp x21, #0x12, #0xe, ge +- 48c: 1a8cc1b5 csel w21, w13, w12, gt +- 490: 1a8f976a csinc w10, w27, w15, ls +- 494: 5a8981a0 csinv w0, w13, w9, hi +- 498: 5a9a6492 csneg w18, w4, w26, vs +- 49c: 9a8793ac csel x12, x29, x7, ls +- 4a0: 9a9474e6 csinc x6, x7, x20, vc +- 4a4: da83d2b6 csinv x22, x21, x3, le +- 4a8: da9b9593 csneg x19, x12, x27, ls +- 4ac: 5ac00200 rbit w0, w16 +- 4b0: 5ac006f1 rev16 w17, w23 +- 4b4: 5ac009d1 rev w17, w14 +- 4b8: 5ac013d8 clz w24, w30 +- 4bc: 5ac016d8 cls w24, w22 +- 4c0: dac00223 rbit x3, x17 +- 4c4: dac005ac rev16 x12, x13 +- 4c8: dac00ac9 rev32 x9, x22 +- 4cc: dac00c00 rev x0, x0 +- 4d0: dac01205 clz x5, x16 +- 4d4: dac016d9 cls x25, x22 +- 4d8: 1ac0089d udiv w29, w4, w0 +- 4dc: 1add0fa0 sdiv w0, w29, w29 +- 4e0: 1ad52225 lsl w5, w17, w21 +- 4e4: 1ad22529 lsr w9, w9, w18 +- 4e8: 1ac82b61 asr w1, w27, w8 +- 4ec: 1acd2e92 ror w18, w20, w13 +- 4f0: 9acc0b28 udiv x8, x25, x12 +- 4f4: 9adc0ca7 sdiv x7, x5, x28 +- 4f8: 9adb2225 lsl x5, x17, x27 +- 4fc: 9ad42757 lsr x23, x26, x20 +- 500: 9adc291c asr x28, x8, x28 +- 504: 9ac42fa3 ror x3, x29, x4 +- 508: 1b1a55d1 madd w17, w14, w26, w21 +- 50c: 1b0bafc1 msub w1, w30, w11, w11 +- 510: 9b067221 madd x1, x17, x6, x28 +- 514: 9b1ea0de msub x30, x6, x30, x8 +- 518: 9b2e20d5 smaddl x21, w6, w14, x8 +- 51c: 9b38cd4a smsubl x10, w10, w24, x19 +- 520: 9bae6254 umaddl x20, w18, w14, x24 +- 524: 9ba59452 umsubl x18, w2, w5, x5 +- 528: 1e2d0a48 fmul s8, s18, s13 +- 52c: 1e3c19c2 fdiv s2, s14, s28 +- 530: 1e3c298f fadd s15, s12, s28 +- 534: 1e213980 fsub s0, s12, s1 +- 538: 1e240baf fmul s15, s29, s4 +- 53c: 1e77082c fmul d12, d1, d23 +- 540: 1e72191b fdiv d27, d8, d18 +- 544: 1e6b2a97 fadd d23, d20, d11 +- 548: 1e723988 fsub d8, d12, d18 +- 54c: 1e770b1a fmul d26, d24, d23 +- 550: 1f0d66f5 fmadd s21, s23, s13, s25 +- 554: 1f01b956 fmsub s22, s10, s1, s14 +- 558: 1f227a8e fnmadd s14, s20, s2, s30 +- 55c: 1f365ba7 fnmadd s7, s29, s22, s22 +- 560: 1f4f14ad fmadd d13, d5, d15, d5 +- 564: 1f45a98e fmsub d14, d12, d5, d10 +- 568: 1f60066a fnmadd d10, d19, d0, d1 +- 56c: 1f620054 fnmadd d20, d2, d2, d0 +- 570: 1e204139 fmov s25, s9 +- 574: 1e20c094 fabs s20, s4 +- 578: 1e214363 fneg s3, s27 +- 57c: 1e21c041 fsqrt s1, s2 +- 580: 1e22c01e fcvt d30, s0 +- 584: 1e60408c fmov d12, d4 +- 588: 1e60c361 fabs d1, d27 +- 58c: 1e6142c8 fneg d8, d22 +- 590: 1e61c16b fsqrt d11, d11 +- 594: 1e624396 fcvt s22, d28 +- 598: 1e3802dc fcvtzs w28, s22 +- 59c: 9e380374 fcvtzs x20, s27 +- 5a0: 1e78000e fcvtzs w14, d0 +- 5a4: 9e78017a fcvtzs x26, d11 +- 5a8: 1e2202dc scvtf s28, w22 +- 5ac: 9e220150 scvtf s16, x10 +- 5b0: 1e6202a8 scvtf d8, w21 +- 5b4: 9e620395 scvtf d21, x28 +- 5b8: 1e260318 fmov w24, s24 +- 5bc: 9e660268 fmov x8, d19 +- 5c0: 1e270188 fmov s8, w12 +- 5c4: 9e6700e6 fmov d6, x7 +- 5c8: 1e3023c0 fcmp s30, s16 +- 5cc: 1e6b2320 fcmp d25, d11 +- 5d0: 1e202168 fcmp s11, #0.0 +- 5d4: 1e602168 fcmp d11, #0.0 +- 5d8: 2910323d stp w29, w12, [x17,#128] +- 5dc: 297449d6 ldp w22, w18, [x14,#-96] +- 5e0: 6948402b ldpsw x11, x16, [x1,#64] +- 5e4: a9072f40 stp x0, x11, [x26,#112] +- 5e8: a9410747 ldp x7, x1, [x26,#16] +- 5ec: 29801f0a stp w10, w7, [x24,#0]! +- 5f0: 29e07307 ldp w7, w28, [x24,#-256]! +- 5f4: 69e272b9 ldpsw x25, x28, [x21,#-240]! +- 5f8: a9bf49d4 stp x20, x18, [x14,#-16]! +- 5fc: a9c529a8 ldp x8, x10, [x13,#80]! +- 600: 28b0605a stp w26, w24, [x2],#-128 +- 604: 28e866a2 ldp w2, w25, [x21],#-192 +- 608: 68ee0ab1 ldpsw x17, x2, [x21],#-144 +- 60c: a886296c stp x12, x10, [x11],#96 +- 610: a8fe1a38 ldp x24, x6, [x17],#-32 +- 614: 282479c3 stnp w3, w30, [x14,#-224] +- 618: 286e534f ldnp w15, w20, [x26,#-144] +- 61c: a8386596 stnp x22, x25, [x12,#-128] +- 620: a8755a3b ldnp x27, x22, [x17,#-176] +- 624: 1e601000 fmov d0, #2.000000000000000000e+00 +- 628: 1e603000 fmov d0, #2.125000000000000000e+00 +- 62c: 1e621000 fmov d0, #4.000000000000000000e+00 +- 630: 1e623000 fmov d0, #4.250000000000000000e+00 +- 634: 1e641000 fmov d0, #8.000000000000000000e+00 +- 638: 1e643000 fmov d0, #8.500000000000000000e+00 +- 63c: 1e661000 fmov d0, #1.600000000000000000e+01 +- 640: 1e663000 fmov d0, #1.700000000000000000e+01 +- 644: 1e681000 fmov d0, #1.250000000000000000e-01 +- 648: 1e683000 fmov d0, #1.328125000000000000e-01 +- 64c: 1e6a1000 fmov d0, #2.500000000000000000e-01 +- 650: 1e6a3000 fmov d0, #2.656250000000000000e-01 +- 654: 1e6c1000 fmov d0, #5.000000000000000000e-01 +- 658: 1e6c3000 fmov d0, #5.312500000000000000e-01 +- 65c: 1e6e1000 fmov d0, #1.000000000000000000e+00 +- 660: 1e6e3000 fmov d0, #1.062500000000000000e+00 +- 664: 1e701000 fmov d0, #-2.000000000000000000e+00 +- 668: 1e703000 fmov d0, #-2.125000000000000000e+00 +- 66c: 1e721000 fmov d0, #-4.000000000000000000e+00 +- 670: 1e723000 fmov d0, #-4.250000000000000000e+00 +- 674: 1e741000 fmov d0, #-8.000000000000000000e+00 +- 678: 1e743000 fmov d0, #-8.500000000000000000e+00 +- 67c: 1e761000 fmov d0, #-1.600000000000000000e+01 +- 680: 1e763000 fmov d0, #-1.700000000000000000e+01 +- 684: 1e781000 fmov d0, #-1.250000000000000000e-01 +- 688: 1e783000 fmov d0, #-1.328125000000000000e-01 +- 68c: 1e7a1000 fmov d0, #-2.500000000000000000e-01 +- 690: 1e7a3000 fmov d0, #-2.656250000000000000e-01 +- 694: 1e7c1000 fmov d0, #-5.000000000000000000e-01 +- 698: 1e7c3000 fmov d0, #-5.312500000000000000e-01 +- 69c: 1e7e1000 fmov d0, #-1.000000000000000000e+00 +- 6a0: 1e7e3000 fmov d0, #-1.062500000000000000e+00 ++ 0: 8b0d82fa add x26, x23, x13, lsl #32 ++ 4: cb49970c sub x12, x24, x9, lsr #37 ++ 8: ab889dfc adds x28, x15, x8, asr #39 ++ c: eb9ee787 subs x7, x28, x30, asr #57 ++ 10: 0b9b3ec9 add w9, w22, w27, asr #15 ++ 14: 4b9279a3 sub w3, w13, w18, asr #30 ++ 18: 2b88474e adds w14, w26, w8, asr #17 ++ 1c: 6b8c56c0 subs w0, w22, w12, asr #21 ++ 20: 8a1a51e0 and x0, x15, x26, lsl #20 ++ 24: aa11f4ba orr x26, x5, x17, lsl #61 ++ 28: ca0281b8 eor x24, x13, x2, lsl #32 ++ 2c: ea918c7c ands x28, x3, x17, asr #35 ++ 30: 0a5d4a19 and w25, w16, w29, lsr #18 ++ 34: 2a4b264d orr w13, w18, w11, lsr #9 ++ 38: 4a523ca5 eor w5, w5, w18, lsr #15 ++ 3c: 6a9b6ae2 ands w2, w23, w27, asr #26 ++ 40: 8a70b79b bic x27, x28, x16, lsr #45 ++ 44: aaba9728 orn x8, x25, x26, asr #37 ++ 48: ca6dfe3d eon x29, x17, x13, lsr #63 ++ 4c: ea627f1c bics x28, x24, x2, lsr #31 ++ 50: 0aa70f53 bic w19, w26, w7, asr #3 ++ 54: 2aaa0f06 orn w6, w24, w10, asr #3 ++ 58: 4a6176a4 eon w4, w21, w1, lsr #29 ++ 5c: 6a604eb0 bics w16, w21, w0, lsr #19 ++ 60: 1105ed91 add w17, w12, #0x17b ++ 64: 3100583e adds w30, w1, #0x16 ++ 68: 5101f8bd sub w29, w5, #0x7e ++ 6c: 710f0306 subs w6, w24, #0x3c0 ++ 70: 9101a1a0 add x0, x13, #0x68 ++ 74: b10a5cc8 adds x8, x6, #0x297 ++ 78: d10810aa sub x10, x5, #0x204 ++ 7c: f10fd061 subs x1, x3, #0x3f4 ++ 80: 120cb166 and w6, w11, #0xfff1fff1 ++ 84: 321764bc orr w28, w5, #0xfffffe07 ++ 88: 52174681 eor w1, w20, #0x7fffe00 ++ 8c: 720c0247 ands w7, w18, #0x100000 ++ 90: 9241018e and x14, x12, #0x8000000000000000 ++ 94: b25a2969 orr x9, x11, #0x1ffc000000000 ++ 98: d278b411 eor x17, x0, #0x3fffffffffff00 ++ 9c: f26aad01 ands x1, x8, #0xffffffffffc00003 ++ a0: 14000000 b a0 ++ a4: 17ffffd7 b 0 ++ a8: 140001cf b 7e4 ++ ac: 94000000 bl ac ++ b0: 97ffffd4 bl 0 ++ b4: 940001cc bl 7e4 ++ b8: 3400000a cbz w10, b8 ++ bc: 34fffa2a cbz w10, 0 ++ c0: 3400392a cbz w10, 7e4 ++ c4: 35000008 cbnz w8, c4 ++ c8: 35fff9c8 cbnz w8, 0 ++ cc: 350038c8 cbnz w8, 7e4 ++ d0: b400000b cbz x11, d0 ++ d4: b4fff96b cbz x11, 0 ++ d8: b400386b cbz x11, 7e4 ++ dc: b500001d cbnz x29, dc ++ e0: b5fff91d cbnz x29, 0 ++ e4: b500381d cbnz x29, 7e4 ++ e8: 10000013 adr x19, e8 ++ ec: 10fff8b3 adr x19, 0 ++ f0: 100037b3 adr x19, 7e4 ++ f4: 90000013 adrp x19, 0 ++ f8: 36300016 tbz w22, #6, f8 ++ fc: 3637f836 tbz w22, #6, 0 ++ 100: 36303736 tbz w22, #6, 7e4 ++ 104: 3758000c tbnz w12, #11, 104 ++ 108: 375ff7cc tbnz w12, #11, 0 ++ 10c: 375836cc tbnz w12, #11, 7e4 ++ 110: 128313a0 mov w0, #0xffffe762 // #-6302 ++ 114: 528a32c7 mov w7, #0x5196 // #20886 ++ 118: 7289173b movk w27, #0x48b9 ++ 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929 ++ 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152 ++ 124: f2c285e8 movk x8, #0x142f, lsl #32 ++ 128: 9358722f sbfx x15, x17, #24, #5 ++ 12c: 330e652f bfxil w15, w9, #14, #12 ++ 130: 53067f3b lsr w27, w25, #6 ++ 134: 93577c53 sbfx x19, x2, #23, #9 ++ 138: b34a1aac bfi x12, x21, #54, #7 ++ 13c: d35a4016 ubfiz x22, x0, #38, #17 ++ 140: 13946c63 extr w3, w3, w20, #27 ++ 144: 93c3dbc8 extr x8, x30, x3, #54 ++ 148: 54000000 b.eq 148 // b.none ++ 14c: 54fff5a0 b.eq 0 // b.none ++ 150: 540034a0 b.eq 7e4 // b.none ++ 154: 54000001 b.ne 154 // b.any ++ 158: 54fff541 b.ne 0 // b.any ++ 15c: 54003441 b.ne 7e4 // b.any ++ 160: 54000002 b.cs 160 // b.hs, b.nlast ++ 164: 54fff4e2 b.cs 0 // b.hs, b.nlast ++ 168: 540033e2 b.cs 7e4 // b.hs, b.nlast ++ 16c: 54000002 b.cs 16c // b.hs, b.nlast ++ 170: 54fff482 b.cs 0 // b.hs, b.nlast ++ 174: 54003382 b.cs 7e4 // b.hs, b.nlast ++ 178: 54000003 b.cc 178 // b.lo, b.ul, b.last ++ 17c: 54fff423 b.cc 0 // b.lo, b.ul, b.last ++ 180: 54003323 b.cc 7e4 // b.lo, b.ul, b.last ++ 184: 54000003 b.cc 184 // b.lo, b.ul, b.last ++ 188: 54fff3c3 b.cc 0 // b.lo, b.ul, b.last ++ 18c: 540032c3 b.cc 7e4 // b.lo, b.ul, b.last ++ 190: 54000004 b.mi 190 // b.first ++ 194: 54fff364 b.mi 0 // b.first ++ 198: 54003264 b.mi 7e4 // b.first ++ 19c: 54000005 b.pl 19c // b.nfrst ++ 1a0: 54fff305 b.pl 0 // b.nfrst ++ 1a4: 54003205 b.pl 7e4 // b.nfrst ++ 1a8: 54000006 b.vs 1a8 ++ 1ac: 54fff2a6 b.vs 0 ++ 1b0: 540031a6 b.vs 7e4 ++ 1b4: 54000007 b.vc 1b4 ++ 1b8: 54fff247 b.vc 0 ++ 1bc: 54003147 b.vc 7e4 ++ 1c0: 54000008 b.hi 1c0 // b.pmore ++ 1c4: 54fff1e8 b.hi 0 // b.pmore ++ 1c8: 540030e8 b.hi 7e4 // b.pmore ++ 1cc: 54000009 b.ls 1cc // b.plast ++ 1d0: 54fff189 b.ls 0 // b.plast ++ 1d4: 54003089 b.ls 7e4 // b.plast ++ 1d8: 5400000a b.ge 1d8 // b.tcont ++ 1dc: 54fff12a b.ge 0 // b.tcont ++ 1e0: 5400302a b.ge 7e4 // b.tcont ++ 1e4: 5400000b b.lt 1e4 // b.tstop ++ 1e8: 54fff0cb b.lt 0 // b.tstop ++ 1ec: 54002fcb b.lt 7e4 // b.tstop ++ 1f0: 5400000c b.gt 1f0 ++ 1f4: 54fff06c b.gt 0 ++ 1f8: 54002f6c b.gt 7e4 ++ 1fc: 5400000d b.le 1fc ++ 200: 54fff00d b.le 0 ++ 204: 54002f0d b.le 7e4 ++ 208: 5400000e b.al 208 ++ 20c: 54ffefae b.al 0 ++ 210: 54002eae b.al 7e4 ++ 214: 5400000f b.nv 214 ++ 218: 54ffef4f b.nv 0 ++ 21c: 54002e4f b.nv 7e4 ++ 220: d40658e1 svc #0x32c7 ++ 224: d4014d22 hvc #0xa69 ++ 228: d4046543 smc #0x232a ++ 22c: d4273f60 brk #0x39fb ++ 230: d44cad80 hlt #0x656c ++ 234: d503201f nop ++ 238: d69f03e0 eret ++ 23c: d6bf03e0 drps ++ 240: d5033fdf isb ++ 244: d5033e9f dsb st ++ 248: d50332bf dmb oshst ++ 24c: d61f0200 br x16 ++ 250: d63f0280 blr x20 ++ 254: c80a7d1b stxr w10, x27, [x8] ++ 258: c800fea1 stlxr w0, x1, [x21] ++ 25c: c85f7fb1 ldxr x17, [x29] ++ 260: c85fff9d ldaxr x29, [x28] ++ 264: c89ffee1 stlr x1, [x23] ++ 268: c8dffe95 ldar x21, [x20] ++ 26c: 88167e7b stxr w22, w27, [x19] ++ 270: 880bfcd0 stlxr w11, w16, [x6] ++ 274: 885f7c12 ldxr w18, [x0] ++ 278: 885ffd44 ldaxr w4, [x10] ++ 27c: 889ffed8 stlr w24, [x22] ++ 280: 88dffe6a ldar w10, [x19] ++ 284: 48017fc5 stxrh w1, w5, [x30] ++ 288: 4808fe2c stlxrh w8, w12, [x17] ++ 28c: 485f7dc9 ldxrh w9, [x14] ++ 290: 485ffc27 ldaxrh w7, [x1] ++ 294: 489ffe05 stlrh w5, [x16] ++ 298: 48dffd82 ldarh w2, [x12] ++ 29c: 080a7c6c stxrb w10, w12, [x3] ++ 2a0: 081cff4e stlxrb w28, w14, [x26] ++ 2a4: 085f7d5e ldxrb w30, [x10] ++ 2a8: 085ffeae ldaxrb w14, [x21] ++ 2ac: 089ffd2d stlrb w13, [x9] ++ 2b0: 08dfff76 ldarb w22, [x27] ++ 2b4: c87f4d7c ldxp x28, x19, [x11] ++ 2b8: c87fcc5e ldaxp x30, x19, [x2] ++ 2bc: c8220417 stxp w2, x23, x1, [x0] ++ 2c0: c82cb5f0 stlxp w12, x16, x13, [x15] ++ 2c4: 887f55b2 ldxp w18, w21, [x13] ++ 2c8: 887ff90b ldaxp w11, w30, [x8] ++ 2cc: 88382c2d stxp w24, w13, w11, [x1] ++ 2d0: 883aedb5 stlxp w26, w21, w27, [x13] ++ 2d4: f819928b stur x11, [x20, #-103] ++ 2d8: b803e21c stur w28, [x16, #62] ++ 2dc: 381f713b sturb w27, [x9, #-9] ++ 2e0: 781ce322 sturh w2, [x25, #-50] ++ 2e4: f850f044 ldur x4, [x2, #-241] ++ 2e8: b85e129e ldur w30, [x20, #-31] ++ 2ec: 385e92f2 ldurb w18, [x23, #-23] ++ 2f0: 785ff35d ldurh w29, [x26, #-1] ++ 2f4: 39801921 ldrsb x1, [x9, #6] ++ 2f8: 7881318b ldursh x11, [x12, #19] ++ 2fc: 78dce02b ldursh w11, [x1, #-50] ++ 300: b8829313 ldursw x19, [x24, #41] ++ 304: fc45f318 ldur d24, [x24, #95] ++ 308: bc5d50af ldur s15, [x5, #-43] ++ 30c: fc001375 stur d21, [x27, #1] ++ 310: bc1951b7 stur s23, [x13, #-107] ++ 314: f8008c0b str x11, [x0, #8]! ++ 318: b801dc03 str w3, [x0, #29]! ++ 31c: 38009dcb strb w11, [x14, #9]! ++ 320: 781fdf1d strh w29, [x24, #-3]! ++ 324: f8570e2d ldr x13, [x17, #-144]! ++ 328: b85faecc ldr w12, [x22, #-6]! ++ 32c: 385f6d8d ldrb w13, [x12, #-10]! ++ 330: 785ebea0 ldrh w0, [x21, #-21]! ++ 334: 38804cf7 ldrsb x23, [x7, #4]! ++ 338: 789cbce3 ldrsh x3, [x7, #-53]! ++ 33c: 78df9cbc ldrsh w28, [x5, #-7]! ++ 340: b89eed38 ldrsw x24, [x9, #-18]! ++ 344: fc40cd6e ldr d14, [x11, #12]! ++ 348: bc5bdd93 ldr s19, [x12, #-67]! ++ 34c: fc103c14 str d20, [x0, #-253]! ++ 350: bc040c08 str s8, [x0, #64]! ++ 354: f81a2784 str x4, [x28], #-94 ++ 358: b81ca4ec str w12, [x7], #-54 ++ 35c: 381e855b strb w27, [x10], #-24 ++ 360: 7801b506 strh w6, [x8], #27 ++ 364: f853654e ldr x14, [x10], #-202 ++ 368: b85d74b0 ldr w16, [x5], #-41 ++ 36c: 384095c2 ldrb w2, [x14], #9 ++ 370: 785ec5bc ldrh w28, [x13], #-20 ++ 374: 389e15a9 ldrsb x9, [x13], #-31 ++ 378: 789dc703 ldrsh x3, [x24], #-36 ++ 37c: 78c06474 ldrsh w20, [x3], #6 ++ 380: b89ff667 ldrsw x7, [x19], #-1 ++ 384: fc57e51e ldr d30, [x8], #-130 ++ 388: bc4155f9 ldr s25, [x15], #21 ++ 38c: fc05a6ee str d14, [x23], #90 ++ 390: bc1df408 str s8, [x0], #-33 ++ 394: f835da4a str x10, [x18, w21, sxtw #3] ++ 398: b836d9a4 str w4, [x13, w22, sxtw #2] ++ 39c: 3833580d strb w13, [x0, w19, uxtw #0] ++ 3a0: 7826cb6c strh w12, [x27, w6, sxtw] ++ 3a4: f8706900 ldr x0, [x8, x16] ++ 3a8: b87ae880 ldr w0, [x4, x26, sxtx] ++ 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0] ++ 3b0: 78724889 ldrh w9, [x4, w18, uxtw] ++ 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0] ++ 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw] ++ 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw] ++ 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2] ++ 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3] ++ 3c8: bc734963 ldr s3, [x11, w19, uxtw] ++ 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3] ++ 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2] ++ 3d4: f9189d05 str x5, [x8, #12600] ++ 3d8: b91ecb1d str w29, [x24, #7880] ++ 3dc: 39187a33 strb w19, [x17, #1566] ++ 3e0: 791f226d strh w13, [x19, #3984] ++ 3e4: f95aa2f3 ldr x19, [x23, #13632] ++ 3e8: b9587bb7 ldr w23, [x29, #6264] ++ 3ec: 395f7176 ldrb w22, [x11, #2012] ++ 3f0: 795d9143 ldrh w3, [x10, #3784] ++ 3f4: 399e7e08 ldrsb x8, [x16, #1951] ++ 3f8: 799a2697 ldrsh x23, [x20, #3346] ++ 3fc: 79df3422 ldrsh w2, [x1, #3994] ++ 400: b99c2624 ldrsw x4, [x17, #7204] ++ 404: fd5c2374 ldr d20, [x27, #14400] ++ 408: bd5fa1d9 ldr s25, [x14, #8096] ++ 40c: fd1d595a str d26, [x10, #15024] ++ 410: bd1b1869 str s9, [x3, #6936] ++ 414: 58001e9b ldr x27, 7e4 ++ 418: 1800000b ldr w11, 418 ++ 41c: f8945060 prfum pldl1keep, [x3, #-187] ++ 420: d8000000 prfm pldl1keep, 420 ++ 424: f8ae6ba0 prfm pldl1keep, [x29, x14] ++ 428: f99a0080 prfm pldl1keep, [x4, #13312] ++ 42c: 1a070035 adc w21, w1, w7 ++ 430: 3a0700a8 adcs w8, w5, w7 ++ 434: 5a0e0367 sbc w7, w27, w14 ++ 438: 7a11009b sbcs w27, w4, w17 ++ 43c: 9a000380 adc x0, x28, x0 ++ 440: ba1e030c adcs x12, x24, x30 ++ 444: da0f0320 sbc x0, x25, x15 ++ 448: fa030301 sbcs x1, x24, x3 ++ 44c: 0b340b12 add w18, w24, w20, uxtb #2 ++ 450: 2b2a278d adds w13, w28, w10, uxth #1 ++ 454: cb22aa0f sub x15, x16, w2, sxth #2 ++ 458: 6b2d29bd subs w29, w13, w13, uxth #2 ++ 45c: 8b2cce8c add x12, x20, w12, sxtw #3 ++ 460: ab2b877e adds x30, x27, w11, sxtb #1 ++ 464: cb21c8ee sub x14, x7, w1, sxtw #2 ++ 468: eb3ba47d subs x29, x3, w27, sxth #1 ++ 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first ++ 470: 7a5232c6 ccmp w22, w18, #0x6, cc // cc = lo, ul, last ++ 474: ba5e624e ccmn x18, x30, #0xe, vs ++ 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore ++ 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le ++ 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore ++ 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last ++ 488: fa4d7882 ccmp x4, #0xd, #0x2, vc ++ 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore ++ 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast ++ 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop ++ 498: 5a96976b csneg w11, w27, w22, ls // ls = plast ++ 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop ++ 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last ++ 4a4: da95c14f csinv x15, x10, x21, gt ++ 4a8: da89c6fe csneg x30, x23, x9, gt ++ 4ac: 5ac0015e rbit w30, w10 ++ 4b0: 5ac005fd rev16 w29, w15 ++ 4b4: 5ac00bdd rev w29, w30 ++ 4b8: 5ac012b9 clz w25, w21 ++ 4bc: 5ac01404 cls w4, w0 ++ 4c0: dac002b2 rbit x18, x21 ++ 4c4: dac0061d rev16 x29, x16 ++ 4c8: dac00a95 rev32 x21, x20 ++ 4cc: dac00e66 rev x6, x19 ++ 4d0: dac0107e clz x30, x3 ++ 4d4: dac01675 cls x21, x19 ++ 4d8: 1ac00b0b udiv w11, w24, w0 ++ 4dc: 1ace0f3b sdiv w27, w25, w14 ++ 4e0: 1ad221c3 lsl w3, w14, w18 ++ 4e4: 1ad825e7 lsr w7, w15, w24 ++ 4e8: 1ad92a3c asr w28, w17, w25 ++ 4ec: 1adc2f42 ror w2, w26, w28 ++ 4f0: 9ada0b25 udiv x5, x25, x26 ++ 4f4: 9ad20e1b sdiv x27, x16, x18 ++ 4f8: 9acc22a6 lsl x6, x21, x12 ++ 4fc: 9acc2480 lsr x0, x4, x12 ++ 500: 9adc2a3b asr x27, x17, x28 ++ 504: 9ad22c5c ror x28, x2, x18 ++ 508: 1b0e39ea madd w10, w15, w14, w14 ++ 50c: 1b0fcf23 msub w3, w25, w15, w19 ++ 510: 9b1010ae madd x14, x5, x16, x4 ++ 514: 9b048b3a msub x26, x25, x4, x2 ++ 518: 9b3d4582 smaddl x2, w12, w29, x17 ++ 51c: 9b2390e8 smsubl x8, w7, w3, x4 ++ 520: 9bba6499 umaddl x25, w4, w26, x25 ++ 524: 9ba0ea24 umsubl x4, w17, w0, x26 ++ 528: 1e2f0af1 fmul s17, s23, s15 ++ 52c: 1e311b95 fdiv s21, s28, s17 ++ 530: 1e23295b fadd s27, s10, s3 ++ 534: 1e3938e0 fsub s0, s7, s25 ++ 538: 1e2f08c9 fmul s9, s6, s15 ++ 53c: 1e6a09fd fmul d29, d15, d10 ++ 540: 1e671a22 fdiv d2, d17, d7 ++ 544: 1e77296b fadd d11, d11, d23 ++ 548: 1e773ba7 fsub d7, d29, d23 ++ 54c: 1e6b0b6e fmul d14, d27, d11 ++ 550: 1f18308b fmadd s11, s4, s24, s12 ++ 554: 1f14adcf fmsub s15, s14, s20, s11 ++ 558: 1f2b31bc nmadd s28, s13, s11, s12 ++ 55c: 1f3a3bd7 fnmadd s23, s30, s26, s14 ++ 560: 1f4a1da9 fmadd d9, d13, d10, d7 ++ 564: 1f4f8fa5 fmsub d5, d29, d15, d3 ++ 568: 1f6f798b fnmadd d11, d12, d15, d30 ++ 56c: 1f73523e fnmadd d30, d17, d19, d20 ++ 570: 1e2040fb fmov s27, s7 ++ 574: 1e20c2a9 fabs s9, s21 ++ 578: 1e214122 fneg s2, s9 ++ 57c: 1e21c0fb fsqrt s27, s7 ++ 580: 1e22c3dd fcvt d29, s30 ++ 584: 1e604031 fmov d17, d1 ++ 588: 1e60c0c2 fabs d2, d6 ++ 58c: 1e61406a fneg d10, d3 ++ 590: 1e61c178 fsqrt d24, d11 ++ 594: 1e624027 fcvt s7, d1 ++ 598: 1e38000b fcvtzs w11, s0 ++ 59c: 9e380243 fcvtzs x3, s18 ++ 5a0: 1e7800dc fcvtzs w28, d6 ++ 5a4: 9e7800d6 fcvtzs x22, d6 ++ 5a8: 1e220360 scvtf s0, w27 ++ 5ac: 9e22005a scvtf s26, x2 ++ 5b0: 1e6200e5 scvtf d5, w7 ++ 5b4: 9e62017c scvtf d28, x11 ++ 5b8: 1e2601b9 fmov w25, s13 ++ 5bc: 9e6602eb fmov x11, d23 ++ 5c0: 1e270113 fmov s19, w8 ++ 5c4: 9e6702b2 fmov d18, x21 ++ 5c8: 1e342320 fcmp s25, s20 ++ 5cc: 1e722260 fcmp d19, d18 ++ 5d0: 1e202048 fcmp s2, #0.0 ++ 5d4: 1e6023a8 fcmp d29, #0.0 ++ 5d8: 29025668 stp w8, w21, [x19, #16] ++ 5dc: 29403e86 ldp w6, w15, [x20] ++ 5e0: 6966387b ldpsw x27, x14, [x3, #-208] ++ 5e4: a93b316a stp x10, x12, [x11, #-80] ++ 5e8: a97e38e7 ldp x7, x14, [x7, #-32] ++ 5ec: 298e5980 stp w0, w22, [x12, #112]! ++ 5f0: 29c61d0e ldp w14, w7, [x8, #48]! ++ 5f4: 69c00930 ldpsw x16, x2, [x9, #0]! ++ 5f8: a9bc7434 stp x20, x29, [x1, #-64]! ++ 5fc: a9c530b5 ldp x21, x12, [x5, #80]! ++ 600: 28b26378 stp w24, w24, [x27], #-112 ++ 604: 28c25a5c ldp w28, w22, [x18], #16 ++ 608: 68f419b1 ldpsw x17, x6, [x13], #-96 ++ 60c: a8b668bc stp x28, x26, [x5], #-160 ++ 610: a8f15746 ldp x6, x21, [x26], #-240 ++ 614: 280453cd stnp w13, w20, [x30, #32] ++ 618: 284c2cb1 ldnp w17, w11, [x5, #96] ++ 61c: a83a534d stnp x13, x20, [x26, #-96] ++ 620: a87b32fd ldnp x29, x12, [x23, #-80] ++ 624: 05a08020 mov z0.s, p0/m, s1 ++ 628: 04b0e3e0 incw x0 ++ 62c: 0470e7e1 dech x1 ++ 630: 042f9c20 lsl z0.b, z1.b, #7 ++ 634: 043f9c35 lsl z21.h, z1.h, #15 ++ 638: 047f9c20 lsl z0.s, z1.s, #31 ++ 63c: 04ff9c20 lsl z0.d, z1.d, #63 ++ 640: 04299420 lsr z0.b, z1.b, #7 ++ 644: 04319160 asr z0.h, z11.h, #15 ++ 648: 0461943e lsr z30.s, z1.s, #31 ++ 64c: 04a19020 asr z0.d, z1.d, #63 ++ 650: 042053ff addvl sp, x0, #31 ++ 654: 047f5401 addpl x1, sp, #-32 ++ 658: 25208028 cntp x8, p0, p1.b ++ 65c: 2538cfe0 mov z0.b, #127 ++ 660: 2578d001 mov z1.h, #-128 ++ 664: 25b8efe2 mov z2.s, #32512 ++ 668: 25f8f007 mov z7.d, #-32768 ++ 66c: a400a3e0 ld1b {z0.b}, p0/z, [sp] ++ 670: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl] ++ 674: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl] ++ 678: a4084ffe ld1b {z30.b}, p3/z, [sp, x8] ++ 67c: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2] ++ 680: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3] ++ 684: e400fbf6 st1b {z22.b}, p6, [sp] ++ 688: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl] ++ 68c: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl] ++ 690: e4014be0 st1b {z0.b}, p2, [sp, x1] ++ 694: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1] ++ 698: e5f25000 st1d {z0.d}, p4, [x0, x18, lsl #3] ++ 69c: 858043e0 ldr z0, [sp] ++ 6a0: 85a043ff ldr z31, [sp, #-256, mul vl] ++ 6a4: e59f5d08 str z8, [x8, #255, mul vl] ++ 6a8: 1e601000 fmov d0, #2.000000000000000000e+00 ++ 6ac: 1e603000 fmov d0, #2.125000000000000000e+00 ++ 6b0: 1e621000 fmov d0, #4.000000000000000000e+00 ++ 6b4: 1e623000 fmov d0, #4.250000000000000000e+00 ++ 6b8: 1e641000 fmov d0, #8.000000000000000000e+00 ++ 6bc: 1e643000 fmov d0, #8.500000000000000000e+00 ++ 6c0: 1e661000 fmov d0, #1.600000000000000000e+01 ++ 6c4: 1e663000 fmov d0, #1.700000000000000000e+01 ++ 6c8: 1e681000 fmov d0, #1.250000000000000000e-01 ++ 6cc: 1e683000 fmov d0, #1.328125000000000000e-01 ++ 6d0: 1e6a1000 fmov d0, #2.500000000000000000e-01 ++ 6d4: 1e6a3000 fmov d0, #2.656250000000000000e-01 ++ 6d8: 1e6c1000 fmov d0, #5.000000000000000000e-01 ++ 6dc: 1e6c3000 fmov d0, #5.312500000000000000e-01 ++ 6e0: 1e6e1000 fmov d0, #1.000000000000000000e+00 ++ 6e4: 1e6e3000 fmov d0, #1.062500000000000000e+00 ++ 6e8: 1e701000 fmov d0, #-2.000000000000000000e+00 ++ 6ec: 1e703000 fmov d0, #-2.125000000000000000e+00 ++ 6f0: 1e721000 fmov d0, #-4.000000000000000000e+00 ++ 6f4: 1e723000 fmov d0, #-4.250000000000000000e+00 ++ 6f8: 1e741000 fmov d0, #-8.000000000000000000e+00 ++ 6fc: 1e743000 fmov d0, #-8.500000000000000000e+00 ++ 700: 1e761000 fmov d0, #-1.600000000000000000e+01 ++ 704: 1e763000 fmov d0, #-1.700000000000000000e+01 ++ 708: 1e781000 fmov d0, #-1.250000000000000000e-01 ++ 70c: 1e783000 fmov d0, #-1.328125000000000000e-01 ++ 710: 1e7a1000 fmov d0, #-2.500000000000000000e-01 ++ 714: 1e7a3000 fmov d0, #-2.656250000000000000e-01 ++ 718: 1e7c1000 fmov d0, #-5.000000000000000000e-01 ++ 71c: 1e7c3000 fmov d0, #-5.312500000000000000e-01 ++ 720: 1e7e1000 fmov d0, #-1.000000000000000000e+00 ++ 724: 1e7e3000 fmov d0, #-1.062500000000000000e+00 ++ 728: 04bb020e add z14.s, z16.s, z27.s ++ 72c: 04ba04c0 sub z0.s, z6.s, z26.s ++ 730: 6586019b fadd z27.s, z12.s, z6.s ++ 734: 6593089e fmul z30.s, z4.s, z19.s ++ 738: 65c2060b fsub z11.d, z16.d, z2.d ++ 73c: 04d6a18f abs z15.d, p0/m, z12.d ++ 740: 040016e9 add z9.b, p5/m, z9.b, z23.b ++ 744: 0490835e asr z30.s, p0/m, z30.s, z26.s ++ 748: 045aaa44 cnt z4.h, p2/m, z18.h ++ 74c: 04938579 lsl z25.s, p1/m, z25.s, z11.s ++ 750: 0411990a lsr z10.b, p6/m, z10.b, z8.b ++ 754: 04101624 mul z4.b, p5/m, z4.b, z17.b ++ 758: 0497ad3e neg z30.s, p3/m, z9.s ++ 75c: 04deae80 not z0.d, p3/m, z20.d ++ 760: 04481c77 smax z23.h, p7/m, z23.h, z3.h ++ 764: 044a0960 smin z0.h, p2/m, z0.h, z11.h ++ 768: 04c118ab sub z11.d, p6/m, z11.d, z5.d ++ 76c: 049caa30 fabs z16.s, p2/m, z17.s ++ 770: 6580834f fadd z15.s, p0/m, z15.s, z26.s ++ 774: 658d9e6a fdiv z10.s, p7/m, z10.s, z19.s ++ 778: 65c68238 fmax z24.d, p0/m, z24.d, z17.d ++ 77c: 65c791fa fmin z26.d, p4/m, z26.d, z15.d ++ 780: 65c28a38 fmul z24.d, p2/m, z24.d, z17.d ++ 784: 049db7be fneg z30.s, p5/m, z29.s ++ 788: 6582b552 frintm z18.s, p5/m, z10.s ++ 78c: 65c0abde frintn z30.d, p2/m, z30.d ++ 790: 6581bbc6 frintp z6.s, p6/m, z30.s ++ 794: 65cdb854 fsqrt z20.d, p6/m, z2.d ++ 798: 658197a9 fsub z9.s, p5/m, z9.s, z29.s ++ 79c: 65f60872 fmla z18.d, p2/m, z3.d, z22.d ++ 7a0: 65ec29af fmls z15.d, p2/m, z13.d, z12.d ++ 7a4: 65be43cc fnmla z12.s, p0/m, z30.s, z30.s ++ 7a8: 65e06ea7 fnmls z7.d, p3/m, z21.d, z0.d ++ 7ac: 04544b53 mla z19.h, p2/m, z26.h, z20.h ++ 7b0: 04d57c30 mls z16.d, p7/m, z1.d, z21.d ++ 7b4: 04323095 and z21.d, z4.d, z18.d ++ 7b8: 04a7324c eor z12.d, z18.d, z7.d ++ 7bc: 046d31f9 orr z25.d, z15.d, z13.d ++ 7c0: 04da30eb andv d11, p4, z7.d ++ 7c4: 04d8252b orv d11, p1, z9.d ++ 7c8: 04d93c1c eorv d28, p7, z0.d ++ 7cc: 044820f0 smaxv h16, p0, z7.h ++ 7d0: 040a2fac sminv b12, p3, z29.b ++ 7d4: 65873975 fminv s21, p6, z11.s ++ 7d8: 65c62886 fmaxv d6, p2, z4.d ++ 7dc: 65d820e7 fadda d7, p0, d7, z7.d ++ 7e0: 04013fac uaddv d12, p7, z29.b + */ + + static const unsigned int insns[] = + { +- 0x8b0772d3, 0xcb4a3570, 0xab9c09bb, 0xeb9aa794, +- 0x0b934e68, 0x4b0a3924, 0x2b1e3568, 0x6b132720, +- 0x8a154c14, 0xaa1445d5, 0xca01cf99, 0xea8b3f6a, +- 0x0a8c5cb9, 0x2a4a11d2, 0x4a855aa4, 0x6a857415, +- 0x8aa697da, 0xaa6d7423, 0xca29bf80, 0xea3cb8bd, +- 0x0a675249, 0x2ab961ba, 0x4a331899, 0x6a646345, +- 0x11055267, 0x31064408, 0x51028e9d, 0x710bdee8, +- 0x91082d81, 0xb106a962, 0xd10b33ae, 0xf10918ab, +- 0x121102d7, 0x3204cd44, 0x5204cf00, 0x72099fb3, +- 0x92729545, 0xb20e37cc, 0xd27c34be, 0xf27e4efa, +- 0x14000000, 0x17ffffd7, 0x1400017f, 0x94000000, +- 0x97ffffd4, 0x9400017c, 0x3400000c, 0x34fffa2c, +- 0x34002f2c, 0x35000014, 0x35fff9d4, 0x35002ed4, +- 0xb400000c, 0xb4fff96c, 0xb4002e6c, 0xb5000018, +- 0xb5fff918, 0xb5002e18, 0x10000006, 0x10fff8a6, +- 0x10002da6, 0x90000015, 0x36080001, 0x360ff821, +- 0x36082d21, 0x37480008, 0x374ff7c8, 0x37482cc8, +- 0x128b50ec, 0x52a9ff8b, 0x7281d095, 0x92edfebd, +- 0xd28361e3, 0xf2a4cc96, 0x9346590c, 0x33194f33, +- 0x531d3d89, 0x9350433c, 0xb34464ac, 0xd3462140, +- 0x139a61a4, 0x93d87fd7, 0x54000000, 0x54fff5a0, +- 0x54002aa0, 0x54000001, 0x54fff541, 0x54002a41, +- 0x54000002, 0x54fff4e2, 0x540029e2, 0x54000002, +- 0x54fff482, 0x54002982, 0x54000003, 0x54fff423, +- 0x54002923, 0x54000003, 0x54fff3c3, 0x540028c3, +- 0x54000004, 0x54fff364, 0x54002864, 0x54000005, +- 0x54fff305, 0x54002805, 0x54000006, 0x54fff2a6, +- 0x540027a6, 0x54000007, 0x54fff247, 0x54002747, +- 0x54000008, 0x54fff1e8, 0x540026e8, 0x54000009, +- 0x54fff189, 0x54002689, 0x5400000a, 0x54fff12a, +- 0x5400262a, 0x5400000b, 0x54fff0cb, 0x540025cb, +- 0x5400000c, 0x54fff06c, 0x5400256c, 0x5400000d, +- 0x54fff00d, 0x5400250d, 0x5400000e, 0x54ffefae, +- 0x540024ae, 0x5400000f, 0x54ffef4f, 0x5400244f, +- 0xd4063721, 0xd4035082, 0xd400bfe3, 0xd4282fc0, +- 0xd444c320, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, +- 0xd5033fdf, 0xd5033f9f, 0xd5033abf, 0xd61f0040, +- 0xd63f00a0, 0xc8147c55, 0xc805fcfd, 0xc85f7e05, +- 0xc85fffbb, 0xc89fffa0, 0xc8dfff95, 0x88157cf8, +- 0x8815ff9a, 0x885f7cd5, 0x885fffcf, 0x889ffc73, +- 0x88dffc56, 0x48127c0f, 0x480bff85, 0x485f7cdd, +- 0x485ffcf2, 0x489fff99, 0x48dffe62, 0x080a7c3e, +- 0x0814fed5, 0x085f7c59, 0x085ffcb8, 0x089ffc70, +- 0x08dfffb6, 0xc87f0a68, 0xc87fcdc7, 0xc82870bb, +- 0xc825b8c8, 0x887f12d9, 0x887fb9ed, 0x8834215a, +- 0x8837ca52, 0xf806317e, 0xb81b3337, 0x39000dc2, +- 0x78005149, 0xf84391f4, 0xb85b220c, 0x385fd356, +- 0x785d127e, 0x389f4149, 0x79801e3c, 0x79c014a3, +- 0xb89a5231, 0xfc5ef282, 0xbc5f60f6, 0xfc12125e, +- 0xbc0152cd, 0xf8190e49, 0xb800befd, 0x381ffd92, +- 0x781e9e90, 0xf8409fa3, 0xb8413c79, 0x385fffa1, +- 0x785c7fa8, 0x389f3dc5, 0x78801f6a, 0x78c19d4b, +- 0xb89a4ec4, 0xfc408eeb, 0xbc436e79, 0xfc152ce1, +- 0xbc036f28, 0xf8025565, 0xb80135f8, 0x381ff74f, +- 0x781fa652, 0xf851a447, 0xb85e557b, 0x385e7472, +- 0x785e070a, 0x38804556, 0x78819591, 0x78dc24e8, +- 0xb89cd6d7, 0xfc430738, 0xbc5f6595, 0xfc1225b2, +- 0xbc1d7430, 0xf82fcac2, 0xb83d6a02, 0x382e5a54, +- 0x7834fa66, 0xf86ecbae, 0xb86cda90, 0x3860d989, +- 0x78637a2c, 0x38a3fa22, 0x78b15827, 0x78f2d9f9, +- 0xb8ac6ab7, 0xfc6879a5, 0xbc767943, 0xfc3bc84e, +- 0xbc3968d4, 0xf91fc0fe, 0xb91da50f, 0x391d280b, +- 0x791d2e23, 0xf95bc8e2, 0xb95ce525, 0x395ae53c, +- 0x795c9282, 0x399d7dd6, 0x799fe008, 0x79de9bc0, +- 0xb99aae78, 0xfd597598, 0xbd5d1d08, 0xfd1f3dea, +- 0xbd1a227a, 0x5800148a, 0x18000003, 0xf88092e0, +- 0xd8ffdf00, 0xf8a84860, 0xf99d7560, 0x1a1c012d, +- 0x3a1c027b, 0x5a060253, 0x7a03028e, 0x9a0801d0, +- 0xba0803a0, 0xda140308, 0xfa00038c, 0x0b3010d7, +- 0x2b37ab39, 0xcb2466da, 0x6b33efb1, 0x8b350fcb, +- 0xab208a70, 0xcb39e52b, 0xeb2c9291, 0x3a4bd1a3, +- 0x7a4c81a2, 0xba42106c, 0xfa5560e3, 0x3a4e3844, +- 0x7a515a26, 0xba4c2940, 0xfa52aaae, 0x1a8cc1b5, +- 0x1a8f976a, 0x5a8981a0, 0x5a9a6492, 0x9a8793ac, +- 0x9a9474e6, 0xda83d2b6, 0xda9b9593, 0x5ac00200, +- 0x5ac006f1, 0x5ac009d1, 0x5ac013d8, 0x5ac016d8, +- 0xdac00223, 0xdac005ac, 0xdac00ac9, 0xdac00c00, +- 0xdac01205, 0xdac016d9, 0x1ac0089d, 0x1add0fa0, +- 0x1ad52225, 0x1ad22529, 0x1ac82b61, 0x1acd2e92, +- 0x9acc0b28, 0x9adc0ca7, 0x9adb2225, 0x9ad42757, +- 0x9adc291c, 0x9ac42fa3, 0x1b1a55d1, 0x1b0bafc1, +- 0x9b067221, 0x9b1ea0de, 0x9b2e20d5, 0x9b38cd4a, +- 0x9bae6254, 0x9ba59452, 0x1e2d0a48, 0x1e3c19c2, +- 0x1e3c298f, 0x1e213980, 0x1e240baf, 0x1e77082c, +- 0x1e72191b, 0x1e6b2a97, 0x1e723988, 0x1e770b1a, +- 0x1f0d66f5, 0x1f01b956, 0x1f227a8e, 0x1f365ba7, +- 0x1f4f14ad, 0x1f45a98e, 0x1f60066a, 0x1f620054, +- 0x1e204139, 0x1e20c094, 0x1e214363, 0x1e21c041, +- 0x1e22c01e, 0x1e60408c, 0x1e60c361, 0x1e6142c8, +- 0x1e61c16b, 0x1e624396, 0x1e3802dc, 0x9e380374, +- 0x1e78000e, 0x9e78017a, 0x1e2202dc, 0x9e220150, +- 0x1e6202a8, 0x9e620395, 0x1e260318, 0x9e660268, +- 0x1e270188, 0x9e6700e6, 0x1e3023c0, 0x1e6b2320, +- 0x1e202168, 0x1e602168, 0x2910323d, 0x297449d6, +- 0x6948402b, 0xa9072f40, 0xa9410747, 0x29801f0a, +- 0x29e07307, 0x69e272b9, 0xa9bf49d4, 0xa9c529a8, +- 0x28b0605a, 0x28e866a2, 0x68ee0ab1, 0xa886296c, +- 0xa8fe1a38, 0x282479c3, 0x286e534f, 0xa8386596, +- 0xa8755a3b, 0x1e601000, 0x1e603000, 0x1e621000, +- 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000, +- 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000, +- 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, +- 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000, +- 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, +- 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, +- 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, +- 0x1e7e3000, ++ 0x8b0d82fa, 0xcb49970c, 0xab889dfc, 0xeb9ee787, ++ 0x0b9b3ec9, 0x4b9279a3, 0x2b88474e, 0x6b8c56c0, ++ 0x8a1a51e0, 0xaa11f4ba, 0xca0281b8, 0xea918c7c, ++ 0x0a5d4a19, 0x2a4b264d, 0x4a523ca5, 0x6a9b6ae2, ++ 0x8a70b79b, 0xaaba9728, 0xca6dfe3d, 0xea627f1c, ++ 0x0aa70f53, 0x2aaa0f06, 0x4a6176a4, 0x6a604eb0, ++ 0x1105ed91, 0x3100583e, 0x5101f8bd, 0x710f0306, ++ 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, ++ 0x120cb166, 0x321764bc, 0x52174681, 0x720c0247, ++ 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, ++ 0x14000000, 0x17ffffd7, 0x140001cf, 0x94000000, ++ 0x97ffffd4, 0x940001cc, 0x3400000a, 0x34fffa2a, ++ 0x3400392a, 0x35000008, 0x35fff9c8, 0x350038c8, ++ 0xb400000b, 0xb4fff96b, 0xb400386b, 0xb500001d, ++ 0xb5fff91d, 0xb500381d, 0x10000013, 0x10fff8b3, ++ 0x100037b3, 0x90000013, 0x36300016, 0x3637f836, ++ 0x36303736, 0x3758000c, 0x375ff7cc, 0x375836cc, ++ 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, ++ 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, ++ 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, ++ 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, ++ 0x540034a0, 0x54000001, 0x54fff541, 0x54003441, ++ 0x54000002, 0x54fff4e2, 0x540033e2, 0x54000002, ++ 0x54fff482, 0x54003382, 0x54000003, 0x54fff423, ++ 0x54003323, 0x54000003, 0x54fff3c3, 0x540032c3, ++ 0x54000004, 0x54fff364, 0x54003264, 0x54000005, ++ 0x54fff305, 0x54003205, 0x54000006, 0x54fff2a6, ++ 0x540031a6, 0x54000007, 0x54fff247, 0x54003147, ++ 0x54000008, 0x54fff1e8, 0x540030e8, 0x54000009, ++ 0x54fff189, 0x54003089, 0x5400000a, 0x54fff12a, ++ 0x5400302a, 0x5400000b, 0x54fff0cb, 0x54002fcb, ++ 0x5400000c, 0x54fff06c, 0x54002f6c, 0x5400000d, ++ 0x54fff00d, 0x54002f0d, 0x5400000e, 0x54ffefae, ++ 0x54002eae, 0x5400000f, 0x54ffef4f, 0x54002e4f, ++ 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, ++ 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, ++ 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, ++ 0xd63f0280, 0xc80a7d1b, 0xc800fea1, 0xc85f7fb1, ++ 0xc85fff9d, 0xc89ffee1, 0xc8dffe95, 0x88167e7b, ++ 0x880bfcd0, 0x885f7c12, 0x885ffd44, 0x889ffed8, ++ 0x88dffe6a, 0x48017fc5, 0x4808fe2c, 0x485f7dc9, ++ 0x485ffc27, 0x489ffe05, 0x48dffd82, 0x080a7c6c, ++ 0x081cff4e, 0x085f7d5e, 0x085ffeae, 0x089ffd2d, ++ 0x08dfff76, 0xc87f4d7c, 0xc87fcc5e, 0xc8220417, ++ 0xc82cb5f0, 0x887f55b2, 0x887ff90b, 0x88382c2d, ++ 0x883aedb5, 0xf819928b, 0xb803e21c, 0x381f713b, ++ 0x781ce322, 0xf850f044, 0xb85e129e, 0x385e92f2, ++ 0x785ff35d, 0x39801921, 0x7881318b, 0x78dce02b, ++ 0xb8829313, 0xfc45f318, 0xbc5d50af, 0xfc001375, ++ 0xbc1951b7, 0xf8008c0b, 0xb801dc03, 0x38009dcb, ++ 0x781fdf1d, 0xf8570e2d, 0xb85faecc, 0x385f6d8d, ++ 0x785ebea0, 0x38804cf7, 0x789cbce3, 0x78df9cbc, ++ 0xb89eed38, 0xfc40cd6e, 0xbc5bdd93, 0xfc103c14, ++ 0xbc040c08, 0xf81a2784, 0xb81ca4ec, 0x381e855b, ++ 0x7801b506, 0xf853654e, 0xb85d74b0, 0x384095c2, ++ 0x785ec5bc, 0x389e15a9, 0x789dc703, 0x78c06474, ++ 0xb89ff667, 0xfc57e51e, 0xbc4155f9, 0xfc05a6ee, ++ 0xbc1df408, 0xf835da4a, 0xb836d9a4, 0x3833580d, ++ 0x7826cb6c, 0xf8706900, 0xb87ae880, 0x3865db2e, ++ 0x78724889, 0x38a7789b, 0x78beca2f, 0x78f6c810, ++ 0xb8bef956, 0xfc6afabd, 0xbc734963, 0xfc3d5b8d, ++ 0xbc25fbb7, 0xf9189d05, 0xb91ecb1d, 0x39187a33, ++ 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, ++ 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, ++ 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, ++ 0xbd1b1869, 0x58001e9b, 0x1800000b, 0xf8945060, ++ 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, ++ 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, ++ 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b12, ++ 0x2b2a278d, 0xcb22aa0f, 0x6b2d29bd, 0x8b2cce8c, ++ 0xab2b877e, 0xcb21c8ee, 0xeb3ba47d, 0x3a4d400e, ++ 0x7a5232c6, 0xba5e624e, 0xfa53814c, 0x3a52d8c2, ++ 0x7a4d8924, 0xba4b3aab, 0xfa4d7882, 0x1a96804c, ++ 0x1a912618, 0x5a90b0e6, 0x5a96976b, 0x9a9db06a, ++ 0x9a9b374c, 0xda95c14f, 0xda89c6fe, 0x5ac0015e, ++ 0x5ac005fd, 0x5ac00bdd, 0x5ac012b9, 0x5ac01404, ++ 0xdac002b2, 0xdac0061d, 0xdac00a95, 0xdac00e66, ++ 0xdac0107e, 0xdac01675, 0x1ac00b0b, 0x1ace0f3b, ++ 0x1ad221c3, 0x1ad825e7, 0x1ad92a3c, 0x1adc2f42, ++ 0x9ada0b25, 0x9ad20e1b, 0x9acc22a6, 0x9acc2480, ++ 0x9adc2a3b, 0x9ad22c5c, 0x1b0e39ea, 0x1b0fcf23, ++ 0x9b1010ae, 0x9b048b3a, 0x9b3d4582, 0x9b2390e8, ++ 0x9bba6499, 0x9ba0ea24, 0x1e2f0af1, 0x1e311b95, ++ 0x1e23295b, 0x1e3938e0, 0x1e2f08c9, 0x1e6a09fd, ++ 0x1e671a22, 0x1e77296b, 0x1e773ba7, 0x1e6b0b6e, ++ 0x1f18308b, 0x1f14adcf, 0x1f2b31bc, 0x1f3a3bd7, ++ 0x1f4a1da9, 0x1f4f8fa5, 0x1f6f798b, 0x1f73523e, ++ 0x1e2040fb, 0x1e20c2a9, 0x1e214122, 0x1e21c0fb, ++ 0x1e22c3dd, 0x1e604031, 0x1e60c0c2, 0x1e61406a, ++ 0x1e61c178, 0x1e624027, 0x1e38000b, 0x9e380243, ++ 0x1e7800dc, 0x9e7800d6, 0x1e220360, 0x9e22005a, ++ 0x1e6200e5, 0x9e62017c, 0x1e2601b9, 0x9e6602eb, ++ 0x1e270113, 0x9e6702b2, 0x1e342320, 0x1e722260, ++ 0x1e202048, 0x1e6023a8, 0x29025668, 0x29403e86, ++ 0x6966387b, 0xa93b316a, 0xa97e38e7, 0x298e5980, ++ 0x29c61d0e, 0x69c00930, 0xa9bc7434, 0xa9c530b5, ++ 0x28b26378, 0x28c25a5c, 0x68f419b1, 0xa8b668bc, ++ 0xa8f15746, 0x280453cd, 0x284c2cb1, 0xa83a534d, ++ 0xa87b32fd, 0x05a08020, 0x04b0e3e0, 0x0470e7e1, ++ 0x042f9c20, 0x043f9c35, 0x047f9c20, 0x04ff9c20, ++ 0x04299420, 0x04319160, 0x0461943e, 0x04a19020, ++ 0x042053ff, 0x047f5401, 0x25208028, 0x2538cfe0, ++ 0x2578d001, 0x25b8efe2, 0x25f8f007, 0xa400a3e0, ++ 0xa4a8a7ea, 0xa547a814, 0xa4084ffe, 0xa55c53e0, ++ 0xa5e1540b, 0xe400fbf6, 0xe408ffff, 0xe547e400, ++ 0xe4014be0, 0xe4a84fe0, 0xe5f25000, 0x858043e0, ++ 0x85a043ff, 0xe59f5d08, 0x1e601000, 0x1e603000, ++ 0x1e621000, 0x1e623000, 0x1e641000, 0x1e643000, ++ 0x1e661000, 0x1e663000, 0x1e681000, 0x1e683000, ++ 0x1e6a1000, 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, ++ 0x1e6e1000, 0x1e6e3000, 0x1e701000, 0x1e703000, ++ 0x1e721000, 0x1e723000, 0x1e741000, 0x1e743000, ++ 0x1e761000, 0x1e763000, 0x1e781000, 0x1e783000, ++ 0x1e7a1000, 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, ++ 0x1e7e1000, 0x1e7e3000, 0x04bb020e, 0x04ba04c0, ++ 0x6586019b, 0x6593089e, 0x65c2060b, 0x04d6a18f, ++ 0x040016e9, 0x0490835e, 0x045aaa44, 0x04938579, ++ 0x0411990a, 0x04101624, 0x0497ad3e, 0x04deae80, ++ 0x04481c77, 0x044a0960, 0x04c118ab, 0x049caa30, ++ 0x6580834f, 0x658d9e6a, 0x65c68238, 0x65c791fa, ++ 0x65c28a38, 0x049db7be, 0x6582b552, 0x65c0abde, ++ 0x6581bbc6, 0x65cdb854, 0x658197a9, 0x65f60872, ++ 0x65ec29af, 0x65be43cc, 0x65e06ea7, 0x04544b53, ++ 0x04d57c30, 0x04323095, 0x04a7324c, 0x046d31f9, ++ 0x04da30eb, 0x04d8252b, 0x04d93c1c, 0x044820f0, ++ 0x040a2fac, 0x65873975, 0x65c62886, 0x65d820e7, ++ 0x04013fac, + }; + // END Generated code -- do not edit + +diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +index 80ddb9b31..f554b5e15 100644 +--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +@@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21); + // Java stack pointer + REGISTER_DECLARATION(Register, esp, r20); + ++// Preserved predicate register with all elements set TRUE. ++REGISTER_DECLARATION(PRegister, ptrue, p7); ++ + #define assert_cond(ARG1) assert(ARG1, #ARG1) + + namespace asm_util { +@@ -273,6 +276,14 @@ public: + f(r->encoding_nocheck(), lsb + 4, lsb); + } + ++ void prf(PRegister r, int lsb) { ++ f(r->encoding_nocheck(), lsb + 3, lsb); ++ } ++ ++ void pgrf(PRegister r, int lsb) { ++ f(r->encoding_nocheck(), lsb + 2, lsb); ++ } ++ + unsigned get(int msb = 31, int lsb = 0) { + int nbits = msb - lsb + 1; + unsigned mask = ((1U << nbits) - 1) << lsb; +@@ -554,6 +565,18 @@ class Address { + void lea(MacroAssembler *, Register) const; + + static bool offset_ok_for_immed(long offset, uint shift); ++ ++ static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) { ++ if (offset % vl == 0) { ++ // Convert address offset into sve imm offset (MUL VL). ++ int sve_offset = offset / vl; ++ if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) { ++ // sve_offset can be encoded ++ return true; ++ } ++ } ++ return false; ++ } + }; + + // Convience classes +@@ -596,7 +619,9 @@ class InternalAddress: public Address { + InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} + }; + +-const int FPUStateSizeInWords = 32 * 2; ++const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers * ++ FloatRegisterImpl::save_slots_per_register; ++ + typedef enum { + PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM, + PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM, +@@ -667,6 +692,12 @@ public: + void rf(FloatRegister reg, int lsb) { + current->rf(reg, lsb); + } ++ void prf(PRegister reg, int lsb) { ++ current->prf(reg, lsb); ++ } ++ void pgrf(PRegister reg, int lsb) { ++ current->pgrf(reg, lsb); ++ } + void fixed(unsigned value, unsigned mask) { + current->fixed(value, mask); + } +@@ -2228,21 +2259,27 @@ public: + + #undef INSN + +-#define INSN(NAME, opc, opc2) \ ++#define INSN(NAME, opc, opc2, accepted) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ ++ guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \ ++ if (accepted < 3) guarantee(T != T2D, "incorrect arrangement"); \ ++ if (accepted < 2) guarantee(T != T2S, "incorrect arrangement"); \ ++ if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement"); \ + starti; \ + f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ + f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ + rf(Vn, 5), rf(Vd, 0); \ + } + +- INSN(absr, 0, 0b100000101110); +- INSN(negr, 1, 0b100000101110); +- INSN(notr, 1, 0b100000010110); +- INSN(addv, 0, 0b110001101110); +- INSN(cls, 0, 0b100000010010); +- INSN(clz, 1, 0b100000010010); +- INSN(cnt, 0, 0b100000010110); ++ INSN(absr, 0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D ++ INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D ++ INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B ++ INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S ++ INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S ++ INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S ++ INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B ++ INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S ++ INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S + + #undef INSN + +@@ -2376,13 +2413,18 @@ public: + f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); + } + +- void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { +- starti; +- f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); +- f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10); +- rf(Vn, 5), rf(Rd, 0); ++#define INSN(NAME, op) \ ++ void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \ ++ starti; \ ++ f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \ ++ f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \ ++ rf(Vn, 5), rf(Rd, 0); \ + } + ++ INSN(umov, 0b001111); ++ INSN(smov, 0b001011); ++#undef INSN ++ + #define INSN(NAME, opc, opc2, isSHR) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ + starti; \ +@@ -2582,13 +2624,299 @@ public: + #undef INSN + + void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) +- { ++{ ++ starti; ++ assert(T == T8B || T == T16B, "invalid arrangement"); ++ assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); ++ f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); ++ rf(Vm, 16), f(0, 15), f(index, 14, 11); ++ f(0, 10), rf(Vn, 5), rf(Vd, 0); ++} ++ ++// SVE arithmetics - unpredicated ++#define INSN(NAME, opcode) \ ++ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ ++ starti; \ ++ assert(T != Q, "invalid register variant"); \ ++ f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \ ++ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ ++ } ++ INSN(sve_add, 0b000); ++ INSN(sve_sub, 0b001); ++#undef INSN ++ ++// SVE floating-point arithmetic - unpredicated ++#define INSN(NAME, opcode) \ ++ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ ++ starti; \ ++ assert(T == S || T == D, "invalid register variant"); \ ++ f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \ ++ rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ ++ } ++ ++ INSN(sve_fadd, 0b000); ++ INSN(sve_fmul, 0b010); ++ INSN(sve_fsub, 0b001); ++#undef INSN ++ ++private: ++ void sve_predicate_reg_insn(unsigned op24, unsigned op13, ++ FloatRegister Zd_or_Vd, SIMD_RegVariant T, ++ PRegister Pg, FloatRegister Zn_or_Vn) { ++ starti; ++ f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13); ++ pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0); ++ } ++ ++public: ++ ++// SVE integer arithmetics - predicate ++#define INSN(NAME, op1, op2) \ ++ void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \ ++ assert(T != Q, "invalid register variant"); \ ++ sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \ ++ } ++ ++ INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary ++ INSN(sve_add, 0b00000100, 0b000000000); // vector add ++ INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar ++ INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right ++ INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits ++ INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element ++ INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar ++ INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left ++ INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right ++ INSN(sve_mul, 0b00000100, 0b010000000); // vector mul ++ INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary ++ INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary ++ INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar ++ INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors ++ INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar ++ INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors ++ INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar ++ INSN(sve_sub, 0b00000100, 0b000001000); // vector sub ++ INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar ++#undef INSN ++ ++// SVE floating-point arithmetics - predicate ++#define INSN(NAME, op1, op2) \ ++ void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \ ++ assert(T == S || T == D, "invalid register variant"); \ ++ sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \ ++ } ++ ++ INSN(sve_fabs, 0b00000100, 0b011100101); ++ INSN(sve_fadd, 0b01100101, 0b000000100); ++ INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd ++ INSN(sve_fdiv, 0b01100101, 0b001101100); ++ INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum ++ INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar ++ INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum ++ INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar ++ INSN(sve_fmul, 0b01100101, 0b000010100); ++ INSN(sve_fneg, 0b00000100, 0b011101101); ++ INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity ++ INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even ++ INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity ++ INSN(sve_fsqrt, 0b01100101, 0b001101101); ++ INSN(sve_fsub, 0b01100101, 0b000001100); ++#undef INSN ++ ++ // SVE multiple-add/sub - predicated ++#define INSN(NAME, op0, op1, op2) \ ++ void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \ ++ starti; \ ++ assert(T != Q, "invalid size"); \ ++ f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \ ++ f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \ ++ } ++ ++ INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm ++ INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm ++ INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm ++ INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm ++ INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm ++ INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm ++#undef INSN ++ ++// SVE bitwise logical - unpredicated ++#define INSN(NAME, opc) \ ++ void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \ ++ starti; \ ++ f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \ ++ rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \ ++ } ++ INSN(sve_and, 0b00); ++ INSN(sve_eor, 0b10); ++ INSN(sve_orr, 0b01); ++#undef INSN ++ ++// SVE shift immediate - unpredicated ++#define INSN(NAME, opc, isSHR) \ ++ void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \ ++ starti; \ ++ /* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \ ++ * for shift right is calculated as: \ ++ * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \ ++ * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \ ++ * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \ ++ * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \ ++ * for shift left is calculated as: \ ++ * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \ ++ * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \ ++ * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \ ++ * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \ ++ */ \ ++ assert(T != Q, "Invalid register variant"); \ ++ if (isSHR) { \ ++ assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \ ++ } else { \ ++ assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \ ++ } \ ++ int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \ ++ int encodedShift = isSHR ? cVal - shift : cVal + shift; \ ++ int tszh = encodedShift >> 5; \ ++ int tszl_imm = encodedShift & 0x1f; \ ++ f(0b00000100, 31, 24); \ ++ f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \ ++ f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \ ++ } ++ ++ INSN(sve_asr, 0b100, /* isSHR = */ true); ++ INSN(sve_lsl, 0b111, /* isSHR = */ false); ++ INSN(sve_lsr, 0b101, /* isSHR = */ true); ++#undef INSN ++ ++private: ++ ++ // Scalar base + immediate index ++ void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg, ++ SIMD_RegVariant T, int op1, int type, int op2) { ++ starti; ++ assert_cond(T >= type); ++ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); ++ f(0, 20), sf(imm, 19, 16), f(op2, 15, 13); ++ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); ++ } ++ ++ // Scalar base + scalar index ++ void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg, ++ SIMD_RegVariant T, int op1, int type, int op2) { ++ starti; ++ assert_cond(T >= type); ++ f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); ++ rf(Xm, 16), f(op2, 15, 13); ++ pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); ++ } ++ ++ void sve_ld_st1(FloatRegister Zt, PRegister Pg, ++ SIMD_RegVariant T, const Address &a, ++ int op1, int type, int imm_op2, int scalar_op2) { ++ switch (a.getMode()) { ++ case Address::base_plus_offset: ++ sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2); ++ break; ++ case Address::base_plus_offset_reg: ++ sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ ++public: ++ ++// SVE load/store - predicated ++#define INSN(NAME, op1, type, imm_op2, scalar_op2) \ ++ void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \ ++ assert(T != Q, "invalid register variant"); \ ++ sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \ ++ } ++ ++ INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010); ++ INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010); ++ INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010); ++ INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010); ++ INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010); ++ INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010); ++ INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010); ++ INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010); ++#undef INSN ++ ++// SVE load/store - unpredicated ++#define INSN(NAME, op1) \ ++ void NAME(FloatRegister Zt, const Address &a) { \ ++ starti; \ ++ assert(a.index() == noreg, "invalid address variant"); \ ++ f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \ ++ f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \ ++ } ++ ++ INSN(sve_ldr, 0b100); // LDR (vector) ++ INSN(sve_str, 0b111); // STR (vector) ++#undef INSN ++ ++#define INSN(NAME, op) \ ++ void NAME(Register Xd, Register Xn, int imm6) { \ ++ starti; \ ++ f(0b000001000, 31, 23), f(op, 22, 21); \ ++ srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \ ++ } ++ ++ INSN(sve_addvl, 0b01); ++ INSN(sve_addpl, 0b11); ++#undef INSN ++ ++// SVE inc/dec register by element count ++#define INSN(NAME, op) \ ++ void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \ ++ starti; \ ++ assert(T != Q, "invalid size"); \ ++ f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \ ++ f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \ ++ } ++ ++ INSN(sve_inc, 0); ++ INSN(sve_dec, 1); ++#undef INSN ++ ++ // SVE predicate count ++ void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) { ++ starti; ++ assert(T != Q, "invalid size"); ++ f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14); ++ prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0); ++ } ++ ++ // SVE dup scalar ++ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) { ++ starti; ++ assert(T != Q, "invalid size"); ++ f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10); ++ srf(Rn, 5), rf(Zd, 0); ++ } ++ ++ // SVE dup imm ++ void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) { ++ starti; ++ assert(T != Q, "invalid size"); ++ int sh = 0; ++ if (imm8 <= 127 && imm8 >= -128) { ++ sh = 0; ++ } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) { ++ sh = 1; ++ imm8 = (imm8 >> 8); ++ } else { ++ guarantee(false, "invalid immediate"); ++ } ++ f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14); ++ f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); ++ } ++ ++ void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) { + starti; +- assert(T == T8B || T == T16B, "invalid arrangement"); +- assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); +- f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); +- rf(Vm, 16), f(0, 15), f(index, 14, 11); +- f(0, 10), rf(Vn, 5), rf(Vd, 0); ++ f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10); ++ f(pattern, 9, 5), f(0b0, 4), prf(pd, 0); + } + + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +diff --git a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +index 6ac54f257..a258528ea 100644 +--- a/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/gc/z/zBarrierSetAssembler_aarch64.cpp +@@ -456,8 +456,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z + ZSetupArguments setup_arguments(masm, stub); + __ mov(rscratch1, stub->slow_path()); + __ blr(rscratch1); ++ if (UseSVE > 0) { ++ // Reinitialize the ptrue predicate register, in case the external runtime ++ // call clobbers ptrue reg, as we may return to SVE compiled code. ++ __ reinitialize_ptrue(); ++ } + } +- + // Stub exit + __ b(*stub->continuation()); + } +diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp +index 071845e5b..f26ea2a8b 100644 +--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp +@@ -112,6 +112,9 @@ define_pd_global(intx, InlineSmallCode, 1000); + "Avoid generating unaligned memory accesses") \ + product(bool, UseLSE, false, \ + "Use LSE instructions") \ ++ product(uint, UseSVE, 0, \ ++ "Highest supported SVE instruction set version") \ ++ range(0, 2) \ + product(bool, UseBlockZeroing, true, \ + "Use DC ZVA for block zeroing") \ + product(intx, BlockZeroingLowLimit, 256, \ +diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +index 241197075..431c5f005 100644 +--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +@@ -50,6 +50,9 @@ + #include "runtime/jniHandles.inline.hpp" + #include "runtime/sharedRuntime.hpp" + #include "runtime/thread.hpp" ++#ifdef COMPILER2 ++#include "opto/matcher.hpp" ++#endif + + #ifdef PRODUCT + #define BLOCK_COMMENT(str) /* nothing */ +@@ -2098,8 +2098,17 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) { + } + + // Push lots of registers in the bit set supplied. Don't push sp. +-// Return the number of words pushed ++// Return the number of dwords pushed + int MacroAssembler::push_fp(unsigned int bitset, Register stack) { ++ int words_pushed = 0; ++ bool use_sve = false; ++ int sve_vector_size_in_bytes = 0; ++ ++#ifdef COMPILER2 ++ use_sve = Matcher::supports_scalable_vector(); ++ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++#endif ++ + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; +@@ -2114,8 +2123,18 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + return 0; + } + ++ // SVE ++ if (use_sve && sve_vector_size_in_bytes > 16) { ++ sub(stack, stack, sve_vector_size_in_bytes * count); ++ for (int i = 0; i < count; i++) { ++ sve_str(as_FloatRegister(regs[i]), Address(stack, i)); ++ } ++ return count * sve_vector_size_in_bytes / 8; ++ } ++ + add(stack, stack, -count * wordSize * 2); + ++ // NEON + if (count & 1) { + strq(as_FloatRegister(regs[0]), Address(stack)); + i += 1; +@@ -2128,7 +2147,16 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) { + return count; + } + ++// Return the number of dwords poped + int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { ++ int words_pushed = 0; ++ bool use_sve = false; ++ int sve_vector_size_in_bytes = 0; ++ ++#ifdef COMPILER2 ++ use_sve = Matcher::supports_scalable_vector(); ++ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++#endif + // Scan bitset to accumulate register pairs + unsigned char regs[32]; + int count = 0; +@@ -2143,6 +2171,16 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) { + return 0; + } + ++ // SVE ++ if (use_sve && sve_vector_size_in_bytes > 16) { ++ for (int i = count - 1; i >= 0; i--) { ++ sve_ldr(as_FloatRegister(regs[i]), Address(stack, i)); ++ } ++ add(stack, stack, sve_vector_size_in_bytes * count); ++ return count * sve_vector_size_in_bytes / 8; ++ } ++ ++ // NEON + if (count & 1) { + ldrq(as_FloatRegister(regs[0]), Address(stack)); + i += 1; +@@ -2616,23 +2654,39 @@ void MacroAssembler::pop_call_clobbered_registers() { + pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp); + } + +-void MacroAssembler::push_CPU_state(bool save_vectors) { +- int step = (save_vectors ? 8 : 4) * wordSize; ++void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve, ++ int sve_vector_size_in_bytes) { + push(0x3fffffff, sp); // integer registers except lr & sp +- mov(rscratch1, -step); +- sub(sp, sp, step); +- for (int i = 28; i >= 4; i -= 4) { +- st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), +- as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1))); ++ if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) { ++ sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); ++ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { ++ sve_str(as_FloatRegister(i), Address(sp, i)); ++ } ++ } else { ++ int step = (save_vectors ? 8 : 4) * wordSize; ++ mov(rscratch1, -step); ++ sub(sp, sp, step); ++ for (int i = 28; i >= 4; i -= 4) { ++ st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), ++ as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1))); ++ } ++ st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp); + } +- st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp); + } + +-void MacroAssembler::pop_CPU_state(bool restore_vectors) { +- int step = (restore_vectors ? 8 : 4) * wordSize; +- for (int i = 0; i <= 28; i += 4) +- ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), +- as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); ++void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve, ++ int sve_vector_size_in_bytes) { ++ if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) { ++ for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) { ++ sve_ldr(as_FloatRegister(i), Address(sp, i)); ++ } ++ add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers); ++ } else { ++ int step = (restore_vectors ? 8 : 4) * wordSize; ++ for (int i = 0; i <= 28; i += 4) ++ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2), ++ as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step))); ++ } + pop(0x3fffffff, sp); // integer registers except lr & sp + } + +@@ -2681,6 +2735,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp) + return Address(base, offset); + } + ++Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) { ++ assert(offset >= 0, "spill to negative address?"); ++ ++ Register base = sp; ++ ++ // An immediate offset in the range 0 to 255 which is multiplied ++ // by the current vector or predicate register size in bytes. ++ if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) { ++ return Address(base, offset / sve_reg_size_in_bytes); ++ } ++ ++ add(tmp, base, offset); ++ return Address(tmp); ++} ++ + // Checks whether offset is aligned. + // Returns true if it is, else false. + bool MacroAssembler::merge_alignment_check(Register base, +@@ -5843,3 +5912,24 @@ void MacroAssembler::get_thread(Register dst) { + + pop(saved_regs, sp); + } ++ ++void MacroAssembler::verify_sve_vector_length() { ++ Label verify_ok; ++ assert(UseSVE > 0, "should only be used for SVE"); ++ movw(rscratch1, zr); ++ sve_inc(rscratch1, B); ++ subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length()); ++ br(EQ, verify_ok); ++ stop("Error: SVE vector length has changed since jvm startup"); ++ bind(verify_ok); ++} ++ ++void MacroAssembler::verify_ptrue() { ++ Label verify_ok; ++ assert(UseSVE > 0, "should only be used for SVE"); ++ sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count. ++ sve_dec(rscratch1, B); ++ cbz(rscratch1, verify_ok); ++ stop("Error: the preserved predicate register (p7) elements are not all true"); ++ bind(verify_ok); ++} +diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +index 014a4d3c6..9fb98c010 100644 +--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +@@ -862,8 +862,10 @@ public: + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + +- void push_CPU_state(bool save_vectors = false); +- void pop_CPU_state(bool restore_vectors = false) ; ++ void push_CPU_state(bool save_vectors = false, bool use_sve = false, ++ int sve_vector_size_in_bytes = 0); ++ void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, ++ int sve_vector_size_in_bytes = 0); + + // Round up to a power of two + void round_to(Register reg, int modulus); +@@ -938,6 +940,11 @@ public: + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + ++ void verify_sve_vector_length(); ++ void reinitialize_ptrue() { ++ sve_ptrue(ptrue, B); ++ } ++ void verify_ptrue(); + + // Debugging + +@@ -1307,6 +1314,7 @@ private: + // Returns an address on the stack which is reachable with a ldr/str of size + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); ++ Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); + + bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const; + +@@ -1330,6 +1338,9 @@ public: + void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + str(Vx, T, spill_address(1 << (int)T, offset)); + } ++ void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { ++ sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); ++ } + void unspill(Register Rx, bool is64, int offset) { + if (is64) { + ldr(Rx, spill_address(8, offset)); +@@ -1340,6 +1351,9 @@ public: + void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { + ldr(Vx, T, spill_address(1 << (int)T, offset)); + } ++ void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { ++ sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); ++ } + void spill_copy128(int src_offset, int dst_offset, + Register tmp1=rscratch1, Register tmp2=rscratch2) { + if (src_offset < 512 && (src_offset & 7) == 0 && +@@ -1353,6 +1367,15 @@ public: + spill(tmp1, true, dst_offset+8); + } + } ++ void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset, ++ int sve_vec_reg_size_in_bytes) { ++ assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size"); ++ for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) { ++ spill_copy128(src_offset, dst_offset); ++ src_offset += 16; ++ dst_offset += 16; ++ } ++ } + }; + + #ifdef ASSERT +diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp +index 30924e8a5..3db8e8337 100644 +--- a/src/hotspot/cpu/aarch64/register_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -26,10 +26,15 @@ + #include "precompiled.hpp" + #include "register_aarch64.hpp" + +-const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * ++ RegisterImpl::max_slots_per_register; + + const int ConcreteRegisterImpl::max_fpr +- = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1); ++ = ConcreteRegisterImpl::max_gpr + ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; ++ ++const int ConcreteRegisterImpl::max_pr ++ = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers; + + const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { +@@ -52,3 +57,10 @@ const char* FloatRegisterImpl::name() const { + }; + return is_valid() ? names[encoding()] : "noreg"; + } ++ ++const char* PRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} +diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp +index 5f7662c89..c211b39ee 100644 +--- a/src/hotspot/cpu/aarch64/register_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -44,7 +44,8 @@ class RegisterImpl: public AbstractRegisterImpl { + enum { + number_of_registers = 32, + number_of_byte_registers = 32, +- number_of_registers_for_jvmci = 34 // Including SP and ZR. ++ number_of_registers_for_jvmci = 34, // Including SP and ZR. ++ max_slots_per_register = 2 + }; + + // derived registers, offsets, and addresses +@@ -127,7 +128,11 @@ inline FloatRegister as_FloatRegister(int encoding) { + class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { +- number_of_registers = 32 ++ number_of_registers = 32, ++ max_slots_per_register = 8, ++ save_slots_per_register = 2, ++ slots_per_neon_register = 4, ++ extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register + }; + + // construction +@@ -183,6 +188,80 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29)); + CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); + CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); + ++// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0]. ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31)); ++ ++ ++class PRegisterImpl; ++typedef PRegisterImpl* PRegister; ++inline PRegister as_PRegister(int encoding) { ++ return (PRegister)(intptr_t)encoding; ++} ++ ++// The implementation of predicate registers for the architecture ++class PRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 8, ++ max_slots_per_register = 1 ++ }; ++ ++ // construction ++ inline friend PRegister as_PRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ PRegister successor() const { return as_PRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ int encoding_nocheck() const { return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++// The predicate registers of SVE. ++CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6)); ++CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7)); ++ + // Need to know the total number of registers of all sorts for SharedInfo. + // Define a class that exports it. + class ConcreteRegisterImpl : public AbstractRegisterImpl { +@@ -193,14 +272,16 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + +- number_of_registers = (2 * RegisterImpl::number_of_registers + +- 4 * FloatRegisterImpl::number_of_registers + ++ number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + ++ FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + ++ PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers + + 1) // flags + }; + + // added to make it compile + static const int max_gpr; + static const int max_fpr; ++ static const int max_pr; + }; + + // A set of registers +diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +index c18109087..e337f582a 100644 +--- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * +@@ -154,3 +154,47 @@ REGISTER_DEFINITION(Register, rthread); + REGISTER_DEFINITION(Register, rheapbase); + + REGISTER_DEFINITION(Register, r31_sp); ++ ++REGISTER_DEFINITION(FloatRegister, z0); ++REGISTER_DEFINITION(FloatRegister, z1); ++REGISTER_DEFINITION(FloatRegister, z2); ++REGISTER_DEFINITION(FloatRegister, z3); ++REGISTER_DEFINITION(FloatRegister, z4); ++REGISTER_DEFINITION(FloatRegister, z5); ++REGISTER_DEFINITION(FloatRegister, z6); ++REGISTER_DEFINITION(FloatRegister, z7); ++REGISTER_DEFINITION(FloatRegister, z8); ++REGISTER_DEFINITION(FloatRegister, z9); ++REGISTER_DEFINITION(FloatRegister, z10); ++REGISTER_DEFINITION(FloatRegister, z11); ++REGISTER_DEFINITION(FloatRegister, z12); ++REGISTER_DEFINITION(FloatRegister, z13); ++REGISTER_DEFINITION(FloatRegister, z14); ++REGISTER_DEFINITION(FloatRegister, z15); ++REGISTER_DEFINITION(FloatRegister, z16); ++REGISTER_DEFINITION(FloatRegister, z17); ++REGISTER_DEFINITION(FloatRegister, z18); ++REGISTER_DEFINITION(FloatRegister, z19); ++REGISTER_DEFINITION(FloatRegister, z20); ++REGISTER_DEFINITION(FloatRegister, z21); ++REGISTER_DEFINITION(FloatRegister, z22); ++REGISTER_DEFINITION(FloatRegister, z23); ++REGISTER_DEFINITION(FloatRegister, z24); ++REGISTER_DEFINITION(FloatRegister, z25); ++REGISTER_DEFINITION(FloatRegister, z26); ++REGISTER_DEFINITION(FloatRegister, z27); ++REGISTER_DEFINITION(FloatRegister, z28); ++REGISTER_DEFINITION(FloatRegister, z29); ++REGISTER_DEFINITION(FloatRegister, z30); ++REGISTER_DEFINITION(FloatRegister, z31); ++ ++REGISTER_DEFINITION(PRegister, p0); ++REGISTER_DEFINITION(PRegister, p1); ++REGISTER_DEFINITION(PRegister, p2); ++REGISTER_DEFINITION(PRegister, p3); ++REGISTER_DEFINITION(PRegister, p4); ++REGISTER_DEFINITION(PRegister, p5); ++REGISTER_DEFINITION(PRegister, p6); ++REGISTER_DEFINITION(PRegister, p7); ++ ++REGISTER_DEFINITION(PRegister, ptrue); +diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +index da2bc6b05..05cc32e7e 100644 +--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +@@ -98,42 +98,60 @@ class RegisterSaver { + // Capture info about frame layout + enum layout { + fpu_state_off = 0, +- fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, ++ fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1, + // The frame sender code expects that rfp will be in + // the "natural" place and will override any oopMap + // setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. +- r0_off = fpu_state_off+FPUStateSizeInWords, +- rfp_off = r0_off + 30 * 2, +- return_off = rfp_off + 2, // slot for return address +- reg_save_size = return_off + 2}; ++ r0_off = fpu_state_off + FPUStateSizeInWords, ++ rfp_off = r0_off + (RegisterImpl::number_of_registers - 2) * RegisterImpl::max_slots_per_register, ++ return_off = rfp_off + RegisterImpl::max_slots_per_register, // slot for return address ++ reg_save_size = return_off + RegisterImpl::max_slots_per_register}; + + }; + + OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { ++ bool use_sve = false; ++ int sve_vector_size_in_bytes = 0; ++ int sve_vector_size_in_slots = 0; ++ ++#ifdef COMPILER2 ++ use_sve = Matcher::supports_scalable_vector(); ++ sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); ++ sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); ++#endif ++ + #if COMPILER2_OR_JVMCI + if (save_vectors) { ++ int vect_words = 0; ++ int extra_save_slots_per_register = 0; + // Save upper half of vector registers +- int vect_words = 32 * 8 / wordSize; ++ if (use_sve) { ++ extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register; ++ } else { ++ extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register; ++ } ++ vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register / ++ VMRegImpl::slots_per_word; + additional_frame_words += vect_words; + } + #else + assert(!save_vectors, "vectors are generated only by C2 and JVMCI"); + #endif + +- int frame_size_in_bytes = align_up(additional_frame_words*wordSize + +- reg_save_size*BytesPerInt, 16); ++ int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ++ reg_save_size * BytesPerInt, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words +- int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // Save Integer and Float registers. + __ enter(); +- __ push_CPU_state(save_vectors); ++ __ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This +@@ -146,10 +164,10 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + for (int i = 0; i < RegisterImpl::number_of_registers; i++) { + Register r = as_Register(i); + if (r < rheapbase && r != rscratch1 && r != rscratch2) { +- int sp_offset = 2 * (i + 32); // SP offsets are in 4-byte words, +- // register slots are 8 bytes +- // wide, 32 floating-point +- // registers ++ // SP offsets are in 4-byte words. ++ // Register slots are 8 bytes wide, 32 floating-point registers. ++ int sp_offset = RegisterImpl::max_slots_per_register * i + ++ FloatRegisterImpl::save_slots_per_register * FloatRegisterImpl::number_of_registers; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), + r->as_VMReg()); + } +@@ -157,7 +175,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + + for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { + FloatRegister r = as_FloatRegister(i); +- int sp_offset = save_vectors ? (4 * i) : (2 * i); ++ int sp_offset = 0; ++ if (save_vectors) { ++ sp_offset = use_sve ? (sve_vector_size_in_slots * i) : ++ (FloatRegisterImpl::slots_per_neon_register * i); ++ } else { ++ sp_offset = FloatRegisterImpl::save_slots_per_register * i; ++ } + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), + r->as_VMReg()); + } +@@ -166,10 +190,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ + } + + void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { +-#ifndef COMPILER2 ++#ifdef COMPILER2 ++ __ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(), ++ Matcher::scalable_vector_reg_size(T_BYTE)); ++#else ++#if !INCLUDE_JVMCI + assert(!restore_vectors, "vectors are generated only by C2 and JVMCI"); + #endif + __ pop_CPU_state(restore_vectors); ++#endif + __ leave(); + } + +@@ -1855,6 +1884,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + } + ++ if (UseSVE > 0) { ++ // Make sure that jni code does not change SVE vector length. ++ __ verify_sve_vector_length(); ++ } ++ + // check for safepoint operation in progress and/or pending suspend requests + Label safepoint_in_progress, safepoint_in_progress_done; + { +@@ -2785,6 +2819,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t + __ maybe_isb(); + __ membar(Assembler::LoadLoad | Assembler::LoadStore); + ++ if (UseSVE > 0 && save_vectors) { ++ // Reinitialize the ptrue predicate register, in case the external runtime ++ // call clobbers ptrue reg, as we may return to SVE compiled code. ++ __ reinitialize_ptrue(); ++ } ++ + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, noException); + +diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +index 0310463ac..979ff51f8 100644 +--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +@@ -486,6 +486,11 @@ class StubGenerator: public StubCodeGenerator { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, c_rarg1); ++ if (UseSVE > 0 ) { ++ // Reinitialize the ptrue predicate register, in case the external runtime ++ // call clobbers ptrue reg, as we may return to SVE compiled code. ++ __ reinitialize_ptrue(); ++ } + // we should not really care that lr is no longer the callee + // address. we saved the value the handler needs in r19 so we can + // just copy it to r3. however, the C2 handler will push its own +@@ -4804,6 +4809,12 @@ class StubGenerator: public StubCodeGenerator { + __ reset_last_Java_frame(true); + __ maybe_isb(); + ++ if (UseSVE > 0) { ++ // Reinitialize the ptrue predicate register, in case the external runtime ++ // call clobbers ptrue reg, as we may return to SVE compiled code. ++ __ reinitialize_ptrue(); ++ } ++ + __ leave(); + + // check for pending exceptions +diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +index 6e4eb1a7a..1bb12d24f 100644 +--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +@@ -1377,6 +1377,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + __ push(dtos); + __ push(ltos); + ++ if (UseSVE > 0) { ++ // Make sure that jni code does not change SVE vector length. ++ __ verify_sve_vector_length(); ++ } ++ + // change thread state + __ mov(rscratch1, _thread_in_native_trans); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); +diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +index 04ae1167d..8f2c95e8b 100644 +--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +@@ -29,13 +29,15 @@ + #include "memory/resourceArea.hpp" + #include "runtime/java.hpp" + #include "runtime/stubCodeGenerator.hpp" ++#include "utilities/formatBuffer.hpp" + #include "utilities/macros.hpp" + #include "vm_version_aarch64.hpp" + + #include OS_HEADER_INLINE(os) + +-#include + #include ++#include ++#include + + #ifndef HWCAP_AES + #define HWCAP_AES (1<<3) +@@ -61,12 +63,27 @@ + #define HWCAP_ATOMICS (1<<8) + #endif + ++#ifndef HWCAP_SVE ++#define HWCAP_SVE (1 << 22) ++#endif ++ ++#ifndef HWCAP2_SVE2 ++#define HWCAP2_SVE2 (1 << 1) ++#endif ++ ++#ifndef PR_SVE_GET_VL ++// For old toolchains which do not have SVE related macros defined. ++#define PR_SVE_SET_VL 50 ++#define PR_SVE_GET_VL 51 ++#endif ++ + int VM_Version::_cpu; + int VM_Version::_model; + int VM_Version::_model2; + int VM_Version::_variant; + int VM_Version::_revision; + int VM_Version::_stepping; ++int VM_Version::_initial_sve_vector_length; + VM_Version::PsrInfo VM_Version::_psr_info = { 0, }; + + static BufferBlob* stub_blob; +@@ -160,6 +177,7 @@ void VM_Version::get_processor_features() { + } + + unsigned long auxv = getauxval(AT_HWCAP); ++ unsigned long auxv2 = getauxval(AT_HWCAP2); + + char buf[512]; + +@@ -250,6 +268,8 @@ void VM_Version::get_processor_features() { + if (auxv & HWCAP_SHA1) strcat(buf, ", sha1"); + if (auxv & HWCAP_SHA2) strcat(buf, ", sha256"); + if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse"); ++ if (auxv & HWCAP_SVE) strcat(buf, ", sve"); ++ if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2"); + + _features_string = os::strdup(buf); + +@@ -379,6 +399,18 @@ void VM_Version::get_processor_features() { + FLAG_SET_DEFAULT(UseBlockZeroing, false); + } + ++ if (auxv & HWCAP_SVE) { ++ if (FLAG_IS_DEFAULT(UseSVE)) { ++ FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1); ++ } ++ if (UseSVE > 0) { ++ _initial_sve_vector_length = prctl(PR_SVE_GET_VL); ++ } ++ } else if (UseSVE > 0) { ++ warning("UseSVE specified, but not supported on current CPU. Disabling SVE."); ++ FLAG_SET_DEFAULT(UseSVE, 0); ++ } ++ + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); +@@ -411,6 +443,50 @@ void VM_Version::get_processor_features() { + UseMontgomerySquareIntrinsic = true; + } + ++ if (UseSVE > 0) { ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = _initial_sve_vector_length; ++ } else if (MaxVectorSize < 16) { ++ warning("SVE does not support vector length less than 16 bytes. Disabling SVE."); ++ UseSVE = 0; ++ } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) { ++ int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize); ++ _initial_sve_vector_length = new_vl; ++ // If MaxVectorSize is larger than system largest supported SVE vector length, above prctl() ++ // call will set task vector length to the system largest supported value. So, we also update ++ // MaxVectorSize to that largest supported value. ++ if (new_vl < 0) { ++ vm_exit_during_initialization( ++ err_msg("Current system does not support SVE vector length for MaxVectorSize: %d", ++ (int)MaxVectorSize)); ++ } else if (new_vl != MaxVectorSize) { ++ warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d", ++ new_vl, new_vl); ++ } ++ MaxVectorSize = new_vl; ++ } else { ++ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); ++ } ++ } ++ ++ if (UseSVE == 0) { // NEON ++ int min_vector_size = 8; ++ int max_vector_size = 16; ++ if (!FLAG_IS_DEFAULT(MaxVectorSize)) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize)); ++ } else if (MaxVectorSize < min_vector_size) { ++ warning("MaxVectorSize must be at least %i on this platform", min_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); ++ } else if (MaxVectorSize > max_vector_size) { ++ warning("MaxVectorSize must be at most %i on this platform", max_vector_size); ++ FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); ++ } ++ } else { ++ FLAG_SET_DEFAULT(MaxVectorSize, 16); ++ } ++ } ++ + #ifdef COMPILER2 + if (FLAG_IS_DEFAULT(OptoScheduling)) { + OptoScheduling = true; +diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +index 0a17f3e73..23c3c1338 100644 +--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +@@ -40,6 +40,7 @@ protected: + static int _variant; + static int _revision; + static int _stepping; ++ static int _initial_sve_vector_length; + + struct PsrInfo { + uint32_t dczid_el0; +@@ -101,6 +102,7 @@ public: + static int cpu_model2() { return _model2; } + static int cpu_variant() { return _variant; } + static int cpu_revision() { return _revision; } ++ static int get_initial_sve_vector_length() { return _initial_sve_vector_length; }; + static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); } + static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); } + static bool is_zva_enabled() { +diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp +index 9fd20be0f..35d0adf5b 100644 +--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp ++++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.cpp +@@ -33,15 +33,17 @@ void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { +- regName[i++] = reg->name(); +- regName[i++] = reg->name(); ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { +- regName[i++] = freg->name(); +- regName[i++] = freg->name(); ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = freg->name(); ++ } + freg = freg->successor(); + } + +diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp +index 0b1d000bb..c249c26a8 100644 +--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp ++++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.hpp +@@ -38,13 +38,14 @@ inline Register as_Register() { + + assert( is_Register(), "must be"); + // Yuk +- return ::as_Register(value() >> 1); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); + } + + inline FloatRegister as_FloatRegister() { + assert( is_FloatRegister() && is_even(value()), "must be" ); + // Yuk +- return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); + } + + inline bool is_concrete() { +diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp +index 145f9797f..dde7a7a91 100644 +--- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp ++++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp +@@ -1,6 +1,6 @@ + /* +- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2014, Red Hat Inc. All rights reserved. ++ * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -28,11 +28,16 @@ + + inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); +- return VMRegImpl::as_VMReg(encoding() << 1 ); ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); + } + + inline VMReg FloatRegisterImpl::as_VMReg() { +- return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); ++} ++ ++inline VMReg PRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr); + } + + #endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP +diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad +index 18e81bdc5..87e5f331b 100644 +--- a/src/hotspot/cpu/arm/arm.ad ++++ b/src/hotspot/cpu/arm/arm.ad +@@ -1093,7 +1093,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for +@@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { + return MaxVectorSize; + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // Vector ideal reg corresponding to specified size in bytes + const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); +diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad +index 07bda6d71..4cbe2cf5c 100644 +--- a/src/hotspot/cpu/ppc/ppc.ad ++++ b/src/hotspot/cpu/ppc/ppc.ad +@@ -2242,7 +2242,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for +@@ -2310,6 +2310,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // PPC implementation uses VSX load/store instructions (if + // SuperwordUseVSX) which support 4 byte but not arbitrary alignment + const bool Matcher::misaligned_vectors_ok() { +diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad +index 96c231b0a..782c1c7c4 100644 +--- a/src/hotspot/cpu/s390/s390.ad ++++ b/src/hotspot/cpu/s390/s390.ad +@@ -1522,7 +1522,7 @@ const bool Matcher::match_rule_supported(int opcode) { + // BUT: make sure match rule is not disabled by a false predicate! + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + // TODO + // Identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. +@@ -1573,6 +1573,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + const uint Matcher::vector_shift_count_ideal_reg(int size) { + fatal("vector shift is not supported"); + return Node::NotAMachineReg; +diff --git a/src/hotspot/cpu/sparc/sparc.ad b/src/hotspot/cpu/sparc/sparc.ad +index a09c795c9..3b1b1046e 100644 +--- a/src/hotspot/cpu/sparc/sparc.ad ++++ b/src/hotspot/cpu/sparc/sparc.ad +@@ -1710,7 +1710,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return true; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + + // TODO + // identify extra cases that we might want to provide match rules for +diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad +index 8fb9a3e34..dc5f1ecf9 100644 +--- a/src/hotspot/cpu/x86/x86.ad ++++ b/src/hotspot/cpu/x86/x86.ad +@@ -1,5 +1,5 @@ + // +-// Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. + // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + // + // This code is free software; you can redistribute it and/or modify it +@@ -1341,7 +1341,7 @@ const bool Matcher::match_rule_supported(int opcode) { + return ret_value; // Per default match rules are supported. + } + +-const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); +@@ -1468,6 +1468,14 @@ const int Matcher::min_vector_size(const BasicType bt) { + return MIN2(size,max_size); + } + ++const bool Matcher::supports_scalable_vector() { ++ return false; ++} ++ ++const int Matcher::scalable_vector_reg_size(const BasicType bt) { ++ return -1; ++} ++ + // Vector ideal reg corresponding to specified size in bytes + const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); +diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad +index c2d1aca0c..0db8e6a14 100644 +--- a/src/hotspot/cpu/x86/x86_64.ad ++++ b/src/hotspot/cpu/x86/x86_64.ad +@@ -2887,7 +2887,7 @@ frame + RAX_H_num // Op_RegL + }; + // Excluded flags and vector registers. +- assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); ++ assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); + return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); + %} + %} +diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp +index ba61aa4c0..9e41b2dc6 100644 +--- a/src/hotspot/share/adlc/archDesc.cpp ++++ b/src/hotspot/share/adlc/archDesc.cpp +@@ -1,5 +1,5 @@ + // +-// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + // + // This code is free software; you can redistribute it and/or modify it +@@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { + // Match Vector types. + if (strncmp(idealOp, "Vec",3)==0) { + switch(last_char) { ++ case 'A': return "TypeVect::VECTA"; + case 'S': return "TypeVect::VECTS"; + case 'D': return "TypeVect::VECTD"; + case 'X': return "TypeVect::VECTX"; +@@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { + } + } + ++ if (strncmp(idealOp, "RegVMask", 8) == 0) { ++ return "Type::BOTTOM"; ++ } ++ + // !!!!! + switch(last_char) { + case 'I': return "TypeInt::INT"; +diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp +index 5ba1fdc57..45826d3b2 100644 +--- a/src/hotspot/share/adlc/formssel.cpp ++++ b/src/hotspot/share/adlc/formssel.cpp +@@ -3946,6 +3946,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { + strcmp(opType,"RegL")==0 || + strcmp(opType,"RegF")==0 || + strcmp(opType,"RegD")==0 || ++ strcmp(opType,"RegVMask")==0 || ++ strcmp(opType,"VecA")==0 || + strcmp(opType,"VecS")==0 || + strcmp(opType,"VecD")==0 || + strcmp(opType,"VecX")==0 || +diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp +index 914dc43f6..710af9de8 100644 +--- a/src/hotspot/share/opto/chaitin.cpp ++++ b/src/hotspot/share/opto/chaitin.cpp +@@ -77,6 +77,7 @@ void LRG::dump() const { + if( _is_oop ) tty->print("Oop "); + if( _is_float ) tty->print("Float "); + if( _is_vector ) tty->print("Vector "); ++ if( _is_scalable ) tty->print("Scalable "); + if( _was_spilled1 ) tty->print("Spilled "); + if( _was_spilled2 ) tty->print("Spilled2 "); + if( _direct_conflict ) tty->print("Direct_conflict "); +@@ -646,7 +647,15 @@ void PhaseChaitin::Register_Allocate() { + // Live ranges record the highest register in their mask. + // We want the low register for the AD file writer's convenience. + OptoReg::Name hi = lrg.reg(); // Get hi register +- OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo ++ int num_regs = lrg.num_regs(); ++ if (lrg.is_scalable() && OptoReg::is_stack(hi)) { ++ // For scalable vector registers, when they are allocated in physical ++ // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable ++ // vector. If they are allocated on stack, we need to get the actual ++ // num_regs, which reflects the physical length of scalable registers. ++ num_regs = lrg.scalable_reg_slots(); ++ } ++ OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo + // We have to use pair [lo,lo+1] even for wide vectors because + // the rest of code generation works only with pairs. It is safe + // since for registers encoding only 'lo' is used. +@@ -801,8 +810,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { + // Check for vector live range (only if vector register is used). + // On SPARC vector uses RegD which could be misaligned so it is not + // processes as vector in RA. +- if (RegMask::is_vector(ireg)) ++ if (RegMask::is_vector(ireg)) { + lrg._is_vector = 1; ++ if (ireg == Op_VecA) { ++ assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); ++ lrg._is_scalable = 1; ++ // For scalable vector, when it is allocated in physical register, ++ // num_regs is RegMask::SlotsPerVecA for reg mask, ++ // which may not be the actual physical register size. ++ // If it is allocated in stack, we need to get the actual ++ // physical length of scalable vector register. ++ lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); ++ } ++ } + assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, + "vector must be in vector registers"); + +@@ -912,6 +932,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { + lrg.set_reg_pressure(1); + #endif + break; ++ case Op_VecA: ++ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); ++ assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); ++ assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); ++ lrg.set_num_regs(RegMask::SlotsPerVecA); ++ lrg.set_reg_pressure(1); ++ break; + case Op_VecS: + assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); + assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); +@@ -1358,6 +1385,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { + return false; + } + ++static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { ++ int num_regs = lrg.num_regs(); ++ OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); ++ ++ if (lrg.is_scalable()) { ++ // a physical register is found ++ if (chunk == 0 && OptoReg::is_reg(assigned)) { ++ return assigned; ++ } ++ ++ // find available stack slots for scalable register ++ if (lrg._is_vector) { ++ num_regs = lrg.scalable_reg_slots(); ++ // if actual scalable vector register is exactly SlotsPerVecA * 32 bits ++ if (num_regs == RegMask::SlotsPerVecA) { ++ return assigned; ++ } ++ ++ // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it ++ // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits ++ // instead of SlotsPerVecA bits. ++ assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg ++ while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { ++ // Verify the found reg has scalable_reg_slots() bits set. ++ if (mask.is_valid_reg(assigned, num_regs)) { ++ return assigned; ++ } else { ++ // Remove more for each iteration ++ mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg ++ mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits ++ assigned = mask.find_first_set(lrg, num_regs); ++ } ++ } ++ return OptoReg::Bad; // will cause chunk change, and retry next chunk ++ } ++ } ++ ++ return assigned; ++} ++ + // Choose a color using the biasing heuristic + OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + +@@ -1391,7 +1458,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + RegMask tempmask = lrg.mask(); + tempmask.AND(lrgs(copy_lrg).mask()); + tempmask.clear_to_sets(lrg.num_regs()); +- OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); ++ OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); + if (OptoReg::is_valid(reg)) + return reg; + } +@@ -1400,7 +1467,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + // If no bias info exists, just go with the register selection ordering + if (lrg._is_vector || lrg.num_regs() == 2) { + // Find an aligned set +- return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); ++ return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); + } + + // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate +@@ -1455,7 +1522,6 @@ uint PhaseChaitin::Select( ) { + LRG *lrg = &lrgs(lidx); + _simplified = lrg->_next; + +- + #ifndef PRODUCT + if (trace_spilling()) { + ttyLocker ttyl; +@@ -1539,7 +1605,6 @@ uint PhaseChaitin::Select( ) { + // Bump register mask up to next stack chunk + chunk += RegMask::CHUNK_SIZE; + lrg->Set_All(); +- + goto retry_next_chunk; + } + +@@ -1564,12 +1629,21 @@ uint PhaseChaitin::Select( ) { + int n_regs = lrg->num_regs(); + assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); + if (n_regs == 1 || !lrg->_fat_proj) { +- assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); ++ if (Matcher::supports_scalable_vector()) { ++ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); ++ } else { ++ assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); ++ } + lrg->Clear(); // Clear the mask + lrg->Insert(reg); // Set regmask to match selected reg + // For vectors and pairs, also insert the low bit of the pair +- for (int i = 1; i < n_regs; i++) ++ // We always choose the high bit, then mask the low bits by register size ++ if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack ++ n_regs = lrg->scalable_reg_slots(); ++ } ++ for (int i = 1; i < n_regs; i++) { + lrg->Insert(OptoReg::add(reg,-i)); ++ } + lrg->set_mask_size(n_regs); + } else { // Else fatproj + // mask must be equal to fatproj bits, by definition +diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp +index 776e3cf63..674791c64 100644 +--- a/src/hotspot/share/opto/chaitin.hpp ++++ b/src/hotspot/share/opto/chaitin.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -115,7 +115,9 @@ public: + _msize_valid=1; + if (_is_vector) { + assert(!_fat_proj, "sanity"); +- _mask.verify_sets(_num_regs); ++ if (!(_is_scalable && OptoReg::is_stack(_reg))) { ++ _mask.verify_sets(_num_regs); ++ } + } else if (_num_regs == 2 && !_fat_proj) { + _mask.verify_pairs(); + } +@@ -139,14 +141,37 @@ public: + void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) } + void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) } + +- // Number of registers this live range uses when it colors + private: ++ // Number of registers this live range uses when it colors + uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else + // except _num_regs is kill count for fat_proj ++ ++ // For scalable register, num_regs may not be the actual physical register size. ++ // We need to get the actual physical length of scalable register when scalable ++ // register is spilled. The size of one slot is 32-bit. ++ uint _scalable_reg_slots; // Actual scalable register length of slots. ++ // Meaningful only when _is_scalable is true. + public: + int num_regs() const { return _num_regs; } + void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } + ++ uint scalable_reg_slots() { return _scalable_reg_slots; } ++ void set_scalable_reg_slots(uint slots) { ++ assert(_is_scalable, "scalable register"); ++ assert(slots > 0, "slots of scalable register is not valid"); ++ _scalable_reg_slots = slots; ++ } ++ ++ bool is_scalable() { ++#ifdef ASSERT ++ if (_is_scalable) { ++ // Should only be a vector for now, but it could also be a RegVMask in future. ++ assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); ++ } ++#endif ++ return _is_scalable; ++ } ++ + private: + // Number of physical registers this live range uses when it colors + // Architecture and register-set dependent +@@ -172,6 +197,8 @@ public: + uint _is_oop:1, // Live-range holds an oop + _is_float:1, // True if in float registers + _is_vector:1, // True if in vector registers ++ _is_scalable:1, // True if register size is scalable ++ // e.g. Arm SVE vector/predicate registers. + _was_spilled1:1, // True if prior spilling on def + _was_spilled2:1, // True if twice prior spilling on def + _is_bound:1, // live range starts life with no +diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp +index 05fdab21e..14e5425b8 100644 +--- a/src/hotspot/share/opto/matcher.cpp ++++ b/src/hotspot/share/opto/matcher.cpp +@@ -84,6 +84,7 @@ Matcher::Matcher() + idealreg2spillmask [Op_RegF] = NULL; + idealreg2spillmask [Op_RegD] = NULL; + idealreg2spillmask [Op_RegP] = NULL; ++ idealreg2spillmask [Op_VecA] = NULL; + idealreg2spillmask [Op_VecS] = NULL; + idealreg2spillmask [Op_VecD] = NULL; + idealreg2spillmask [Op_VecX] = NULL; +@@ -97,6 +98,7 @@ Matcher::Matcher() + idealreg2debugmask [Op_RegF] = NULL; + idealreg2debugmask [Op_RegD] = NULL; + idealreg2debugmask [Op_RegP] = NULL; ++ idealreg2debugmask [Op_VecA] = NULL; + idealreg2debugmask [Op_VecS] = NULL; + idealreg2debugmask [Op_VecD] = NULL; + idealreg2debugmask [Op_VecX] = NULL; +@@ -110,6 +112,7 @@ Matcher::Matcher() + idealreg2mhdebugmask[Op_RegF] = NULL; + idealreg2mhdebugmask[Op_RegD] = NULL; + idealreg2mhdebugmask[Op_RegP] = NULL; ++ idealreg2mhdebugmask[Op_VecA] = NULL; + idealreg2mhdebugmask[Op_VecS] = NULL; + idealreg2mhdebugmask[Op_VecD] = NULL; + idealreg2mhdebugmask[Op_VecX] = NULL; +@@ -417,6 +420,8 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { + return rms; + } + ++#define NOF_STACK_MASKS (3*6+6) ++ + //---------------------------init_first_stack_mask----------------------------- + // Create the initial stack mask used by values spilling to the stack. + // Disallow any debug info in outgoing argument areas by setting the +@@ -424,7 +429,12 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { + void Matcher::init_first_stack_mask() { + + // Allocate storage for spill masks as masks for the appropriate load type. +- RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); ++ RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * NOF_STACK_MASKS); ++ ++ // Initialize empty placeholder masks into the newly allocated arena ++ for (int i = 0; i < NOF_STACK_MASKS; i++) { ++ new (rms + i) RegMask(); ++ } + + idealreg2spillmask [Op_RegN] = &rms[0]; + idealreg2spillmask [Op_RegI] = &rms[1]; +@@ -447,11 +457,12 @@ void Matcher::init_first_stack_mask() { + idealreg2mhdebugmask[Op_RegD] = &rms[16]; + idealreg2mhdebugmask[Op_RegP] = &rms[17]; + +- idealreg2spillmask [Op_VecS] = &rms[18]; +- idealreg2spillmask [Op_VecD] = &rms[19]; +- idealreg2spillmask [Op_VecX] = &rms[20]; +- idealreg2spillmask [Op_VecY] = &rms[21]; +- idealreg2spillmask [Op_VecZ] = &rms[22]; ++ idealreg2spillmask [Op_VecA] = &rms[18]; ++ idealreg2spillmask [Op_VecS] = &rms[19]; ++ idealreg2spillmask [Op_VecD] = &rms[20]; ++ idealreg2spillmask [Op_VecX] = &rms[21]; ++ idealreg2spillmask [Op_VecY] = &rms[22]; ++ idealreg2spillmask [Op_VecZ] = &rms[23]; + + OptoReg::Name i; + +@@ -478,6 +489,7 @@ void Matcher::init_first_stack_mask() { + // Keep spill masks aligned. + aligned_stack_mask.clear_to_pairs(); + assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); ++ RegMask scalable_stack_mask = aligned_stack_mask; + + *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; + #ifdef _LP64 +@@ -548,28 +560,48 @@ void Matcher::init_first_stack_mask() { + *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; + idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); + } +- if (UseFPUForSpilling) { +- // This mask logic assumes that the spill operations are +- // symmetric and that the registers involved are the same size. +- // On sparc for instance we may have to use 64 bit moves will +- // kill 2 registers when used with F0-F31. +- idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); +- idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); ++ ++ if (Matcher::supports_scalable_vector()) { ++ int k = 1; ++ OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); ++ // Exclude last input arg stack slots to avoid spilling vector register there, ++ // otherwise vector spills could stomp over stack slots in caller frame. ++ for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { ++ scalable_stack_mask.Remove(in); ++ in = OptoReg::add(in, -1); ++ } ++ ++ // For VecA ++ scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); ++ assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); ++ *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; ++ idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); ++ } else { ++ *idealreg2spillmask[Op_VecA] = RegMask::Empty; ++ } ++ ++ if (UseFPUForSpilling) { ++ // This mask logic assumes that the spill operations are ++ // symmetric and that the registers involved are the same size. ++ // On sparc for instance we may have to use 64 bit moves will ++ // kill 2 registers when used with F0-F31. ++ idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); ++ idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); + #ifdef _LP64 +- idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); +- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); +- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); +- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); ++ idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); ++ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); ++ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); ++ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); + #else +- idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); ++ idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); + #ifdef ARM +- // ARM has support for moving 64bit values between a pair of +- // integer registers and a double register +- idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); +- idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); ++ // ARM has support for moving 64bit values between a pair of ++ // integer registers and a double register ++ idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); ++ idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + #endif + #endif +- } ++ } + + // Make up debug masks. Any spill slot plus callee-save registers. + // Caller-save registers are assumed to be trashable by the various +@@ -872,6 +904,10 @@ void Matcher::init_spill_mask( Node *ret ) { + idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); + + // Vector regmasks. ++ if (Matcher::supports_scalable_vector()) { ++ MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); ++ idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); ++ } + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE, 4); + MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); +@@ -1573,7 +1609,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem) + } + } + +- + // Call DFA to match this node, and return + svec->DFA( n->Opcode(), n ); + +diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp +index 244e3d1f8..9a8307102 100644 +--- a/src/hotspot/share/opto/matcher.hpp ++++ b/src/hotspot/share/opto/matcher.hpp +@@ -310,7 +310,7 @@ public: + + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen +- static const bool match_rule_supported_vector(int opcode, int vlen); ++ static const bool match_rule_supported_vector(int opcode, int vlen, BasicType bt); + + // Some microarchitectures have mask registers used on vectors + static const bool has_predicated_vectors(void); +@@ -333,6 +333,10 @@ public: + Matcher::min_vector_size(bt) <= size); + } + ++ static const bool supports_scalable_vector(); ++ // Actual max scalable vector register length. ++ static const int scalable_vector_reg_size(const BasicType bt); ++ + // Vector ideal reg + static const uint vector_ideal_reg(int len); + static const uint vector_shift_count_ideal_reg(int len); +diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp +index e31e8d847..1a826d8ba 100644 +--- a/src/hotspot/share/opto/opcodes.cpp ++++ b/src/hotspot/share/opto/opcodes.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -38,12 +38,14 @@ const char *NodeClassNames[] = { + "RegF", + "RegD", + "RegL", +- "RegFlags", ++ "VecA", + "VecS", + "VecD", + "VecX", + "VecY", + "VecZ", ++ "RegVMask", ++ "RegFlags", + "_last_machine_leaf", + #include "classes.hpp" + "_last_class_name", +diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp +index ae3d61ce0..ec96ba055 100644 +--- a/src/hotspot/share/opto/opcodes.hpp ++++ b/src/hotspot/share/opto/opcodes.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -37,11 +37,13 @@ enum Opcodes { + macro(RegF) // Machine float register + macro(RegD) // Machine double register + macro(RegL) // Machine long register ++ macro(VecA) // Machine vectora register + macro(VecS) // Machine vectors register + macro(VecD) // Machine vectord register + macro(VecX) // Machine vectorx register + macro(VecY) // Machine vectory register + macro(VecZ) // Machine vectorz register ++ macro(RegVMask) // Vector mask/predicate register + macro(RegFlags) // Machine flags register + _last_machine_leaf, // Split between regular opcodes and machine + #include "classes.hpp" +diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp +index d572ac9fe..3514b37bc 100644 +--- a/src/hotspot/share/opto/postaloc.cpp ++++ b/src/hotspot/share/opto/postaloc.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v + Node *val = skip_copies(n->in(k)); + if (val == x) return blk_adjust; // No progress? + +- int n_regs = RegMask::num_registers(val->ideal_reg()); + uint val_idx = _lrg_map.live_range_id(val); + OptoReg::Name val_reg = lrgs(val_idx).reg(); ++ int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); + + // See if it happens to already be in the correct register! + // (either Phi's direct register, or the common case of the name +@@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v + } + + Node *vv = value[reg]; ++ // For scalable register, number of registers may be inconsistent between ++ // "val_reg" and "reg". For example, when "val" resides in register ++ // but "reg" is located in stack. ++ if (lrgs(val_idx).is_scalable()) { ++ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); ++ if (OptoReg::is_stack(reg)) { ++ n_regs = lrgs(val_idx).scalable_reg_slots(); ++ } else { ++ n_regs = RegMask::SlotsPerVecA; ++ } ++ } + if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set +- uint last = (n_regs-1); // Looking for the last part of a set ++ uint last; ++ if (lrgs(val_idx).is_scalable()) { ++ assert(val->ideal_reg() == Op_VecA, "scalable vector register"); ++ // For scalable vector register, regmask is always SlotsPerVecA bits aligned ++ last = RegMask::SlotsPerVecA - 1; ++ } else { ++ last = (n_regs-1); // Looking for the last part of a set ++ } + if ((reg&last) != last) continue; // Wrong part of a set + if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value + } +@@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + uint k; + Node *phi = block->get_node(j); + uint pidx = _lrg_map.live_range_id(phi); +- OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); ++ OptoReg::Name preg = lrgs(pidx).reg(); + + // Remove copies remaining on edges. Check for junk phi. + Node *u = NULL; +@@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + if( pidx ) { + value.map(preg,phi); + regnd.map(preg,phi); +- int n_regs = RegMask::num_registers(phi->ideal_reg()); ++ int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name preg_lo = OptoReg::add(preg,-l); + value.map(preg_lo,phi); +@@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + regnd.map(ureg, def); + // Record other half of doubles + uint def_ideal_reg = def->ideal_reg(); +- int n_regs = RegMask::num_registers(def_ideal_reg); ++ int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); + for (int l = 1; l < n_regs; l++) { + OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); + if (!value[ureg_lo] && +@@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() { + } + + uint n_ideal_reg = n->ideal_reg(); +- int n_regs = RegMask::num_registers(n_ideal_reg); ++ int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); + if (n_regs == 1) { + // If Node 'n' does not change the value mapped by the register, + // then 'n' is a useless copy. Do not update the register->node +diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp +index 2e04c42eb..dd9b5476b 100644 +--- a/src/hotspot/share/opto/regmask.cpp ++++ b/src/hotspot/share/opto/regmask.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -24,6 +24,7 @@ + + #include "precompiled.hpp" + #include "opto/ad.hpp" ++#include "opto/chaitin.hpp" + #include "opto/compile.hpp" + #include "opto/matcher.hpp" + #include "opto/node.hpp" +@@ -116,30 +117,47 @@ const RegMask RegMask::Empty( + + //============================================================================= + bool RegMask::is_vector(uint ireg) { +- return (ireg == Op_VecS || ireg == Op_VecD || ++ return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || + ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); + } + + int RegMask::num_registers(uint ireg) { + switch(ireg) { + case Op_VecZ: +- return 16; ++ return SlotsPerVecZ; + case Op_VecY: +- return 8; ++ return SlotsPerVecY; + case Op_VecX: +- return 4; ++ return SlotsPerVecX; + case Op_VecD: ++ return SlotsPerVecD; + case Op_RegD: + case Op_RegL: + #ifdef _LP64 + case Op_RegP: + #endif + return 2; ++ case Op_VecA: ++ assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); ++ return SlotsPerVecA; + } + // Op_VecS and the rest ideal registers. + return 1; + } + ++int RegMask::num_registers(uint ireg, LRG &lrg) { ++ int n_regs = num_registers(ireg); ++ ++ // assigned is OptoReg which is selected by register allocator ++ OptoReg::Name assigned = lrg.reg(); ++ assert(OptoReg::is_valid(assigned), "should be valid opto register"); ++ ++ if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { ++ n_regs = lrg.scalable_reg_slots(); ++ } ++ return n_regs; ++} ++ + //------------------------------find_first_pair-------------------------------- + // Find the lowest-numbered register pair in the mask. Return the + // HIGHEST register number in the pair, or BAD if no pairs. +@@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { + return true; + } + ++// Check that whether given reg number with size is valid ++// for current regmask, where reg is the highest number. ++bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { ++ for (int i = 0; i < size; i++) { ++ if (!Member(reg - i)) { ++ return false; ++ } ++ } ++ return true; ++} ++ + // only indicies of power 2 are accessed, so index 3 is only filled in for storage. + static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; + //------------------------------find_first_set--------------------------------- + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Works also for size 1. +-OptoReg::Name RegMask::find_first_set(const int size) const { +- verify_sets(size); ++OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { ++ if (lrg.is_scalable()) { ++ // For scalable vector register, regmask is SlotsPerVecA bits aligned. ++ assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); ++ } else { ++ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); ++ } + for (int i = 0; i < RM_SIZE; i++) { + if (_A[i]) { // Found some bits + int bit = _A[i] & -_A[i]; // Extract low bit +@@ -325,12 +359,16 @@ bool RegMask::is_aligned_sets(const int size) const { + while (bits) { // Check bits for pairing + int bit = bits & -bits; // Extract low bit + // Low bit is not odd means its mis-aligned. +- if ((bit & low_bits_mask) == 0) return false; ++ if ((bit & low_bits_mask) == 0) { ++ return false; ++ } + // Do extra work since (bit << size) may overflow. + int hi_bit = bit << (size-1); // high bit + int set = hi_bit + ((hi_bit-1) & ~(bit-1)); + // Check for aligned adjacent bits in this set +- if ((bits & set) != set) return false; ++ if ((bits & set) != set) { ++ return false; ++ } + bits -= set; // Remove this set + } + } +diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp +index c64d08795..b733b87ad 100644 +--- a/src/hotspot/share/opto/regmask.hpp ++++ b/src/hotspot/share/opto/regmask.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -28,6 +28,8 @@ + #include "code/vmreg.hpp" + #include "opto/optoreg.hpp" + ++class LRG; ++ + // Some fun naming (textual) substitutions: + // + // RegMask::get_low_elem() ==> RegMask::find_first_elem() +@@ -95,11 +97,13 @@ public: + // requirement is internal to the allocator, and independent of any + // particular platform. + enum { SlotsPerLong = 2, ++ SlotsPerVecA = 8, + SlotsPerVecS = 1, + SlotsPerVecD = 2, + SlotsPerVecX = 4, + SlotsPerVecY = 8, +- SlotsPerVecZ = 16 }; ++ SlotsPerVecZ = 16, ++ }; + + // A constructor only used by the ADLC output. All mask fields are filled + // in directly. Calls to this look something like RM(1,2,3,4); +@@ -204,10 +208,14 @@ public: + return false; + } + ++ // Check that whether given reg number with size is valid ++ // for current regmask, where reg is the highest number. ++ bool is_valid_reg(OptoReg::Name reg, const int size) const; ++ + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Assert that the mask contains only bit sets. +- OptoReg::Name find_first_set(const int size) const; ++ OptoReg::Name find_first_set(LRG &lrg, const int size) const; + + // Clear out partial bits; leave only aligned adjacent bit sets of size. + void clear_to_sets(const int size); +@@ -226,6 +234,7 @@ public: + + static bool is_vector(uint ireg); + static int num_registers(uint ireg); ++ static int num_registers(uint ireg, LRG &lrg); + + // Fast overlap test. Non-zero if any registers in common. + int overlap( const RegMask &rm ) const { +diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp +index e7714ba3e..a6a62ea4a 100644 +--- a/src/hotspot/share/opto/superword.cpp ++++ b/src/hotspot/share/opto/superword.cpp +@@ -93,8 +93,11 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) : + //------------------------------transform_loop--------------------------- + void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) { + assert(UseSuperWord, "should be"); +- // Do vectors exist on this architecture? +- if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return; ++ // SuperWord only works with power of two vector sizes. ++ int vector_width = Matcher::vector_width_in_bytes(T_BYTE); ++ if (vector_width < 2 || !is_power_of_2(vector_width)) { ++ return; ++ } + + assert(lpt->_head->is_CountedLoop(), "must be"); + CountedLoopNode *cl = lpt->_head->as_CountedLoop(); +diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp +index 8898a3f00..37ec81995 100644 +--- a/src/hotspot/share/opto/type.cpp ++++ b/src/hotspot/share/opto/type.cpp +@@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY + { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ + #else // all other ++ { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. + { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX +@@ -649,6 +650,10 @@ void Type::Initialize_shared(Compile* current) { + // get_zero_type() should not happen for T_CONFLICT + _zero_type[T_CONFLICT]= NULL; + ++ if (Matcher::supports_scalable_vector()) { ++ TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); ++ } ++ + // Vector predefined types, it needs initialized _const_basic_type[]. + if (Matcher::vector_size_supported(T_BYTE,4)) { + TypeVect::VECTS = TypeVect::make(T_BYTE,4); +@@ -665,6 +670,8 @@ void Type::Initialize_shared(Compile* current) { + if (Matcher::vector_size_supported(T_FLOAT,16)) { + TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); + } ++ ++ mreg2type[Op_VecA] = TypeVect::VECTA; + mreg2type[Op_VecS] = TypeVect::VECTS; + mreg2type[Op_VecD] = TypeVect::VECTD; + mreg2type[Op_VecX] = TypeVect::VECTX; +@@ -984,6 +991,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { + + Bad, // Tuple - handled in v-call + Bad, // Array - handled in v-call ++ Bad, // VectorA - handled in v-call + Bad, // VectorS - handled in v-call + Bad, // VectorD - handled in v-call + Bad, // VectorX - handled in v-call +@@ -1880,7 +1888,6 @@ const TypeTuple *TypeTuple::LONG_PAIR; + const TypeTuple *TypeTuple::INT_CC_PAIR; + const TypeTuple *TypeTuple::LONG_CC_PAIR; + +- + //------------------------------make------------------------------------------- + // Make a TypeTuple from the range of a method signature + const TypeTuple *TypeTuple::make_range(ciSignature* sig) { +@@ -2252,6 +2259,7 @@ bool TypeAry::ary_must_be_exact() const { + + //==============================TypeVect======================================= + // Convenience common pre-built types. ++const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic + const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors + const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors + const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors +@@ -2262,10 +2270,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors + const TypeVect* TypeVect::make(const Type *elem, uint length) { + BasicType elem_bt = elem->array_element_basic_type(); + assert(is_java_primitive(elem_bt), "only primitive types in vector"); +- assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); + assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); + int size = length * type2aelembytes(elem_bt); + switch (Matcher::vector_ideal_reg(size)) { ++ case Op_VecA: ++ return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); + case Op_VecS: + return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); + case Op_RegL: +@@ -2297,7 +2306,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { + + default: // All else is a mistake + typerr(t); +- ++ case VectorA: + case VectorS: + case VectorD: + case VectorX: +@@ -2352,6 +2361,8 @@ bool TypeVect::empty(void) const { + #ifndef PRODUCT + void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { + switch (base()) { ++ case VectorA: ++ st->print("vectora["); break; + case VectorS: + st->print("vectors["); break; + case VectorD: +diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp +index 6c8194670..ca92fe3ab 100644 +--- a/src/hotspot/share/opto/type.hpp ++++ b/src/hotspot/share/opto/type.hpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -53,6 +53,7 @@ class TypeNarrowKlass; + class TypeAry; + class TypeTuple; + class TypeVect; ++class TypeVectA; + class TypeVectS; + class TypeVectD; + class TypeVectX; +@@ -87,6 +88,7 @@ public: + + Tuple, // Method signature or object layout + Array, // Array types ++ VectorA, // (Scalable) Vector types for vector length agnostic + VectorS, // 32bit Vector types + VectorD, // 64bit Vector types + VectorX, // 128bit Vector types +@@ -754,6 +756,7 @@ public: + virtual const Type *xmeet( const Type *t) const; + virtual const Type *xdual() const; // Compute dual right now. + ++ static const TypeVect *VECTA; + static const TypeVect *VECTS; + static const TypeVect *VECTD; + static const TypeVect *VECTX; +@@ -765,6 +768,11 @@ public: + #endif + }; + ++class TypeVectA : public TypeVect { ++ friend class TypeVect; ++ TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} ++}; ++ + class TypeVectS : public TypeVect { + friend class TypeVect; + TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} +@@ -1611,12 +1619,12 @@ inline const TypeAry *Type::is_ary() const { + } + + inline const TypeVect *Type::is_vect() const { +- assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); ++ assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); + return (TypeVect*)this; + } + + inline const TypeVect *Type::isa_vect() const { +- return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; ++ return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; + } + + inline const TypePtr *Type::is_ptr() const { +diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp +index fae147fa8..3a0a42513 100644 +--- a/src/hotspot/share/opto/vectornode.cpp ++++ b/src/hotspot/share/opto/vectornode.cpp +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -221,7 +221,7 @@ bool VectorNode::implemented(int opc, uint vlen, BasicType bt) { + (vlen > 1) && is_power_of_2(vlen) && + Matcher::vector_size_supported(bt, vlen)) { + int vopc = VectorNode::opcode(opc, bt); +- return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen); ++ return vopc > 0 && Matcher::match_rule_supported_vector(vopc, vlen, bt); + } + return false; + } +@@ -608,7 +608,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) { + (vlen > 1) && is_power_of_2(vlen) && + Matcher::vector_size_supported(bt, vlen)) { + int vopc = ReductionNode::opcode(opc, bt); +- return vopc != opc && Matcher::match_rule_supported(vopc); ++ return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt); + } + return false; + } +diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java +new file mode 100644 +index 000000000..dc15ca800 +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java +@@ -0,0 +1,128 @@ ++/* ++* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++* Copyright (c) 2020, Arm Limited. All rights reserved. ++* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++* ++* This code is free software; you can redistribute it and/or modify it ++* under the terms of the GNU General Public License version 2 only, as ++* published by the Free Software Foundation. ++* ++* This code is distributed in the hope that it will be useful, but WITHOUT ++* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++* version 2 for more details (a copy is included in the LICENSE file that ++* accompanied this code). ++* ++* You should have received a copy of the GNU General Public License version ++* 2 along with this work; if not, write to the Free Software Foundation, ++* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++* ++* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++* or visit www.oracle.com if you need additional information or have any ++* questions. ++* ++*/ ++ ++/** ++ * @test ++ * ++ * @requires os.arch == "aarch64" & vm.compiler2.enabled ++ * @summary Verify VM SVE checking behavior ++ * @library /test/lib ++ * @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI ++ * ++ */ ++ ++package compiler.c2.aarch64; ++ ++import java.util.ArrayList; ++import java.util.Collections; ++import java.util.List; ++import jdk.test.lib.process.ProcessTools; ++import jdk.test.lib.process.OutputAnalyzer; ++ ++public class TestSVEWithJNI { ++ static { ++ System.loadLibrary("TestSVEWithJNI"); ++ } ++ ++ static final int EXIT_CODE = 99; ++ // Returns a nonnegative on success, or a negative value on error. ++ public static native int setVectorLength(int arg); ++ // Returns a nonnegative value on success, or a negative value on error. ++ public static native int getVectorLength(); ++ ++ public static final String MSG = "Current Vector Size: "; ++ public static void testNormal() { ++ int vlen = getVectorLength(); ++ System.out.println(MSG + vlen); ++ // Should be fine if no vector length changed. ++ if (setVectorLength(vlen) < 0) { ++ throw new Error("Error in setting vector length."); ++ } ++ } ++ ++ public static void testAbort() { ++ int vlen = getVectorLength(); ++ if (vlen <= 16) { ++ throw new Error("Error: unsupported vector length."); ++ } ++ if (setVectorLength(16) < 0) { ++ throw new Error("Error: setting vector length failed."); ++ } ++ } ++ ++ public static ProcessBuilder createProcessBuilder(String [] args, String mode) { ++ List vmopts = new ArrayList<>(); ++ String testjdkPath = System.getProperty("test.jdk"); ++ Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath); ++ Collections.addAll(vmopts, args); ++ Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode); ++ return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()])); ++ } ++ ++ public static void main(String [] args) throws Exception { ++ if (args.length == 0) { ++ int vlen = getVectorLength(); ++ if (vlen < 0) { ++ return; ++ } ++ String [][] testOpts = { ++ {"-Xint", "-XX:UseSVE=1"}, ++ {"-Xcomp", "-XX:UseSVE=1"}, ++ }; ++ ProcessBuilder pb; ++ OutputAnalyzer output; ++ for (String [] opts : testOpts) { ++ pb = createProcessBuilder(opts, "normal"); ++ output = new OutputAnalyzer(pb.start()); ++ output.shouldHaveExitValue(EXIT_CODE); ++ ++ pb = createProcessBuilder(opts, "abort"); ++ output = new OutputAnalyzer(pb.start()); ++ output.shouldNotHaveExitValue(EXIT_CODE); ++ output.shouldMatch("(error|Error|ERROR)"); ++ } ++ ++ // Verify MaxVectorSize ++ ++ // Any SVE architecture should support 128-bit vector size. ++ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal"); ++ output = new OutputAnalyzer(pb.start()); ++ output.shouldHaveExitValue(EXIT_CODE); ++ output.shouldContain(MSG + 16); ++ ++ // An unsupported large vector size value. ++ pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal"); ++ output = new OutputAnalyzer(pb.start()); ++ output.shouldHaveExitValue(EXIT_CODE); ++ output.shouldContain("warning"); ++ } else if (args[0].equals("normal")) { ++ testNormal(); ++ System.exit(EXIT_CODE); ++ } else if (args[0].equals("abort")) { ++ testAbort(); ++ System.exit(EXIT_CODE); ++ } ++ } ++} +diff --git a/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c +new file mode 100644 +index 000000000..0cb3ab0b5 +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c +@@ -0,0 +1,68 @@ ++/* ++* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. ++* Copyright (c) 2020, Arm Limited. All rights reserved. ++* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++* ++* This code is free software; you can redistribute it and/or modify it ++* under the terms of the GNU General Public License version 2 only, as ++* published by the Free Software Foundation. ++* ++* This code is distributed in the hope that it will be useful, but WITHOUT ++* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++* version 2 for more details (a copy is included in the LICENSE file that ++* accompanied this code). ++* ++* You should have received a copy of the GNU General Public License version ++* 2 along with this work; if not, write to the Free Software Foundation, ++* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++* ++* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++* or visit www.oracle.com if you need additional information or have any ++* questions. ++* ++*/ ++ ++#ifdef __aarch64__ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef PR_SVE_GET_VL ++// For old toolchains which do not have SVE related macros defined. ++#define PR_SVE_SET_VL 50 ++#define PR_SVE_GET_VL 51 ++#endif ++ ++int get_current_thread_vl() { ++ return prctl(PR_SVE_GET_VL); ++} ++ ++int set_current_thread_vl(unsigned long arg) { ++ return prctl(PR_SVE_SET_VL, arg); ++} ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength ++(JNIEnv * env, jclass clz, jint length) { ++ return set_current_thread_vl(length); ++} ++ ++JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength ++(JNIEnv *env, jclass clz) { ++ return get_current_thread_vl(); ++} ++ ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +-- +2.19.1 + diff --git a/add-integerCache-feature.patch b/add-integerCache-feature.patch new file mode 100755 index 0000000000000000000000000000000000000000..0665764676cbfd8605656c3966fbb6c4f4985989 --- /dev/null +++ b/add-integerCache-feature.patch @@ -0,0 +1,413 @@ +diff --git a/src/hotspot/share/prims/unsafe.cpp b/src/hotspot/share/prims/unsafe.cpp +index d8f1679b4..18ea89b85 100644 +--- a/src/hotspot/share/prims/unsafe.cpp ++++ b/src/hotspot/share/prims/unsafe.cpp +@@ -1018,7 +1018,11 @@ UNSAFE_ENTRY(jint, Unsafe_GetLoadAverage0(JNIEnv *env, jobject unsafe, jdoubleAr + return ret; + } UNSAFE_END + ++UNSAFE_ENTRY(jboolean, Unsafe_GetUseHashMapIntegerCache(JNIEnv *env, jobject unsafe)) { ++ return UseHashMapIntegerCache; ++} ++UNSAFE_END + + UNSAFE_ENTRY(jboolean, Unsafe_GetUseFastSerializer(JNIEnv *env, jobject unsafe)) { + return UseFastSerializer; + } +@@ -1108,6 +1113,7 @@ static JNINativeMethod jdk_internal_misc_Unsafe_methods[] = { + {CC "fullFence", CC "()V", FN_PTR(Unsafe_FullFence)}, + + {CC "isBigEndian0", CC "()Z", FN_PTR(Unsafe_isBigEndian0)}, ++ {CC "getUseHashMapIntegerCache", CC "()Z", FN_PTR(Unsafe_GetUseHashMapIntegerCache)}, + {CC "getUseFastSerializer", CC "()Z", FN_PTR(Unsafe_GetUseFastSerializer)}, + {CC "unalignedAccess0", CC "()Z", FN_PTR(Unsafe_unalignedAccess0)} + }; +diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp +index 47458b6c1..b8c0bec40 100644 +--- a/src/hotspot/share/runtime/globals.hpp ++++ b/src/hotspot/share/runtime/globals.hpp +@@ -2677,6 +2677,11 @@ define_pd_global(uint64_t,MaxRAM, 1ULL*G); + JFR_ONLY(product(ccstr, StartFlightRecording, NULL, \ + "Start flight recording with options")) \ + \ ++ experimental(bool, UseHashMapIntegerCache, false, \ ++ "The integer cache is an array of references to objects of" \ ++ "the HashMap Value type, indexed by the unboxed int key value." \ ++ "faster in execution, higher in memory consumption.") \ ++ \ + experimental(bool, UseFastSerializer, false, \ + "Cache-based serialization.It is extremely fast, but it can only" \ + "be effective in certain scenarios.") \ +diff --git a/src/java.base/share/classes/java/util/HashMap.java b/src/java.base/share/classes/java/util/HashMap.java +index df303031a..c260b61fd 100644 +--- a/src/java.base/share/classes/java/util/HashMap.java ++++ b/src/java.base/share/classes/java/util/HashMap.java +@@ -35,6 +35,7 @@ import java.util.function.BiFunction; + import java.util.function.Consumer; + import java.util.function.Function; + import jdk.internal.misc.SharedSecrets; ++import jdk.internal.misc.Unsafe; + + /** + * Hash table based implementation of the {@code Map} interface. This +@@ -272,6 +273,28 @@ public class HashMap extends AbstractMap + */ + static final int MIN_TREEIFY_CAPACITY = 64; + ++ /** ++ * Used to get the commandline option: UseHashMapIntegerCache. ++ */ ++ private static final Unsafe UNSAFE = Unsafe.getUnsafe(); ++ ++ /** ++ * Indicate integerCache can be performed. disable if HashMap.Node.setValue ++ * is directly used to update Node value. ++ */ ++ private static boolean enableIntegerCache = UNSAFE.getUseHashMapIntegerCache(); ++ ++ /** ++ * The smallest table size for create integer cache. ++ */ ++ private static final int MIN_INTEGER_CACHE = 2048; ++ ++ /** ++ * The factor used in create integer cache to guarantee most Key are ++ * Integer and in range. ++ */ ++ private static final float INTEGER_CACHE_FACTOR = 0.95f; ++ + /** + * Basic hash bin node, used for most entries. (See below for + * TreeNode subclass, and in LinkedHashMap for its Entry subclass.) +@@ -298,6 +321,10 @@ public class HashMap extends AbstractMap + } + + public final V setValue(V newValue) { ++ // Disable integerCache in all HashMap instance. ++ if (key != null && key instanceof Integer) { ++ enableIntegerCache = false; ++ } + V oldValue = value; + value = newValue; + return oldValue; +@@ -390,6 +417,12 @@ public class HashMap extends AbstractMap + */ + transient Node[] table; + ++ /** ++ * Cache Value> Map ++ * integerCache[key->intValue] = V ++ */ ++ transient Object[] integerCache; ++ + /** + * Holds cached entrySet(). Note that AbstractMap fields are used + * for keySet() and values(). +@@ -547,7 +580,20 @@ public class HashMap extends AbstractMap + * + * @see #put(Object, Object) + */ ++ @SuppressWarnings("unchecked") + public V get(Object key) { ++ if (integerCache != null) { ++ if (enableIntegerCache == false) { ++ integerCache = null; ++ } ++ else if (key != null && key instanceof Integer) { ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < integerCache.length) { ++ return (V)integerCache[val]; ++ } ++ } ++ } ++ + Node e; + return (e = getNode(hash(key), key)) == null ? null : e.value; + } +@@ -588,6 +634,17 @@ public class HashMap extends AbstractMap + * key. + */ + public boolean containsKey(Object key) { ++ if (integerCache != null) { ++ if (enableIntegerCache == false) { ++ integerCache = null; ++ } ++ else if (key != null && key instanceof Integer) { ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < integerCache.length && integerCache[val] != null) { ++ return true; ++ } ++ } ++ } + return getNode(hash(key), key) != null; + } + +@@ -620,6 +677,11 @@ public class HashMap extends AbstractMap + final V putVal(int hash, K key, V value, boolean onlyIfAbsent, + boolean evict) { + Node[] tab; Node p; int n, i; ++ ++ if (integerCache != null) { ++ updateIntegerCache(key, value, onlyIfAbsent); ++ } ++ + if ((tab = table) == null || (n = tab.length) == 0) + n = (tab = resize()).length; + if ((p = tab[i = (n - 1) & hash]) == null) +@@ -740,6 +802,8 @@ public class HashMap extends AbstractMap + } + } + } ++ ++ createIntegerCache(); + return newTab; + } + +@@ -839,6 +903,10 @@ public class HashMap extends AbstractMap + p.next = node.next; + ++modCount; + --size; ++ ++ if (integerCache != null) { ++ updateIntegerCache(node.key, null, false); ++ } + afterNodeRemoval(node); + return node; + } +@@ -858,6 +926,7 @@ public class HashMap extends AbstractMap + for (int i = 0; i < tab.length; ++i) + tab[i] = null; + } ++ integerCache = null; + } + + /** +@@ -882,6 +951,82 @@ public class HashMap extends AbstractMap + return false; + } + ++ /** ++ * 1. iterator all Keys and statistic ++ * Integer Key count, total count is size ++ * Integer Key count in range [0, table.length], get Max value. ++ * ++ * 2. Create integer cache ++ */ ++ @SuppressWarnings({"unchecked"}) ++ private final void createIntegerCache() { ++ int n = table.length; ++ int intKeyCount = 0; ++ int intKeyCountInrange = 0; ++ int maxIntKey = 0; ++ if (n < MIN_INTEGER_CACHE || (enableIntegerCache == false)) { ++ integerCache = null; ++ return; ++ } ++ Iterator it = this.keySet().iterator(); ++ while (it.hasNext()) { ++ K key = it.next(); ++ if (key != null && key instanceof Integer) { ++ intKeyCount++; ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < n) { ++ intKeyCountInrange++; ++ if (val > maxIntKey) ++ maxIntKey = val; ++ } ++ } ++ } ++ float keyIntRation = ((float)intKeyCount) / size; ++ float keyIntInRangeRation = ((float)intKeyCountInrange) / size; ++ if (keyIntRation >= INTEGER_CACHE_FACTOR && ++ keyIntInRangeRation >= INTEGER_CACHE_FACTOR) { ++ // compute integerCache size ++ int cacheMapSize = n < (2 * maxIntKey) ? n : (2 * maxIntKey); ++ integerCache = new Object[cacheMapSize]; ++ Iterator> entries = this.entrySet().iterator(); ++ while (entries.hasNext()) { ++ Map.Entry thisEntry = entries.next(); ++ K key = thisEntry.getKey(); ++ V value = thisEntry.getValue(); ++ if (key != null && key instanceof Integer) { ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < integerCache.length) { ++ integerCache[val] = value; ++ } ++ } ++ } ++ } else { ++ integerCache = null; ++ } ++ } ++ ++ /** ++ * put if integerCache null check outside of this call ++ * JIT will not inline this method (not hot) when HashMap is not Integer Key intensive. ++ * Otherwise it will always inline updateIntegerCache method. ++ * ++ */ ++ private final void updateIntegerCache(K key, V value, boolean onlyIfAbsent) { ++ if (enableIntegerCache == false) { ++ integerCache = null; ++ return; ++ } ++ if (key != null && key instanceof Integer) { ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < integerCache.length) { ++ if (onlyIfAbsent && integerCache[val] != null) { ++ return; ++ } ++ integerCache[val] = value; ++ } ++ } ++ } ++ + /** + * Returns a {@link Set} view of the keys contained in this map. + * The set is backed by the map, so changes to the map are +@@ -1047,7 +1192,19 @@ public class HashMap extends AbstractMap + // Overrides of JDK8 Map extension methods + + @Override ++ @SuppressWarnings("unchecked") + public V getOrDefault(Object key, V defaultValue) { ++ if (integerCache != null) { ++ if (enableIntegerCache == false) { ++ integerCache = null; ++ } else if (key != null && key instanceof Integer) { ++ V value; ++ int val = ((Integer)key).intValue(); ++ if (val >= 0 && val < integerCache.length && (value = (V)integerCache[val]) != null) { ++ return value; ++ } ++ } ++ } + Node e; + return (e = getNode(hash(key), key)) == null ? defaultValue : e.value; + } +@@ -1068,6 +1225,9 @@ public class HashMap extends AbstractMap + if ((e = getNode(hash(key), key)) != null && + ((v = e.value) == oldValue || (v != null && v.equals(oldValue)))) { + e.value = newValue; ++ if (integerCache != null) { ++ updateIntegerCache(key, newValue, false); ++ } + afterNodeAccess(e); + return true; + } +@@ -1080,6 +1240,9 @@ public class HashMap extends AbstractMap + if ((e = getNode(hash(key), key)) != null) { + V oldValue = e.value; + e.value = value; ++ if (integerCache != null) { ++ updateIntegerCache(key, value, false); ++ } + afterNodeAccess(e); + return oldValue; + } +@@ -1136,6 +1299,9 @@ public class HashMap extends AbstractMap + return null; + } else if (old != null) { + old.value = v; ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } + afterNodeAccess(old); + return v; + } +@@ -1146,6 +1312,11 @@ public class HashMap extends AbstractMap + if (binCount >= TREEIFY_THRESHOLD - 1) + treeifyBin(tab, hash); + } ++ ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } ++ + modCount = mc + 1; + ++size; + afterNodeInsertion(true); +@@ -1176,6 +1347,9 @@ public class HashMap extends AbstractMap + if (mc != modCount) { throw new ConcurrentModificationException(); } + if (v != null) { + e.value = v; ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } + afterNodeAccess(e); + return v; + } +@@ -1230,6 +1404,9 @@ public class HashMap extends AbstractMap + if (old != null) { + if (v != null) { + old.value = v; ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } + afterNodeAccess(old); + } + else +@@ -1243,6 +1420,9 @@ public class HashMap extends AbstractMap + if (binCount >= TREEIFY_THRESHOLD - 1) + treeifyBin(tab, hash); + } ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } + modCount = mc + 1; + ++size; + afterNodeInsertion(true); +@@ -1303,6 +1483,9 @@ public class HashMap extends AbstractMap + } + if (v != null) { + old.value = v; ++ if (integerCache != null) { ++ updateIntegerCache(key, v, false); ++ } + afterNodeAccess(old); + } + else +@@ -1317,6 +1500,9 @@ public class HashMap extends AbstractMap + if (binCount >= TREEIFY_THRESHOLD - 1) + treeifyBin(tab, hash); + } ++ if (integerCache != null) { ++ updateIntegerCache(key, value, false); ++ } + ++modCount; + ++size; + afterNodeInsertion(true); +@@ -1350,6 +1536,9 @@ public class HashMap extends AbstractMap + for (Node e : tab) { + for (; e != null; e = e.next) { + e.value = function.apply(e.key, e.value); ++ if (integerCache != null) { ++ updateIntegerCache(e.key, e.value, false); ++ } + } + } + if (modCount != mc) +@@ -1823,6 +2012,7 @@ public class HashMap extends AbstractMap + modCount = 0; + threshold = 0; + size = 0; ++ integerCache = null; + } + + // Callbacks to allow LinkedHashMap post-actions +diff --git a/src/java.base/share/classes/jdk/internal/misc/Unsafe.java b/src/java.base/share/classes/jdk/internal/misc/Unsafe.java +index d78caabdc..4d71e671e 100644 +--- a/src/java.base/share/classes/jdk/internal/misc/Unsafe.java ++++ b/src/java.base/share/classes/jdk/internal/misc/Unsafe.java +@@ -3702,7 +3702,7 @@ public final class Unsafe { + private static int convEndian(boolean big, int n) { return big == BE ? n : Integer.reverseBytes(n) ; } + private static long convEndian(boolean big, long n) { return big == BE ? n : Long.reverseBytes(n) ; } + +- ++ public native boolean getUseHashMapIntegerCache(); + public native boolean getUseFastSerializer(); + private native long allocateMemory0(long bytes); + private native long reallocateMemory0(long address, long bytes); +-- +2.19.1 + diff --git a/add-zgc-parameter-adaptation-feature.patch b/add-zgc-parameter-adaptation-feature.patch new file mode 100755 index 0000000000000000000000000000000000000000..a9e8a087ef0ae1521f4cec235665fc8a1b9cb202 --- /dev/null +++ b/add-zgc-parameter-adaptation-feature.patch @@ -0,0 +1,328 @@ +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index fa2b2ddea..b357a36cd 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -5273,7 +5273,6 @@ void os::Linux::numa_init() { + ls.print(" %d", node); + } + } +- log_info(gc, heap)("UseNUMA enabled for G1"); + } + } + +diff --git a/src/hotspot/share/gc/z/zDriver.cpp b/src/hotspot/share/gc/z/zDriver.cpp +index b3056bfcc..a9b905341 100644 +--- a/src/hotspot/share/gc/z/zDriver.cpp ++++ b/src/hotspot/share/gc/z/zDriver.cpp +@@ -324,6 +324,17 @@ public: + } + }; + ++class ZDriverRelocationScopeHelper : public StackObj { ++public: ++ ZDriverRelocationScopeHelper() { ++ ZStatRelocationRate::at_start(); ++ } ++ ++ ~ZDriverRelocationScopeHelper() { ++ ZStatRelocationRate::at_end(); ++ } ++}; ++ + void ZDriver::run_gc_cycle(GCCause::Cause cause) { + ZDriverCycleScope scope(cause); + +@@ -381,12 +392,12 @@ void ZDriver::run_gc_cycle(GCCause::Cause cause) { + + // Phase 9: Pause Relocate Start + { ++ ZDriverRelocationScopeHelper scope_helper(); + ZRelocateStartClosure cl; + vm_operation(&cl); +- } + +- // Phase 10: Concurrent Relocate +- { ++ // Phase 10: Concurrent Relocate ++ + ZStatTimer timer(ZPhaseConcurrentRelocated); + ZHeap::heap()->relocate(); + } +diff --git a/src/hotspot/share/gc/z/zRelocationSetSelector.cpp b/src/hotspot/share/gc/z/zRelocationSetSelector.cpp +index 222529f14..780415498 100644 +--- a/src/hotspot/share/gc/z/zRelocationSetSelector.cpp ++++ b/src/hotspot/share/gc/z/zRelocationSetSelector.cpp +@@ -26,27 +26,52 @@ + #include "gc/z/zPage.inline.hpp" + #include "gc/z/zRelocationSet.hpp" + #include "gc/z/zRelocationSetSelector.hpp" ++#include "gc/z/zStat.hpp" + #include "logging/log.hpp" + #include "runtime/globals.hpp" + #include "utilities/debug.hpp" + ++// means the probability is 1 in 1000 that a sample is outside of the confidence interval. ++const double ZRelocationSetSelectorGroup::one_in_1000 = 3.290527; ++ + ZRelocationSetSelectorGroup::ZRelocationSetSelectorGroup(const char* name, + size_t page_size, + size_t object_size_limit) : + _name(name), + _page_size(page_size), + _object_size_limit(object_size_limit), +- _fragmentation_limit(page_size * (ZFragmentationLimit / 100)), ++ _fragmentation_limit(ZAdatpivePageRelcaim ? (page_size * (ZPageMinWastePercent / 100)) : ++ (page_size * (ZFragmentationLimit / 100))), ++ _predication_relocation_size(0), + _registered_pages(), + _sorted_pages(NULL), + _nselected(0), + _relocating(0), +- _fragmentation(0) {} ++ _fragmentation(0) { ++ if (is_fragment_limit_adatpive() && page_size == ZPageSizeSmall) { ++ // Predicate the relocation rate ++ double max_relocation_rate = 0.0; ++ max_relocation_rate = (ZStatRelocationRate::small_avg() * ZAllocationSpikeTolerance) + ++ (ZStatRelocationRate::small_avg_sd() * one_in_1000); ++ ++ // The decay average time ++ const AbsSeq& duration_of_relocation = ZStatRelocationRate::duration(); ++ const double max_duration_of_relocation = ++ duration_of_relocation.davg() + (duration_of_relocation.dsd() * one_in_1000); ++ ++ _predication_relocation_size = (size_t)max_relocation_rate * max_duration_of_relocation; ++ log_info(gc, reloc)("Predication Relocation size: " SIZE_FORMAT, _predication_relocation_size); ++ } ++} + + ZRelocationSetSelectorGroup::~ZRelocationSetSelectorGroup() { + FREE_C_HEAP_ARRAY(const ZPage*, _sorted_pages); + } + ++bool ZRelocationSetSelectorGroup::is_fragment_limit_adatpive() { ++ return ZAdatpivePageRelcaim && ZStatCycle::ncycles() >= 3; // warm up needs 2 cycles ++} ++ + void ZRelocationSetSelectorGroup::register_live_page(const ZPage* page, size_t garbage) { + if (garbage > _fragmentation_limit) { + _registered_pages.add(page); +@@ -104,12 +129,25 @@ void ZRelocationSetSelectorGroup::select() { + size_t selected_from = 0; + size_t selected_to = 0; + size_t from_size = 0; ++ bool is_abortable_selection = false; ++ size_t cur_page_live_bytes = 0; ++ double page_min_live_percent = 100 - ZPageMaxWastePercent; + + semi_sort(); + + for (size_t from = 1; from <= npages; from++) { + // Add page to the candidate relocation set +- from_size += _sorted_pages[from - 1]->live_bytes(); ++ cur_page_live_bytes = _sorted_pages[from - 1]->live_bytes(); ++ from_size += cur_page_live_bytes; ++ // Abortable selection for relocation ++ if (is_fragment_limit_adatpive() && _page_size == ZPageSizeSmall && ++ from_size >= _predication_relocation_size && ++ percent_of(cur_page_live_bytes, ZPageSizeSmall) > page_min_live_percent) { ++ // Get really relocation bytes ++ from_size -= cur_page_live_bytes; ++ is_abortable_selection = true; ++ break; ++ } + + // Calculate the maximum number of pages needed by the candidate relocation set. + // By subtracting the object size limit from the pages size we get the maximum +@@ -130,8 +168,13 @@ void ZRelocationSetSelectorGroup::select() { + } + + log_trace(gc, reloc)("Candidate Relocation Set (%s Pages): " +- SIZE_FORMAT "->" SIZE_FORMAT ", %.1f%% relative defragmentation, %s", +- _name, from, to, diff_reclaimable, (selected_from == from) ? "Selected" : "Rejected"); ++ SIZE_FORMAT "->" SIZE_FORMAT ", %.1f%% relative defragmentation, %s", ++ _name, from, to, diff_reclaimable, (selected_from == from) ? "Selected" : "Rejected"); ++ } ++ ++ if (is_abortable_selection) { ++ log_info(gc, reloc)("Abortable selection for Small Page really relocation byte is: " SIZE_FORMAT ++ ", predication relocation byte is: " SIZE_FORMAT, from_size, _predication_relocation_size); + } + + // Finalize selection +@@ -146,6 +189,22 @@ void ZRelocationSetSelectorGroup::select() { + + log_debug(gc, reloc)("Relocation Set (%s Pages): " SIZE_FORMAT "->" SIZE_FORMAT ", " SIZE_FORMAT " skipped", + _name, selected_from, selected_to, npages - _nselected); ++ ++ calculate_live_bytes(); ++} ++ ++void ZRelocationSetSelectorGroup::calculate_live_bytes() { ++ if (_page_size != ZPageSizeSmall) { ++ return; ++ } ++ ++ if ((!ZAdatpivePageRelcaim) && (ZStatCycle::ncycles() >= 3)) { ++ return; ++ } ++ ++ for (size_t from = 0; from < _nselected; from++) { ++ ZStatRelocation::_small_page_live_bytes += _sorted_pages[from]->live_bytes(); ++ } + } + + const ZPage* const* ZRelocationSetSelectorGroup::selected() const { +diff --git a/src/hotspot/share/gc/z/zRelocationSetSelector.hpp b/src/hotspot/share/gc/z/zRelocationSetSelector.hpp +index 1aa1142d5..92d2a6f39 100644 +--- a/src/hotspot/share/gc/z/zRelocationSetSelector.hpp ++++ b/src/hotspot/share/gc/z/zRelocationSetSelector.hpp +@@ -32,10 +32,13 @@ class ZRelocationSet; + + class ZRelocationSetSelectorGroup { + private: ++ static const double one_in_1000; ++ + const char* const _name; + const size_t _page_size; + const size_t _object_size_limit; + const size_t _fragmentation_limit; ++ size_t _predication_relocation_size; + + ZArray _registered_pages; + const ZPage** _sorted_pages; +@@ -43,6 +46,7 @@ private: + size_t _relocating; + size_t _fragmentation; + ++ bool is_fragment_limit_adatpive(); + void semi_sort(); + + public: +@@ -53,6 +57,7 @@ public: + + void register_live_page(const ZPage* page, size_t garbage); + void select(); ++ void calculate_live_bytes(); + + const ZPage* const* selected() const; + size_t nselected() const; +diff --git a/src/hotspot/share/gc/z/zStat.cpp b/src/hotspot/share/gc/z/zStat.cpp +index 3b55bdadd..a586e2897 100644 +--- a/src/hotspot/share/gc/z/zStat.cpp ++++ b/src/hotspot/share/gc/z/zStat.cpp +@@ -1101,6 +1101,7 @@ void ZStatMark::print() { + // Stat relocation + // + size_t ZStatRelocation::_relocating; ++size_t ZStatRelocation::_small_page_live_bytes; + bool ZStatRelocation::_success; + + void ZStatRelocation::set_at_select_relocation_set(size_t relocating) { +@@ -1119,6 +1120,42 @@ void ZStatRelocation::print() { + } + } + ++// ++// Stat relcoation rate ++// ++Ticks ZStatRelocationRate::_start_of_last; ++Ticks ZStatRelocationRate::_end_of_last; ++NumberSeq ZStatRelocationRate::_duration(0.3 /* alpha */); ++TruncatedSeq ZStatRelocationRate::_small_rate; ++TruncatedSeq ZStatRelocationRate::_small_rate_avg; ++ ++void ZStatRelocationRate::at_start() { ++ _start_of_last = Ticks::now(); ++} ++ ++void ZStatRelocationRate::at_end() { ++ _end_of_last = Ticks::now(); ++ ++ const double duration = (_end_of_last - _start_of_last).seconds() + 1.0; // Add 1.0 to avoid the duration close zero ++ _duration.add(duration); ++ ++ const double small_bytes_per_second = ZStatRelocation::_small_page_live_bytes / duration; ++ _small_rate.add(small_bytes_per_second); ++ _small_rate_avg.add(_small_rate.avg()); ++} ++ ++const AbsSeq& ZStatRelocationRate::duration() { ++ return _duration; ++} ++ ++double ZStatRelocationRate::small_avg() { ++ return _small_rate.avg(); ++} ++ ++double ZStatRelocationRate::small_avg_sd() { ++ return _small_rate_avg.sd(); ++} ++ + // + // Stat nmethods + // +diff --git a/src/hotspot/share/gc/z/zStat.hpp b/src/hotspot/share/gc/z/zStat.hpp +index 3390466e6..f826378f3 100644 +--- a/src/hotspot/share/gc/z/zStat.hpp ++++ b/src/hotspot/share/gc/z/zStat.hpp +@@ -392,12 +392,33 @@ private: + static bool _success; + + public: ++ static size_t _small_page_live_bytes; + static void set_at_select_relocation_set(size_t relocating); + static void set_at_relocate_end(bool success); +- + static void print(); + }; + ++// ++// Stat relocation rate ++// ++class ZStatRelocationRate : public AllStatic { ++private: ++ static Ticks _start_of_last; ++ static Ticks _end_of_last; ++ static NumberSeq _duration; ++ static TruncatedSeq _small_rate; // B/s ++ static TruncatedSeq _small_rate_avg; // B/s ++ ++public: ++ static void at_start(); ++ static void at_end(); ++ ++ static const AbsSeq& duration(); ++ ++ static double small_avg(); ++ static double small_avg_sd(); ++}; ++ + // + // Stat nmethods + // +diff --git a/src/hotspot/share/gc/z/z_globals.hpp b/src/hotspot/share/gc/z/z_globals.hpp +index 8cee59be7..326e8ec40 100644 +--- a/src/hotspot/share/gc/z/z_globals.hpp ++++ b/src/hotspot/share/gc/z/z_globals.hpp +@@ -49,6 +49,17 @@ + product(double, ZFragmentationLimit, 25.0, \ + "Maximum allowed heap fragmentation") \ + \ ++ experimental(bool, ZAdatpivePageRelcaim, false, \ ++ "Adapptive page relcaim at relcoation phase") \ ++ \ ++ develop(double, ZPageMinWastePercent, 5.0, \ ++ "Amount of space, expressed as a percentage of the page size, " \ ++ "that ZGC is willing not to collect to avoid expensive GCs.") \ ++ range(0.0, 100.0) \ ++ \ ++ product(double, ZPageMaxWastePercent, 30.0, "Adaptive small page " \ ++ "seclect minmum pages percent.")range(0.0, 100.0) \ ++ \ + product(bool, ZStallOnOutOfMemory, true, \ + "Allow Java threads to stall and wait for GC to complete " \ + "instead of immediately throwing an OutOfMemoryError") \ +-- +2.19.1 + diff --git a/java-11-openjdk.spec b/java-11-openjdk.spec index 48adb12a3c0cf8051efb0193dd71da9516d1bb67..936ad972f3a4f38e38757a80105c70e3a7b18739 100644 --- a/java-11-openjdk.spec +++ b/java-11-openjdk.spec @@ -735,7 +735,7 @@ Provides: java-src%{?1} = %{epoch}:%{version}-%{release} Name: java-%{javaver}-%{origin} Version: %{newjavaver}.%{buildver} -Release: 4 +Release: 10 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -836,7 +836,13 @@ Patch50: 8248336-AArch64-C2-offset-overflow-in-BoxLockNode-em.patch # 11.0.9 Patch51: 8255781-Bump-patch-update-version-for-OpenJDK-jdk-11.0.9.1.patch Patch52: 8250861-Crash-in-MinINode-Ideal.patch - +Patch53: 8236512-PKCS11-Connection-closed-after-Cipher-doFinal-and-NoPadding.patch +Patch54: 8207160-ClassReader-adjustMethodParams-can-potentially-return-null-if-the-args-list-is-empty.patch +Patch55: 8215047-Task-terminators-do-not-complete-termination-in-consistent-state.patch +Patch56: 8247766-aarch64-guarantee-val-1U--nbits-failed-Field-too-big-for-insn.patch +Patch57: add-zgc-parameter-adaptation-feature.patch +Patch58: add-integerCache-feature.patch +Patch59: add-SVE-backend-feature.patch BuildRequires: autoconf BuildRequires: alsa-lib-devel @@ -1102,6 +1108,13 @@ pushd %{top_level_dir_name} %patch50 -p1 %patch51 -p1 %patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 popd # openjdk %patch1000 @@ -1604,6 +1617,25 @@ require "copy_jdk_configs.lua" %changelog +* Thu Dec 24 2020 kuenking - 1:11.0.9.11-10 +- add add-SVE-backend-feature.patch + +* Thu Dec 24 2020 kuenking - 1:11.0.9.11-9 +- add add-integerCache-feature.patch + +* Thu Dec 24 2020 kuenking - 1:11.0.9.11-8 +- add add-zgc-parameter-adaptation-feature.patch + +* Wed Dec 23 2020 alapha - 1:11.0.9.11-7 +- add 8215047-Task-terminators-do-not-complete-termination-in-consistent-state.patch +- add 8247766-aarch64-guarantee-val-1U--nbits-failed-Field-too-big-for-insn.patch + +* Wed Dec 23 2020 eapen - 1:11.0.9.11-6 +- add 8207160-ClassReader-adjustMethodParams-can-potentially-return-null-if-the-args-list-is-empty.patch + +* Tue Dec 22 2020 aijm - 1:11.0.9.11-5 +- add 8236512-PKCS11-Connection-closed-after-Cipher-doFinal-and-NoPadding.patch + * Mon Dec 21 2020 noah - 1:11.0.9.11-4 - add a license to this repo