From b43ff34ef7cd1e458555c9449672b2c539acbcac Mon Sep 17 00:00:00 2001 From: zhoulu Date: Wed, 6 Aug 2025 15:41:18 +0800 Subject: [PATCH] Backport (riscv): Add AES implementation based on Zvkned Signed-off-by: liuqingtao --- ...d-AES-implementation-based-on-Zvkned.patch | 5209 +++++++++++++++++ openssl.spec | 11 +- 2 files changed, 5219 insertions(+), 1 deletion(-) create mode 100644 Backport-riscv-Add-AES-implementation-based-on-Zvkned.patch diff --git a/Backport-riscv-Add-AES-implementation-based-on-Zvkned.patch b/Backport-riscv-Add-AES-implementation-based-on-Zvkned.patch new file mode 100644 index 0000000..814c7e5 --- /dev/null +++ b/Backport-riscv-Add-AES-implementation-based-on-Zvkned.patch @@ -0,0 +1,5209 @@ +From 1b4730838806d862006c22c9b28a3c7c9ecadb49 Mon Sep 17 00:00:00 2001 +From: zhoulu +Date: Fri, 1 Aug 2025 12:24:18 +0800 +Subject: [PATCH] Backport (riscv): Add AES implementation based on Zvkned +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reference:https://github.com/openssl/openssl/commit/5c03d5ddce0f3183a24868eb515bf615bc515314 +Add riscv64 asm_arch to linux64-riscv64 target +Reviewed-by: Philipp Tomsich +Signed-off-by: Henry Brausen + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +(Merged from #18275) + +(cherry picked from commit cb2764f) + +Reference:https://github.com/openssl/openssl/commit/b3504b600c028a00f36cdbfedc928a48df9818ff +Add AES implementation in generic riscv64 asm + +Reviewed-by: Philipp Tomsich +Signed-off-by: Henry Brausen + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +(Merged from #17640) + +Reference:https://github.com/openssl/openssl/commit/94474e02fa217c037ece9d819a9b12025f65cdb9 + +riscv: Implement AES-192 + +Signed-off-by: Ard Biesheuvel +Signed-off-by: Christoph Müllner + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +Reviewed-by: Hugo Landau +(Merged from + +Signed-off-by: Christoph Müllner + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +Reviewed-by: Hugo Landau +(Merged from #21923) + +Reference:https://github.com/openssl/openssl/commit/751a22194e4dc52532d2aedd49fe62dbea0badc8 + +riscv: Provide vector crypto implementation of AES-ECB mode. + +Signed-off-by: Phoebe Chen + +Reviewed-by: Tomas Mraz +Reviewed-by: Paul Dale +Reviewed-by: Hugo Landau +(Merged from #21923) + +Signed-off-by: liuqingtao +--- + Configurations/10-main.conf | 1 + + crypto/aes/asm/aes-riscv64-zvkned.pl | 1376 +++++++++++++ + crypto/aes/asm/aes-riscv64.pl | 1709 +++++++++++++++++ + crypto/aes/build.info | 6 + + crypto/build.info | 3 + + crypto/perlasm/riscv.pm | 1076 +++++++++++ + crypto/riscv64cpuid.pl | 105 + + crypto/riscvcap.c | 145 ++ + doc/man7/openssl-env.pod | 4 +- + include/crypto/aes_platform.h | 30 + + include/crypto/riscv_arch.def | 61 + + include/crypto/riscv_arch.h | 125 ++ + .../ciphers/cipher_aes_ccm_hw_rv64i.inc | 71 + + .../implementations/ciphers/cipher_aes_hw.c | 8 +- + .../ciphers/cipher_aes_hw_rv64i.inc | 83 + + .../ciphers/cipher_aes_ocb_hw.c | 32 + + .../ciphers/cipher_aes_xts_hw.c | 36 + + .../30-test_evp_data/evpciph_aes_common.txt | 47 +- + 18 files changed, 4913 insertions(+), 5 deletions(-) + create mode 100644 crypto/aes/asm/aes-riscv64-zvkned.pl + create mode 100644 crypto/aes/asm/aes-riscv64.pl + create mode 100644 crypto/perlasm/riscv.pm + create mode 100644 crypto/riscv64cpuid.pl + create mode 100644 crypto/riscvcap.c + create mode 100644 include/crypto/riscv_arch.def + create mode 100644 include/crypto/riscv_arch.h + create mode 100644 providers/implementations/ciphers/cipher_aes_ccm_hw_rv64i.inc + create mode 100644 providers/implementations/ciphers/cipher_aes_hw_rv64i.inc + +diff --git a/Configurations/10-main.conf b/Configurations/10-main.conf +index 280a75b..3f4c9a8 100644 +--- a/Configurations/10-main.conf ++++ b/Configurations/10-main.conf +@@ -819,6 +819,7 @@ my %targets = ( + "linux64-riscv64" => { + inherit_from => [ "linux-generic64"], + perlasm_scheme => "linux64", ++ asm_arch => 'riscv64', + }, + + # loongarch64 below refers to contemporary LoongArch Architecture +diff --git a/crypto/aes/asm/aes-riscv64-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkned.pl +new file mode 100644 +index 0000000..4c02927 +--- /dev/null ++++ b/crypto/aes/asm/aes-riscv64-zvkned.pl +@@ -0,0 +1,1376 @@ ++#! /usr/bin/env perl ++# This file is dual-licensed, meaning that you can use it under your ++# choice of either of the following two licenses: ++# ++# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You can obtain ++# a copy in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++# ++# or ++# ++# Copyright (c) 2023, Christoph Müllner ++# Copyright (c) 2023, Phoebe Chen ++# All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions ++# are met: ++# 1. Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# 2. Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in the ++# documentation and/or other materials provided with the distribution. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++# - RV64I ++# - RISC-V Vector ('V') with VLEN >= 128 ++# - RISC-V Vector AES block cipher extension ('Zvkned') ++ ++use strict; ++use warnings; ++ ++use FindBin qw($Bin); ++use lib "$Bin"; ++use lib "$Bin/../../perlasm"; ++use riscv; ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$output and open STDOUT,">$output"; ++ ++my $code=<<___; ++.text ++___ ++ ++my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, ++ $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, ++ $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, ++ $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, ++) = map("v$_",(0..31)); ++ ++# Load all 11 round keys to v1-v11 registers. ++sub aes_128_load_key { ++ my $KEYP = shift; ++ ++ my $code=<<___; ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ @{[vle32_v $V1, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V2, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V3, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V4, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V5, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V6, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V7, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V8, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V9, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V10, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++___ ++ ++ return $code; ++} ++ ++# Load all 13 round keys to v1-v13 registers. ++sub aes_192_load_key { ++ my $KEYP = shift; ++ ++ my $code=<<___; ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ @{[vle32_v $V1, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V2, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V3, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V4, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V5, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V6, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V7, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V8, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V9, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V10, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V12, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V13, $KEYP]} ++___ ++ ++ return $code; ++} ++ ++# Load all 15 round keys to v1-v15 registers. ++sub aes_256_load_key { ++ my $KEYP = shift; ++ ++ my $code=<<___; ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ @{[vle32_v $V1, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V2, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V3, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V4, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V5, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V6, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V7, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V8, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V9, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V10, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V12, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V13, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V14, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V15, $KEYP]} ++___ ++ ++ return $code; ++} ++ ++# aes-128 encryption with round keys v1-v11 ++sub aes_128_encrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] ++ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesem_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesem_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesem_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesem_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesem_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesem_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesem_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesef_vs $V24, $V11]} # with round key w[40,43] ++___ ++ ++ return $code; ++} ++ ++# aes-128 decryption with round keys v1-v11 ++sub aes_128_decrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V11]} # with round key w[40,43] ++ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ++___ ++ ++ return $code; ++} ++ ++# aes-192 encryption with round keys v1-v13 ++sub aes_192_encrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] ++ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesem_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesem_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesem_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesem_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesem_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesem_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesem_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesem_vs $V24, $V11]} # with round key w[40,43] ++ @{[vaesem_vs $V24, $V12]} # with round key w[44,47] ++ @{[vaesef_vs $V24, $V13]} # with round key w[48,51] ++___ ++ ++ return $code; ++} ++ ++# aes-192 decryption with round keys v1-v13 ++sub aes_192_decrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V13]} # with round key w[48,51] ++ @{[vaesdm_vs $V24, $V12]} # with round key w[44,47] ++ @{[vaesdm_vs $V24, $V11]} # with round key w[40,43] ++ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ++___ ++ ++ return $code; ++} ++ ++# aes-256 encryption with round keys v1-v15 ++sub aes_256_encrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V1]} # with round key w[ 0, 3] ++ @{[vaesem_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesem_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesem_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesem_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesem_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesem_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesem_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesem_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesem_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesem_vs $V24, $V11]} # with round key w[40,43] ++ @{[vaesem_vs $V24, $V12]} # with round key w[44,47] ++ @{[vaesem_vs $V24, $V13]} # with round key w[48,51] ++ @{[vaesem_vs $V24, $V14]} # with round key w[52,55] ++ @{[vaesef_vs $V24, $V15]} # with round key w[56,59] ++___ ++ ++ return $code; ++} ++ ++# aes-256 decryption with round keys v1-v15 ++sub aes_256_decrypt { ++ my $code=<<___; ++ @{[vaesz_vs $V24, $V15]} # with round key w[56,59] ++ @{[vaesdm_vs $V24, $V14]} # with round key w[52,55] ++ @{[vaesdm_vs $V24, $V13]} # with round key w[48,51] ++ @{[vaesdm_vs $V24, $V12]} # with round key w[44,47] ++ @{[vaesdm_vs $V24, $V11]} # with round key w[40,43] ++ @{[vaesdm_vs $V24, $V10]} # with round key w[36,39] ++ @{[vaesdm_vs $V24, $V9]} # with round key w[32,35] ++ @{[vaesdm_vs $V24, $V8]} # with round key w[28,31] ++ @{[vaesdm_vs $V24, $V7]} # with round key w[24,27] ++ @{[vaesdm_vs $V24, $V6]} # with round key w[20,23] ++ @{[vaesdm_vs $V24, $V5]} # with round key w[16,19] ++ @{[vaesdm_vs $V24, $V4]} # with round key w[12,15] ++ @{[vaesdm_vs $V24, $V3]} # with round key w[ 8,11] ++ @{[vaesdm_vs $V24, $V2]} # with round key w[ 4, 7] ++ @{[vaesdf_vs $V24, $V1]} # with round key w[ 0, 3] ++___ ++ ++ return $code; ++} ++ ++{ ++############################################################################### ++# void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out, ++# size_t length, const AES_KEY *key, ++# unsigned char *ivec, const int enc); ++my ($INP, $OUTP, $LEN, $KEYP, $IVP, $ENC) = ("a0", "a1", "a2", "a3", "a4", "a5"); ++my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2"); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_cbc_encrypt ++.type rv64i_zvkned_cbc_encrypt,\@function ++rv64i_zvkned_cbc_encrypt: ++ # check whether the length is a multiple of 16 and >= 16 ++ li $T1, 16 ++ blt $LEN, $T1, L_end ++ andi $T1, $LEN, 15 ++ bnez $T1, L_end ++ ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T0, 10 ++ beq $ROUNDS, $T0, L_cbc_enc_128 ++ ++ li $T0, 12 ++ beq $ROUNDS, $T0, L_cbc_enc_192 ++ ++ li $T0, 14 ++ beq $ROUNDS, $T0, L_cbc_enc_256 ++ ++ ret ++.size rv64i_zvkned_cbc_encrypt,.-rv64i_zvkned_cbc_encrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_enc_128: ++ # Load all 11 round keys to v1-v11 registers. ++ @{[aes_128_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vxor_vv $V24, $V24, $V16]} ++ j 2f ++ ++1: ++ @{[vle32_v $V17, $INP]} ++ @{[vxor_vv $V24, $V24, $V17]} ++ ++2: ++ # AES body ++ @{[aes_128_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ addi $INP, $INP, 16 ++ addi $OUTP, $OUTP, 16 ++ addi $LEN, $LEN, -16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V24, $IVP]} ++ ++ ret ++.size L_cbc_enc_128,.-L_cbc_enc_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_enc_192: ++ # Load all 13 round keys to v1-v13 registers. ++ @{[aes_192_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vxor_vv $V24, $V24, $V16]} ++ j 2f ++ ++1: ++ @{[vle32_v $V17, $INP]} ++ @{[vxor_vv $V24, $V24, $V17]} ++ ++2: ++ # AES body ++ @{[aes_192_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ addi $INP, $INP, 16 ++ addi $OUTP, $OUTP, 16 ++ addi $LEN, $LEN, -16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V24, $IVP]} ++ ++ ret ++.size L_cbc_enc_192,.-L_cbc_enc_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_enc_256: ++ # Load all 15 round keys to v1-v15 registers. ++ @{[aes_256_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vxor_vv $V24, $V24, $V16]} ++ j 2f ++ ++1: ++ @{[vle32_v $V17, $INP]} ++ @{[vxor_vv $V24, $V24, $V17]} ++ ++2: ++ # AES body ++ @{[aes_256_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ addi $INP, $INP, 16 ++ addi $OUTP, $OUTP, 16 ++ addi $LEN, $LEN, -16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V24, $IVP]} ++ ++ ret ++.size L_cbc_enc_256,.-L_cbc_enc_256 ++___ ++ ++############################################################################### ++# void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out, ++# size_t length, const AES_KEY *key, ++# unsigned char *ivec, const int enc); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_cbc_decrypt ++.type rv64i_zvkned_cbc_decrypt,\@function ++rv64i_zvkned_cbc_decrypt: ++ # check whether the length is a multiple of 16 and >= 16 ++ li $T1, 16 ++ blt $LEN, $T1, L_end ++ andi $T1, $LEN, 15 ++ bnez $T1, L_end ++ ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T0, 10 ++ beq $ROUNDS, $T0, L_cbc_dec_128 ++ ++ li $T0, 12 ++ beq $ROUNDS, $T0, L_cbc_dec_192 ++ ++ li $T0, 14 ++ beq $ROUNDS, $T0, L_cbc_dec_256 ++ ++ ret ++.size rv64i_zvkned_cbc_decrypt,.-rv64i_zvkned_cbc_decrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_dec_128: ++ # Load all 11 round keys to v1-v11 registers. ++ @{[aes_128_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ j 2f ++ ++1: ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ addi $OUTP, $OUTP, 16 ++ ++2: ++ # AES body ++ @{[aes_128_decrypt]} ++ ++ @{[vxor_vv $V24, $V24, $V16]} ++ @{[vse32_v $V24, $OUTP]} ++ @{[vmv_v_v $V16, $V17]} ++ ++ addi $LEN, $LEN, -16 ++ addi $INP, $INP, 16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V16, $IVP]} ++ ++ ret ++.size L_cbc_dec_128,.-L_cbc_dec_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_dec_192: ++ # Load all 13 round keys to v1-v13 registers. ++ @{[aes_192_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ j 2f ++ ++1: ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ addi $OUTP, $OUTP, 16 ++ ++2: ++ # AES body ++ @{[aes_192_decrypt]} ++ ++ @{[vxor_vv $V24, $V24, $V16]} ++ @{[vse32_v $V24, $OUTP]} ++ @{[vmv_v_v $V16, $V17]} ++ ++ addi $LEN, $LEN, -16 ++ addi $INP, $INP, 16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V16, $IVP]} ++ ++ ret ++.size L_cbc_dec_192,.-L_cbc_dec_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_cbc_dec_256: ++ # Load all 15 round keys to v1-v15 registers. ++ @{[aes_256_load_key $KEYP]} ++ ++ # Load IV. ++ @{[vle32_v $V16, $IVP]} ++ ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ j 2f ++ ++1: ++ @{[vle32_v $V24, $INP]} ++ @{[vmv_v_v $V17, $V24]} ++ addi $OUTP, $OUTP, 16 ++ ++2: ++ # AES body ++ @{[aes_256_decrypt]} ++ ++ @{[vxor_vv $V24, $V24, $V16]} ++ @{[vse32_v $V24, $OUTP]} ++ @{[vmv_v_v $V16, $V17]} ++ ++ addi $LEN, $LEN, -16 ++ addi $INP, $INP, 16 ++ ++ bnez $LEN, 1b ++ ++ @{[vse32_v $V16, $IVP]} ++ ++ ret ++.size L_cbc_dec_256,.-L_cbc_dec_256 ++___ ++} ++ ++{ ++############################################################################### ++# void rv64i_zvkned_ecb_encrypt(const unsigned char *in, unsigned char *out, ++# size_t length, const AES_KEY *key, ++# const int enc); ++my ($INP, $OUTP, $LEN, $KEYP, $ENC) = ("a0", "a1", "a2", "a3", "a4"); ++my ($REMAIN_LEN) = ("a5"); ++my ($VL) = ("a6"); ++my ($T0, $T1, $ROUNDS) = ("t0", "t1", "t2"); ++my ($LEN32) = ("t3"); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_ecb_encrypt ++.type rv64i_zvkned_ecb_encrypt,\@function ++rv64i_zvkned_ecb_encrypt: ++ # Make the LEN become e32 length. ++ srli $LEN32, $LEN, 2 ++ ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T0, 10 ++ beq $ROUNDS, $T0, L_ecb_enc_128 ++ ++ li $T0, 12 ++ beq $ROUNDS, $T0, L_ecb_enc_192 ++ ++ li $T0, 14 ++ beq $ROUNDS, $T0, L_ecb_enc_256 ++ ++ ret ++.size rv64i_zvkned_ecb_encrypt,.-rv64i_zvkned_ecb_encrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_enc_128: ++ # Load all 11 round keys to v1-v11 registers. ++ @{[aes_128_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_128_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_enc_128,.-L_ecb_enc_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_enc_192: ++ # Load all 13 round keys to v1-v13 registers. ++ @{[aes_192_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_192_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_enc_192,.-L_ecb_enc_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_enc_256: ++ # Load all 15 round keys to v1-v15 registers. ++ @{[aes_256_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_256_encrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_enc_256,.-L_ecb_enc_256 ++___ ++ ++############################################################################### ++# void rv64i_zvkned_ecb_decrypt(const unsigned char *in, unsigned char *out, ++# size_t length, const AES_KEY *key, ++# const int enc); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_ecb_decrypt ++.type rv64i_zvkned_ecb_decrypt,\@function ++rv64i_zvkned_ecb_decrypt: ++ # Make the LEN become e32 length. ++ srli $LEN32, $LEN, 2 ++ ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T0, 10 ++ beq $ROUNDS, $T0, L_ecb_dec_128 ++ ++ li $T0, 12 ++ beq $ROUNDS, $T0, L_ecb_dec_192 ++ ++ li $T0, 14 ++ beq $ROUNDS, $T0, L_ecb_dec_256 ++ ++ ret ++.size rv64i_zvkned_ecb_decrypt,.-rv64i_zvkned_ecb_decrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_dec_128: ++ # Load all 11 round keys to v1-v11 registers. ++ @{[aes_128_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_128_decrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_dec_128,.-L_ecb_dec_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_dec_192: ++ # Load all 13 round keys to v1-v13 registers. ++ @{[aes_192_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_192_decrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_dec_192,.-L_ecb_dec_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_ecb_dec_256: ++ # Load all 15 round keys to v1-v15 registers. ++ @{[aes_256_load_key $KEYP]} ++ ++1: ++ @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} ++ slli $T0, $VL, 2 ++ sub $LEN32, $LEN32, $VL ++ ++ @{[vle32_v $V24, $INP]} ++ ++ # AES body ++ @{[aes_256_decrypt]} ++ ++ @{[vse32_v $V24, $OUTP]} ++ ++ add $INP, $INP, $T0 ++ add $OUTP, $OUTP, $T0 ++ ++ bnez $LEN32, 1b ++ ++ ret ++.size L_ecb_dec_256,.-L_ecb_dec_256 ++___ ++ ++} ++ ++{ ++################################################################################ ++# int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits, ++# AES_KEY *key) ++# int rv64i_zvkned_set_decrypt_key(const unsigned char *userKey, const int bits, ++# AES_KEY *key) ++my ($UKEY,$BITS,$KEYP) = ("a0", "a1", "a2"); ++my ($T0,$T1,$T4) = ("t1", "t2", "t4"); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_set_encrypt_key ++.type rv64i_zvkned_set_encrypt_key,\@function ++rv64i_zvkned_set_encrypt_key: ++ beqz $UKEY, L_fail_m1 ++ beqz $KEYP, L_fail_m1 ++ ++ # Get proper routine for key size ++ li $T0, 256 ++ beq $BITS, $T0, L_set_key_256 ++ li $T0, 128 ++ beq $BITS, $T0, L_set_key_128 ++ ++ j L_fail_m2 ++ ++.size rv64i_zvkned_set_encrypt_key,.-rv64i_zvkned_set_encrypt_key ++___ ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_set_decrypt_key ++.type rv64i_zvkned_set_decrypt_key,\@function ++rv64i_zvkned_set_decrypt_key: ++ beqz $UKEY, L_fail_m1 ++ beqz $KEYP, L_fail_m1 ++ ++ # Get proper routine for key size ++ li $T0, 256 ++ beq $BITS, $T0, L_set_key_256 ++ li $T0, 128 ++ beq $BITS, $T0, L_set_key_128 ++ ++ j L_fail_m2 ++ ++.size rv64i_zvkned_set_decrypt_key,.-rv64i_zvkned_set_decrypt_key ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_set_key_128: ++ # Store the number of rounds ++ li $T1, 10 ++ sw $T1, 240($KEYP) ++ ++ @{[vsetivli__x0_4_e32_m1_tu_mu]} ++ ++ # Load the key ++ @{[vle32_v $V10, ($UKEY)]} ++ ++ # Generate keys for round 2-11 into registers v11-v20. ++ @{[vaeskf1_vi $V11, $V10, 1]} # v11 <- rk2 (w[ 4, 7]) ++ @{[vaeskf1_vi $V12, $V11, 2]} # v12 <- rk3 (w[ 8,11]) ++ @{[vaeskf1_vi $V13, $V12, 3]} # v13 <- rk4 (w[12,15]) ++ @{[vaeskf1_vi $V14, $V13, 4]} # v14 <- rk5 (w[16,19]) ++ @{[vaeskf1_vi $V15, $V14, 5]} # v15 <- rk6 (w[20,23]) ++ @{[vaeskf1_vi $V16, $V15, 6]} # v16 <- rk7 (w[24,27]) ++ @{[vaeskf1_vi $V17, $V16, 7]} # v17 <- rk8 (w[28,31]) ++ @{[vaeskf1_vi $V18, $V17, 8]} # v18 <- rk9 (w[32,35]) ++ @{[vaeskf1_vi $V19, $V18, 9]} # v19 <- rk10 (w[36,39]) ++ @{[vaeskf1_vi $V20, $V19, 10]} # v20 <- rk11 (w[40,43]) ++ ++ # Store the round keys ++ @{[vse32_v $V10, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V11, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V12, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V13, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V14, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V15, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V16, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V17, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V18, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V19, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V20, $KEYP]} ++ ++ li a0, 1 ++ ret ++.size L_set_key_128,.-L_set_key_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_set_key_256: ++ # Store the number of rounds ++ li $T1, 14 ++ sw $T1, 240($KEYP) ++ ++ @{[vsetivli__x0_4_e32_m1_tu_mu]} ++ ++ # Load the key ++ @{[vle32_v $V10, ($UKEY)]} ++ addi $UKEY, $UKEY, 16 ++ @{[vle32_v $V11, ($UKEY)]} ++ ++ @{[vmv_v_v $V12, $V10]} ++ @{[vaeskf2_vi $V12, $V11, 2]} ++ @{[vmv_v_v $V13, $V11]} ++ @{[vaeskf2_vi $V13, $V12, 3]} ++ @{[vmv_v_v $V14, $V12]} ++ @{[vaeskf2_vi $V14, $V13, 4]} ++ @{[vmv_v_v $V15, $V13]} ++ @{[vaeskf2_vi $V15, $V14, 5]} ++ @{[vmv_v_v $V16, $V14]} ++ @{[vaeskf2_vi $V16, $V15, 6]} ++ @{[vmv_v_v $V17, $V15]} ++ @{[vaeskf2_vi $V17, $V16, 7]} ++ @{[vmv_v_v $V18, $V16]} ++ @{[vaeskf2_vi $V18, $V17, 8]} ++ @{[vmv_v_v $V19, $V17]} ++ @{[vaeskf2_vi $V19, $V18, 9]} ++ @{[vmv_v_v $V20, $V18]} ++ @{[vaeskf2_vi $V20, $V19, 10]} ++ @{[vmv_v_v $V21, $V19]} ++ @{[vaeskf2_vi $V21, $V20, 11]} ++ @{[vmv_v_v $V22, $V20]} ++ @{[vaeskf2_vi $V22, $V21, 12]} ++ @{[vmv_v_v $V23, $V21]} ++ @{[vaeskf2_vi $V23, $V22, 13]} ++ @{[vmv_v_v $V24, $V22]} ++ @{[vaeskf2_vi $V24, $V23, 14]} ++ ++ @{[vse32_v $V10, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V11, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V12, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V13, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V14, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V15, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V16, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V17, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V18, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V19, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V20, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V21, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V22, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V23, $KEYP]} ++ addi $KEYP, $KEYP, 16 ++ @{[vse32_v $V24, $KEYP]} ++ ++ li a0, 1 ++ ret ++.size L_set_key_256,.-L_set_key_256 ++___ ++} ++ ++{ ++################################################################################ ++# void rv64i_zvkned_encrypt(const unsigned char *in, unsigned char *out, ++# const AES_KEY *key); ++my ($INP,$OUTP,$KEYP) = ("a0", "a1", "a2"); ++my ($T0,$T1, $ROUNDS, $T6) = ("a3", "a4", "t5", "t6"); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_encrypt ++.type rv64i_zvkned_encrypt,\@function ++rv64i_zvkned_encrypt: ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T6, 14 ++ beq $ROUNDS, $T6, L_enc_256 ++ li $T6, 10 ++ beq $ROUNDS, $T6, L_enc_128 ++ li $T6, 12 ++ beq $ROUNDS, $T6, L_enc_192 ++ ++ j L_fail_m2 ++.size rv64i_zvkned_encrypt,.-rv64i_zvkned_encrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_enc_128: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesem_vs $V1, $V11]} # with round key w[ 4, 7] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesem_vs $V1, $V12]} # with round key w[ 8,11] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesem_vs $V1, $V13]} # with round key w[12,15] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesem_vs $V1, $V14]} # with round key w[16,19] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesem_vs $V1, $V15]} # with round key w[20,23] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesem_vs $V1, $V16]} # with round key w[24,27] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesem_vs $V1, $V17]} # with round key w[28,31] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesem_vs $V1, $V18]} # with round key w[32,35] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesem_vs $V1, $V19]} # with round key w[36,39] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesef_vs $V1, $V20]} # with round key w[40,43] ++ ++ @{[vse32_v $V1, $OUTP]} ++ ++ ret ++.size L_enc_128,.-L_enc_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_enc_192: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesem_vs $V1, $V11]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesem_vs $V1, $V12]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesem_vs $V1, $V13]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesem_vs $V1, $V14]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesem_vs $V1, $V15]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesem_vs $V1, $V16]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesem_vs $V1, $V17]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesem_vs $V1, $V18]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesem_vs $V1, $V19]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesem_vs $V1, $V20]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V21, $KEYP]} ++ @{[vaesem_vs $V1, $V21]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V22, $KEYP]} ++ @{[vaesef_vs $V1, $V22]} ++ ++ @{[vse32_v $V1, $OUTP]} ++ ret ++.size L_enc_192,.-L_enc_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_enc_256: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesz_vs $V1, $V10]} # with round key w[ 0, 3] ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesem_vs $V1, $V11]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesem_vs $V1, $V12]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesem_vs $V1, $V13]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesem_vs $V1, $V14]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesem_vs $V1, $V15]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesem_vs $V1, $V16]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesem_vs $V1, $V17]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesem_vs $V1, $V18]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesem_vs $V1, $V19]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesem_vs $V1, $V20]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V21, $KEYP]} ++ @{[vaesem_vs $V1, $V21]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V22, $KEYP]} ++ @{[vaesem_vs $V1, $V22]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V23, $KEYP]} ++ @{[vaesem_vs $V1, $V23]} ++ addi $KEYP, $KEYP, 16 ++ @{[vle32_v $V24, $KEYP]} ++ @{[vaesef_vs $V1, $V24]} ++ ++ @{[vse32_v $V1, $OUTP]} ++ ret ++.size L_enc_256,.-L_enc_256 ++___ ++ ++################################################################################ ++# void rv64i_zvkned_decrypt(const unsigned char *in, unsigned char *out, ++# const AES_KEY *key); ++ ++$code .= <<___; ++.p2align 3 ++.globl rv64i_zvkned_decrypt ++.type rv64i_zvkned_decrypt,\@function ++rv64i_zvkned_decrypt: ++ # Load number of rounds ++ lwu $ROUNDS, 240($KEYP) ++ ++ # Get proper routine for key size ++ li $T6, 14 ++ beq $ROUNDS, $T6, L_dec_256 ++ li $T6, 10 ++ beq $ROUNDS, $T6, L_dec_128 ++ li $T6, 12 ++ beq $ROUNDS, $T6, L_dec_192 ++ ++ j L_fail_m2 ++.size rv64i_zvkned_decrypt,.-rv64i_zvkned_decrypt ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_dec_128: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ addi $KEYP, $KEYP, 160 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesz_vs $V1, $V20]} # with round key w[40,43] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] ++ ++ @{[vse32_v $V1, $OUTP]} ++ ++ ret ++.size L_dec_128,.-L_dec_128 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_dec_192: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ addi $KEYP, $KEYP, 192 ++ @{[vle32_v $V22, $KEYP]} ++ @{[vaesz_vs $V1, $V22]} # with round key w[48,51] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V21, $KEYP]} ++ @{[vaesdm_vs $V1, $V21]} # with round key w[44,47] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesdm_vs $V1, $V20]} # with round key w[40,43] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] ++ ++ @{[vse32_v $V1, $OUTP]} ++ ++ ret ++.size L_dec_192,.-L_dec_192 ++___ ++ ++$code .= <<___; ++.p2align 3 ++L_dec_256: ++ @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} ++ ++ @{[vle32_v $V1, $INP]} ++ ++ addi $KEYP, $KEYP, 224 ++ @{[vle32_v $V24, $KEYP]} ++ @{[vaesz_vs $V1, $V24]} # with round key w[56,59] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V23, $KEYP]} ++ @{[vaesdm_vs $V1, $V23]} # with round key w[52,55] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V22, $KEYP]} ++ @{[vaesdm_vs $V1, $V22]} # with round key w[48,51] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V21, $KEYP]} ++ @{[vaesdm_vs $V1, $V21]} # with round key w[44,47] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V20, $KEYP]} ++ @{[vaesdm_vs $V1, $V20]} # with round key w[40,43] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V19, $KEYP]} ++ @{[vaesdm_vs $V1, $V19]} # with round key w[36,39] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V18, $KEYP]} ++ @{[vaesdm_vs $V1, $V18]} # with round key w[32,35] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V17, $KEYP]} ++ @{[vaesdm_vs $V1, $V17]} # with round key w[28,31] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V16, $KEYP]} ++ @{[vaesdm_vs $V1, $V16]} # with round key w[24,27] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V15, $KEYP]} ++ @{[vaesdm_vs $V1, $V15]} # with round key w[20,23] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V14, $KEYP]} ++ @{[vaesdm_vs $V1, $V14]} # with round key w[16,19] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V13, $KEYP]} ++ @{[vaesdm_vs $V1, $V13]} # with round key w[12,15] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V12, $KEYP]} ++ @{[vaesdm_vs $V1, $V12]} # with round key w[ 8,11] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V11, $KEYP]} ++ @{[vaesdm_vs $V1, $V11]} # with round key w[ 4, 7] ++ addi $KEYP, $KEYP, -16 ++ @{[vle32_v $V10, $KEYP]} ++ @{[vaesdf_vs $V1, $V10]} # with round key w[ 0, 3] ++ ++ @{[vse32_v $V1, $OUTP]} ++ ++ ret ++.size L_dec_256,.-L_dec_256 ++___ ++} ++ ++$code .= <<___; ++L_fail_m1: ++ li a0, -1 ++ ret ++.size L_fail_m1,.-L_fail_m1 ++ ++L_fail_m2: ++ li a0, -2 ++ ret ++.size L_fail_m2,.-L_fail_m2 ++ ++L_end: ++ ret ++.size L_end,.-L_end ++___ ++ ++print $code; ++ ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/aes/asm/aes-riscv64.pl b/crypto/aes/asm/aes-riscv64.pl +new file mode 100644 +index 0000000..46b418c +--- /dev/null ++++ b/crypto/aes/asm/aes-riscv64.pl +@@ -0,0 +1,1709 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$output and open STDOUT,">$output"; ++ ++################################################################################ ++# Utility functions to help with keeping track of which registers to stack/ ++# unstack when entering / exiting routines. ++################################################################################ ++{ ++ # Callee-saved registers ++ my @callee_saved = map("x$_",(2,8,9,18..27)); ++ # Caller-saved registers ++ my @caller_saved = map("x$_",(1,5..7,10..17,28..31)); ++ my @must_save; ++ sub use_reg { ++ my $reg = shift; ++ if (grep(/^$reg$/, @callee_saved)) { ++ push(@must_save, $reg); ++ } elsif (!grep(/^$reg$/, @caller_saved)) { ++ # Register is not usable! ++ die("Unusable register ".$reg); ++ } ++ return $reg; ++ } ++ sub use_regs { ++ return map(use_reg("x$_"), @_); ++ } ++ sub save_regs { ++ my $ret = ''; ++ my $stack_reservation = ($#must_save + 1) * 8; ++ my $stack_offset = $stack_reservation; ++ if ($stack_reservation % 16) { ++ $stack_reservation += 8; ++ } ++ $ret.=" addi sp,sp,-$stack_reservation\n"; ++ foreach (@must_save) { ++ $stack_offset -= 8; ++ $ret.=" sd $_,$stack_offset(sp)\n"; ++ } ++ return $ret; ++ } ++ sub load_regs { ++ my $ret = ''; ++ my $stack_reservation = ($#must_save + 1) * 8; ++ my $stack_offset = $stack_reservation; ++ if ($stack_reservation % 16) { ++ $stack_reservation += 8; ++ } ++ foreach (@must_save) { ++ $stack_offset -= 8; ++ $ret.=" ld $_,$stack_offset(sp)\n"; ++ } ++ $ret.=" addi sp,sp,$stack_reservation\n"; ++ return $ret; ++ } ++ sub clear_regs { ++ @must_save = (); ++ } ++} ++ ++################################################################################ ++# Register assignment for AES_encrypt and AES_decrypt ++################################################################################ ++ ++# Registers to hold AES state (called s0-s3 or y0-y3 elsewhere) ++my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9); ++ ++# Function arguments (x10-x12 are a0-a2 in the ABI) ++# Input block pointer, output block pointer, key pointer ++my ($INP,$OUTP,$KEYP) = use_regs(10..12); ++ ++# Temporaries ++my ($T0,$T1,$T2,$T3) = use_regs(13..16); ++my ($T4,$T5,$T6,$T7,$T8,$T9,$T10,$T11) = use_regs(17..24); ++my ($T12,$T13,$T14,$T15) = use_regs(25..28); ++ ++# Register to hold table offset ++my ($I0) = use_regs(29); ++ ++# Loop counter ++my ($loopcntr) = use_regs(30); ++ ++# Lookup table address register ++my ($TBL) = use_regs(31); ++ ++# Lookup table mask register ++my ($MSK) = use_regs(5); ++ ++# Aliases for readability ++my $K0 = $loopcntr; ++my $K1 = $KEYP; ++ ++################################################################################ ++# Table lookup utility functions for AES_encrypt and AES_decrypt ++################################################################################ ++ ++# do_lookup([destination regs], [state regs], [temporary regs], shamt) ++# do_lookup loads four entries from an AES encryption/decryption table ++# and stores the result in the specified destination register set ++# Ds->[0] = Table[Qs->[0] >> shamt] ++# Ds->[1] = Table[Qs->[1] >> shamt] ++# Ds->[2] = Table[Qs->[2] >> shamt] ++# Ds->[3] = Table[Qs->[3] >> shamt] ++# Four temporary regs are used to generate these lookups. The temporary regs ++# can be equal to the destination regs, but only if they appear in the same ++# order. I.e. do_lookup([A,B,C,D],[...],[A,B,C,D],...) is OK ++sub do_lookup { ++ # (destination regs, state regs, temporary regs, shift amount) ++ my ($Ds, $Qs, $Ts, $shamt) = @_; ++ ++ my $ret = ''; ++ ++ # AES encryption/decryption table entries have word-sized (4-byte) entries. ++ # To convert the table index into a byte offset, we compute ++ # ((Qs->[i] >> shamt) & 0xFF) << 2 ++ # However, to save work, we compute the equivalent expression ++ # (Qs->[i] >> (shamt-2)) & 0x3FC ++ if ($shamt < 2) { ++$ret .= <<___; ++ ++ slli $Ts->[0],$Qs->[0],$shamt+2 ++ slli $Ts->[1],$Qs->[1],$shamt+2 ++ slli $Ts->[2],$Qs->[2],$shamt+2 ++ slli $Ts->[3],$Qs->[3],$shamt+2 ++___ ++ } else { ++$ret .= <<___; ++ ++ srli $Ts->[0],$Qs->[0],$shamt-2 ++ srli $Ts->[1],$Qs->[1],$shamt-2 ++ srli $Ts->[2],$Qs->[2],$shamt-2 ++ srli $Ts->[3],$Qs->[3],$shamt-2 ++___ ++ } ++ ++$ret .= <<___; ++ ++ andi $Ts->[0],$Ts->[0],0x3FC ++ andi $Ts->[1],$Ts->[1],0x3FC ++ andi $Ts->[2],$Ts->[2],0x3FC ++ andi $Ts->[3],$Ts->[3],0x3FC ++ ++ # Index into table. ++ add $I0,$TBL,$Ts->[0] ++ lwu $Ds->[0],0($I0) ++ add $I0,$TBL,$Ts->[1] ++ lwu $Ds->[1],0($I0) ++ add $I0,$TBL,$Ts->[2] ++ lwu $Ds->[2],0($I0) ++ add $I0,$TBL,$Ts->[3] ++ lwu $Ds->[3],0($I0) ++ ++___ ++ ++ return $ret; ++} ++ ++# Identical to do_lookup(), but loads only a single byte into each destination ++# register (replaces lwu with lbu). Used in the final round of AES_encrypt. ++sub do_lookup_byte { ++ my $ret = do_lookup(@_); ++ $ret =~ s/lwu/lbu/g; ++ return $ret; ++} ++ ++# do_lookup_Td4([destination regs], [state regs], [temporary regs]) ++# Used in final phase of AES_decrypt ++# Ds->[0] = Table[(Qs->[0]) &0xFF] ++# Ds->[1] = Table[(Qs->[1] >> 8 )&0xFF] ++# Ds->[2] = Table[(Qs->[2] >> 16)&0xFF] ++# Ds->[3] = Table[(Qs->[3] >> 24)&0xFF] ++# Four temporary regs are used to generate these lookups. The temporary regs ++# can be equal to the destination regs, but only if they appear in the same ++# order. I.e. do_lookup([A,B,C,D],[...],[A,B,C,D],...) is OK ++sub do_lookup_Td4 { ++ my ($Ds, $Qs, $Ts) = @_; ++ ++ my $ret = ''; ++ ++$ret .= <<___; ++ srli $Ts->[1],$Qs->[1],8 ++ srli $Ts->[2],$Qs->[2],16 ++ srli $Ts->[3],$Qs->[3],24 ++ ++ andi $Ts->[0],$Qs->[0],0xFF ++ andi $Ts->[1],$Ts->[1],0xFF ++ andi $Ts->[2],$Ts->[2],0xFF ++ andi $Ts->[3],$Ts->[3],0xFF ++ ++ add $I0,$TBL,$Ts->[0] ++ lbu $Ds->[0],0($I0) ++ add $I0,$TBL,$Ts->[1] ++ lbu $Ds->[1],0($I0) ++ add $I0,$TBL,$Ts->[2] ++ lbu $Ds->[2],0($I0) ++ add $I0,$TBL,$Ts->[3] ++ lbu $Ds->[3],0($I0) ++ ++___ ++ ++ return $ret; ++} ++ ++################################################################################ ++# void AES_encrypt(const unsigned char *in, unsigned char *out, ++# const AES_KEY *key); ++################################################################################ ++my $code .= <<___; ++.text ++.balign 16 ++.globl AES_encrypt ++.type AES_encrypt,\@function ++AES_encrypt: ++___ ++ ++$code .= save_regs(); ++ ++$code .= <<___; ++ ++ # Load input to block cipher ++ ld $Q0,0($INP) ++ ld $Q2,8($INP) ++ ++ ++ # Load key ++ ld $T0,0($KEYP) ++ ld $T2,8($KEYP) ++ ++ ++ # Load number of rounds ++ lwu $loopcntr,240($KEYP) ++ ++ # Load address of substitution table and wrap-around mask ++ la $TBL,AES_Te0 ++ li $MSK,~0xFFF ++ ++ # y = n xor k, stored in Q0-Q3 ++ ++ xor $Q0,$Q0,$T0 ++ xor $Q2,$Q2,$T2 ++ srli $Q1,$Q0,32 ++ srli $Q3,$Q2,32 ++ ++ # The main loop only executes the first N-1 rounds. ++ add $loopcntr,$loopcntr,-1 ++ ++ # Do Nr - 1 rounds (final round is special) ++ ++1: ++___ ++ ++# Lookup in table Te0 ++$code .= do_lookup( ++ [$T4,$T5,$T6,$T7], # Destination registers ++ [$Q0,$Q1,$Q2,$Q3], # State registers ++ [$T0,$T1,$T2,$T3], # Temporaries ++ 0 # Shift amount ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in table Te1 ++$code .= do_lookup( ++ [$T8,$T9,$T10,$T11], ++ [$Q1,$Q2,$Q3,$Q0], ++ [$T0,$T1,$T2,$T3], ++ 8 ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in table Te2 ++$code .= do_lookup( ++ [$T12,$T13,$T14,$T15], ++ [$Q2,$Q3,$Q0,$Q1], ++ [$T0,$T1,$T2,$T3], ++ 16 ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in table Te3 ++$code .= do_lookup( ++ [$T0,$T1,$T2,$T3], ++ [$Q3,$Q0,$Q1,$Q2], ++ [$T0,$T1,$T2,$T3], ++ 24 ++); ++ ++$code .= <<___; ++ ++ # Combine table lookups ++ xor $T4,$T4,$T8 ++ xor $T5,$T5,$T9 ++ xor $T6,$T6,$T10 ++ xor $T7,$T7,$T11 ++ ++ xor $T4,$T4,$T12 ++ xor $T5,$T5,$T13 ++ xor $T6,$T6,$T14 ++ xor $T7,$T7,$T15 ++ ++ xor $T0,$T0,$T4 ++ xor $T1,$T1,$T5 ++ xor $T2,$T2,$T6 ++ xor $T3,$T3,$T7 ++ ++ # Update key ptr to point to next key in schedule ++ add $KEYP,$KEYP,16 ++ ++ # Grab next key in schedule ++ ld $T4,0($KEYP) ++ ld $T6,8($KEYP) ++ ++ # Round TBL back to 4k boundary ++ and $TBL,$TBL,$MSK ++ ++ add $loopcntr,$loopcntr,-1 ++ ++ xor $Q0,$T0,$T4 ++ xor $Q2,$T2,$T6 ++ srli $T5,$T4,32 ++ xor $Q1,$T1,$T5 ++ srli $T7,$T6,32 ++ xor $Q3,$T3,$T7 ++ ++ bgtz $loopcntr,1b ++ ++#================================FINAL ROUND==================================== ++ ++# In the final round, all lookup table accesses would appear as follows: ++# ++# ... compute index I0 ++# add I0,TBL,T0 ++# lbu T0,1(I0) ++# ++# Instead of indexing with a 1 offset, we can add 1 to the TBL pointer, and use ++# a 0 offset when indexing in the following code. This enables some instruction ++# fusion opportunities. ++ ++ add $TBL,$TBL,1 ++ ++ ld $K0,16($KEYP) ++ ld $K1,24($KEYP) ++___ ++ ++$code .= do_lookup_byte( ++ [$T4,$T5,$T6,$T7], ++ [$Q0,$Q1,$Q2,$Q3], ++ [$T0,$T1,$T2,$T3], ++ 0 ++); ++ ++$code .= do_lookup_byte( ++ [$T8,$T9,$T10,$T11], ++ [$Q1,$Q2,$Q3,$Q0], ++ [$T0,$T1,$T2,$T3], ++ 8 ++); ++ ++$code .= do_lookup_byte( ++ [$T12,$T13,$T14,$T15], ++ [$Q2,$Q3,$Q0,$Q1], ++ [$T0,$T1,$T2,$T3], ++ 16 ++); ++ ++$code .= do_lookup_byte( ++ [$T0,$T1,$T2,$T3], ++ [$Q3,$Q0,$Q1,$Q2], ++ [$T0,$T1,$T2,$T3], ++ 24 ++); ++ ++$code .= <<___; ++ ++ # Combine table lookups into T0 and T2 ++ ++ slli $T5,$T5,32 ++ slli $T7,$T7,32 ++ slli $T8,$T8,8 ++ slli $T9,$T9,8+32 ++ slli $T10,$T10,8 ++ slli $T11,$T11,8+32 ++ slli $T12,$T12,16 ++ slli $T13,$T13,16+32 ++ slli $T14,$T14,16 ++ slli $T15,$T15,16+32 ++ ++ slli $T0,$T0,24 ++ slli $T1,$T1,24+32 ++ slli $T2,$T2,24 ++ slli $T3,$T3,24+32 ++ ++ xor $T4,$T4,$T0 ++ xor $T5,$T5,$T1 ++ xor $T6,$T6,$T2 ++ xor $T7,$T7,$T3 ++ ++ xor $T8,$T8,$T12 ++ xor $T9,$T9,$T13 ++ xor $T10,$T10,$T14 ++ xor $T11,$T11,$T15 ++ ++ xor $T0,$T4,$T8 ++ xor $T1,$T5,$T9 ++ xor $T2,$T6,$T10 ++ xor $T3,$T7,$T11 ++ ++ ++ xor $T0,$T0,$T1 ++ # T0 = [T1 T13 T9 T5 T0 T12 T8 T4] ++ xor $T0,$T0,$K0 # XOR in key ++ ++ xor $T2,$T2,$T3 ++ # T2 = [T3 T15 T11 T7 T2 T14 T10 T6] ++ xor $T2,$T2,$K1 # XOR in key ++ ++ sd $T0,0($OUTP) ++ sd $T2,8($OUTP) ++ ++ # Pop registers and return ++2: ++___ ++ ++$code .= load_regs(); ++ ++$code .= <<___; ++ ret ++___ ++ ++################################################################################ ++# void AES_decrypt(const unsigned char *in, unsigned char *out, ++# const AES_KEY *key); ++################################################################################ ++$code .= <<___; ++.text ++.balign 16 ++.globl AES_decrypt ++.type AES_decrypt,\@function ++AES_decrypt: ++___ ++ ++$code .= save_regs(); ++ ++$code .= <<___; ++ ++ # Load input to block cipher ++ ld $Q0,0($INP) ++ ld $Q2,8($INP) ++ ++ # Load key ++ # Note that key is assumed in BE byte order ++ # (This routine was written against a key scheduling implementation that ++ # placed keys in BE byte order.) ++ ld $T0,0($KEYP) ++ ld $T2,8($KEYP) ++ ++ # Load number of rounds ++ lwu $loopcntr,240($KEYP) ++ ++ # Load address of substitution table and wrap-around mask ++ la $TBL,AES_Td0 ++ li $MSK,~0xFFF ++ ++ xor $Q0,$Q0,$T0 ++ xor $Q2,$Q2,$T2 ++ srli $Q1,$Q0,32 ++ srli $Q3,$Q2,32 ++ ++ # The main loop only executes the first N-1 rounds. ++ add $loopcntr,$loopcntr,-1 ++ ++ # Do Nr - 1 rounds (final round is special) ++1: ++___ ++ ++# Lookup in Td0 ++$code .= do_lookup( ++ [$T4,$T5,$T6,$T7], # Destination registers ++ [$Q0,$Q1,$Q2,$Q3], # State registers ++ [$T0,$T1,$T2,$T3], # Temporaries ++ 0 # Shift amount ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in Td1 ++$code .= do_lookup( ++ [$T8,$T9,$T10,$T11], ++ [$Q3,$Q0,$Q1,$Q2], ++ [$T0,$T1,$T2,$T3], ++ 8 ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in Td2 ++$code .= do_lookup( ++ [$T12,$T13,$T14,$T15], ++ [$Q2,$Q3,$Q0,$Q1], ++ [$T0,$T1,$T2,$T3], ++ 16 ++); ++ ++$code .= <<___; ++ add $TBL,$TBL,1024 ++___ ++ ++# Lookup in Td3 ++$code .= do_lookup( ++ [$T0,$T1,$T2,$T3], ++ [$Q1,$Q2,$Q3,$Q0], ++ [$T0,$T1,$T2,$T3], ++ 24 ++); ++ ++$code .= <<___; ++ xor $T4,$T4,$T8 ++ xor $T5,$T5,$T9 ++ xor $T6,$T6,$T10 ++ xor $T7,$T7,$T11 ++ ++ xor $T4,$T4,$T12 ++ xor $T5,$T5,$T13 ++ xor $T6,$T6,$T14 ++ xor $T7,$T7,$T15 ++ ++ xor $T0,$T0,$T4 ++ xor $T1,$T1,$T5 ++ xor $T2,$T2,$T6 ++ xor $T3,$T3,$T7 ++ ++ # Update key ptr to point to next key in schedule ++ add $KEYP,$KEYP,16 ++ ++ # Grab next key in schedule ++ ld $T4,0($KEYP) ++ ld $T6,8($KEYP) ++ ++ # Round TBL back to 4k boundary ++ and $TBL,$TBL,$MSK ++ ++ add $loopcntr,$loopcntr,-1 ++ ++ xor $Q0,$T0,$T4 ++ xor $Q2,$T2,$T6 ++ srli $T5,$T4,32 ++ xor $Q1,$T1,$T5 ++ srli $T7,$T6,32 ++ xor $Q3,$T3,$T7 ++ ++ bgtz $loopcntr,1b ++ ++#================================FINAL ROUND==================================== ++ ++ la $TBL,AES_Td4 ++ ++ # K0,K1 are aliases for loopcntr,KEYP ++ # As these registers will no longer be used after these loads, reuse them ++ # to store the final key in the schedule. ++ ld $K0,16($KEYP) ++ ld $K1,24($KEYP) ++___ ++ ++$code .= do_lookup_Td4( ++ [$T4,$T5,$T6,$T7], ++ [$Q0,$Q3,$Q2,$Q1], ++ [$T0,$T1,$T2,$T3] ++); ++ ++$code .= do_lookup_Td4( ++ [$T8,$T9,$T10,$T11], ++ [$Q1,$Q0,$Q3,$Q2], ++ [$T0,$T1,$T2,$T3] ++); ++ ++$code .= do_lookup_Td4( ++ [$T12,$T13,$T14,$T15], ++ [$Q2,$Q1,$Q0,$Q3], ++ [$T0,$T1,$T2,$T3] ++); ++ ++$code .= do_lookup_Td4( ++ [$T0,$T1,$T2,$T3], ++ [$Q3,$Q2,$Q1,$Q0], ++ [$T0,$T1,$T2,$T3] ++); ++ ++$code .= <<___; ++ ++ # T0-T15 now contain the decrypted block, minus xoring with the final round ++ # key. We pack T0-T15 into the two 64-bit registers T0 and T4, then xor ++ # in the key and store. ++ ++ slli $T5,$T5,8 ++ slli $T6,$T6,16 ++ slli $T7,$T7,24 ++ slli $T8,$T8,32 ++ slli $T9,$T9,8+32 ++ slli $T10,$T10,16+32 ++ slli $T11,$T11,32+24 ++ slli $T13,$T13,8 ++ slli $T14,$T14,16 ++ slli $T15,$T15,24 ++ slli $T0,$T0,32 ++ slli $T1,$T1,8+32 ++ slli $T2,$T2,16+32 ++ slli $T3,$T3,24+32 ++ ++ xor $T4,$T4,$T5 ++ xor $T6,$T6,$T7 ++ xor $T8,$T8,$T9 ++ xor $T10,$T10,$T11 ++ ++ xor $T12,$T12,$T13 ++ xor $T14,$T14,$T15 ++ xor $T0,$T0,$T1 ++ xor $T2,$T2,$T3 ++ ++ xor $T4,$T4,$T6 ++ xor $T8,$T8,$T10 ++ xor $T12,$T12,$T14 ++ xor $T0,$T0,$T2 ++ ++ xor $T4,$T4,$T8 ++ # T4 = [T11 T10 T9 T8 T7 T6 T5 T4] ++ xor $T4,$T4,$K0 # xor in key ++ ++ xor $T0,$T0,$T12 ++ # T0 = [T3 T2 T1 T0 T15 T14 T13 T12] ++ xor $T0,$T0,$K1 # xor in key ++ ++ sd $T4,0($OUTP) ++ sd $T0,8($OUTP) ++ ++ # Pop registers and return ++___ ++ ++$code .= load_regs(); ++ ++$code .= <<___; ++ ret ++___ ++ ++clear_regs(); ++ ++################################################################################ ++# Register assignment for AES_set_encrypt_key ++################################################################################ ++ ++# Function arguments (x10-x12 are a0-a2 in the ABI) ++# Pointer to user key, number of bits in key, key pointer ++my ($UKEY,$BITS,$KEYP) = use_regs(10..12); ++ ++# Temporaries ++my ($T0,$T1,$T2,$T3) = use_regs(6..8,13); ++my ($T4,$T5,$T6,$T7,$T8,$T9,$T10,$T11) = use_regs(14..17,28..31); ++ ++# Pointer into rcon table ++my ($RCON) = use_regs(9); ++ ++# Register to hold table offset and used as a temporary ++my ($I0) = use_regs(18); ++ ++# Loop counter ++my ($loopcntr) = use_regs(19); ++ ++# Lookup table address register ++my ($TBL) = use_regs(20); ++ ++# Calculates dest = [ ++# S[(in>>shifts[3])&0xFF], ++# S[(in>>shifts[2])&0xFF], ++# S[(in>>shifts[1])&0xFF], ++# S[(in>>shifts[0])&0xFF] ++# ] ++# This routine spreads accesses across Te0-Te3 to help bring those tables ++# into cache, in anticipation of running AES_[en/de]crypt. ++sub do_enc_lookup { ++ # (destination reg, input reg, shifts array, temporary regs) ++ my ($dest, $in, $shifts, $Ts) = @_; ++ ++ my $ret = ''; ++ ++$ret .= <<___; ++ ++ # Round TBL back to 4k boundary ++ srli $TBL,$TBL,12 ++ slli $TBL,$TBL,12 ++ ++ # Offset by 1 byte, since Te0[x] = S[x].[03, 01, 01, 02] ++ # So that, later on, a 0-offset lbu yields S[x].01 == S[x] ++ addi $TBL,$TBL,1 ++___ ++ ++ for ($i = 0; $i < 4; $i++) { ++ if ($shifts->[$i] < 2) { ++ $ret .= " slli $Ts->[$i],$in,2-$shifts->[$i]\n"; ++ } else { ++ $ret .= " srli $Ts->[$i],$in,$shifts->[$i]-2\n"; ++ } ++ } ++ ++$ret .= <<___; ++ ++ andi $Ts->[0],$Ts->[0],0x3FC ++ andi $Ts->[1],$Ts->[1],0x3FC ++ andi $Ts->[2],$Ts->[2],0x3FC ++ andi $Ts->[3],$Ts->[3],0x3FC ++ ++ # Index into tables Te0-Te3 (spread access across tables to help bring ++ # them into cache for later) ++ ++ add $I0,$TBL,$Ts->[0] ++ lbu $Ts->[0],0($I0) ++ ++ add $TBL,$TBL,1025 # yes, 1025 ++ add $I0,$TBL,$Ts->[1] ++ lbu $Ts->[1],0($I0) ++ ++ add $TBL,$TBL,1025 ++ add $I0,$TBL,$Ts->[2] ++ lbu $Ts->[2],0($I0) ++ ++ add $TBL,$TBL,1022 ++ add $I0,$TBL,$Ts->[3] ++ lbu $Ts->[3],0($I0) ++ ++ slli $Ts->[1],$Ts->[1],8 ++ slli $Ts->[2],$Ts->[2],16 ++ slli $Ts->[3],$Ts->[3],24 ++ ++ xor $Ts->[0],$Ts->[0],$Ts->[1] ++ xor $Ts->[2],$Ts->[2],$Ts->[3] ++ xor $dest,$Ts->[0],$Ts->[2] ++___ ++ ++ return $ret; ++} ++ ++################################################################################ ++# void AES_set_encrypt_key(const unsigned char *userKey, const int bits, ++# AES_KEY *key) ++################################################################################ ++$code .= <<___; ++.text ++.balign 16 ++.globl AES_set_encrypt_key ++.type AES_set_encrypt_key,\@function ++AES_set_encrypt_key: ++___ ++$code .= save_regs(); ++$code .= <<___; ++ bnez $UKEY,1f # if (!userKey || !key) return -1; ++ bnez $KEYP,1f ++ li a0,-1 ++ ret ++1: ++ la $RCON,AES_rcon ++ la $TBL,AES_Te0 ++ li $T8,128 ++ li $T9,192 ++ li $T10,256 ++ ++ # Determine number of rounds from key size in bits ++ bne $BITS,$T8,1f ++ li $T3,10 # key->rounds = 10 if bits == 128 ++ j 3f ++1: ++ bne $BITS,$T9,2f ++ li $T3,12 # key->rounds = 12 if bits == 192 ++ j 3f ++2: ++ li $T3,14 # key->rounds = 14 if bits == 256 ++ beq $BITS,$T10,3f ++ li a0,-2 # If bits != 128, 192, or 256, return -2 ++ j 5f ++3: ++ ld $T0,0($UKEY) ++ ld $T2,8($UKEY) ++ ++ sw $T3,240($KEYP) ++ ++ li $loopcntr,0 # == i*4 ++ ++ srli $T1,$T0,32 ++ srli $T3,$T2,32 ++ ++ sd $T0,0($KEYP) ++ sd $T2,8($KEYP) ++ ++ # if bits == 128 ++ # jump into loop ++ beq $BITS,$T8,1f ++ ++ ld $T4,16($UKEY) ++ srli $T5,$T4,32 ++ sd $T4,16($KEYP) ++ ++ # if bits == 192 ++ # jump into loop ++ beq $BITS,$T9,2f ++ ++ ld $T6,24($UKEY) ++ srli $T7,$T6,32 ++ sd $T6,24($KEYP) ++ ++ # bits == 256 ++ j 3f ++___ ++ ++$code .= <<___; ++1: ++ addi $KEYP,$KEYP,16 ++1: ++___ ++$code .= do_enc_lookup($T4,$T3,[8,16,24,0],[$T4,$T5,$T6,$T7]); ++ ++$code .= <<___; ++ add $T5,$RCON,$loopcntr # rcon[i] (i increments by 4 so it can double as ++ # a word offset) ++ lwu $T5,0($T5) ++ ++ addi $loopcntr,$loopcntr,4 ++ li $I0,10*4 ++ ++ xor $T0,$T0,$T4 ++ xor $T0,$T0,$T5 ++ xor $T1,$T1,$T0 ++ xor $T2,$T2,$T1 ++ xor $T3,$T3,$T2 ++ ++ sw $T0,0($KEYP) ++ sw $T1,4($KEYP) ++ sw $T2,8($KEYP) ++ sw $T3,12($KEYP) ++ ++ addi $KEYP,$KEYP,16 ++ ++ ++ bne $loopcntr,$I0,1b ++ j 4f ++___ ++$code .= <<___; ++2: ++ addi $KEYP,$KEYP,24 ++2: ++___ ++$code .= do_enc_lookup($T6,$T5,[8,16,24,0],[$T6,$T7,$T8,$T9]); ++ ++$code .= <<___; ++ add $T7,$RCON,$loopcntr # rcon[i] (i increments by 4 so it can double as ++ # a word offset) ++ lwu $T7,0($T7) ++ ++ addi $loopcntr,$loopcntr,4 ++ li $I0,8*4 ++ ++ xor $T0,$T0,$T6 ++ xor $T0,$T0,$T7 ++ xor $T1,$T1,$T0 ++ xor $T2,$T2,$T1 ++ xor $T3,$T3,$T2 ++ ++ sw $T0,0($KEYP) ++ sw $T1,4($KEYP) ++ sw $T2,8($KEYP) ++ sw $T3,12($KEYP) ++ ++ beq $loopcntr,$I0,4f ++ ++ xor $T4,$T4,$T3 ++ xor $T5,$T5,$T4 ++ sw $T4,16($KEYP) ++ sw $T5,20($KEYP) ++ ++ addi $KEYP,$KEYP,24 ++ j 2b ++___ ++$code .= <<___; ++3: ++ addi $KEYP,$KEYP,32 ++3: ++___ ++$code .= do_enc_lookup($T8,$T7,[8,16,24,0],[$T8,$T9,$T10,$T11]); ++ ++$code .= <<___; ++ add $T9,$RCON,$loopcntr # rcon[i] (i increments by 4 so it can double as ++ # a word offset) ++ lwu $T9,0($T9) ++ ++ addi $loopcntr,$loopcntr,4 ++ li $I0,7*4 ++ ++ xor $T0,$T0,$T8 ++ xor $T0,$T0,$T9 ++ xor $T1,$T1,$T0 ++ xor $T2,$T2,$T1 ++ xor $T3,$T3,$T2 ++ ++ sw $T0,0($KEYP) ++ sw $T1,4($KEYP) ++ sw $T2,8($KEYP) ++ sw $T3,12($KEYP) ++ ++ beq $loopcntr,$I0,4f ++___ ++$code .= do_enc_lookup($T8,$T3,[0,8,16,24],[$T8,$T9,$T10,$T11]); ++$code .= <<___; ++ xor $T4,$T4,$T8 ++ xor $T5,$T5,$T4 ++ xor $T6,$T6,$T5 ++ xor $T7,$T7,$T6 ++ sw $T4,16($KEYP) ++ sw $T5,20($KEYP) ++ sw $T6,24($KEYP) ++ sw $T7,28($KEYP) ++ ++ addi $KEYP,$KEYP,32 ++ j 3b ++ ++4: # return 0 ++ li a0,0 ++5: # return a0 ++___ ++$code .= load_regs(); ++$code .= <<___; ++ ret ++___ ++ ++clear_regs(); ++ ++################################################################################ ++# Register assignment for AES_set_decrypt_key ++################################################################################ ++ ++# Function arguments (x10-x12 are a0-a2 in the ABI) ++# Pointer to user key, number of bits in key, key pointer ++my ($UKEY,$BITS,$KEYP) = use_regs(10..12); ++ ++# Temporaries ++my ($T0,$T1,$T2,$T3) = use_regs(6..8,9); ++my ($T4,$T5,$T6,$T7,$T8) = use_regs(13..17); ++ ++my ($I1) = use_regs(18); ++ ++# Register to hold table offset and used as a temporary ++my ($I0) = use_regs(19); ++ ++# Loop counter ++my ($loopcntr) = use_regs(20); ++ ++# Lookup table address register ++my ($TBL) = use_regs(21); ++ ++# Calculates dest = [ ++# Td0[Te1[(in >> 24) & 0xff] & 0xff] ^ ++# Td1[Te1[(in >> 16) & 0xff] & 0xff] ^ ++# Td2[Te1[(in >> 8) & 0xff] & 0xff] ^ ++# Td3[Te1[(in ) & 0xff] & 0xff] ++# ] ++sub do_dec_lookup { ++ # (destination reg, input reg, temporary regs) ++ my ($dest, $in, $Ts) = @_; ++ ++ my $ret = ''; ++ ++$ret .= <<___; ++ ++ la $TBL,AES_Te2 ++ ++ slli $Ts->[0],$in,2 ++ srli $Ts->[1],$in,8-2 ++ srli $Ts->[2],$in,16-2 ++ srli $Ts->[3],$in,24-2 ++ ++ andi $Ts->[0],$Ts->[0],0x3FC ++ andi $Ts->[1],$Ts->[1],0x3FC ++ andi $Ts->[2],$Ts->[2],0x3FC ++ andi $Ts->[3],$Ts->[3],0x3FC ++ ++ # Index into table Te2 ++ ++ add $I0,$TBL,$Ts->[0] ++ lwu $Ts->[0],0($I0) ++ ++ add $I0,$TBL,$Ts->[1] ++ lwu $Ts->[1],0($I0) ++ ++ add $I0,$TBL,$Ts->[2] ++ lwu $Ts->[2],0($I0) ++ ++ add $I0,$TBL,$Ts->[3] ++ lwu $Ts->[3],0($I0) ++ ++ andi $Ts->[0],$Ts->[0],0xFF ++ andi $Ts->[1],$Ts->[1],0xFF ++ andi $Ts->[2],$Ts->[2],0xFF ++ andi $Ts->[3],$Ts->[3],0xFF ++ ++ slli $Ts->[0],$Ts->[0],2 ++ slli $Ts->[1],$Ts->[1],2 ++ slli $Ts->[2],$Ts->[2],2 ++ slli $Ts->[3],$Ts->[3],2 ++ ++ la $TBL,AES_Td0 ++ ++ # Lookup in Td0-Td3 ++ ++ add $I0,$TBL,$Ts->[0] ++ lwu $Ts->[0],0($I0) ++ ++ add $TBL,$TBL,1024 ++ add $I0,$TBL,$Ts->[1] ++ lwu $Ts->[1],0($I0) ++ ++ add $TBL,$TBL,1024 ++ add $I0,$TBL,$Ts->[2] ++ lwu $Ts->[2],0($I0) ++ ++ add $TBL,$TBL,1024 ++ add $I0,$TBL,$Ts->[3] ++ lwu $Ts->[3],0($I0) ++ ++ xor $Ts->[0],$Ts->[0],$Ts->[1] ++ xor $Ts->[2],$Ts->[2],$Ts->[3] ++ xor $dest,$Ts->[0],$Ts->[2] ++___ ++ ++ return $ret; ++} ++ ++################################################################################ ++# void AES_set_decrypt_key(const unsigned char *userKey, const int bits, ++# AES_KEY *key) ++################################################################################ ++$code .= <<___; ++.text ++.balign 16 ++.globl AES_set_decrypt_key ++.type AES_set_decrypt_key,\@function ++AES_set_decrypt_key: ++ # Call AES_set_encrypt_key first ++ addi sp,sp,-16 ++ sd $KEYP,0(sp) # We need to hold onto this! ++ sd ra,8(sp) ++ jal ra,AES_set_encrypt_key ++ ld $KEYP,0(sp) ++ ld ra,8(sp) ++ addi sp,sp,16 ++ bgez a0,1f # If error, return error ++ ret ++1: ++___ ++$code .= save_regs(); ++$code .= <<___; ++ ++ li $T4,0 ++ lwu $T8,240($KEYP) ++ slli $T5,$T8,4 ++ # Invert order of round keys ++1: ++ add $I0,$KEYP,$T4 ++ ld $T0,0($I0) ++ ld $T1,8($I0) ++ add $I1,$KEYP,$T5 ++ ld $T2,0($I1) ++ ld $T3,8($I1) ++ addi $T4,$T4,16 ++ addi $T5,$T5,-16 ++ sd $T0,0($I1) ++ sd $T1,8($I1) ++ sd $T2,0($I0) ++ sd $T3,8($I0) ++ blt $T4,$T5,1b ++ ++ li $loopcntr,1 ++ ++1: ++ addi $KEYP,$KEYP,16 ++ lwu $T0,0($KEYP) ++ lwu $T1,4($KEYP) ++ lwu $T2,8($KEYP) ++ lwu $T3,12($KEYP) ++___ ++$code .= do_dec_lookup($T0,$T0,[$T4,$T5,$T6,$T7]); ++$code .= do_dec_lookup($T1,$T1,[$T4,$T5,$T6,$T7]); ++$code .= do_dec_lookup($T2,$T2,[$T4,$T5,$T6,$T7]); ++$code .= do_dec_lookup($T3,$T3,[$T4,$T5,$T6,$T7]); ++$code .= <<___; ++ sw $T0,0($KEYP) ++ sw $T1,4($KEYP) ++ sw $T2,8($KEYP) ++ sw $T3,12($KEYP) ++ addi $loopcntr,$loopcntr,1 ++ blt $loopcntr,$T8,1b ++___ ++$code .= load_regs(); ++$code .= <<___; ++ li a0,0 ++ ret ++___ ++$code .= <<___; ++ ++.section .rodata ++.p2align 12 ++.type AES_Te0,\@object ++AES_Te0: ++.word 0xa56363c6U, 0x847c7cf8U, 0x997777eeU, 0x8d7b7bf6U ++.word 0x0df2f2ffU, 0xbd6b6bd6U, 0xb16f6fdeU, 0x54c5c591U ++.word 0x50303060U, 0x03010102U, 0xa96767ceU, 0x7d2b2b56U ++.word 0x19fefee7U, 0x62d7d7b5U, 0xe6abab4dU, 0x9a7676ecU ++.word 0x45caca8fU, 0x9d82821fU, 0x40c9c989U, 0x877d7dfaU ++.word 0x15fafaefU, 0xeb5959b2U, 0xc947478eU, 0x0bf0f0fbU ++.word 0xecadad41U, 0x67d4d4b3U, 0xfda2a25fU, 0xeaafaf45U ++.word 0xbf9c9c23U, 0xf7a4a453U, 0x967272e4U, 0x5bc0c09bU ++.word 0xc2b7b775U, 0x1cfdfde1U, 0xae93933dU, 0x6a26264cU ++.word 0x5a36366cU, 0x413f3f7eU, 0x02f7f7f5U, 0x4fcccc83U ++.word 0x5c343468U, 0xf4a5a551U, 0x34e5e5d1U, 0x08f1f1f9U ++.word 0x937171e2U, 0x73d8d8abU, 0x53313162U, 0x3f15152aU ++.word 0x0c040408U, 0x52c7c795U, 0x65232346U, 0x5ec3c39dU ++.word 0x28181830U, 0xa1969637U, 0x0f05050aU, 0xb59a9a2fU ++.word 0x0907070eU, 0x36121224U, 0x9b80801bU, 0x3de2e2dfU ++.word 0x26ebebcdU, 0x6927274eU, 0xcdb2b27fU, 0x9f7575eaU ++.word 0x1b090912U, 0x9e83831dU, 0x742c2c58U, 0x2e1a1a34U ++.word 0x2d1b1b36U, 0xb26e6edcU, 0xee5a5ab4U, 0xfba0a05bU ++.word 0xf65252a4U, 0x4d3b3b76U, 0x61d6d6b7U, 0xceb3b37dU ++.word 0x7b292952U, 0x3ee3e3ddU, 0x712f2f5eU, 0x97848413U ++.word 0xf55353a6U, 0x68d1d1b9U, 0x00000000U, 0x2cededc1U ++.word 0x60202040U, 0x1ffcfce3U, 0xc8b1b179U, 0xed5b5bb6U ++.word 0xbe6a6ad4U, 0x46cbcb8dU, 0xd9bebe67U, 0x4b393972U ++.word 0xde4a4a94U, 0xd44c4c98U, 0xe85858b0U, 0x4acfcf85U ++.word 0x6bd0d0bbU, 0x2aefefc5U, 0xe5aaaa4fU, 0x16fbfbedU ++.word 0xc5434386U, 0xd74d4d9aU, 0x55333366U, 0x94858511U ++.word 0xcf45458aU, 0x10f9f9e9U, 0x06020204U, 0x817f7ffeU ++.word 0xf05050a0U, 0x443c3c78U, 0xba9f9f25U, 0xe3a8a84bU ++.word 0xf35151a2U, 0xfea3a35dU, 0xc0404080U, 0x8a8f8f05U ++.word 0xad92923fU, 0xbc9d9d21U, 0x48383870U, 0x04f5f5f1U ++.word 0xdfbcbc63U, 0xc1b6b677U, 0x75dadaafU, 0x63212142U ++.word 0x30101020U, 0x1affffe5U, 0x0ef3f3fdU, 0x6dd2d2bfU ++.word 0x4ccdcd81U, 0x140c0c18U, 0x35131326U, 0x2fececc3U ++.word 0xe15f5fbeU, 0xa2979735U, 0xcc444488U, 0x3917172eU ++.word 0x57c4c493U, 0xf2a7a755U, 0x827e7efcU, 0x473d3d7aU ++.word 0xac6464c8U, 0xe75d5dbaU, 0x2b191932U, 0x957373e6U ++.word 0xa06060c0U, 0x98818119U, 0xd14f4f9eU, 0x7fdcdca3U ++.word 0x66222244U, 0x7e2a2a54U, 0xab90903bU, 0x8388880bU ++.word 0xca46468cU, 0x29eeeec7U, 0xd3b8b86bU, 0x3c141428U ++.word 0x79dedea7U, 0xe25e5ebcU, 0x1d0b0b16U, 0x76dbdbadU ++.word 0x3be0e0dbU, 0x56323264U, 0x4e3a3a74U, 0x1e0a0a14U ++.word 0xdb494992U, 0x0a06060cU, 0x6c242448U, 0xe45c5cb8U ++.word 0x5dc2c29fU, 0x6ed3d3bdU, 0xefacac43U, 0xa66262c4U ++.word 0xa8919139U, 0xa4959531U, 0x37e4e4d3U, 0x8b7979f2U ++.word 0x32e7e7d5U, 0x43c8c88bU, 0x5937376eU, 0xb76d6ddaU ++.word 0x8c8d8d01U, 0x64d5d5b1U, 0xd24e4e9cU, 0xe0a9a949U ++.word 0xb46c6cd8U, 0xfa5656acU, 0x07f4f4f3U, 0x25eaeacfU ++.word 0xaf6565caU, 0x8e7a7af4U, 0xe9aeae47U, 0x18080810U ++.word 0xd5baba6fU, 0x887878f0U, 0x6f25254aU, 0x722e2e5cU ++.word 0x241c1c38U, 0xf1a6a657U, 0xc7b4b473U, 0x51c6c697U ++.word 0x23e8e8cbU, 0x7cdddda1U, 0x9c7474e8U, 0x211f1f3eU ++.word 0xdd4b4b96U, 0xdcbdbd61U, 0x868b8b0dU, 0x858a8a0fU ++.word 0x907070e0U, 0x423e3e7cU, 0xc4b5b571U, 0xaa6666ccU ++.word 0xd8484890U, 0x05030306U, 0x01f6f6f7U, 0x120e0e1cU ++.word 0xa36161c2U, 0x5f35356aU, 0xf95757aeU, 0xd0b9b969U ++.word 0x91868617U, 0x58c1c199U, 0x271d1d3aU, 0xb99e9e27U ++.word 0x38e1e1d9U, 0x13f8f8ebU, 0xb398982bU, 0x33111122U ++.word 0xbb6969d2U, 0x70d9d9a9U, 0x898e8e07U, 0xa7949433U ++.word 0xb69b9b2dU, 0x221e1e3cU, 0x92878715U, 0x20e9e9c9U ++.word 0x49cece87U, 0xff5555aaU, 0x78282850U, 0x7adfdfa5U ++.word 0x8f8c8c03U, 0xf8a1a159U, 0x80898909U, 0x170d0d1aU ++.word 0xdabfbf65U, 0x31e6e6d7U, 0xc6424284U, 0xb86868d0U ++.word 0xc3414182U, 0xb0999929U, 0x772d2d5aU, 0x110f0f1eU ++.word 0xcbb0b07bU, 0xfc5454a8U, 0xd6bbbb6dU, 0x3a16162cU ++ ++.type AES_Te1,\@object ++AES_Te1: ++.word 0x6363c6a5U, 0x7c7cf884U, 0x7777ee99U, 0x7b7bf68dU ++.word 0xf2f2ff0dU, 0x6b6bd6bdU, 0x6f6fdeb1U, 0xc5c59154U ++.word 0x30306050U, 0x01010203U, 0x6767cea9U, 0x2b2b567dU ++.word 0xfefee719U, 0xd7d7b562U, 0xabab4de6U, 0x7676ec9aU ++.word 0xcaca8f45U, 0x82821f9dU, 0xc9c98940U, 0x7d7dfa87U ++.word 0xfafaef15U, 0x5959b2ebU, 0x47478ec9U, 0xf0f0fb0bU ++.word 0xadad41ecU, 0xd4d4b367U, 0xa2a25ffdU, 0xafaf45eaU ++.word 0x9c9c23bfU, 0xa4a453f7U, 0x7272e496U, 0xc0c09b5bU ++.word 0xb7b775c2U, 0xfdfde11cU, 0x93933daeU, 0x26264c6aU ++.word 0x36366c5aU, 0x3f3f7e41U, 0xf7f7f502U, 0xcccc834fU ++.word 0x3434685cU, 0xa5a551f4U, 0xe5e5d134U, 0xf1f1f908U ++.word 0x7171e293U, 0xd8d8ab73U, 0x31316253U, 0x15152a3fU ++.word 0x0404080cU, 0xc7c79552U, 0x23234665U, 0xc3c39d5eU ++.word 0x18183028U, 0x969637a1U, 0x05050a0fU, 0x9a9a2fb5U ++.word 0x07070e09U, 0x12122436U, 0x80801b9bU, 0xe2e2df3dU ++.word 0xebebcd26U, 0x27274e69U, 0xb2b27fcdU, 0x7575ea9fU ++.word 0x0909121bU, 0x83831d9eU, 0x2c2c5874U, 0x1a1a342eU ++.word 0x1b1b362dU, 0x6e6edcb2U, 0x5a5ab4eeU, 0xa0a05bfbU ++.word 0x5252a4f6U, 0x3b3b764dU, 0xd6d6b761U, 0xb3b37dceU ++.word 0x2929527bU, 0xe3e3dd3eU, 0x2f2f5e71U, 0x84841397U ++.word 0x5353a6f5U, 0xd1d1b968U, 0x00000000U, 0xededc12cU ++.word 0x20204060U, 0xfcfce31fU, 0xb1b179c8U, 0x5b5bb6edU ++.word 0x6a6ad4beU, 0xcbcb8d46U, 0xbebe67d9U, 0x3939724bU ++.word 0x4a4a94deU, 0x4c4c98d4U, 0x5858b0e8U, 0xcfcf854aU ++.word 0xd0d0bb6bU, 0xefefc52aU, 0xaaaa4fe5U, 0xfbfbed16U ++.word 0x434386c5U, 0x4d4d9ad7U, 0x33336655U, 0x85851194U ++.word 0x45458acfU, 0xf9f9e910U, 0x02020406U, 0x7f7ffe81U ++.word 0x5050a0f0U, 0x3c3c7844U, 0x9f9f25baU, 0xa8a84be3U ++.word 0x5151a2f3U, 0xa3a35dfeU, 0x404080c0U, 0x8f8f058aU ++.word 0x92923fadU, 0x9d9d21bcU, 0x38387048U, 0xf5f5f104U ++.word 0xbcbc63dfU, 0xb6b677c1U, 0xdadaaf75U, 0x21214263U ++.word 0x10102030U, 0xffffe51aU, 0xf3f3fd0eU, 0xd2d2bf6dU ++.word 0xcdcd814cU, 0x0c0c1814U, 0x13132635U, 0xececc32fU ++.word 0x5f5fbee1U, 0x979735a2U, 0x444488ccU, 0x17172e39U ++.word 0xc4c49357U, 0xa7a755f2U, 0x7e7efc82U, 0x3d3d7a47U ++.word 0x6464c8acU, 0x5d5dbae7U, 0x1919322bU, 0x7373e695U ++.word 0x6060c0a0U, 0x81811998U, 0x4f4f9ed1U, 0xdcdca37fU ++.word 0x22224466U, 0x2a2a547eU, 0x90903babU, 0x88880b83U ++.word 0x46468ccaU, 0xeeeec729U, 0xb8b86bd3U, 0x1414283cU ++.word 0xdedea779U, 0x5e5ebce2U, 0x0b0b161dU, 0xdbdbad76U ++.word 0xe0e0db3bU, 0x32326456U, 0x3a3a744eU, 0x0a0a141eU ++.word 0x494992dbU, 0x06060c0aU, 0x2424486cU, 0x5c5cb8e4U ++.word 0xc2c29f5dU, 0xd3d3bd6eU, 0xacac43efU, 0x6262c4a6U ++.word 0x919139a8U, 0x959531a4U, 0xe4e4d337U, 0x7979f28bU ++.word 0xe7e7d532U, 0xc8c88b43U, 0x37376e59U, 0x6d6ddab7U ++.word 0x8d8d018cU, 0xd5d5b164U, 0x4e4e9cd2U, 0xa9a949e0U ++.word 0x6c6cd8b4U, 0x5656acfaU, 0xf4f4f307U, 0xeaeacf25U ++.word 0x6565caafU, 0x7a7af48eU, 0xaeae47e9U, 0x08081018U ++.word 0xbaba6fd5U, 0x7878f088U, 0x25254a6fU, 0x2e2e5c72U ++.word 0x1c1c3824U, 0xa6a657f1U, 0xb4b473c7U, 0xc6c69751U ++.word 0xe8e8cb23U, 0xdddda17cU, 0x7474e89cU, 0x1f1f3e21U ++.word 0x4b4b96ddU, 0xbdbd61dcU, 0x8b8b0d86U, 0x8a8a0f85U ++.word 0x7070e090U, 0x3e3e7c42U, 0xb5b571c4U, 0x6666ccaaU ++.word 0x484890d8U, 0x03030605U, 0xf6f6f701U, 0x0e0e1c12U ++.word 0x6161c2a3U, 0x35356a5fU, 0x5757aef9U, 0xb9b969d0U ++.word 0x86861791U, 0xc1c19958U, 0x1d1d3a27U, 0x9e9e27b9U ++.word 0xe1e1d938U, 0xf8f8eb13U, 0x98982bb3U, 0x11112233U ++.word 0x6969d2bbU, 0xd9d9a970U, 0x8e8e0789U, 0x949433a7U ++.word 0x9b9b2db6U, 0x1e1e3c22U, 0x87871592U, 0xe9e9c920U ++.word 0xcece8749U, 0x5555aaffU, 0x28285078U, 0xdfdfa57aU ++.word 0x8c8c038fU, 0xa1a159f8U, 0x89890980U, 0x0d0d1a17U ++.word 0xbfbf65daU, 0xe6e6d731U, 0x424284c6U, 0x6868d0b8U ++.word 0x414182c3U, 0x999929b0U, 0x2d2d5a77U, 0x0f0f1e11U ++.word 0xb0b07bcbU, 0x5454a8fcU, 0xbbbb6dd6U, 0x16162c3aU ++ ++.type AES_Te2,\@object ++AES_Te2: ++.word 0x63c6a563U, 0x7cf8847cU, 0x77ee9977U, 0x7bf68d7bU ++.word 0xf2ff0df2U, 0x6bd6bd6bU, 0x6fdeb16fU, 0xc59154c5U ++.word 0x30605030U, 0x01020301U, 0x67cea967U, 0x2b567d2bU ++.word 0xfee719feU, 0xd7b562d7U, 0xab4de6abU, 0x76ec9a76U ++.word 0xca8f45caU, 0x821f9d82U, 0xc98940c9U, 0x7dfa877dU ++.word 0xfaef15faU, 0x59b2eb59U, 0x478ec947U, 0xf0fb0bf0U ++.word 0xad41ecadU, 0xd4b367d4U, 0xa25ffda2U, 0xaf45eaafU ++.word 0x9c23bf9cU, 0xa453f7a4U, 0x72e49672U, 0xc09b5bc0U ++.word 0xb775c2b7U, 0xfde11cfdU, 0x933dae93U, 0x264c6a26U ++.word 0x366c5a36U, 0x3f7e413fU, 0xf7f502f7U, 0xcc834fccU ++.word 0x34685c34U, 0xa551f4a5U, 0xe5d134e5U, 0xf1f908f1U ++.word 0x71e29371U, 0xd8ab73d8U, 0x31625331U, 0x152a3f15U ++.word 0x04080c04U, 0xc79552c7U, 0x23466523U, 0xc39d5ec3U ++.word 0x18302818U, 0x9637a196U, 0x050a0f05U, 0x9a2fb59aU ++.word 0x070e0907U, 0x12243612U, 0x801b9b80U, 0xe2df3de2U ++.word 0xebcd26ebU, 0x274e6927U, 0xb27fcdb2U, 0x75ea9f75U ++.word 0x09121b09U, 0x831d9e83U, 0x2c58742cU, 0x1a342e1aU ++.word 0x1b362d1bU, 0x6edcb26eU, 0x5ab4ee5aU, 0xa05bfba0U ++.word 0x52a4f652U, 0x3b764d3bU, 0xd6b761d6U, 0xb37dceb3U ++.word 0x29527b29U, 0xe3dd3ee3U, 0x2f5e712fU, 0x84139784U ++.word 0x53a6f553U, 0xd1b968d1U, 0x00000000U, 0xedc12cedU ++.word 0x20406020U, 0xfce31ffcU, 0xb179c8b1U, 0x5bb6ed5bU ++.word 0x6ad4be6aU, 0xcb8d46cbU, 0xbe67d9beU, 0x39724b39U ++.word 0x4a94de4aU, 0x4c98d44cU, 0x58b0e858U, 0xcf854acfU ++.word 0xd0bb6bd0U, 0xefc52aefU, 0xaa4fe5aaU, 0xfbed16fbU ++.word 0x4386c543U, 0x4d9ad74dU, 0x33665533U, 0x85119485U ++.word 0x458acf45U, 0xf9e910f9U, 0x02040602U, 0x7ffe817fU ++.word 0x50a0f050U, 0x3c78443cU, 0x9f25ba9fU, 0xa84be3a8U ++.word 0x51a2f351U, 0xa35dfea3U, 0x4080c040U, 0x8f058a8fU ++.word 0x923fad92U, 0x9d21bc9dU, 0x38704838U, 0xf5f104f5U ++.word 0xbc63dfbcU, 0xb677c1b6U, 0xdaaf75daU, 0x21426321U ++.word 0x10203010U, 0xffe51affU, 0xf3fd0ef3U, 0xd2bf6dd2U ++.word 0xcd814ccdU, 0x0c18140cU, 0x13263513U, 0xecc32fecU ++.word 0x5fbee15fU, 0x9735a297U, 0x4488cc44U, 0x172e3917U ++.word 0xc49357c4U, 0xa755f2a7U, 0x7efc827eU, 0x3d7a473dU ++.word 0x64c8ac64U, 0x5dbae75dU, 0x19322b19U, 0x73e69573U ++.word 0x60c0a060U, 0x81199881U, 0x4f9ed14fU, 0xdca37fdcU ++.word 0x22446622U, 0x2a547e2aU, 0x903bab90U, 0x880b8388U ++.word 0x468cca46U, 0xeec729eeU, 0xb86bd3b8U, 0x14283c14U ++.word 0xdea779deU, 0x5ebce25eU, 0x0b161d0bU, 0xdbad76dbU ++.word 0xe0db3be0U, 0x32645632U, 0x3a744e3aU, 0x0a141e0aU ++.word 0x4992db49U, 0x060c0a06U, 0x24486c24U, 0x5cb8e45cU ++.word 0xc29f5dc2U, 0xd3bd6ed3U, 0xac43efacU, 0x62c4a662U ++.word 0x9139a891U, 0x9531a495U, 0xe4d337e4U, 0x79f28b79U ++.word 0xe7d532e7U, 0xc88b43c8U, 0x376e5937U, 0x6ddab76dU ++.word 0x8d018c8dU, 0xd5b164d5U, 0x4e9cd24eU, 0xa949e0a9U ++.word 0x6cd8b46cU, 0x56acfa56U, 0xf4f307f4U, 0xeacf25eaU ++.word 0x65caaf65U, 0x7af48e7aU, 0xae47e9aeU, 0x08101808U ++.word 0xba6fd5baU, 0x78f08878U, 0x254a6f25U, 0x2e5c722eU ++.word 0x1c38241cU, 0xa657f1a6U, 0xb473c7b4U, 0xc69751c6U ++.word 0xe8cb23e8U, 0xdda17cddU, 0x74e89c74U, 0x1f3e211fU ++.word 0x4b96dd4bU, 0xbd61dcbdU, 0x8b0d868bU, 0x8a0f858aU ++.word 0x70e09070U, 0x3e7c423eU, 0xb571c4b5U, 0x66ccaa66U ++.word 0x4890d848U, 0x03060503U, 0xf6f701f6U, 0x0e1c120eU ++.word 0x61c2a361U, 0x356a5f35U, 0x57aef957U, 0xb969d0b9U ++.word 0x86179186U, 0xc19958c1U, 0x1d3a271dU, 0x9e27b99eU ++.word 0xe1d938e1U, 0xf8eb13f8U, 0x982bb398U, 0x11223311U ++.word 0x69d2bb69U, 0xd9a970d9U, 0x8e07898eU, 0x9433a794U ++.word 0x9b2db69bU, 0x1e3c221eU, 0x87159287U, 0xe9c920e9U ++.word 0xce8749ceU, 0x55aaff55U, 0x28507828U, 0xdfa57adfU ++.word 0x8c038f8cU, 0xa159f8a1U, 0x89098089U, 0x0d1a170dU ++.word 0xbf65dabfU, 0xe6d731e6U, 0x4284c642U, 0x68d0b868U ++.word 0x4182c341U, 0x9929b099U, 0x2d5a772dU, 0x0f1e110fU ++.word 0xb07bcbb0U, 0x54a8fc54U, 0xbb6dd6bbU, 0x162c3a16U ++ ++.type AES_Te3,\@object ++AES_Te3: ++.word 0xc6a56363U, 0xf8847c7cU, 0xee997777U, 0xf68d7b7bU ++.word 0xff0df2f2U, 0xd6bd6b6bU, 0xdeb16f6fU, 0x9154c5c5U ++.word 0x60503030U, 0x02030101U, 0xcea96767U, 0x567d2b2bU ++.word 0xe719fefeU, 0xb562d7d7U, 0x4de6ababU, 0xec9a7676U ++.word 0x8f45cacaU, 0x1f9d8282U, 0x8940c9c9U, 0xfa877d7dU ++.word 0xef15fafaU, 0xb2eb5959U, 0x8ec94747U, 0xfb0bf0f0U ++.word 0x41ecadadU, 0xb367d4d4U, 0x5ffda2a2U, 0x45eaafafU ++.word 0x23bf9c9cU, 0x53f7a4a4U, 0xe4967272U, 0x9b5bc0c0U ++.word 0x75c2b7b7U, 0xe11cfdfdU, 0x3dae9393U, 0x4c6a2626U ++.word 0x6c5a3636U, 0x7e413f3fU, 0xf502f7f7U, 0x834fccccU ++.word 0x685c3434U, 0x51f4a5a5U, 0xd134e5e5U, 0xf908f1f1U ++.word 0xe2937171U, 0xab73d8d8U, 0x62533131U, 0x2a3f1515U ++.word 0x080c0404U, 0x9552c7c7U, 0x46652323U, 0x9d5ec3c3U ++.word 0x30281818U, 0x37a19696U, 0x0a0f0505U, 0x2fb59a9aU ++.word 0x0e090707U, 0x24361212U, 0x1b9b8080U, 0xdf3de2e2U ++.word 0xcd26ebebU, 0x4e692727U, 0x7fcdb2b2U, 0xea9f7575U ++.word 0x121b0909U, 0x1d9e8383U, 0x58742c2cU, 0x342e1a1aU ++.word 0x362d1b1bU, 0xdcb26e6eU, 0xb4ee5a5aU, 0x5bfba0a0U ++.word 0xa4f65252U, 0x764d3b3bU, 0xb761d6d6U, 0x7dceb3b3U ++.word 0x527b2929U, 0xdd3ee3e3U, 0x5e712f2fU, 0x13978484U ++.word 0xa6f55353U, 0xb968d1d1U, 0x00000000U, 0xc12cededU ++.word 0x40602020U, 0xe31ffcfcU, 0x79c8b1b1U, 0xb6ed5b5bU ++.word 0xd4be6a6aU, 0x8d46cbcbU, 0x67d9bebeU, 0x724b3939U ++.word 0x94de4a4aU, 0x98d44c4cU, 0xb0e85858U, 0x854acfcfU ++.word 0xbb6bd0d0U, 0xc52aefefU, 0x4fe5aaaaU, 0xed16fbfbU ++.word 0x86c54343U, 0x9ad74d4dU, 0x66553333U, 0x11948585U ++.word 0x8acf4545U, 0xe910f9f9U, 0x04060202U, 0xfe817f7fU ++.word 0xa0f05050U, 0x78443c3cU, 0x25ba9f9fU, 0x4be3a8a8U ++.word 0xa2f35151U, 0x5dfea3a3U, 0x80c04040U, 0x058a8f8fU ++.word 0x3fad9292U, 0x21bc9d9dU, 0x70483838U, 0xf104f5f5U ++.word 0x63dfbcbcU, 0x77c1b6b6U, 0xaf75dadaU, 0x42632121U ++.word 0x20301010U, 0xe51affffU, 0xfd0ef3f3U, 0xbf6dd2d2U ++.word 0x814ccdcdU, 0x18140c0cU, 0x26351313U, 0xc32fececU ++.word 0xbee15f5fU, 0x35a29797U, 0x88cc4444U, 0x2e391717U ++.word 0x9357c4c4U, 0x55f2a7a7U, 0xfc827e7eU, 0x7a473d3dU ++.word 0xc8ac6464U, 0xbae75d5dU, 0x322b1919U, 0xe6957373U ++.word 0xc0a06060U, 0x19988181U, 0x9ed14f4fU, 0xa37fdcdcU ++.word 0x44662222U, 0x547e2a2aU, 0x3bab9090U, 0x0b838888U ++.word 0x8cca4646U, 0xc729eeeeU, 0x6bd3b8b8U, 0x283c1414U ++.word 0xa779dedeU, 0xbce25e5eU, 0x161d0b0bU, 0xad76dbdbU ++.word 0xdb3be0e0U, 0x64563232U, 0x744e3a3aU, 0x141e0a0aU ++.word 0x92db4949U, 0x0c0a0606U, 0x486c2424U, 0xb8e45c5cU ++.word 0x9f5dc2c2U, 0xbd6ed3d3U, 0x43efacacU, 0xc4a66262U ++.word 0x39a89191U, 0x31a49595U, 0xd337e4e4U, 0xf28b7979U ++.word 0xd532e7e7U, 0x8b43c8c8U, 0x6e593737U, 0xdab76d6dU ++.word 0x018c8d8dU, 0xb164d5d5U, 0x9cd24e4eU, 0x49e0a9a9U ++.word 0xd8b46c6cU, 0xacfa5656U, 0xf307f4f4U, 0xcf25eaeaU ++.word 0xcaaf6565U, 0xf48e7a7aU, 0x47e9aeaeU, 0x10180808U ++.word 0x6fd5babaU, 0xf0887878U, 0x4a6f2525U, 0x5c722e2eU ++.word 0x38241c1cU, 0x57f1a6a6U, 0x73c7b4b4U, 0x9751c6c6U ++.word 0xcb23e8e8U, 0xa17cddddU, 0xe89c7474U, 0x3e211f1fU ++.word 0x96dd4b4bU, 0x61dcbdbdU, 0x0d868b8bU, 0x0f858a8aU ++.word 0xe0907070U, 0x7c423e3eU, 0x71c4b5b5U, 0xccaa6666U ++.word 0x90d84848U, 0x06050303U, 0xf701f6f6U, 0x1c120e0eU ++.word 0xc2a36161U, 0x6a5f3535U, 0xaef95757U, 0x69d0b9b9U ++.word 0x17918686U, 0x9958c1c1U, 0x3a271d1dU, 0x27b99e9eU ++.word 0xd938e1e1U, 0xeb13f8f8U, 0x2bb39898U, 0x22331111U ++.word 0xd2bb6969U, 0xa970d9d9U, 0x07898e8eU, 0x33a79494U ++.word 0x2db69b9bU, 0x3c221e1eU, 0x15928787U, 0xc920e9e9U ++.word 0x8749ceceU, 0xaaff5555U, 0x50782828U, 0xa57adfdfU ++.word 0x038f8c8cU, 0x59f8a1a1U, 0x09808989U, 0x1a170d0dU ++.word 0x65dabfbfU, 0xd731e6e6U, 0x84c64242U, 0xd0b86868U ++.word 0x82c34141U, 0x29b09999U, 0x5a772d2dU, 0x1e110f0fU ++.word 0x7bcbb0b0U, 0xa8fc5454U, 0x6dd6bbbbU, 0x2c3a1616U ++ ++.p2align 12 ++.type AES_Td0,\@object ++AES_Td0: ++.word 0x50a7f451U, 0x5365417eU, 0xc3a4171aU, 0x965e273aU ++.word 0xcb6bab3bU, 0xf1459d1fU, 0xab58faacU, 0x9303e34bU ++.word 0x55fa3020U, 0xf66d76adU, 0x9176cc88U, 0x254c02f5U ++.word 0xfcd7e54fU, 0xd7cb2ac5U, 0x80443526U, 0x8fa362b5U ++.word 0x495ab1deU, 0x671bba25U, 0x980eea45U, 0xe1c0fe5dU ++.word 0x02752fc3U, 0x12f04c81U, 0xa397468dU, 0xc6f9d36bU ++.word 0xe75f8f03U, 0x959c9215U, 0xeb7a6dbfU, 0xda595295U ++.word 0x2d83bed4U, 0xd3217458U, 0x2969e049U, 0x44c8c98eU ++.word 0x6a89c275U, 0x78798ef4U, 0x6b3e5899U, 0xdd71b927U ++.word 0xb64fe1beU, 0x17ad88f0U, 0x66ac20c9U, 0xb43ace7dU ++.word 0x184adf63U, 0x82311ae5U, 0x60335197U, 0x457f5362U ++.word 0xe07764b1U, 0x84ae6bbbU, 0x1ca081feU, 0x942b08f9U ++.word 0x58684870U, 0x19fd458fU, 0x876cde94U, 0xb7f87b52U ++.word 0x23d373abU, 0xe2024b72U, 0x578f1fe3U, 0x2aab5566U ++.word 0x0728ebb2U, 0x03c2b52fU, 0x9a7bc586U, 0xa50837d3U ++.word 0xf2872830U, 0xb2a5bf23U, 0xba6a0302U, 0x5c8216edU ++.word 0x2b1ccf8aU, 0x92b479a7U, 0xf0f207f3U, 0xa1e2694eU ++.word 0xcdf4da65U, 0xd5be0506U, 0x1f6234d1U, 0x8afea6c4U ++.word 0x9d532e34U, 0xa055f3a2U, 0x32e18a05U, 0x75ebf6a4U ++.word 0x39ec830bU, 0xaaef6040U, 0x069f715eU, 0x51106ebdU ++.word 0xf98a213eU, 0x3d06dd96U, 0xae053eddU, 0x46bde64dU ++.word 0xb58d5491U, 0x055dc471U, 0x6fd40604U, 0xff155060U ++.word 0x24fb9819U, 0x97e9bdd6U, 0xcc434089U, 0x779ed967U ++.word 0xbd42e8b0U, 0x888b8907U, 0x385b19e7U, 0xdbeec879U ++.word 0x470a7ca1U, 0xe90f427cU, 0xc91e84f8U, 0x00000000U ++.word 0x83868009U, 0x48ed2b32U, 0xac70111eU, 0x4e725a6cU ++.word 0xfbff0efdU, 0x5638850fU, 0x1ed5ae3dU, 0x27392d36U ++.word 0x64d90f0aU, 0x21a65c68U, 0xd1545b9bU, 0x3a2e3624U ++.word 0xb1670a0cU, 0x0fe75793U, 0xd296eeb4U, 0x9e919b1bU ++.word 0x4fc5c080U, 0xa220dc61U, 0x694b775aU, 0x161a121cU ++.word 0x0aba93e2U, 0xe52aa0c0U, 0x43e0223cU, 0x1d171b12U ++.word 0x0b0d090eU, 0xadc78bf2U, 0xb9a8b62dU, 0xc8a91e14U ++.word 0x8519f157U, 0x4c0775afU, 0xbbdd99eeU, 0xfd607fa3U ++.word 0x9f2601f7U, 0xbcf5725cU, 0xc53b6644U, 0x347efb5bU ++.word 0x7629438bU, 0xdcc623cbU, 0x68fcedb6U, 0x63f1e4b8U ++.word 0xcadc31d7U, 0x10856342U, 0x40229713U, 0x2011c684U ++.word 0x7d244a85U, 0xf83dbbd2U, 0x1132f9aeU, 0x6da129c7U ++.word 0x4b2f9e1dU, 0xf330b2dcU, 0xec52860dU, 0xd0e3c177U ++.word 0x6c16b32bU, 0x99b970a9U, 0xfa489411U, 0x2264e947U ++.word 0xc48cfca8U, 0x1a3ff0a0U, 0xd82c7d56U, 0xef903322U ++.word 0xc74e4987U, 0xc1d138d9U, 0xfea2ca8cU, 0x360bd498U ++.word 0xcf81f5a6U, 0x28de7aa5U, 0x268eb7daU, 0xa4bfad3fU ++.word 0xe49d3a2cU, 0x0d927850U, 0x9bcc5f6aU, 0x62467e54U ++.word 0xc2138df6U, 0xe8b8d890U, 0x5ef7392eU, 0xf5afc382U ++.word 0xbe805d9fU, 0x7c93d069U, 0xa92dd56fU, 0xb31225cfU ++.word 0x3b99acc8U, 0xa77d1810U, 0x6e639ce8U, 0x7bbb3bdbU ++.word 0x097826cdU, 0xf418596eU, 0x01b79aecU, 0xa89a4f83U ++.word 0x656e95e6U, 0x7ee6ffaaU, 0x08cfbc21U, 0xe6e815efU ++.word 0xd99be7baU, 0xce366f4aU, 0xd4099feaU, 0xd67cb029U ++.word 0xafb2a431U, 0x31233f2aU, 0x3094a5c6U, 0xc066a235U ++.word 0x37bc4e74U, 0xa6ca82fcU, 0xb0d090e0U, 0x15d8a733U ++.word 0x4a9804f1U, 0xf7daec41U, 0x0e50cd7fU, 0x2ff69117U ++.word 0x8dd64d76U, 0x4db0ef43U, 0x544daaccU, 0xdf0496e4U ++.word 0xe3b5d19eU, 0x1b886a4cU, 0xb81f2cc1U, 0x7f516546U ++.word 0x04ea5e9dU, 0x5d358c01U, 0x737487faU, 0x2e410bfbU ++.word 0x5a1d67b3U, 0x52d2db92U, 0x335610e9U, 0x1347d66dU ++.word 0x8c61d79aU, 0x7a0ca137U, 0x8e14f859U, 0x893c13ebU ++.word 0xee27a9ceU, 0x35c961b7U, 0xede51ce1U, 0x3cb1477aU ++.word 0x59dfd29cU, 0x3f73f255U, 0x79ce1418U, 0xbf37c773U ++.word 0xeacdf753U, 0x5baafd5fU, 0x146f3ddfU, 0x86db4478U ++.word 0x81f3afcaU, 0x3ec468b9U, 0x2c342438U, 0x5f40a3c2U ++.word 0x72c31d16U, 0x0c25e2bcU, 0x8b493c28U, 0x41950dffU ++.word 0x7101a839U, 0xdeb30c08U, 0x9ce4b4d8U, 0x90c15664U ++.word 0x6184cb7bU, 0x70b632d5U, 0x745c6c48U, 0x4257b8d0U ++ ++.type AES_Td1,\@object ++AES_Td1: ++.word 0xa7f45150U, 0x65417e53U, 0xa4171ac3U, 0x5e273a96U ++.word 0x6bab3bcbU, 0x459d1ff1U, 0x58faacabU, 0x03e34b93U ++.word 0xfa302055U, 0x6d76adf6U, 0x76cc8891U, 0x4c02f525U ++.word 0xd7e54ffcU, 0xcb2ac5d7U, 0x44352680U, 0xa362b58fU ++.word 0x5ab1de49U, 0x1bba2567U, 0x0eea4598U, 0xc0fe5de1U ++.word 0x752fc302U, 0xf04c8112U, 0x97468da3U, 0xf9d36bc6U ++.word 0x5f8f03e7U, 0x9c921595U, 0x7a6dbfebU, 0x595295daU ++.word 0x83bed42dU, 0x217458d3U, 0x69e04929U, 0xc8c98e44U ++.word 0x89c2756aU, 0x798ef478U, 0x3e58996bU, 0x71b927ddU ++.word 0x4fe1beb6U, 0xad88f017U, 0xac20c966U, 0x3ace7db4U ++.word 0x4adf6318U, 0x311ae582U, 0x33519760U, 0x7f536245U ++.word 0x7764b1e0U, 0xae6bbb84U, 0xa081fe1cU, 0x2b08f994U ++.word 0x68487058U, 0xfd458f19U, 0x6cde9487U, 0xf87b52b7U ++.word 0xd373ab23U, 0x024b72e2U, 0x8f1fe357U, 0xab55662aU ++.word 0x28ebb207U, 0xc2b52f03U, 0x7bc5869aU, 0x0837d3a5U ++.word 0x872830f2U, 0xa5bf23b2U, 0x6a0302baU, 0x8216ed5cU ++.word 0x1ccf8a2bU, 0xb479a792U, 0xf207f3f0U, 0xe2694ea1U ++.word 0xf4da65cdU, 0xbe0506d5U, 0x6234d11fU, 0xfea6c48aU ++.word 0x532e349dU, 0x55f3a2a0U, 0xe18a0532U, 0xebf6a475U ++.word 0xec830b39U, 0xef6040aaU, 0x9f715e06U, 0x106ebd51U ++.word 0x8a213ef9U, 0x06dd963dU, 0x053eddaeU, 0xbde64d46U ++.word 0x8d5491b5U, 0x5dc47105U, 0xd406046fU, 0x155060ffU ++.word 0xfb981924U, 0xe9bdd697U, 0x434089ccU, 0x9ed96777U ++.word 0x42e8b0bdU, 0x8b890788U, 0x5b19e738U, 0xeec879dbU ++.word 0x0a7ca147U, 0x0f427ce9U, 0x1e84f8c9U, 0x00000000U ++.word 0x86800983U, 0xed2b3248U, 0x70111eacU, 0x725a6c4eU ++.word 0xff0efdfbU, 0x38850f56U, 0xd5ae3d1eU, 0x392d3627U ++.word 0xd90f0a64U, 0xa65c6821U, 0x545b9bd1U, 0x2e36243aU ++.word 0x670a0cb1U, 0xe757930fU, 0x96eeb4d2U, 0x919b1b9eU ++.word 0xc5c0804fU, 0x20dc61a2U, 0x4b775a69U, 0x1a121c16U ++.word 0xba93e20aU, 0x2aa0c0e5U, 0xe0223c43U, 0x171b121dU ++.word 0x0d090e0bU, 0xc78bf2adU, 0xa8b62db9U, 0xa91e14c8U ++.word 0x19f15785U, 0x0775af4cU, 0xdd99eebbU, 0x607fa3fdU ++.word 0x2601f79fU, 0xf5725cbcU, 0x3b6644c5U, 0x7efb5b34U ++.word 0x29438b76U, 0xc623cbdcU, 0xfcedb668U, 0xf1e4b863U ++.word 0xdc31d7caU, 0x85634210U, 0x22971340U, 0x11c68420U ++.word 0x244a857dU, 0x3dbbd2f8U, 0x32f9ae11U, 0xa129c76dU ++.word 0x2f9e1d4bU, 0x30b2dcf3U, 0x52860decU, 0xe3c177d0U ++.word 0x16b32b6cU, 0xb970a999U, 0x489411faU, 0x64e94722U ++.word 0x8cfca8c4U, 0x3ff0a01aU, 0x2c7d56d8U, 0x903322efU ++.word 0x4e4987c7U, 0xd138d9c1U, 0xa2ca8cfeU, 0x0bd49836U ++.word 0x81f5a6cfU, 0xde7aa528U, 0x8eb7da26U, 0xbfad3fa4U ++.word 0x9d3a2ce4U, 0x9278500dU, 0xcc5f6a9bU, 0x467e5462U ++.word 0x138df6c2U, 0xb8d890e8U, 0xf7392e5eU, 0xafc382f5U ++.word 0x805d9fbeU, 0x93d0697cU, 0x2dd56fa9U, 0x1225cfb3U ++.word 0x99acc83bU, 0x7d1810a7U, 0x639ce86eU, 0xbb3bdb7bU ++.word 0x7826cd09U, 0x18596ef4U, 0xb79aec01U, 0x9a4f83a8U ++.word 0x6e95e665U, 0xe6ffaa7eU, 0xcfbc2108U, 0xe815efe6U ++.word 0x9be7bad9U, 0x366f4aceU, 0x099fead4U, 0x7cb029d6U ++.word 0xb2a431afU, 0x233f2a31U, 0x94a5c630U, 0x66a235c0U ++.word 0xbc4e7437U, 0xca82fca6U, 0xd090e0b0U, 0xd8a73315U ++.word 0x9804f14aU, 0xdaec41f7U, 0x50cd7f0eU, 0xf691172fU ++.word 0xd64d768dU, 0xb0ef434dU, 0x4daacc54U, 0x0496e4dfU ++.word 0xb5d19ee3U, 0x886a4c1bU, 0x1f2cc1b8U, 0x5165467fU ++.word 0xea5e9d04U, 0x358c015dU, 0x7487fa73U, 0x410bfb2eU ++.word 0x1d67b35aU, 0xd2db9252U, 0x5610e933U, 0x47d66d13U ++.word 0x61d79a8cU, 0x0ca1377aU, 0x14f8598eU, 0x3c13eb89U ++.word 0x27a9ceeeU, 0xc961b735U, 0xe51ce1edU, 0xb1477a3cU ++.word 0xdfd29c59U, 0x73f2553fU, 0xce141879U, 0x37c773bfU ++.word 0xcdf753eaU, 0xaafd5f5bU, 0x6f3ddf14U, 0xdb447886U ++.word 0xf3afca81U, 0xc468b93eU, 0x3424382cU, 0x40a3c25fU ++.word 0xc31d1672U, 0x25e2bc0cU, 0x493c288bU, 0x950dff41U ++.word 0x01a83971U, 0xb30c08deU, 0xe4b4d89cU, 0xc1566490U ++.word 0x84cb7b61U, 0xb632d570U, 0x5c6c4874U, 0x57b8d042U ++ ++.type AES_Td2,\@object ++AES_Td2: ++.word 0xf45150a7U, 0x417e5365U, 0x171ac3a4U, 0x273a965eU ++.word 0xab3bcb6bU, 0x9d1ff145U, 0xfaacab58U, 0xe34b9303U ++.word 0x302055faU, 0x76adf66dU, 0xcc889176U, 0x02f5254cU ++.word 0xe54ffcd7U, 0x2ac5d7cbU, 0x35268044U, 0x62b58fa3U ++.word 0xb1de495aU, 0xba25671bU, 0xea45980eU, 0xfe5de1c0U ++.word 0x2fc30275U, 0x4c8112f0U, 0x468da397U, 0xd36bc6f9U ++.word 0x8f03e75fU, 0x9215959cU, 0x6dbfeb7aU, 0x5295da59U ++.word 0xbed42d83U, 0x7458d321U, 0xe0492969U, 0xc98e44c8U ++.word 0xc2756a89U, 0x8ef47879U, 0x58996b3eU, 0xb927dd71U ++.word 0xe1beb64fU, 0x88f017adU, 0x20c966acU, 0xce7db43aU ++.word 0xdf63184aU, 0x1ae58231U, 0x51976033U, 0x5362457fU ++.word 0x64b1e077U, 0x6bbb84aeU, 0x81fe1ca0U, 0x08f9942bU ++.word 0x48705868U, 0x458f19fdU, 0xde94876cU, 0x7b52b7f8U ++.word 0x73ab23d3U, 0x4b72e202U, 0x1fe3578fU, 0x55662aabU ++.word 0xebb20728U, 0xb52f03c2U, 0xc5869a7bU, 0x37d3a508U ++.word 0x2830f287U, 0xbf23b2a5U, 0x0302ba6aU, 0x16ed5c82U ++.word 0xcf8a2b1cU, 0x79a792b4U, 0x07f3f0f2U, 0x694ea1e2U ++.word 0xda65cdf4U, 0x0506d5beU, 0x34d11f62U, 0xa6c48afeU ++.word 0x2e349d53U, 0xf3a2a055U, 0x8a0532e1U, 0xf6a475ebU ++.word 0x830b39ecU, 0x6040aaefU, 0x715e069fU, 0x6ebd5110U ++.word 0x213ef98aU, 0xdd963d06U, 0x3eddae05U, 0xe64d46bdU ++.word 0x5491b58dU, 0xc471055dU, 0x06046fd4U, 0x5060ff15U ++.word 0x981924fbU, 0xbdd697e9U, 0x4089cc43U, 0xd967779eU ++.word 0xe8b0bd42U, 0x8907888bU, 0x19e7385bU, 0xc879dbeeU ++.word 0x7ca1470aU, 0x427ce90fU, 0x84f8c91eU, 0x00000000U ++.word 0x80098386U, 0x2b3248edU, 0x111eac70U, 0x5a6c4e72U ++.word 0x0efdfbffU, 0x850f5638U, 0xae3d1ed5U, 0x2d362739U ++.word 0x0f0a64d9U, 0x5c6821a6U, 0x5b9bd154U, 0x36243a2eU ++.word 0x0a0cb167U, 0x57930fe7U, 0xeeb4d296U, 0x9b1b9e91U ++.word 0xc0804fc5U, 0xdc61a220U, 0x775a694bU, 0x121c161aU ++.word 0x93e20abaU, 0xa0c0e52aU, 0x223c43e0U, 0x1b121d17U ++.word 0x090e0b0dU, 0x8bf2adc7U, 0xb62db9a8U, 0x1e14c8a9U ++.word 0xf1578519U, 0x75af4c07U, 0x99eebbddU, 0x7fa3fd60U ++.word 0x01f79f26U, 0x725cbcf5U, 0x6644c53bU, 0xfb5b347eU ++.word 0x438b7629U, 0x23cbdcc6U, 0xedb668fcU, 0xe4b863f1U ++.word 0x31d7cadcU, 0x63421085U, 0x97134022U, 0xc6842011U ++.word 0x4a857d24U, 0xbbd2f83dU, 0xf9ae1132U, 0x29c76da1U ++.word 0x9e1d4b2fU, 0xb2dcf330U, 0x860dec52U, 0xc177d0e3U ++.word 0xb32b6c16U, 0x70a999b9U, 0x9411fa48U, 0xe9472264U ++.word 0xfca8c48cU, 0xf0a01a3fU, 0x7d56d82cU, 0x3322ef90U ++.word 0x4987c74eU, 0x38d9c1d1U, 0xca8cfea2U, 0xd498360bU ++.word 0xf5a6cf81U, 0x7aa528deU, 0xb7da268eU, 0xad3fa4bfU ++.word 0x3a2ce49dU, 0x78500d92U, 0x5f6a9bccU, 0x7e546246U ++.word 0x8df6c213U, 0xd890e8b8U, 0x392e5ef7U, 0xc382f5afU ++.word 0x5d9fbe80U, 0xd0697c93U, 0xd56fa92dU, 0x25cfb312U ++.word 0xacc83b99U, 0x1810a77dU, 0x9ce86e63U, 0x3bdb7bbbU ++.word 0x26cd0978U, 0x596ef418U, 0x9aec01b7U, 0x4f83a89aU ++.word 0x95e6656eU, 0xffaa7ee6U, 0xbc2108cfU, 0x15efe6e8U ++.word 0xe7bad99bU, 0x6f4ace36U, 0x9fead409U, 0xb029d67cU ++.word 0xa431afb2U, 0x3f2a3123U, 0xa5c63094U, 0xa235c066U ++.word 0x4e7437bcU, 0x82fca6caU, 0x90e0b0d0U, 0xa73315d8U ++.word 0x04f14a98U, 0xec41f7daU, 0xcd7f0e50U, 0x91172ff6U ++.word 0x4d768dd6U, 0xef434db0U, 0xaacc544dU, 0x96e4df04U ++.word 0xd19ee3b5U, 0x6a4c1b88U, 0x2cc1b81fU, 0x65467f51U ++.word 0x5e9d04eaU, 0x8c015d35U, 0x87fa7374U, 0x0bfb2e41U ++.word 0x67b35a1dU, 0xdb9252d2U, 0x10e93356U, 0xd66d1347U ++.word 0xd79a8c61U, 0xa1377a0cU, 0xf8598e14U, 0x13eb893cU ++.word 0xa9ceee27U, 0x61b735c9U, 0x1ce1ede5U, 0x477a3cb1U ++.word 0xd29c59dfU, 0xf2553f73U, 0x141879ceU, 0xc773bf37U ++.word 0xf753eacdU, 0xfd5f5baaU, 0x3ddf146fU, 0x447886dbU ++.word 0xafca81f3U, 0x68b93ec4U, 0x24382c34U, 0xa3c25f40U ++.word 0x1d1672c3U, 0xe2bc0c25U, 0x3c288b49U, 0x0dff4195U ++.word 0xa8397101U, 0x0c08deb3U, 0xb4d89ce4U, 0x566490c1U ++.word 0xcb7b6184U, 0x32d570b6U, 0x6c48745cU, 0xb8d04257U ++ ++.type AES_Td3,\@object ++AES_Td3: ++.word 0x5150a7f4U, 0x7e536541U, 0x1ac3a417U, 0x3a965e27U ++.word 0x3bcb6babU, 0x1ff1459dU, 0xacab58faU, 0x4b9303e3U ++.word 0x2055fa30U, 0xadf66d76U, 0x889176ccU, 0xf5254c02U ++.word 0x4ffcd7e5U, 0xc5d7cb2aU, 0x26804435U, 0xb58fa362U ++.word 0xde495ab1U, 0x25671bbaU, 0x45980eeaU, 0x5de1c0feU ++.word 0xc302752fU, 0x8112f04cU, 0x8da39746U, 0x6bc6f9d3U ++.word 0x03e75f8fU, 0x15959c92U, 0xbfeb7a6dU, 0x95da5952U ++.word 0xd42d83beU, 0x58d32174U, 0x492969e0U, 0x8e44c8c9U ++.word 0x756a89c2U, 0xf478798eU, 0x996b3e58U, 0x27dd71b9U ++.word 0xbeb64fe1U, 0xf017ad88U, 0xc966ac20U, 0x7db43aceU ++.word 0x63184adfU, 0xe582311aU, 0x97603351U, 0x62457f53U ++.word 0xb1e07764U, 0xbb84ae6bU, 0xfe1ca081U, 0xf9942b08U ++.word 0x70586848U, 0x8f19fd45U, 0x94876cdeU, 0x52b7f87bU ++.word 0xab23d373U, 0x72e2024bU, 0xe3578f1fU, 0x662aab55U ++.word 0xb20728ebU, 0x2f03c2b5U, 0x869a7bc5U, 0xd3a50837U ++.word 0x30f28728U, 0x23b2a5bfU, 0x02ba6a03U, 0xed5c8216U ++.word 0x8a2b1ccfU, 0xa792b479U, 0xf3f0f207U, 0x4ea1e269U ++.word 0x65cdf4daU, 0x06d5be05U, 0xd11f6234U, 0xc48afea6U ++.word 0x349d532eU, 0xa2a055f3U, 0x0532e18aU, 0xa475ebf6U ++.word 0x0b39ec83U, 0x40aaef60U, 0x5e069f71U, 0xbd51106eU ++.word 0x3ef98a21U, 0x963d06ddU, 0xddae053eU, 0x4d46bde6U ++.word 0x91b58d54U, 0x71055dc4U, 0x046fd406U, 0x60ff1550U ++.word 0x1924fb98U, 0xd697e9bdU, 0x89cc4340U, 0x67779ed9U ++.word 0xb0bd42e8U, 0x07888b89U, 0xe7385b19U, 0x79dbeec8U ++.word 0xa1470a7cU, 0x7ce90f42U, 0xf8c91e84U, 0x00000000U ++.word 0x09838680U, 0x3248ed2bU, 0x1eac7011U, 0x6c4e725aU ++.word 0xfdfbff0eU, 0x0f563885U, 0x3d1ed5aeU, 0x3627392dU ++.word 0x0a64d90fU, 0x6821a65cU, 0x9bd1545bU, 0x243a2e36U ++.word 0x0cb1670aU, 0x930fe757U, 0xb4d296eeU, 0x1b9e919bU ++.word 0x804fc5c0U, 0x61a220dcU, 0x5a694b77U, 0x1c161a12U ++.word 0xe20aba93U, 0xc0e52aa0U, 0x3c43e022U, 0x121d171bU ++.word 0x0e0b0d09U, 0xf2adc78bU, 0x2db9a8b6U, 0x14c8a91eU ++.word 0x578519f1U, 0xaf4c0775U, 0xeebbdd99U, 0xa3fd607fU ++.word 0xf79f2601U, 0x5cbcf572U, 0x44c53b66U, 0x5b347efbU ++.word 0x8b762943U, 0xcbdcc623U, 0xb668fcedU, 0xb863f1e4U ++.word 0xd7cadc31U, 0x42108563U, 0x13402297U, 0x842011c6U ++.word 0x857d244aU, 0xd2f83dbbU, 0xae1132f9U, 0xc76da129U ++.word 0x1d4b2f9eU, 0xdcf330b2U, 0x0dec5286U, 0x77d0e3c1U ++.word 0x2b6c16b3U, 0xa999b970U, 0x11fa4894U, 0x472264e9U ++.word 0xa8c48cfcU, 0xa01a3ff0U, 0x56d82c7dU, 0x22ef9033U ++.word 0x87c74e49U, 0xd9c1d138U, 0x8cfea2caU, 0x98360bd4U ++.word 0xa6cf81f5U, 0xa528de7aU, 0xda268eb7U, 0x3fa4bfadU ++.word 0x2ce49d3aU, 0x500d9278U, 0x6a9bcc5fU, 0x5462467eU ++.word 0xf6c2138dU, 0x90e8b8d8U, 0x2e5ef739U, 0x82f5afc3U ++.word 0x9fbe805dU, 0x697c93d0U, 0x6fa92dd5U, 0xcfb31225U ++.word 0xc83b99acU, 0x10a77d18U, 0xe86e639cU, 0xdb7bbb3bU ++.word 0xcd097826U, 0x6ef41859U, 0xec01b79aU, 0x83a89a4fU ++.word 0xe6656e95U, 0xaa7ee6ffU, 0x2108cfbcU, 0xefe6e815U ++.word 0xbad99be7U, 0x4ace366fU, 0xead4099fU, 0x29d67cb0U ++.word 0x31afb2a4U, 0x2a31233fU, 0xc63094a5U, 0x35c066a2U ++.word 0x7437bc4eU, 0xfca6ca82U, 0xe0b0d090U, 0x3315d8a7U ++.word 0xf14a9804U, 0x41f7daecU, 0x7f0e50cdU, 0x172ff691U ++.word 0x768dd64dU, 0x434db0efU, 0xcc544daaU, 0xe4df0496U ++.word 0x9ee3b5d1U, 0x4c1b886aU, 0xc1b81f2cU, 0x467f5165U ++.word 0x9d04ea5eU, 0x015d358cU, 0xfa737487U, 0xfb2e410bU ++.word 0xb35a1d67U, 0x9252d2dbU, 0xe9335610U, 0x6d1347d6U ++.word 0x9a8c61d7U, 0x377a0ca1U, 0x598e14f8U, 0xeb893c13U ++.word 0xceee27a9U, 0xb735c961U, 0xe1ede51cU, 0x7a3cb147U ++.word 0x9c59dfd2U, 0x553f73f2U, 0x1879ce14U, 0x73bf37c7U ++.word 0x53eacdf7U, 0x5f5baafdU, 0xdf146f3dU, 0x7886db44U ++.word 0xca81f3afU, 0xb93ec468U, 0x382c3424U, 0xc25f40a3U ++.word 0x1672c31dU, 0xbc0c25e2U, 0x288b493cU, 0xff41950dU ++.word 0x397101a8U, 0x08deb30cU, 0xd89ce4b4U, 0x6490c156U ++.word 0x7b6184cbU, 0xd570b632U, 0x48745c6cU, 0xd04257b8U ++ ++.type AES_Td4,\@object ++AES_Td4: ++.byte 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U ++.byte 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU ++.byte 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U ++.byte 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU ++.byte 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU ++.byte 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU ++.byte 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U ++.byte 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U ++.byte 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U ++.byte 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U ++.byte 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU ++.byte 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U ++.byte 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU ++.byte 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U ++.byte 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U ++.byte 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU ++.byte 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU ++.byte 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U ++.byte 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U ++.byte 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU ++.byte 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U ++.byte 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU ++.byte 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U ++.byte 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U ++.byte 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U ++.byte 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU ++.byte 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU ++.byte 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU ++.byte 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U ++.byte 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U ++.byte 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U ++.byte 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU ++ ++.type AES_rcon,\@object ++AES_rcon: ++.word 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U ++.word 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U ++.word 0x0000001BU, 0x00000036U ++___ ++ ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; +\ No newline at end of file +diff --git a/crypto/aes/build.info b/crypto/aes/build.info +index b250903..77cdaca 100644 +--- a/crypto/aes/build.info ++++ b/crypto/aes/build.info +@@ -47,6 +47,9 @@ IF[{- !$disabled{asm} -}] + # aes-c64xplus.s implements AES_ctr32_encrypt + $AESDEF_c64xplus=AES_ASM AES_CTR_ASM + ++ $AESASM_riscv64=aes_cbc.c aes-riscv64.s aes-riscv64-zvkned.s ++ $AESDEF_riscv64=AES_ASM ++ + # Now that we have defined all the arch specific variables, use the + # appropriate one, and define the appropriate macros + IF[$AESASM_{- $target{asm_arch} -}] +@@ -113,6 +116,9 @@ GENERATE[aes-parisc.s]=asm/aes-parisc.pl + GENERATE[aes-mips.S]=asm/aes-mips.pl + INCLUDE[aes-mips.o]=.. + ++GENERATE[aes-riscv64.s]=asm/aes-riscv64.pl ++GENERATE[aes-riscv64-zvkned.s]=asm/aes-riscv64-zvkned.pl ++ + GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl + INCLUDE[aesv8-armx.o]=.. + GENERATE[vpaes-armv8.S]=asm/vpaes-armv8.pl +diff --git a/crypto/build.info b/crypto/build.info +index c04db55..f1dd411 100644 +--- a/crypto/build.info ++++ b/crypto/build.info +@@ -51,6 +51,8 @@ IF[{- !$disabled{asm} && $config{processor} ne '386' -}] + + $CPUIDASM_c64xplus=c64xpluscpuid.s + ++ $CPUIDASM_riscv64=riscvcap.c riscv64cpuid.s ++ + # Now that we have defined all the arch specific variables, use the + # appropriate one, and define the appropriate macros + IF[$CPUIDASM_{- $target{asm_arch} -}] +@@ -130,6 +132,7 @@ GENERATE[armv4cpuid.S]=armv4cpuid.pl + INCLUDE[armv4cpuid.o]=. + GENERATE[s390xcpuid.S]=s390xcpuid.pl + INCLUDE[s390xcpuid.o]=. ++GENERATE[riscv64cpuid.s]=riscv64cpuid.pl + + IF[{- $config{target} =~ /^(?:Cygwin|mingw|VC-|BC-)/ -}] + SHARED_SOURCE[../libcrypto]=dllmain.c +diff --git a/crypto/perlasm/riscv.pm b/crypto/perlasm/riscv.pm +new file mode 100644 +index 0000000..69a47c5 +--- /dev/null ++++ b/crypto/perlasm/riscv.pm +@@ -0,0 +1,1076 @@ ++#! /usr/bin/env perl ++# This file is dual-licensed, meaning that you can use it under your ++# choice of either of the following two licenses: ++# ++# Copyright 2023-2025 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You can obtain ++# a copy in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++# ++# or ++# ++# Copyright (c) 2023, Christoph Müllner ++# Copyright (c) 2023, Jerry Shih ++# Copyright (c) 2023, Phoebe Chen ++# Copyright (c) 2025, Julian Zhu ++# All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions ++# are met: ++# 1. Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# 2. Redistributions in binary form must reproduce the above copyright ++# notice, this list of conditions and the following disclaimer in the ++# documentation and/or other materials provided with the distribution. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++use strict; ++use warnings; ++ ++# Set $have_stacktrace to 1 if we have Devel::StackTrace ++my $have_stacktrace = 0; ++if (eval {require Devel::StackTrace;1;}) { ++ $have_stacktrace = 1; ++} ++ ++my @regs = map("x$_",(0..31)); ++# Mapping from the RISC-V psABI ABI mnemonic names to the register number. ++my @regaliases = ('zero','ra','sp','gp','tp','t0','t1','t2','s0','s1', ++ map("a$_",(0..7)), ++ map("s$_",(2..11)), ++ map("t$_",(3..6)) ++); ++ ++my %reglookup; ++@reglookup{@regs} = @regs; ++@reglookup{@regaliases} = @regs; ++ ++# Takes a register name, possibly an alias, and converts it to a register index ++# from 0 to 31 ++sub read_reg { ++ my $reg = lc shift; ++ if (!exists($reglookup{$reg})) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unknown register ".$reg."\n".$trace); ++ } ++ my $regstr = $reglookup{$reg}; ++ if (!($regstr =~ /^x([0-9]+)$/)) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Could not process register ".$reg."\n".$trace); ++ } ++ return $1; ++} ++ ++# Read the sew setting(8, 16, 32 and 64) and convert to vsew encoding. ++sub read_sew { ++ my $sew_setting = shift; ++ ++ if ($sew_setting eq "e8") { ++ return 0; ++ } elsif ($sew_setting eq "e16") { ++ return 1; ++ } elsif ($sew_setting eq "e32") { ++ return 2; ++ } elsif ($sew_setting eq "e64") { ++ return 3; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported SEW setting:".$sew_setting."\n".$trace); ++ } ++} ++ ++# Read the LMUL settings and convert to vlmul encoding. ++sub read_lmul { ++ my $lmul_setting = shift; ++ ++ if ($lmul_setting eq "mf8") { ++ return 5; ++ } elsif ($lmul_setting eq "mf4") { ++ return 6; ++ } elsif ($lmul_setting eq "mf2") { ++ return 7; ++ } elsif ($lmul_setting eq "m1") { ++ return 0; ++ } elsif ($lmul_setting eq "m2") { ++ return 1; ++ } elsif ($lmul_setting eq "m4") { ++ return 2; ++ } elsif ($lmul_setting eq "m8") { ++ return 3; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported LMUL setting:".$lmul_setting."\n".$trace); ++ } ++} ++ ++# Read the tail policy settings and convert to vta encoding. ++sub read_tail_policy { ++ my $tail_setting = shift; ++ ++ if ($tail_setting eq "ta") { ++ return 1; ++ } elsif ($tail_setting eq "tu") { ++ return 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported tail policy setting:".$tail_setting."\n".$trace); ++ } ++} ++ ++# Read the mask policy settings and convert to vma encoding. ++sub read_mask_policy { ++ my $mask_setting = shift; ++ ++ if ($mask_setting eq "ma") { ++ return 1; ++ } elsif ($mask_setting eq "mu") { ++ return 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unsupported mask policy setting:".$mask_setting."\n".$trace); ++ } ++} ++ ++my @vregs = map("v$_",(0..31)); ++my %vreglookup; ++@vreglookup{@vregs} = @vregs; ++ ++sub read_vreg { ++ my $vreg = lc shift; ++ if (!exists($vreglookup{$vreg})) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Unknown vector register ".$vreg."\n".$trace); ++ } ++ if (!($vreg =~ /^v([0-9]+)$/)) { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("Could not process vector register ".$vreg."\n".$trace); ++ } ++ return $1; ++} ++ ++# Read the vm settings and convert to mask encoding. ++sub read_mask_vreg { ++ my $vreg = shift; ++ # The default value is unmasked. ++ my $mask_bit = 1; ++ ++ if (defined($vreg)) { ++ my $reg_id = read_vreg $vreg; ++ if ($reg_id == 0) { ++ $mask_bit = 0; ++ } else { ++ my $trace = ""; ++ if ($have_stacktrace) { ++ $trace = Devel::StackTrace->new->as_string; ++ } ++ die("The ".$vreg." is not the mask register v0.\n".$trace); ++ } ++ } ++ return $mask_bit; ++} ++ ++# Helper functions ++ ++sub brev8_rv64i { ++ # brev8 without `brev8` instruction (only in Zbkb) ++ # Bit-reverses the first argument and needs two scratch registers ++ my $val = shift; ++ my $t0 = shift; ++ my $t1 = shift; ++ my $brev8_const = shift; ++ my $seq = <<___; ++ la $brev8_const, Lbrev8_const ++ ++ ld $t0, 0($brev8_const) # 0xAAAAAAAAAAAAAAAA ++ slli $t1, $val, 1 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 1 ++ or $val, $t1, $val ++ ++ ld $t0, 8($brev8_const) # 0xCCCCCCCCCCCCCCCC ++ slli $t1, $val, 2 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 2 ++ or $val, $t1, $val ++ ++ ld $t0, 16($brev8_const) # 0xF0F0F0F0F0F0F0F0 ++ slli $t1, $val, 4 ++ and $t1, $t1, $t0 ++ and $val, $val, $t0 ++ srli $val, $val, 4 ++ or $val, $t1, $val ++___ ++ return $seq; ++} ++ ++sub sd_rev8_rv64i { ++ # rev8 without `rev8` instruction (only in Zbb or Zbkb) ++ # Stores the given value byte-reversed and needs one scratch register ++ my $val = shift; ++ my $addr = shift; ++ my $off = shift; ++ my $tmp = shift; ++ my $off0 = ($off + 0); ++ my $off1 = ($off + 1); ++ my $off2 = ($off + 2); ++ my $off3 = ($off + 3); ++ my $off4 = ($off + 4); ++ my $off5 = ($off + 5); ++ my $off6 = ($off + 6); ++ my $off7 = ($off + 7); ++ my $seq = <<___; ++ sb $val, $off7($addr) ++ srli $tmp, $val, 8 ++ sb $tmp, $off6($addr) ++ srli $tmp, $val, 16 ++ sb $tmp, $off5($addr) ++ srli $tmp, $val, 24 ++ sb $tmp, $off4($addr) ++ srli $tmp, $val, 32 ++ sb $tmp, $off3($addr) ++ srli $tmp, $val, 40 ++ sb $tmp, $off2($addr) ++ srli $tmp, $val, 48 ++ sb $tmp, $off1($addr) ++ srli $tmp, $val, 56 ++ sb $tmp, $off0($addr) ++___ ++ return $seq; ++} ++ ++sub roriw_rv64i { ++ my ( ++ $rd, $rs, $tmp1, $tmp2, $imm, ++ ) = @_; ++ my $code=<<___; ++ srliw $tmp1, $rs, $imm ++ slliw $tmp2, $rs, (32-$imm) ++ or $rd, $tmp1, $tmp2 ++___ ++ return $code; ++} ++ ++# Scalar crypto instructions ++ ++sub aes64ds { ++ # Encoding for aes64ds rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011101_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64dsm { ++ # Encoding for aes64dsm rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011111_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64es { ++ # Encoding for aes64es rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011001_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64esm { ++ # Encoding for aes64esm rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0011011_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64im { ++ # Encoding for aes64im rd, rs1 instruction on RV64 ++ # XXXXXXXXXXXX_ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b001100000000_00000_001_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64ks1i { ++ # Encoding for aes64ks1i rd, rs1, rnum instruction on RV64 ++ # XXXXXXXX_rnum_ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b00110001_0000_00000_001_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rnum = shift; ++ return ".word ".($template | ($rnum << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub aes64ks2 { ++ # Encoding for aes64ks2 rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0111111_00000_00000_000_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub brev8 { ++ # brev8 rd, rs ++ my $template = 0b011010000111_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs = read_reg shift; ++ return ".word ".($template | ($rs << 15) | ($rd << 7)); ++} ++ ++sub clmul { ++ # Encoding for clmul rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_001_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub clmulh { ++ # Encoding for clmulh rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_011_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub rev8 { ++ # Encoding for rev8 rd, rs instruction on RV64 ++ # XXXXXXXXXXXXX_ rs _XXX_ rd _XXXXXXX ++ my $template = 0b011010111000_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs = read_reg shift; ++ return ".word ".($template | ($rs << 15) | ($rd << 7)); ++} ++ ++sub rori { ++ # Encoding for rori rd, rs1, shamt instruction on RV64 ++ # XXXXXXX_ shamt _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0110000_00000_00000_101_00000_0010011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $shamt = shift; ++ return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub roriw { ++ # Encoding for roriw rd, rs1, shamt instruction on RV64 ++ # XXXXXXX_ shamt _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0110000_00000_00000_101_00000_0011011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $shamt = shift; ++ return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub maxu { ++ # Encoding for maxu rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_111_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub minu { ++ # Encoding for minu rd, rs1, rs2 instruction on RV64 ++ # XXXXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX ++ my $template = 0b0000101_00000_00000_101_00000_0110011; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++# Vector instructions ++ ++sub vadd_vv { ++ # vadd.vv vd, vs2, vs1, vm ++ my $template = 0b000000_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vadd_vx { ++ # vadd.vx vd, vs2, rs1, vm ++ my $template = 0b000000_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsub_vv { ++ # vsub.vv vd, vs2, vs1, vm ++ my $template = 0b000010_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vsub_vx { ++ # vsub.vx vd, vs2, rs1, vm ++ my $template = 0b000010_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vid_v { ++ # vid.v vd ++ my $template = 0b0101001_00000_10001_010_00000_1010111; ++ my $vd = read_vreg shift; ++ return ".word ".($template | ($vd << 7)); ++} ++ ++sub viota_m { ++ # viota.m vd, vs2, vm ++ my $template = 0b010100_0_00000_10000_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vle8_v { ++ # vle8.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_000_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vle32_v { ++ # vle32.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_110_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vle64_v { ++ # vle64.v vd, (rs1) ++ my $template = 0b0000001_00000_00000_111_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlse32_v { ++ # vlse32.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlsseg_nf_e32_v { ++ # vlssege32.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0000111; ++ my $nf = shift; ++ $nf -= 1; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vlse64_v { ++ # vlse64.v vd, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_111_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vluxei8_v { ++ # vluxei8.v vd, (rs1), vs2, vm ++ my $template = 0b000001_0_00000_00000_000_00000_0000111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vmerge_vim { ++ # vmerge.vim vd, vs2, imm, v0 ++ my $template = 0b0101110_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($imm << 15) | ($vd << 7)); ++} ++ ++sub vmerge_vvm { ++ # vmerge.vvm vd vs2 vs1 ++ my $template = 0b0101110_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)) ++} ++ ++sub vmseq_vi { ++ # vmseq.vi vd vs1, imm ++ my $template = 0b0110001_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($vs1 << 20) | ($imm << 15) | ($vd << 7)) ++} ++ ++sub vmsgtu_vx { ++ # vmsgtu.vx vd vs2, rs1, vm ++ my $template = 0b011110_0_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)) ++} ++ ++sub vmv_v_i { ++ # vmv.v.i vd, imm ++ my $template = 0b0101111_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $imm = shift; ++ return ".word ".($template | ($imm << 15) | ($vd << 7)); ++} ++ ++sub vmv_v_x { ++ # vmv.v.x vd, rs1 ++ my $template = 0b0101111_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vmv_v_v { ++ # vmv.v.v vd, vs1 ++ my $template = 0b0101111_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vor_vv { ++ # vor.vv vd, vs2, vs1 ++ my $template = 0b0010101_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vor_vv_v0t { ++ # vor.vv vd, vs2, vs1, v0.t ++ my $template = 0b0010100_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vse8_v { ++ # vse8.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_000_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vse32_v { ++ # vse32.v vd, (rs1), vm ++ my $template = 0b000000_0_00000_00000_110_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vssseg_nf_e32_v { ++ # vsssege32.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0100111; ++ my $nf = shift; ++ $nf -= 1; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($nf << 29) | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vsuxei8_v { ++ # vsuxei8.v vs3, (rs1), vs2, vm ++ my $template = 0b000001_0_00000_00000_000_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vse64_v { ++ # vse64.v vd, (rs1) ++ my $template = 0b0000001_00000_00000_111_00000_0100111; ++ my $vd = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsetivli__x0_2_e64_m1_tu_mu { ++ # vsetivli x0, 2, e64, m1, tu, mu ++ return ".word 0xc1817057"; ++} ++ ++sub vsetivli__x0_4_e32_m1_tu_mu { ++ # vsetivli x0, 4, e32, m1, tu, mu ++ return ".word 0xc1027057"; ++} ++ ++sub vsetivli__x0_4_e64_m1_tu_mu { ++ # vsetivli x0, 4, e64, m1, tu, mu ++ return ".word 0xc1827057"; ++} ++ ++sub vsetivli__x0_8_e32_m1_tu_mu { ++ # vsetivli x0, 8, e32, m1, tu, mu ++ return ".word 0xc1047057"; ++} ++ ++sub vsetvli { ++ # vsetvli rd, rs1, vtypei ++ my $template = 0b0_00000000000_00000_111_00000_1010111; ++ my $rd = read_reg shift; ++ my $rs1 = read_reg shift; ++ my $sew = read_sew shift; ++ my $lmul = read_lmul shift; ++ my $tail_policy = read_tail_policy shift; ++ my $mask_policy = read_mask_policy shift; ++ my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul; ++ ++ return ".word ".($template | ($vtypei << 20) | ($rs1 << 15) | ($rd << 7)); ++} ++ ++sub vsetivli { ++ # vsetvli rd, uimm, vtypei ++ my $template = 0b11_0000000000_00000_111_00000_1010111; ++ my $rd = read_reg shift; ++ my $uimm = shift; ++ my $sew = read_sew shift; ++ my $lmul = read_lmul shift; ++ my $tail_policy = read_tail_policy shift; ++ my $mask_policy = read_mask_policy shift; ++ my $vtypei = ($mask_policy << 7) | ($tail_policy << 6) | ($sew << 3) | $lmul; ++ ++ return ".word ".($template | ($vtypei << 20) | ($uimm << 15) | ($rd << 7)); ++} ++ ++sub vslidedown_vi { ++ # vslidedown.vi vd, vs2, uimm ++ my $template = 0b0011111_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vslidedown_vx { ++ # vslidedown.vx vd, vs2, rs1 ++ my $template = 0b0011111_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vslideup_vi_v0t { ++ # vslideup.vi vd, vs2, uimm, v0.t ++ my $template = 0b0011100_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vslideup_vi { ++ # vslideup.vi vd, vs2, uimm ++ my $template = 0b0011101_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsll_vi { ++ # vsll.vi vd, vs2, uimm, vm ++ my $template = 0b1001011_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsrl_vi { ++ # vsrl.vi vd, vs2, uimm, vm ++ my $template = 0b1010001_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsrl_vx { ++ # vsrl.vx vd, vs2, rs1 ++ my $template = 0b1010001_00000_00000_100_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vsse32_v { ++ # vse32.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_110_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vsse64_v { ++ # vsse64.v vs3, (rs1), rs2 ++ my $template = 0b0000101_00000_00000_111_00000_0100111; ++ my $vs3 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ my $rs2 = read_reg shift; ++ return ".word ".($template | ($rs2 << 20) | ($rs1 << 15) | ($vs3 << 7)); ++} ++ ++sub vxor_vv_v0t { ++ # vxor.vv vd, vs2, vs1, v0.t ++ my $template = 0b0010110_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vxor_vv { ++ # vxor.vv vd, vs2, vs1 ++ my $template = 0b0010111_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vzext_vf2 { ++ # vzext.vf2 vd, vs2, vm ++ my $template = 0b010010_0_00000_00110_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++# Vector crypto instructions ++ ++## Zvbb and Zvkb instructions ++## ++## vandn (also in zvkb) ++## vbrev ++## vbrev8 (also in zvkb) ++## vrev8 (also in zvkb) ++## vclz ++## vctz ++## vcpop ++## vrol (also in zvkb) ++## vror (also in zvkb) ++## vwsll ++ ++sub vbrev8_v { ++ # vbrev8.v vd, vs2, vm ++ my $template = 0b010010_0_00000_01000_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vrev8_v { ++ # vrev8.v vd, vs2, vm ++ my $template = 0b010010_0_00000_01001_010_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vror_vi { ++ # vror.vi vd, vs2, uimm ++ my $template = 0b01010_0_1_00000_00000_011_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ my $uimm_i5 = $uimm >> 5; ++ my $uimm_i4_0 = $uimm & 0b11111; ++ ++ return ".word ".($template | ($uimm_i5 << 26) | ($vs2 << 20) | ($uimm_i4_0 << 15) | ($vd << 7)); ++} ++ ++sub vwsll_vv { ++ # vwsll.vv vd, vs2, vs1, vm ++ my $template = 0b110101_0_00000_00000_000_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ my $vm = read_mask_vreg shift; ++ return ".word ".($template | ($vm << 25) | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++## Zvbc instructions ++ ++sub vclmulh_vx { ++ # vclmulh.vx vd, vs2, rs1 ++ my $template = 0b0011011_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vclmul_vx_v0t { ++ # vclmul.vx vd, vs2, rs1, v0.t ++ my $template = 0b0011000_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++sub vclmul_vx { ++ # vclmul.vx vd, vs2, rs1 ++ my $template = 0b0011001_00000_00000_110_00000_1010111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $rs1 = read_reg shift; ++ return ".word ".($template | ($vs2 << 20) | ($rs1 << 15) | ($vd << 7)); ++} ++ ++## Zvkg instructions ++ ++sub vghsh_vv { ++ # vghsh.vv vd, vs2, vs1 ++ my $template = 0b1011001_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15) | ($vd << 7)); ++} ++ ++sub vgmul_vv { ++ # vgmul.vv vd, vs2 ++ my $template = 0b1010001_00000_10001_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## Zvkned instructions ++ ++sub vaesdf_vs { ++ # vaesdf.vs vd, vs2 ++ my $template = 0b101001_1_00000_00001_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesdm_vs { ++ # vaesdm.vs vd, vs2 ++ my $template = 0b101001_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesef_vs { ++ # vaesef.vs vd, vs2 ++ my $template = 0b101001_1_00000_00011_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaesem_vs { ++ # vaesem.vs vd, vs2 ++ my $template = 0b101001_1_00000_00010_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaeskf1_vi { ++ # vaeskf1.vi vd, vs2, uimmm ++ my $template = 0b100010_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($uimm << 15) | ($vs2 << 20) | ($vd << 7)); ++} ++ ++sub vaeskf2_vi { ++ # vaeskf2.vi vd, vs2, uimm ++ my $template = 0b101010_1_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vaesz_vs { ++ # vaesz.vs vd, vs2 ++ my $template = 0b101001_1_00000_00111_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## Zvknha and Zvknhb instructions ++ ++sub vsha2ms_vv { ++ # vsha2ms.vv vd, vs2, vs1 ++ my $template = 0b1011011_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++sub vsha2ch_vv { ++ # vsha2ch.vv vd, vs2, vs1 ++ my $template = 0b101110_10000_00000_001_00000_01110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++sub vsha2cl_vv { ++ # vsha2cl.vv vd, vs2, vs1 ++ my $template = 0b101111_10000_00000_001_00000_01110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20)| ($vs1 << 15 )| ($vd << 7)); ++} ++ ++## Zvksed instructions ++ ++sub vsm4k_vi { ++ # vsm4k.vi vd, vs2, uimm ++ my $template = 0b1000011_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15) | ($vd << 7)); ++} ++ ++sub vsm4r_vs { ++ # vsm4r.vs vd, vs2 ++ my $template = 0b1010011_00000_10000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vd << 7)); ++} ++ ++## zvksh instructions ++ ++sub vsm3c_vi { ++ # vsm3c.vi vd, vs2, uimm ++ my $template = 0b1010111_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $uimm = shift; ++ return ".word ".($template | ($vs2 << 20) | ($uimm << 15 ) | ($vd << 7)); ++} ++ ++sub vsm3me_vv { ++ # vsm3me.vv vd, vs2, vs1 ++ my $template = 0b1000001_00000_00000_010_00000_1110111; ++ my $vd = read_vreg shift; ++ my $vs2 = read_vreg shift; ++ my $vs1 = read_vreg shift; ++ return ".word ".($template | ($vs2 << 20) | ($vs1 << 15 ) | ($vd << 7)); ++} ++ ++1; +diff --git a/crypto/riscv64cpuid.pl b/crypto/riscv64cpuid.pl +new file mode 100644 +index 0000000..bc806af +--- /dev/null ++++ b/crypto/riscv64cpuid.pl +@@ -0,0 +1,105 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$output and open STDOUT,">$output"; ++ ++{ ++my ($in_a,$in_b,$len,$x,$temp1,$temp2) = ('a0','a1','a2','t0','t1','t2'); ++$code.=<<___; ++################################################################################ ++# int CRYPTO_memcmp(const void * in_a, const void * in_b, size_t len) ++################################################################################ ++.text ++.balign 16 ++.globl CRYPTO_memcmp ++.type CRYPTO_memcmp,\@function ++CRYPTO_memcmp: ++ li $x,0 ++ beqz $len,2f # len == 0 ++1: ++ lbu $temp1,0($in_a) ++ lbu $temp2,0($in_b) ++ addi $in_a,$in_a,1 ++ addi $in_b,$in_b,1 ++ addi $len,$len,-1 ++ xor $temp1,$temp1,$temp2 ++ or $x,$x,$temp1 ++ bgtz $len,1b ++2: ++ mv a0,$x ++ ret ++___ ++} ++{ ++my ($ptr,$len,$temp1,$temp2) = ('a0','a1','t0','t1'); ++$code.=<<___; ++################################################################################ ++# void OPENSSL_cleanse(void *ptr, size_t len) ++################################################################################ ++.text ++.balign 16 ++.globl OPENSSL_cleanse ++.type OPENSSL_cleanse,\@function ++OPENSSL_cleanse: ++ beqz $len,2f # len == 0, return ++ srli $temp1,$len,4 ++ bnez $temp1,3f # len > 15 ++ ++1: # Store <= 15 individual bytes ++ sb x0,0($ptr) ++ addi $ptr,$ptr,1 ++ addi $len,$len,-1 ++ bnez $len,1b ++2: ++ ret ++ ++3: # Store individual bytes until we are aligned ++ andi $temp1,$ptr,0x7 ++ beqz $temp1,4f ++ sb x0,0($ptr) ++ addi $ptr,$ptr,1 ++ addi $len,$len,-1 ++ j 3b ++ ++4: # Store aligned dwords ++ li $temp2,8 ++4: ++ sd x0,0($ptr) ++ addi $ptr,$ptr,8 ++ addi $len,$len,-8 ++ bge $len,$temp2,4b # if len>=8 loop ++ bnez $len,1b # if len<8 and len != 0, store remaining bytes ++ ret ++___ ++} ++ ++{ ++my ($ret) = ('a0'); ++$code .= <<___; ++################################################################################ ++# size_t riscv_vlen_asm(void) ++# Return VLEN (i.e. the length of a vector register in bits). ++.p2align 3 ++.globl riscv_vlen_asm ++.type riscv_vlen_asm,\@function ++riscv_vlen_asm: ++ csrr $ret, vlenb ++ slli $ret, $ret, 3 ++ ret ++.size riscv_vlen_asm,.-riscv_vlen_asm ++___ ++} ++ ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; +\ No newline at end of file +diff --git a/crypto/riscvcap.c b/crypto/riscvcap.c +new file mode 100644 +index 0000000..0c44b93 +--- /dev/null ++++ b/crypto/riscvcap.c +@@ -0,0 +1,145 @@ ++/* ++ * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include "internal/cryptlib.h" ++ ++#define OPENSSL_RISCVCAP_IMPL ++#include "crypto/riscv_arch.h" ++ ++#ifdef OSSL_RISCV_HWPROBE ++# include ++# include ++# include ++# include ++#endif ++ ++extern size_t riscv_vlen_asm(void); ++ ++static void parse_env(const char *envstr); ++static void strtoupper(char *str); ++ ++static size_t vlen = 0; ++ ++#ifdef OSSL_RISCV_HWPROBE ++unsigned int OPENSSL_riscv_hwcap_P = 0; ++#endif ++ ++uint32_t OPENSSL_rdtsc(void) ++{ ++ return 0; ++} ++ ++size_t OPENSSL_instrument_bus(unsigned int *out, size_t cnt) ++{ ++ return 0; ++} ++ ++size_t OPENSSL_instrument_bus2(unsigned int *out, size_t cnt, size_t max) ++{ ++ return 0; ++} ++ ++static void strtoupper(char *str) ++{ ++ for (char *x = str; *x; ++x) ++ *x = toupper((unsigned char)*x); ++} ++ ++/* parse_env() parses a RISC-V architecture string. An example of such a string ++ * is "rv64gc_zba_zbb_zbc_zbs". Currently, the rv64gc part is ignored ++ * and we simply search for "_[extension]" in the arch string to see if we ++ * should enable a given extension. ++ */ ++#define BUFLEN 256 ++static void parse_env(const char *envstr) ++{ ++ char envstrupper[BUFLEN]; ++ char buf[BUFLEN]; ++ ++ /* Convert env str to all uppercase */ ++ OPENSSL_strlcpy(envstrupper, envstr, sizeof(envstrupper)); ++ strtoupper(envstrupper); ++ ++ for (size_t i = 0; i < kRISCVNumCaps; ++i) { ++ /* Prefix capability with underscore in preparation for search */ ++ BIO_snprintf(buf, BUFLEN, "_%s", RISCV_capabilities[i].name); ++ if (strstr(envstrupper, buf) != NULL) { ++ /* Match, set relevant bit in OPENSSL_riscvcap_P[] */ ++ OPENSSL_riscvcap_P[RISCV_capabilities[i].index] |= ++ (1 << RISCV_capabilities[i].bit_offset); ++ } ++ } ++} ++ ++#ifdef OSSL_RISCV_HWPROBE ++static long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, ++ size_t cpu_count, unsigned long *cpus, ++ unsigned int flags) ++{ ++ return syscall(__NR_riscv_hwprobe, pairs, pair_count, cpu_count, cpus, flags); ++} ++ ++static void hwprobe_to_cap(void) ++{ ++ long ret; ++ struct riscv_hwprobe pairs[OSSL_RISCV_HWPROBE_PAIR_COUNT] = { ++ OSSL_RISCV_HWPROBE_PAIR_CONTENT ++ }; ++ ++ ret = riscv_hwprobe(pairs, OSSL_RISCV_HWPROBE_PAIR_COUNT, 0, NULL, 0); ++ /* if hwprobe syscall does not exist, ret would be -ENOSYS */ ++ if (ret == 0) { ++ for (size_t i = 0; i < kRISCVNumCaps; ++i) { ++ for (size_t j = 0; j != OSSL_RISCV_HWPROBE_PAIR_COUNT; ++j) { ++ if (pairs[j].key == RISCV_capabilities[i].hwprobe_key ++ && (pairs[j].value & RISCV_capabilities[i].hwprobe_value) ++ != 0) ++ if (!IS_IN_DEPEND_VECTOR(RISCV_capabilities[i].bit_offset) || VECTOR_CAPABLE) ++ /* Match, set relevant bit in OPENSSL_riscvcap_P[] */ ++ OPENSSL_riscvcap_P[RISCV_capabilities[i].index] |= ++ (1 << RISCV_capabilities[i].bit_offset); ++ } ++ } ++ } ++} ++#endif /* OSSL_RISCV_HWPROBE */ ++ ++size_t riscv_vlen(void) ++{ ++ return vlen; ++} ++ ++void OPENSSL_cpuid_setup(void) ++{ ++ char *e; ++ static int trigger = 0; ++ ++ if (trigger != 0) ++ return; ++ trigger = 1; ++ ++ if ((e = getenv("OPENSSL_riscvcap"))) { ++ parse_env(e); ++ } ++#ifdef OSSL_RISCV_HWPROBE ++ else { ++ OPENSSL_riscv_hwcap_P = getauxval(AT_HWCAP); ++ hwprobe_to_cap(); ++ } ++#endif ++ ++ if (RISCV_HAS_V()) { ++ vlen = riscv_vlen_asm(); ++ } ++} +diff --git a/doc/man7/openssl-env.pod b/doc/man7/openssl-env.pod +index a2443d5..922d3c1 100644 +--- a/doc/man7/openssl-env.pod ++++ b/doc/man7/openssl-env.pod +@@ -74,7 +74,7 @@ See L. + + Additional arguments for the L command. + +-=item B, B, B, B, B ++=item B, B, B, B, B, B + + OpenSSL supports a number of different algorithm implementations for + various machines and, by default, it determines which to use based on the +@@ -91,7 +91,7 @@ See L. + + =head1 COPYRIGHT + +-Copyright 2019-2021 The OpenSSL Project Authors. All Rights Reserved. ++Copyright 2019-2022 The OpenSSL Project Authors. All Rights Reserved. + + Licensed under the Apache License 2.0 (the "License"). You may not use + this file except in compliance with the License. You can obtain a copy +diff --git a/include/crypto/aes_platform.h b/include/crypto/aes_platform.h +index e95ad5a..c47ce4e 100644 +--- a/include/crypto/aes_platform.h ++++ b/include/crypto/aes_platform.h +@@ -396,6 +396,36 @@ void aes256_t4_xts_decrypt(const unsigned char *in, unsigned char *out, + + /* Convert key size to function code: [16,24,32] -> [18,19,20]. */ + # define S390X_AES_FC(keylen) (S390X_AES_128 + ((((keylen) << 3) - 128) >> 6)) ++ ++# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64 ++/* RISC-V 64 support */ ++# include "riscv_arch.h" ++ ++/* Zvkned extension (vector crypto AES). */ ++int rv64i_zvkned_set_encrypt_key(const unsigned char *userKey, const int bits, ++ AES_KEY *key); ++int rv64i_zvkned_set_decrypt_key(const unsigned char *userKey, const int bits, ++ AES_KEY *key); ++void rv64i_zvkned_encrypt(const unsigned char *in, unsigned char *out, ++ const AES_KEY *key); ++void rv64i_zvkned_decrypt(const unsigned char *in, unsigned char *out, ++ const AES_KEY *key); ++ ++void rv64i_zvkned_cbc_encrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const AES_KEY *key, ++ unsigned char *ivec, const int enc); ++ ++void rv64i_zvkned_cbc_decrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const AES_KEY *key, ++ unsigned char *ivec, const int enc); ++ ++void rv64i_zvkned_ecb_encrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const AES_KEY *key, ++ const int enc); ++ ++void rv64i_zvkned_ecb_decrypt(const unsigned char *in, unsigned char *out, ++ size_t length, const AES_KEY *key, ++ const int enc); + # endif + + # if defined(HWAES_CAPABLE) +diff --git a/include/crypto/riscv_arch.def b/include/crypto/riscv_arch.def +new file mode 100644 +index 0000000..32147d0 +--- /dev/null ++++ b/include/crypto/riscv_arch.def +@@ -0,0 +1,61 @@ ++/* ++ * Copyright 2022-2024 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++/* X Macro Definitions for Specification of RISC-V Arch Capabilities */ ++ ++/* ++ * Each RISC-V capability ends up encoded as a single set bit in an array of ++ * words. When specifying a new capability, write a new RISCV_DEFINE_CAP ++ * statement, with an argument as the extension name in all-caps, ++ * second argument as the index in the array where the capability will be stored ++ * and third argument as the index of the bit to be used to encode the ++ * capability. ++ * ++ * The fourth and the fifth arguments are copied from linux header asm/hwprobe.h. ++ * Directly coping values instead of macro names comes from the fact ++ * that an old version may lack definition of some macro. ++ * When there is no hwprobe key/value pair for a capability, the key is set to -1 ++ * and the value is set to 0, as when the hwprobe syscall returns a key of -1, ++ * the value is set to 0 and the corresponding capability would not be enabled. ++ * ++ * RISCV_DEFINE_CAP(EXTENSION NAME, array index, bit index, hwprobe key, hwprobe value) */ ++ ++RISCV_DEFINE_CAP(ZBA, 0, 0, 4, (1 << 3)) ++RISCV_DEFINE_CAP(ZBB, 0, 1, 4, (1 << 4)) ++RISCV_DEFINE_CAP(ZBC, 0, 2, 4, (1 << 7)) ++RISCV_DEFINE_CAP(ZBS, 0, 3, 4, (1 << 5)) ++RISCV_DEFINE_CAP(ZBKB, 0, 4, 4, (1 << 8)) ++RISCV_DEFINE_CAP(ZBKC, 0, 5, 4, (1 << 9)) ++RISCV_DEFINE_CAP(ZBKX, 0, 6, 4, (1 << 10)) ++RISCV_DEFINE_CAP(ZKND, 0, 7, 4, (1 << 11)) ++RISCV_DEFINE_CAP(ZKNE, 0, 8, 4, (1 << 12)) ++RISCV_DEFINE_CAP(ZKNH, 0, 9, 4, (1 << 13)) ++RISCV_DEFINE_CAP(ZKSED, 0, 10, 4, (1 << 14)) ++RISCV_DEFINE_CAP(ZKSH, 0, 11, 4, (1 << 15)) ++RISCV_DEFINE_CAP(ZKR, 0, 12, -1, 0) ++RISCV_DEFINE_CAP(ZKT, 0, 13, 4, (1 << 16)) ++RISCV_DEFINE_CAP(V, 0, 14, 4, (1 << 2)) ++RISCV_DEFINE_CAP(ZVBB, 0, 15, 4, (1 << 17)) ++RISCV_DEFINE_CAP(ZVBC, 0, 16, 4, (1 << 18)) ++RISCV_DEFINE_CAP(ZVKB, 0, 17, 4, (1 << 19)) ++RISCV_DEFINE_CAP(ZVKG, 0, 18, 4, (1 << 20)) ++RISCV_DEFINE_CAP(ZVKNED, 0, 19, 4, (1 << 21)) ++RISCV_DEFINE_CAP(ZVKNHA, 0, 20, 4, (1 << 22)) ++RISCV_DEFINE_CAP(ZVKNHB, 0, 21, 4, (1 << 23)) ++RISCV_DEFINE_CAP(ZVKSED, 0, 22, 4, (1 << 24)) ++RISCV_DEFINE_CAP(ZVKSH, 0, 23, 4, (1 << 25)) ++ ++/* ++ * In the future ... ++ * RISCV_DEFINE_CAP(ZFOO, 0, 31, ..., ...) ++ * RISCV_DEFINE_CAP(ZBAR, 1, 0, ..., ...) ++ * ... and so on. ++ */ ++ ++#undef RISCV_DEFINE_CAP +diff --git a/include/crypto/riscv_arch.h b/include/crypto/riscv_arch.h +new file mode 100644 +index 0000000..1d78eb7 +--- /dev/null ++++ b/include/crypto/riscv_arch.h +@@ -0,0 +1,125 @@ ++/* ++ * Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++#ifndef OSSL_CRYPTO_RISCV_ARCH_H ++# define OSSL_CRYPTO_RISCV_ARCH_H ++ ++# include ++# include ++ ++# if defined(OPENSSL_SYS_LINUX) && !defined(FIPS_MODULE) ++# if __has_include() ++# include ++# /* ++ * Some environments using musl are reported to have the hwprobe.h include ++ * file but not have the __NR_riscv_hwprobe define. ++ */ ++# ifdef __NR_riscv_hwprobe ++# define OSSL_RISCV_HWPROBE ++# include ++extern unsigned int OPENSSL_riscv_hwcap_P; ++# define VECTOR_CAPABLE (OPENSSL_riscv_hwcap_P & COMPAT_HWCAP_ISA_V) ++# define ZVX_MIN 15 ++# define ZVX_MAX 23 ++# define IS_IN_DEPEND_VECTOR(offset) ((ZVX_MIN >= offset) && (offset <= ZVX_MAX)) ++# endif ++# endif ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++extern uint32_t OPENSSL_riscvcap_P[ (( ++# include "riscv_arch.def" ++) + sizeof(uint32_t) - 1) / sizeof(uint32_t) ]; ++ ++# ifdef OPENSSL_RISCVCAP_IMPL ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++uint32_t OPENSSL_riscvcap_P[ (( ++# include "riscv_arch.def" ++) + sizeof(uint32_t) - 1) / sizeof(uint32_t) ]; ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ static inline int RISCV_HAS_##NAME(void) \ ++ { \ ++ return (OPENSSL_riscvcap_P[INDEX] & (1 << BIT_INDEX)) != 0; \ ++ } ++# include "riscv_arch.def" ++ ++struct RISCV_capability_s { ++ const char *name; ++ size_t index; ++ size_t bit_offset; ++# ifdef OSSL_RISCV_HWPROBE ++ int32_t hwprobe_key; ++ uint64_t hwprobe_value; ++# endif ++}; ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ OSSL_RISCV_HWPROBE_KEY, OSSL_RISCV_HWPROBE_VALUE) +1 ++extern const struct RISCV_capability_s RISCV_capabilities[ ++# include "riscv_arch.def" ++]; ++ ++# ifdef OPENSSL_RISCVCAP_IMPL ++# ifdef OSSL_RISCV_HWPROBE ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ { #NAME, INDEX, BIT_INDEX, HWPROBE_KEY, HWPROBE_VALUE }, ++# else ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) \ ++ { #NAME, INDEX, BIT_INDEX }, ++# endif ++const struct RISCV_capability_s RISCV_capabilities[] = { ++# include "riscv_arch.def" ++}; ++# endif ++ ++# define RISCV_DEFINE_CAP(NAME, INDEX, BIT_INDEX, \ ++ HWPROBE_KEY, HWPROBE_VALUE) +1 ++static const size_t kRISCVNumCaps = ++# include "riscv_arch.def" ++; ++ ++# ifdef OSSL_RISCV_HWPROBE ++/* ++ * Content is an array of { hwprobe_key, 0 } where ++ * hwprobe_key is copied from asm/hwprobe.h. ++ * It should be updated along with riscv_arch.def. ++ */ ++# define OSSL_RISCV_HWPROBE_PAIR_COUNT 1 ++# define OSSL_RISCV_HWPROBE_PAIR_CONTENT \ ++ { 4, 0 }, ++# endif ++ ++/* Extension combination tests. */ ++#define RISCV_HAS_ZBB_AND_ZBC() (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) ++#define RISCV_HAS_ZBKB_AND_ZKND_AND_ZKNE() (RISCV_HAS_ZBKB() && RISCV_HAS_ZKND() && RISCV_HAS_ZKNE()) ++#define RISCV_HAS_ZKND_AND_ZKNE() (RISCV_HAS_ZKND() && RISCV_HAS_ZKNE()) ++/* ++ * The ZVBB is the superset of ZVKB extension. We use macro here to replace the ++ * `RISCV_HAS_ZVKB()` with `RISCV_HAS_ZVBB() || RISCV_HAS_ZVKB()`. ++ */ ++#define RISCV_HAS_ZVKB() (RISCV_HAS_ZVBB() || RISCV_HAS_ZVKB()) ++#define RISCV_HAS_ZVKB_AND_ZVKNHA() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKNHA()) ++#define RISCV_HAS_ZVKB_AND_ZVKNHB() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKNHB()) ++#define RISCV_HAS_ZVKB_AND_ZVKSED() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKSED()) ++#define RISCV_HAS_ZVKB_AND_ZVKSH() (RISCV_HAS_ZVKB() && RISCV_HAS_ZVKSH()) ++ ++/* ++ * Get the size of a vector register in bits (VLEN). ++ * If RISCV_HAS_V() is false, then this returns 0. ++ */ ++size_t riscv_vlen(void); ++ ++#endif +diff --git a/providers/implementations/ciphers/cipher_aes_ccm_hw_rv64i.inc b/providers/implementations/ciphers/cipher_aes_ccm_hw_rv64i.inc +new file mode 100644 +index 0000000..b14c10a +--- /dev/null ++++ b/providers/implementations/ciphers/cipher_aes_ccm_hw_rv64i.inc +@@ -0,0 +1,71 @@ ++/* ++ * Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++/*- ++ * RISC-V 64 ZKND ZKNE support for AES CCM. ++ * This file is included by cipher_aes_ccm_hw.c ++ */ ++ ++static int ccm_rv64i_zknd_zkne_initkey(PROV_CCM_CTX *ctx, const unsigned char *key, ++ size_t keylen) ++{ ++ PROV_AES_CCM_CTX *actx = (PROV_AES_CCM_CTX *)ctx; ++ ++ AES_HW_CCM_SET_KEY_FN(rv64i_zkne_set_encrypt_key, rv64i_zkne_encrypt, ++ NULL, NULL); ++ return 1; ++} ++ ++static const PROV_CCM_HW rv64i_zknd_zkne_ccm = { ++ ccm_rv64i_zknd_zkne_initkey, ++ ossl_ccm_generic_setiv, ++ ossl_ccm_generic_setaad, ++ ossl_ccm_generic_auth_encrypt, ++ ossl_ccm_generic_auth_decrypt, ++ ossl_ccm_generic_gettag ++}; ++ ++/*- ++ * RISC-V RV64 ZVKNED support for AES CCM. ++ * This file is included by cipher_aes_ccm_hw.c ++ */ ++ ++static int ccm_rv64i_zvkned_initkey(PROV_CCM_CTX *ctx, const unsigned char *key, ++ size_t keylen) ++{ ++ PROV_AES_CCM_CTX *actx = (PROV_AES_CCM_CTX *)ctx; ++ ++ /* Zvkned only supports 128 and 256 bit keys for key schedule generation. */ ++ if (keylen * 8 == 128 || keylen * 8 == 256) { ++ AES_HW_CCM_SET_KEY_FN(rv64i_zvkned_set_encrypt_key, rv64i_zvkned_encrypt, ++ NULL, NULL); ++ } else { ++ AES_HW_CCM_SET_KEY_FN(AES_set_encrypt_key, rv64i_zvkned_encrypt, NULL, NULL) ++ } ++ return 1; ++} ++ ++static const PROV_CCM_HW rv64i_zvkned_ccm = { ++ ccm_rv64i_zvkned_initkey, ++ ossl_ccm_generic_setiv, ++ ossl_ccm_generic_setaad, ++ ossl_ccm_generic_auth_encrypt, ++ ossl_ccm_generic_auth_decrypt, ++ ossl_ccm_generic_gettag ++}; ++ ++const PROV_CCM_HW *ossl_prov_aes_hw_ccm(size_t keybits) ++{ ++ if (RISCV_HAS_ZVKNED() && riscv_vlen() >= 128) ++ return &rv64i_zvkned_ccm; ++ else if (RISCV_HAS_ZKND_AND_ZKNE()) ++ return &rv64i_zknd_zkne_ccm; ++ else ++ return &aes_ccm; ++} +\ No newline at end of file +diff --git a/providers/implementations/ciphers/cipher_aes_hw.c b/providers/implementations/ciphers/cipher_aes_hw.c +index 596cdba..f1e462f 100644 +--- a/providers/implementations/ciphers/cipher_aes_hw.c ++++ b/providers/implementations/ciphers/cipher_aes_hw.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2001-2021 The OpenSSL Project Authors. All Rights Reserved. ++ * Copyright 2001-2023 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy +@@ -142,6 +142,10 @@ const PROV_CIPHER_HW *ossl_prov_cipher_hw_aes_##mode(size_t keybits) \ + # include "cipher_aes_hw_t4.inc" + #elif defined(S390X_aes_128_CAPABLE) + # include "cipher_aes_hw_s390x.inc" ++#elif defined(__riscv) && __riscv_xlen == 64 ++# include "cipher_aes_hw_rv64i.inc" ++#elif defined(__riscv) && __riscv_xlen == 32 ++# include "cipher_aes_hw_rv32i.inc" + #else + /* The generic case */ + # define PROV_CIPHER_HW_declare(mode) +@@ -154,4 +158,4 @@ PROV_CIPHER_HW_aes_mode(ofb128) + PROV_CIPHER_HW_aes_mode(cfb128) + PROV_CIPHER_HW_aes_mode(cfb1) + PROV_CIPHER_HW_aes_mode(cfb8) +-PROV_CIPHER_HW_aes_mode(ctr) ++PROV_CIPHER_HW_aes_mode(ctr) +\ No newline at end of file +diff --git a/providers/implementations/ciphers/cipher_aes_hw_rv64i.inc b/providers/implementations/ciphers/cipher_aes_hw_rv64i.inc +new file mode 100644 +index 0000000..5e9a4ce +--- /dev/null ++++ b/providers/implementations/ciphers/cipher_aes_hw_rv64i.inc +@@ -0,0 +1,83 @@ ++/* ++ * Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the Apache License 2.0 (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++/*- ++ * RISC-V RV64 ZVKNED support for AES modes ecb, cbc, ofb, cfb, ctr. ++ * This file is included by cipher_aes_hw.c ++ */ ++ ++#define cipher_hw_rv64i_zvkned_cbc ossl_cipher_hw_generic_cbc ++#define cipher_hw_rv64i_zvkned_ecb ossl_cipher_hw_generic_ecb ++#define cipher_hw_rv64i_zvkned_ofb128 ossl_cipher_hw_generic_ofb128 ++#define cipher_hw_rv64i_zvkned_cfb128 ossl_cipher_hw_generic_cfb128 ++#define cipher_hw_rv64i_zvkned_cfb8 ossl_cipher_hw_generic_cfb8 ++#define cipher_hw_rv64i_zvkned_cfb1 ossl_cipher_hw_generic_cfb1 ++#define cipher_hw_rv64i_zvkned_ctr ossl_cipher_hw_generic_ctr ++ ++static int cipher_hw_rv64i_zvkned_initkey(PROV_CIPHER_CTX *dat, ++ const unsigned char *key, ++ size_t keylen) ++{ ++ int ret; ++ PROV_AES_CTX *adat = (PROV_AES_CTX *)dat; ++ AES_KEY *ks = &adat->ks.ks; ++ ++ dat->ks = ks; ++ ++ /* ++ * Zvkned only supports 128 and 256 bit keys for key schedule generation. ++ * For AES-192 case, we could fallback to `AES_set_encrypt_key`. ++ * All Zvkned-based implementations use the same `encrypt-key` scheduling ++ * for both encryption and decryption. ++ */ ++ if (keylen * 8 == 128 || keylen * 8 == 256) { ++ ret = rv64i_zvkned_set_encrypt_key(key, keylen * 8, ks); ++ } else { ++ ret = AES_set_encrypt_key(key, keylen * 8, ks); ++ } ++ ++ if (dat->mode == EVP_CIPH_CBC_MODE) { ++ if (dat->enc) { ++ dat->stream.cbc = (cbc128_f) rv64i_zvkned_cbc_encrypt; ++ } else { ++ dat->stream.cbc = (cbc128_f) rv64i_zvkned_cbc_decrypt; ++ } ++ } else if (dat->mode == EVP_CIPH_ECB_MODE) { ++ if (dat->enc) { ++ dat->stream.ecb = (ecb128_f) rv64i_zvkned_ecb_encrypt; ++ } else { ++ dat->stream.ecb = (ecb128_f) rv64i_zvkned_ecb_decrypt; ++ } ++ } ++ ++ /* Zvkned supports aes-128/192/256 encryption and decryption. */ ++ if ((dat->mode == EVP_CIPH_ECB_MODE || dat->mode == EVP_CIPH_CBC_MODE) && ++ !dat->enc) { ++ dat->block = (block128_f) rv64i_zvkned_decrypt; ++ } else { ++ dat->block = (block128_f) rv64i_zvkned_encrypt; ++ } ++ ++ if (ret < 0) { ++ ERR_raise(ERR_LIB_PROV, PROV_R_KEY_SETUP_FAILED); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++#define PROV_CIPHER_HW_declare(mode) \ ++static const PROV_CIPHER_HW rv64i_zvkned_##mode = { \ ++ cipher_hw_rv64i_zvkned_initkey, \ ++ cipher_hw_rv64i_zvkned_##mode, \ ++ cipher_hw_aes_copyctx \ ++}; ++#define PROV_CIPHER_HW_select(mode) \ ++if (RISCV_HAS_ZVKNED() && riscv_vlen() >= 128) \ ++ return &rv64i_zvkned_##mode; +diff --git a/providers/implementations/ciphers/cipher_aes_ocb_hw.c b/providers/implementations/ciphers/cipher_aes_ocb_hw.c +index 7aa97dc..e5ce4b0 100644 +--- a/providers/implementations/ciphers/cipher_aes_ocb_hw.c ++++ b/providers/implementations/ciphers/cipher_aes_ocb_hw.c +@@ -103,6 +103,38 @@ static const PROV_CIPHER_HW aes_t4_ocb = { \ + # define PROV_CIPHER_HW_select() \ + if (SPARC_AES_CAPABLE) \ + return &aes_t4_ocb; ++ ++#elif defined(__riscv) && __riscv_xlen == 64 ++ ++static int cipher_hw_aes_ocb_rv64i_zvkned_initkey(PROV_CIPHER_CTX *vctx, ++ const unsigned char *key, ++ size_t keylen) ++{ ++ PROV_AES_OCB_CTX *ctx = (PROV_AES_OCB_CTX *)vctx; ++ ++ /* Zvkned only supports 128 and 256 bit keys. */ ++ if (keylen * 8 == 128 || keylen * 8 == 256) { ++ OCB_SET_KEY_FN(rv64i_zvkned_set_encrypt_key, ++ rv64i_zvkned_set_decrypt_key, ++ rv64i_zvkned_encrypt, rv64i_zvkned_decrypt, ++ NULL, NULL); ++ } else { ++ OCB_SET_KEY_FN(AES_set_encrypt_key, AES_set_encrypt_key, ++ rv64i_zvkned_encrypt, rv64i_zvkned_decrypt, ++ NULL, NULL); ++ } ++ return 1; ++} ++ ++# define PROV_CIPHER_HW_declare() \ ++static const PROV_CIPHER_HW aes_rv64i_zvkned_ocb = { \ ++ cipher_hw_aes_ocb_rv64i_zvkned_initkey, \ ++ NULL \ ++}; ++# define PROV_CIPHER_HW_select() \ ++ if (RISCV_HAS_ZVKNED() && riscv_vlen() >= 128) \ ++ return &aes_rv64i_zvkned_ocb; ++ + #else + # define PROV_CIPHER_HW_declare() + # define PROV_CIPHER_HW_select() +diff --git a/providers/implementations/ciphers/cipher_aes_xts_hw.c b/providers/implementations/ciphers/cipher_aes_xts_hw.c +index c71492f..b371c5b 100644 +--- a/providers/implementations/ciphers/cipher_aes_xts_hw.c ++++ b/providers/implementations/ciphers/cipher_aes_xts_hw.c +@@ -158,6 +158,42 @@ static const PROV_CIPHER_HW aes_xts_t4 = { \ + # define PROV_CIPHER_HW_select_xts() \ + if (SPARC_AES_CAPABLE) \ + return &aes_xts_t4; ++ ++#elif defined(__riscv) && __riscv_xlen == 64 ++ ++static int cipher_hw_aes_xts_rv64i_zvkned_initkey(PROV_CIPHER_CTX *ctx, ++ const unsigned char *key, ++ size_t keylen) ++{ ++ PROV_AES_XTS_CTX *xctx = (PROV_AES_XTS_CTX *)ctx; ++ OSSL_xts_stream_fn stream_enc = NULL; ++ OSSL_xts_stream_fn stream_dec = NULL; ++ ++ /* Zvkned only supports 128 and 256 bit keys. */ ++ if (keylen * 8 == 128 || keylen * 8 == 256) { ++ XTS_SET_KEY_FN(rv64i_zvkned_set_encrypt_key, ++ rv64i_zvkned_set_decrypt_key, ++ rv64i_zvkned_encrypt, rv64i_zvkned_decrypt, ++ stream_enc, stream_dec); ++ } else { ++ XTS_SET_KEY_FN(AES_set_encrypt_key, AES_set_encrypt_key, ++ rv64i_zvkned_encrypt, rv64i_zvkned_decrypt, ++ stream_enc, stream_dec); ++ } ++ return 1; ++} ++ ++# define PROV_CIPHER_HW_declare_xts() \ ++static const PROV_CIPHER_HW aes_xts_rv64i_zvkned = { \ ++ cipher_hw_aes_xts_rv64i_zvkned_initkey, \ ++ NULL, \ ++ cipher_hw_aes_xts_copyctx \ ++}; ++ ++# define PROV_CIPHER_HW_select_xts() \ ++if (RISCV_HAS_ZVKNED() && riscv_vlen() >= 128) \ ++ return &aes_xts_rv64i_zvkned; ++ + # else + /* The generic case */ + # define PROV_CIPHER_HW_declare_xts() +diff --git a/test/recipes/30-test_evp_data/evpciph_aes_common.txt b/test/recipes/30-test_evp_data/evpciph_aes_common.txt +index 484147b..3d977b5 100644 +--- a/test/recipes/30-test_evp_data/evpciph_aes_common.txt ++++ b/test/recipes/30-test_evp_data/evpciph_aes_common.txt +@@ -171,6 +171,22 @@ Plaintext = F69F2445DF4F9B17AD2B417BE66C3710 + Ciphertext = 3FF1CAA1681FAC09120ECA307586E1A7 + NextIV = 3ff1caa1681fac09120eca307586e1a7 + ++# 128 bytes plaintext ++Cipher = AES-128-CBC ++Key = cea7f6d343bb1ef40998ecb90aca8249 ++IV = ced2c3991da2a1d485990e2e9c9246f3 ++Plaintext = f836bb4afd5c1d8efca719467030ed53ff17949b7a2037189eff2acd6f4b97c978522edaa998d88293ff203637c8b91e076cd03cb37af4203e1ebb276fd81f18ae144e203fb9484d8b90109aa8eff494c701f322ca02c9834f8c4ca91372df8d08e27fe8b8cdf670845d295660d2e87ded8fcd4b913fa0d102e87313ee3c31e5 ++Ciphertext = ab73a74b8b6d44747741c363748b2571ff46102c90913b83a3326faf5f6f949d20a9bbab222f8ce102d3d70c0867d0690241e73401b9b9c6af3280dd49969f22a7eade6db00c6c4b2764c9373a6c24e25c107e730da20317849065d2b3770840084fbf941e64c7e9b08440b83f7fc0afaa4ee0981626d71126bb8155290ba7b5 ++NextIV = aa4ee0981626d71126bb8155290ba7b5 ++ ++# 1024 bytes plaintext ++Cipher = AES-128-CBC ++Key = 0dd82568e0a0ad0938150ae5c0944f36 ++IV = 8b32cb9ef61ee47acdb5299f7fb4c705 ++Plaintext = 16ebe2213bdbd704e6c56e836abdaa5f8b59fed9935e113a58f2f5452a865c58e6661cf0774baf0b76a2630a8e587c644f1389ba3d537f43ce310d1d6939f8eb4e2317aac67a37777ee940be3d5aed78ccbb24622bcb57c6d63d34b62167121a91cbbf1483d2dd175d90cc6f35cd7a2b37d03caf5d4e9da880694ba8d69abe2af0f560f32f19ad2e40813aaf139cdbd008bd314a71691d4a5ff5aaf09cb1d26438e92b0d1924fdf504b83010d8963a30bf1e20a3aa4e42135af0845559f24b8f0666bb0a93908939210ba7aaaaa1194ebbfc2d1efa0b872fc7936b1b6696a738d51b75e6258fa0e1e3e046e891a07517a65ffdbc2b0c373a5d205181185df8aa9174e6e0cee685f7757c3f4c3a504fb3fa9e9aa682c4ddaca0c409ca1c46040c24f7d66b8af996a076d140caafad52a91cec864588f9daac1e9032ad57c9f0f5da2025104a1823cb36ca3ea53dc42250ffdb74267c28c47e00a60dd3d593ce1029ffe0802ef1b57745ee35a42bcc0c363dbbf98da6e086f8b5fdd02b9eb813b86bfb31f4f205f5a5f396a0a0397953c7229111f51acedd819985aaeb94a18a0ceda5079f2a52687e6cb6f82f38552a27c6533de6e865ab30cd753335a27727fd8ad79eb3cb48f4cd8371478abde70040c8b7e6e4a17a19136859e231004e9f1a30471da8b387177de9bd30d2163ef6bfe88275b47c441c0deb733c2522727ae03e35f2a318acdcd22f685f5e5aabcc6570433fc4d81cb0978e03fc36e8eea9d2786c16a399e8764f4460c7956498519d4789cee47cfce766b04022747163127bae1cd919a490620bc221b5c6ff8c9e31e4975d70e917034ecf1620de0d2315a4bf2c0c7786bd706961c04fb4c39baac8c839b7a6363585f96e35bed0b109158f41923200ad1e7fb4bcc920f1459d9a0e2448d295ab969e87a2b7007e8d1e2dc19b1b9bd1c1a412bcf5acbf8640d4d9ea90925ce18b552d3f73dfec70ed19267552135db92a93e4a9437386729c172e36f218d041242f232e160ff5310f614a70b821b1c52a3c8d398d7df30b42125b829f89fc050325796e0b3b4d8e0504556741082a81d8096b449ddf8b619e2aba017ed5d28e6572c96c2121baf24df096457747a076935677221cd9ce7ba81a83974a2c0cac102ce10a828c4eeea507ecca8bf62cbc868be2a75afc62763624a32e05294ad07666fd145ad54be4476d7c889174ada4156ba7f5e95138865ff1fff5693bad8317fbfee1651319b42550a0570da8e75c92962c44aefc6f3567467fb91f9b3e759f0bc67f54b82c4f91245373d2adba70e876cf1f7fa4797150d805f358f0d59d2706050fcc7ad558e37564082a33c8ae0acc5ab76b755896996a383474849e8e6e34e51279f7477dfd0a0bfacceef9d53610f9d7492cc26e3d207b84326638fe822234c06cabc1b38f98754f ++Ciphertext = b6a9e1349db543a56129d6b15d7300e88da351280c75e7fa9697a3362565dc6c26a4bf234a7a161101ab527d5c71be6cef8f1ee2f11980f8c20e9c99b0d205ef2c3ea8b42d9d91475299bf7dc08e3200966d7398a965d2dc5b1532d1e5d1bfc94e89f685e148eca99c42ecdc2c2ded3f2ff738c9fc727844892eb761e067b24d61a5af9cfd83d1538c7b60dc6e9f4bef5da49d725b5028d5e3e4d359622ecc93b06df46c2ad9caf9290521b888e58aa18a93ab85097a754fbbc2b66ee0934729859b967d2b7859dea7dca660462357a615c5e0c0b9f968555bb5137b7579951c45310a669f4c0a4042f6c6c354afbe465cf8762e0db473a089c1d2a17dda09b7c9b78b9597b3222042c7a74347601e32f0fb982e855719f47210ec5e1d3f0aa4ebe7ab6c40c95ce894d83b93a093c5cee53ce656912461af6ba1811840140fff2d66814d137202b8f6de9cd3eb3e30bcef8b7633a91e343a7433b9a0ee493a98b92f48385f04b5e51f056a7b0d46c25c18ae88ad83d902a49f61eaf9c1a22b38e4d2472623a33a1604b892306663f710d02eccd03a3e25bbbf0f651d3e126f9abd1711f5258d0d4d8b197ef6d3807c12fcf4afea926c2d8a5402319a8d4ab441359196661a8ef6e33599094bdb92326884e5d3a837b923462b61eefd22e535f5109f33901b09b5bb9652a3535b1ce5a2a565cccc9b05aaa35989de9564e2dddf28170be012c87a58825b545200f3a3780367799a4908b9151f1047ad7811865a1950b485910f62af12d65d84bfe3091968334baf8c2bf3247bf545a0b9e38075ddc76680e7ca2c1d8c0fcdb4f7c92afe2dfd7993b5786e794877942222ae568528e03b1383bd983d19436d6fc3290168fedc4144ea7285575b1faaab868106841ed5c51baeb7540c90fdc23c49138353856e4462c9ae30266fa9c46588b3a66e3d56b0b984f8c7a6d23ef0c9d7127032e779b4f57fc3dc5ad066f1f9fb8f1ebd3d837483370f04aa04221d4dedaf76831adba87d8f7b193cf077361344d2c587c965607b745575cc8db320622cacb5c057039100acf950cc8a4f87f5cb55dcec6adf4c1082bba9c5b1bf1b967ab070de5350559c14cae80a614e6ed25e3b5b80682a7877eb3a42029787c55d50fd53b47a51aa32020815666e83df67471f5fd99139b91db9d046b21c16cc43f039fb012a3c28d0d754e8892fc0ee254b3d7727c15c9d8f64c8889efc5e42c073a4b36d982858652d7b7f3644d55bf1ed96a4f9ab3105c83cf4dc0085637439615fd346f1ce56247c62af2443cbd07c842e71013a0a298edb5e1ffb57711443a6f29dab16949cc6232ffe5f1630891cee4e3b689dc18686b7ae4f257f34b711ac28412340d77bb24ed009e9344d73e6db52e7acbdb21a732cc0538e9b0fc26aa3f5a6c43a1150826949ca7b602614dae349b8e5 ++NextIV = 3a1150826949ca7b602614dae349b8e5 ++ + # CBC-AES192.Encrypt and CBC-AES192.Decrypt + Cipher = AES-192-CBC + Key = 8E73B0F7DA0E6452C810F32B809079E562F8EAD2522C6B7B +@@ -1221,6 +1237,36 @@ IV = 00000000000000000000000000000000 + Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1 + Ciphertext = 27A7479BEFA1D476489F308CD4CFA6E2A96E4BBE3208FF25287DD3819616E89CC78CF7F5E543445F8333D8FA7F56000005279FA5D8B5E4AD40E736DDB4D35412328063FD2AAB53E5EA1E0A9F332500A5DF9487D07A5C92CC512C8866C7E860CE93FDF166A24912B422976146AE20CE846BB7DC9BA94A767AAEF20C0D61AD02655EA92DC4C4E41A8952C651D33174BE51A10C421110E6D81588EDE82103A252D8A750E8768DEFFFED9122810AAEB99F910409B03D164E727C31290FD4E039500872AF + ++Cipher = aes-256-xts ++Key = 27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592 ++IV = 00000000000000000000000000000000 ++Plaintext = 000102030405060708090a0b0c0d0e0f10 ++Ciphertext = b5f737852b18e4cf31d7353ff220ca5e3a ++ ++Cipher = aes-256-xts ++Key = 27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592 ++IV = 00000000000000000000000000000000 ++Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f ++Ciphertext = 3a060a8cad115a6f44572e3759e43c8fcad8bfcb233ff6ad71b7c1e7ca651508860aed34ec9506d368aa50274a31c16d2deae4d64c2a8096091c093f3820fb6d21089bcedaac361e3febe706cafe14b96bffbd29ff8fb716d4ed8fb39ccfb667 ++ ++Cipher = aes-256-xts ++Key = 27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592 ++IV = 00000000000000000000000000000000 ++Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f6061 ++Ciphertext = 3a060a8cad115a6f44572e3759e43c8fcad8bfcb233ff6ad71b7c1e7ca651508860aed34ec9506d368aa50274a31c16d2deae4d64c2a8096091c093f3820fb6d21089bcedaac361e3febe706cafe14b9bc89de34258d32ec3ad59fd2986a401e6bff ++ ++Cipher = aes-256-xts ++Key = 27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592 ++IV = 00000000000000000000000000000000 ++Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f ++Ciphertext = 3a060a8cad115a6f44572e3759e43c8fcad8bfcb233ff6ad71b7c1e7ca651508860aed34ec9506d368aa50274a31c16d2deae4d64c2a8096091c093f3820fb6d21089bcedaac361e3febe706cafe14b96bffbd29ff8fb716d4ed8fb39ccfb667a6b985d89b9c862780185c839f60307a ++ ++Cipher = aes-256-xts ++Key = 27182818284590452353602874713526624977572470936999595749669676273141592653589793238462643383279502884197169399375105820974944592 ++IV = 00000000000000000000000000000000 ++Plaintext = 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f7071 ++Ciphertext = 3a060a8cad115a6f44572e3759e43c8fcad8bfcb233ff6ad71b7c1e7ca651508860aed34ec9506d368aa50274a31c16d2deae4d64c2a8096091c093f3820fb6d21089bcedaac361e3febe706cafe14b96bffbd29ff8fb716d4ed8fb39ccfb6671ae1fd167248af55dc14646bd7e2e8c3a6b9 ++ + Title = AES XTS Non standard test vectors - generated from reference implementation + + Cipher = aes-128-xts +@@ -1302,4 +1348,3 @@ Tag = ab6e47d42cec13bdf53a67b21257bddf + Plaintext = 00000000000000000000000000000000 + Ciphertext = 0388dace60b6a392f328c2b971b2fe78 + NextIV = 000000000000000000000000 +- +-- +2.27.0 + diff --git a/openssl.spec b/openssl.spec index df851e7..7dec8dc 100644 --- a/openssl.spec +++ b/openssl.spec @@ -2,7 +2,7 @@ Name: openssl Epoch: 1 Version: 3.0.12 -Release: 17 +Release: 22 Summary: Cryptography and SSL/TLS Toolkit License: OpenSSL and SSLeay URL: https://www.openssl.org/ @@ -76,6 +76,7 @@ Patch63: backport-Refactor-OSSL_LIB_CTX-to-avoid-using-CRYPTO_EX_DATA.patch Patch64: backport-Release-the-drbg-in-the-global-default-context-befor.patch Patch65: backport-params-provide-a-faster-TRIE-based-param-lookup.patch Patch66: backport-CVE-2024-13176-Fix-timing-side-channel.patch +Patch72: Backport-riscv-Add-AES-implementation-based-on-Zvkned.patch Patch9000: add-FIPS_mode_set-support.patch Patch9001: backport-CVE-2024-9143-Harden-BN_GF2m_poly2arr-against-misuse.patch @@ -141,6 +142,11 @@ sslarch=%{_os}64-%{_target_cpu} sslflags="--libdir=%{_libdir}" %endif +%ifarch riscv64 + RPM_OPT_FLAGS="$RPM_OPT_FLAGS -O3 -march=rv64gcv_zba_zbb_zbc_zbs_zkt_zvbb_zvbc_zvkb_zvkg_zvkned_zvknhb_zvksed_zvksh" + RPM_LD_FLAGS="$RPM_LD_FLAGS -Wl,--build-id" +%endif + %ifarch x86_64 aarch64 sslflags=enable-ec_nistp_64_gcc_128 %endif @@ -281,6 +287,9 @@ make test || : %ldconfig_scriptlets libs %changelog +* Fri Aug 1 2025 liuqingtao - 1:3.0.12-22 +- Backport riscv Add AES implementation based on Zvkned + * Mon Mar 10 2025 mahailiang - 1:3.0.12-17 - add sw_64 support -- Gitee