diff --git a/add-sw_64-support.patch b/add-sw_64-support.patch new file mode 100644 index 0000000000000000000000000000000000000000..6eccddb05346c49761cb91296a22abd99eef6c72 --- /dev/null +++ b/add-sw_64-support.patch @@ -0,0 +1,1520 @@ +From dfb837f21d55b9cf0bf78e6b3d5dc02567672400 Mon Sep 17 00:00:00 2001 +From: mahailiang +Date: Sun, 29 Sep 2024 17:26:05 +0800 +Subject: [PATCH] to support sw_64 + +--- + Configurations/10-main.conf | 12 + + crypto/bn/asm/sw_64-mont.pl | 328 ++++++++++++++++++++++ + crypto/bn/bn_local.h | 2 +- + crypto/bn/build.info | 2 +- + crypto/modes/asm/ghash-sw_64.pl | 467 ++++++++++++++++++++++++++++++++ + crypto/sha/asm/sha1-sw_64.pl | 329 ++++++++++++++++++++++ + crypto/sha/build.info | 2 +- + crypto/sw_64cpuid.pl | 273 +++++++++++++++++++ + include/crypto/md32_common.h | 2 +- + 9 files changed, 1413 insertions(+), 4 deletions(-) + create mode 100644 crypto/bn/asm/sw_64-mont.pl + create mode 100644 crypto/modes/asm/ghash-sw_64.pl + create mode 100644 crypto/sha/asm/sha1-sw_64.pl + create mode 100644 crypto/sw_64cpuid.pl + +diff --git a/Configurations/10-main.conf b/Configurations/10-main.conf +index 915e7dd..33fd760 100644 +--- a/Configurations/10-main.conf ++++ b/Configurations/10-main.conf +@@ -984,6 +984,18 @@ my %targets = ( + asm_arch => 'alpha', + perlasm_scheme => "void", + }, ++ "linux-sw_64-gcc" => { ++ inherit_from => [ "linux-generic64" ], ++ lib_cppflags => add("-DL_ENDIAN"), ++ bn_ops => "SIXTY_FOUR_BIT_LONG", ++ }, ++ "linux-sw_64" => { ++ inherit_from => [ "linux-generic64" ], ++ cflags => add("-DL_ENDIAN"), ++ bn_ops => "SIXTY_FOUR_BIT_LONG", ++ perlasm_scheme => "elf", ++ multilib => "64", ++ }, + "linux-c64xplus" => { + inherit_from => [ "BASE_unix" ], + # TI_CGT_C6000_7.3.x is a requirement +diff --git a/crypto/bn/asm/sw_64-mont.pl b/crypto/bn/asm/sw_64-mont.pl +new file mode 100644 +index 0000000..348b903 +--- /dev/null ++++ b/crypto/bn/asm/sw_64-mont.pl +@@ -0,0 +1,328 @@ ++#! /usr/bin/env perl ++# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# On 21264 RSA sign performance improves by 70/35/20/15 percent for ++# 512/1024/2048/4096 bit key lengths. This is against vendor compiler ++# instructed to '-tune host' code with in-line assembler. Other ++# benchmarks improve by 15-20%. To anchor it to something else, the ++# code provides approximately the same performance per GHz as AMD64. ++# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x ++# difference. ++ ++$output=pop; ++open STDOUT,">$output"; ++ ++# int bn_mul_mont( ++$rp="a0"; # BN_ULONG *rp, ++$ap="a1"; # const BN_ULONG *ap, ++$bp="a2"; # const BN_ULONG *bp, ++$np="a3"; # const BN_ULONG *np, ++$n0="a4"; # const BN_ULONG *n0, ++$num="a5"; # int num); ++ ++$lo0="t0"; ++$hi0="t1"; ++$lo1="t2"; ++$hi1="t3"; ++$aj="t4"; ++$bi="t5"; ++$nj="t6"; ++$tp="t7"; ++$alo="t8"; ++$ahi="t9"; ++$nlo="t10"; ++$nhi="t11"; ++$tj="t12"; ++$i="s3"; ++$j="s4"; ++$m1="s5"; ++ ++$code=<<___; ++#ifdef __linux__ ++#include ++#else ++#include ++#include ++#endif ++ ++.text ++ ++.set noat ++.set noreorder ++ ++.globl bn_mul_mont ++.align 5 ++.ent bn_mul_mont ++bn_mul_mont: ++ ldi sp,-48(sp) ++ stl ra,0(sp) ++ stl s3,8(sp) ++ stl s4,16(sp) ++ stl s5,24(sp) ++ stl fp,32(sp) ++ mov sp,fp ++ .mask 0x0400f000,-48 ++ .frame fp,48,ra ++ .prologue 0 ++ ++ .align 4 ++ .set reorder ++ sextl $num,$num ++ mov 0,v0 ++ cmplt $num,4,AT ++ bne AT,.Lexit ++ ++ ldl $hi0,0($ap) # ap[0] ++ s8addl $num,16,AT ++ ldl $aj,8($ap) ++ subl sp,AT,sp ++ ldl $bi,0($bp) # bp[0] ++ ldi AT,-4096(zero) # mov -4096,AT ++ ldl $n0,0($n0) ++ and sp,AT,sp ++ ++ mull $hi0,$bi,$lo0 ++ ldl $hi1,0($np) # np[0] ++ umulh $hi0,$bi,$hi0 ++ ldl $nj,8($np) ++ ++ mull $lo0,$n0,$m1 ++ ++ mull $hi1,$m1,$lo1 ++ umulh $hi1,$m1,$hi1 ++ ++ addl $lo1,$lo0,$lo1 ++ cmpult $lo1,$lo0,AT ++ addl $hi1,AT,$hi1 ++ ++ mull $aj,$bi,$alo ++ mov 2,$j ++ umulh $aj,$bi,$ahi ++ mov sp,$tp ++ ++ mull $nj,$m1,$nlo ++ s8addl $j,$ap,$aj ++ umulh $nj,$m1,$nhi ++ s8addl $j,$np,$nj ++.align 4 ++.L1st: ++ .set noreorder ++ ldl $aj,0($aj) ++ addw $j,1,$j ++ ldl $nj,0($nj) ++ ldi $tp,8($tp) ++ ++ addl $alo,$hi0,$lo0 ++ mull $aj,$bi,$alo ++ cmpult $lo0,$hi0,AT ++ addl $nlo,$hi1,$lo1 ++ ++ mull $nj,$m1,$nlo ++ addl $ahi,AT,$hi0 ++ cmpult $lo1,$hi1,v0 ++ cmplt $j,$num,$tj ++ ++ umulh $aj,$bi,$ahi ++ addl $nhi,v0,$hi1 ++ addl $lo1,$lo0,$lo1 ++ s8addl $j,$ap,$aj ++ ++ umulh $nj,$m1,$nhi ++ cmpult $lo1,$lo0,v0 ++ addl $hi1,v0,$hi1 ++ s8addl $j,$np,$nj ++ ++ stl $lo1,-8($tp) ++ nop ++ unop ++ bne $tj,.L1st ++ .set reorder ++ ++ addl $alo,$hi0,$lo0 ++ addl $nlo,$hi1,$lo1 ++ cmpult $lo0,$hi0,AT ++ cmpult $lo1,$hi1,v0 ++ addl $ahi,AT,$hi0 ++ addl $nhi,v0,$hi1 ++ ++ addl $lo1,$lo0,$lo1 ++ cmpult $lo1,$lo0,v0 ++ addl $hi1,v0,$hi1 ++ ++ stl $lo1,0($tp) ++ ++ addl $hi1,$hi0,$hi1 ++ cmpult $hi1,$hi0,AT ++ stl $hi1,8($tp) ++ stl AT,16($tp) ++ ++ mov 1,$i ++.align 4 ++.Louter: ++ s8addl $i,$bp,$bi ++ ldl $hi0,0($ap) ++ ldl $aj,8($ap) ++ ldl $bi,0($bi) ++ ldl $hi1,0($np) ++ ldl $nj,8($np) ++ ldl $tj,0(sp) ++ ++ mull $hi0,$bi,$lo0 ++ umulh $hi0,$bi,$hi0 ++ ++ addl $lo0,$tj,$lo0 ++ cmpult $lo0,$tj,AT ++ addl $hi0,AT,$hi0 ++ ++ mull $lo0,$n0,$m1 ++ ++ mull $hi1,$m1,$lo1 ++ umulh $hi1,$m1,$hi1 ++ ++ addl $lo1,$lo0,$lo1 ++ cmpult $lo1,$lo0,AT ++ mov 2,$j ++ addl $hi1,AT,$hi1 ++ ++ mull $aj,$bi,$alo ++ mov sp,$tp ++ umulh $aj,$bi,$ahi ++ ++ mull $nj,$m1,$nlo ++ s8addl $j,$ap,$aj ++ umulh $nj,$m1,$nhi ++.align 4 ++.Linner: ++ .set noreorder ++ ldl $tj,8($tp) #L0 ++ nop #U1 ++ ldl $aj,0($aj) #L1 ++ s8addl $j,$np,$nj #U0 ++ ++ ldl $nj,0($nj) #L0 ++ nop #U1 ++ addl $alo,$hi0,$lo0 #L1 ++ ldi $tp,8($tp) ++ ++ mull $aj,$bi,$alo #U1 ++ cmpult $lo0,$hi0,AT #L0 ++ addl $nlo,$hi1,$lo1 #L1 ++ addw $j,1,$j ++ ++ mull $nj,$m1,$nlo #U1 ++ addl $ahi,AT,$hi0 #L0 ++ addl $lo0,$tj,$lo0 #L1 ++ cmpult $lo1,$hi1,v0 #U0 ++ ++ umulh $aj,$bi,$ahi #U1 ++ cmpult $lo0,$tj,AT #L0 ++ addl $lo1,$lo0,$lo1 #L1 ++ addl $nhi,v0,$hi1 #U0 ++ ++ umulh $nj,$m1,$nhi #U1 ++ s8addl $j,$ap,$aj #L0 ++ cmpult $lo1,$lo0,v0 #L1 ++ cmplt $j,$num,$tj #U0 # borrow $tj ++ ++ addl $hi0,AT,$hi0 #L0 ++ addl $hi1,v0,$hi1 #U1 ++ stl $lo1,-8($tp) #L1 ++ bne $tj,.Linner #U0 ++ .set reorder ++ ++ ldl $tj,8($tp) ++ addl $alo,$hi0,$lo0 ++ addl $nlo,$hi1,$lo1 ++ cmpult $lo0,$hi0,AT ++ cmpult $lo1,$hi1,v0 ++ addl $ahi,AT,$hi0 ++ addl $nhi,v0,$hi1 ++ ++ addl $lo0,$tj,$lo0 ++ cmpult $lo0,$tj,AT ++ addl $hi0,AT,$hi0 ++ ++ ldl $tj,16($tp) ++ addl $lo1,$lo0,$j ++ cmpult $j,$lo0,v0 ++ addl $hi1,v0,$hi1 ++ ++ addl $hi1,$hi0,$lo1 ++ stl $j,0($tp) ++ cmpult $lo1,$hi0,$hi1 ++ addl $lo1,$tj,$lo1 ++ cmpult $lo1,$tj,AT ++ addw $i,1,$i ++ addl $hi1,AT,$hi1 ++ stl $lo1,8($tp) ++ cmplt $i,$num,$tj # borrow $tj ++ stl $hi1,16($tp) ++ bne $tj,.Louter ++ ++ s8addl $num,sp,$tj # &tp[num] ++ mov $rp,$bp # put rp aside ++ mov sp,$tp ++ mov sp,$ap ++ mov 0,$hi0 # clear borrow bit ++ ++.align 4 ++.Lsub: ldl $lo0,0($tp) ++ ldl $lo1,0($np) ++ ldi $tp,8($tp) ++ ldi $np,8($np) ++ subl $lo0,$lo1,$lo1 # tp[i]-np[i] ++ cmpult $lo0,$lo1,AT ++ subl $lo1,$hi0,$lo0 ++ cmpult $lo1,$lo0,$hi0 ++ or $hi0,AT,$hi0 ++ stl $lo0,0($rp) ++ cmpult $tp,$tj,v0 ++ ldi $rp,8($rp) ++ bne v0,.Lsub ++ ++ subl $hi1,$hi0,$hi0 # handle upmost overflow bit ++ mov sp,$tp ++ mov $bp,$rp # restore rp ++ ++.align 4 ++.Lcopy: ldl $aj,0($tp) # conditional copy ++ ldl $nj,0($rp) ++ ldi $tp,8($tp) ++ ldi $rp,8($rp) ++ seleq $hi0,$nj,$aj ++ stl zero,-8($tp) # zap tp ++ cmpult $tp,$tj,AT ++ stl $aj,-8($rp) ++ bne AT,.Lcopy ++ mov 1,v0 ++ ++.Lexit: ++ .set noreorder ++ mov fp,sp ++ /*ldl ra,0(sp)*/ ++ ldl s3,8(sp) ++ ldl s4,16(sp) ++ ldl s5,24(sp) ++ ldl fp,32(sp) ++ ldi sp,48(sp) ++ ret (ra) ++.end bn_mul_mont ++.ascii "Montgomery Multiplication for Sw_64, CRYPTOGAMS by " ++.align 2 ++___ ++ ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h +index 50e9d26..e73bd3f 100644 +--- a/crypto/bn/bn_local.h ++++ b/crypto/bn/bn_local.h +@@ -387,7 +387,7 @@ struct bn_gencb_st { + # define BN_UMULT_LOHI(low,high,a,b) ({ \ + uint128_t ret=(uint128_t)(a)*(b); \ + (high)=ret>>64; (low)=ret; }) +-# elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) ++# elif (defined(__alpha) || defined(__sw_64)) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) + # if defined(__DECC) + # include + # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) +diff --git a/crypto/bn/build.info b/crypto/bn/build.info +index c4ba51b..b598423 100644 +--- a/crypto/bn/build.info ++++ b/crypto/bn/build.info +@@ -168,7 +168,7 @@ GENERATE[ppc-mont.s]=asm/ppc-mont.pl + GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl + + GENERATE[alpha-mont.S]=asm/alpha-mont.pl +- ++GENERATE[sw_64-mont.S]=asm/sw_64-mont.pl + GENERATE[armv4-mont.S]=asm/armv4-mont.pl + INCLUDE[armv4-mont.o]=.. + GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl +diff --git a/crypto/modes/asm/ghash-sw_64.pl b/crypto/modes/asm/ghash-sw_64.pl +new file mode 100644 +index 0000000..59b5596 +--- /dev/null ++++ b/crypto/modes/asm/ghash-sw_64.pl +@@ -0,0 +1,467 @@ ++#! /usr/bin/env perl ++# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# March 2010 ++# ++# The module implements "4-bit" GCM GHASH function and underlying ++# single multiplication operation in GF(2^128). "4-bit" means that it ++# uses 256 bytes per-key table [+128 bytes shared table]. Even though ++# loops are aggressively modulo-scheduled in respect to references to ++# Htbl and Z.hi updates for 8 cycles per byte, measured performance is ++# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic ++# scheduling "glitch," because uprofile(1) indicates uniform sample ++# distribution, as if all instruction bundles execute in 1.5 cycles. ++# Meaning that it could have been even faster, yet 12 cycles is ~60% ++# better than gcc-generated code and ~80% than code generated by vendor ++# compiler. ++ ++$cnt="v0"; # $0 ++$t0="t0"; ++$t1="t1"; ++$t2="t2"; ++$Thi0="t3"; # $4 ++$Tlo0="t4"; ++$Thi1="t5"; ++$Tlo1="t6"; ++$rem="t7"; # $8 ++################# ++$Xi="a0"; # $16, input argument block ++$Htbl="a1"; ++$inp="a2"; ++$len="a3"; ++$nlo="a4"; # $20 ++$nhi="a5"; ++$Zhi="t8"; ++$Zlo="t9"; ++$Xhi="t10"; # $24 ++$Xlo="t11"; ++$remp="t12"; ++$rem_4bit="AT"; # $28 ++ ++{ my $N; ++ sub loop() { ++ ++ $N++; ++$code.=<<___; ++.align 4 ++ extlb $Xlo,7,$nlo ++ and $nlo,0xf0,$nhi ++ sll $nlo,4,$nlo ++ and $nlo,0xf0,$nlo ++ ++ addl $nlo,$Htbl,$nlo ++ ldl $Zlo,8($nlo) ++ addl $nhi,$Htbl,$nhi ++ ldl $Zhi,0($nlo) ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ ldi $cnt,6(zero) ++ extlb $Xlo,6,$nlo ++ ++ ldl $Tlo1,8($nhi) ++ s8addl $remp,$rem_4bit,$remp ++ ldl $Thi1,0($nhi) ++ srl $Zlo,4,$Zlo ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $t0,$Zlo,$Zlo ++ and $nlo,0xf0,$nhi ++ ++ xor $Tlo1,$Zlo,$Zlo ++ sll $nlo,4,$nlo ++ xor $Thi1,$Zhi,$Zhi ++ and $nlo,0xf0,$nlo ++ ++ addl $nlo,$Htbl,$nlo ++ ldl $Tlo0,8($nlo) ++ addl $nhi,$Htbl,$nhi ++ ldl $Thi0,0($nlo) ++ ++.Looplo$N: ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ subl $cnt,1,$cnt ++ srl $Zlo,4,$Zlo ++ ++ ldl $Tlo1,8($nhi) ++ xor $rem,$Zhi,$Zhi ++ ldl $Thi1,0($nhi) ++ s8addl $remp,$rem_4bit,$remp ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $t0,$Zlo,$Zlo ++ extlb $Xlo,$cnt,$nlo ++ ++ and $nlo,0xf0,$nhi ++ xor $Thi0,$Zhi,$Zhi ++ xor $Tlo0,$Zlo,$Zlo ++ sll $nlo,4,$nlo ++ ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ and $nlo,0xf0,$nlo ++ srl $Zlo,4,$Zlo ++ ++ s8addl $remp,$rem_4bit,$remp ++ xor $rem,$Zhi,$Zhi ++ addl $nlo,$Htbl,$nlo ++ addl $nhi,$Htbl,$nhi ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ ldl $Tlo0,8($nlo) ++ xor $t0,$Zlo,$Zlo ++ ++ xor $Tlo1,$Zlo,$Zlo ++ xor $Thi1,$Zhi,$Zhi ++ ldl $Thi0,0($nlo) ++ bne $cnt,.Looplo$N ++ ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ ldi $cnt,7(zero) ++ srl $Zlo,4,$Zlo ++ ++ ldl $Tlo1,8($nhi) ++ xor $rem,$Zhi,$Zhi ++ ldl $Thi1,0($nhi) ++ s8addl $remp,$rem_4bit,$remp ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $t0,$Zlo,$Zlo ++ extlb $Xhi,$cnt,$nlo ++ ++ and $nlo,0xf0,$nhi ++ xor $Thi0,$Zhi,$Zhi ++ xor $Tlo0,$Zlo,$Zlo ++ sll $nlo,4,$nlo ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ and $nlo,0xf0,$nlo ++ srl $Zlo,4,$Zlo ++ ++ s8addl $remp,$rem_4bit,$remp ++ xor $rem,$Zhi,$Zhi ++ addl $nlo,$Htbl,$nlo ++ addl $nhi,$Htbl,$nhi ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ ldl $Tlo0,8($nlo) ++ xor $t0,$Zlo,$Zlo ++ ++ xor $Tlo1,$Zlo,$Zlo ++ xor $Thi1,$Zhi,$Zhi ++ ldl $Thi0,0($nlo) ++ unop ++ ++ ++.Loophi$N: ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ subl $cnt,1,$cnt ++ srl $Zlo,4,$Zlo ++ ++ ldl $Tlo1,8($nhi) ++ xor $rem,$Zhi,$Zhi ++ ldl $Thi1,0($nhi) ++ s8addl $remp,$rem_4bit,$remp ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $t0,$Zlo,$Zlo ++ extlb $Xhi,$cnt,$nlo ++ ++ and $nlo,0xf0,$nhi ++ xor $Thi0,$Zhi,$Zhi ++ xor $Tlo0,$Zlo,$Zlo ++ sll $nlo,4,$nlo ++ ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ and $nlo,0xf0,$nlo ++ srl $Zlo,4,$Zlo ++ ++ s8addl $remp,$rem_4bit,$remp ++ xor $rem,$Zhi,$Zhi ++ addl $nlo,$Htbl,$nlo ++ addl $nhi,$Htbl,$nhi ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ ldl $Tlo0,8($nlo) ++ xor $t0,$Zlo,$Zlo ++ ++ xor $Tlo1,$Zlo,$Zlo ++ xor $Thi1,$Zhi,$Zhi ++ ldl $Thi0,0($nlo) ++ bne $cnt,.Loophi$N ++ ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ srl $Zlo,4,$Zlo ++ ++ ldl $Tlo1,8($nhi) ++ xor $rem,$Zhi,$Zhi ++ ldl $Thi1,0($nhi) ++ s8addl $remp,$rem_4bit,$remp ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $t0,$Zlo,$Zlo ++ ++ xor $Tlo0,$Zlo,$Zlo ++ xor $Thi0,$Zhi,$Zhi ++ ++ and $Zlo,0x0f,$remp ++ sll $Zhi,60,$t0 ++ srl $Zlo,4,$Zlo ++ ++ s8addl $remp,$rem_4bit,$remp ++ xor $rem,$Zhi,$Zhi ++ ++ ldl $rem,0($remp) ++ srl $Zhi,4,$Zhi ++ xor $Tlo1,$Zlo,$Zlo ++ xor $Thi1,$Zhi,$Zhi ++ xor $t0,$Zlo,$Zlo ++ xor $rem,$Zhi,$Zhi ++___ ++}} ++ ++$code=<<___; ++#ifdef __linux__ ++#include ++#else ++#include ++#include ++#endif ++ ++.text ++ ++.set noat ++.set noreorder ++.globl gcm_gmult_4bit ++.align 4 ++.ent gcm_gmult_4bit ++gcm_gmult_4bit: ++ .frame sp,0,ra ++ .prologue 0 ++ ++ ldl $Xlo,8($Xi) ++ ldl $Xhi,0($Xi) ++ ++ bsr $t0,picmeup ++ nop ++___ ++ ++ &loop(); ++ ++$code.=<<___; ++ srl $Zlo,24,$t0 # byte swap ++ srl $Zlo,8,$t1 ++ ++ sll $Zlo,8,$t2 ++ sll $Zlo,24,$Zlo ++ zapnot $t0,0x11,$t0 ++ zapnot $t1,0x22,$t1 ++ ++ zapnot $Zlo,0x88,$Zlo ++ or $t0,$t1,$t0 ++ zapnot $t2,0x44,$t2 ++ ++ or $Zlo,$t0,$Zlo ++ srl $Zhi,24,$t0 ++ srl $Zhi,8,$t1 ++ ++ or $Zlo,$t2,$Zlo ++ sll $Zhi,8,$t2 ++ sll $Zhi,24,$Zhi ++ ++ srl $Zlo,32,$Xlo ++ sll $Zlo,32,$Zlo ++ ++ zapnot $t0,0x11,$t0 ++ zapnot $t1,0x22,$t1 ++ or $Zlo,$Xlo,$Xlo ++ ++ zapnot $Zhi,0x88,$Zhi ++ or $t0,$t1,$t0 ++ zapnot $t2,0x44,$t2 ++ ++ or $Zhi,$t0,$Zhi ++ or $Zhi,$t2,$Zhi ++ ++ srl $Zhi,32,$Xhi ++ sll $Zhi,32,$Zhi ++ ++ or $Zhi,$Xhi,$Xhi ++ stl $Xlo,8($Xi) ++ stl $Xhi,0($Xi) ++ ++ ret (ra) ++.end gcm_gmult_4bit ++___ ++ ++$inhi="s0"; ++$inlo="s1"; ++ ++$code.=<<___; ++.globl gcm_ghash_4bit ++.align 4 ++.ent gcm_ghash_4bit ++gcm_ghash_4bit: ++ ldi sp,-32(sp) ++ stl ra,0(sp) ++ stl s0,8(sp) ++ stl s1,16(sp) ++ .mask 0x04000600,-32 ++ .frame sp,32,ra ++ .prologue 0 ++ ++ ldl_u $inhi,0($inp) ++ ldl_u $Thi0,7($inp) ++ ldl_u $inlo,8($inp) ++ ldl_u $Tlo0,15($inp) ++ ldl $Xhi,0($Xi) ++ ldl $Xlo,8($Xi) ++ ++ bsr $t0,picmeup ++ nop ++ ++.Louter: ++ extll $inhi,$inp,$inhi ++ exthl $Thi0,$inp,$Thi0 ++ or $inhi,$Thi0,$inhi ++ ldi $inp,16($inp) ++ ++ extll $inlo,$inp,$inlo ++ exthl $Tlo0,$inp,$Tlo0 ++ or $inlo,$Tlo0,$inlo ++ subl $len,16,$len ++ ++ xor $Xlo,$inlo,$Xlo ++ xor $Xhi,$inhi,$Xhi ++___ ++ ++ &loop(); ++ ++$code.=<<___; ++ srl $Zlo,24,$t0 # byte swap ++ srl $Zlo,8,$t1 ++ ++ sll $Zlo,8,$t2 ++ sll $Zlo,24,$Zlo ++ zapnot $t0,0x11,$t0 ++ zapnot $t1,0x22,$t1 ++ ++ zapnot $Zlo,0x88,$Zlo ++ or $t0,$t1,$t0 ++ zapnot $t2,0x44,$t2 ++ ++ or $Zlo,$t0,$Zlo ++ srl $Zhi,24,$t0 ++ srl $Zhi,8,$t1 ++ ++ or $Zlo,$t2,$Zlo ++ sll $Zhi,8,$t2 ++ sll $Zhi,24,$Zhi ++ ++ srl $Zlo,32,$Xlo ++ sll $Zlo,32,$Zlo ++ beq $len,.Ldone ++ ++ zapnot $t0,0x11,$t0 ++ zapnot $t1,0x22,$t1 ++ or $Zlo,$Xlo,$Xlo ++ ldl_u $inhi,0($inp) ++ ++ zapnot $Zhi,0x88,$Zhi ++ or $t0,$t1,$t0 ++ zapnot $t2,0x44,$t2 ++ ldl_u $Thi0,7($inp) ++ ++ or $Zhi,$t0,$Zhi ++ or $Zhi,$t2,$Zhi ++ ldl_u $inlo,8($inp) ++ ldl_u $Tlo0,15($inp) ++ ++ srl $Zhi,32,$Xhi ++ sll $Zhi,32,$Zhi ++ ++ or $Zhi,$Xhi,$Xhi ++ br zero,.Louter ++ ++.Ldone: ++ zapnot $t0,0x11,$t0 ++ zapnot $t1,0x22,$t1 ++ or $Zlo,$Xlo,$Xlo ++ ++ zapnot $Zhi,0x88,$Zhi ++ or $t0,$t1,$t0 ++ zapnot $t2,0x44,$t2 ++ ++ or $Zhi,$t0,$Zhi ++ or $Zhi,$t2,$Zhi ++ ++ srl $Zhi,32,$Xhi ++ sll $Zhi,32,$Zhi ++ ++ or $Zhi,$Xhi,$Xhi ++ ++ stl $Xlo,8($Xi) ++ stl $Xhi,0($Xi) ++ ++ .set noreorder ++ /*ldl ra,0(sp)*/ ++ ldl s0,8(sp) ++ ldl s1,16(sp) ++ ldi sp,32(sp) ++ ret (ra) ++.end gcm_ghash_4bit ++ ++.align 4 ++.ent picmeup ++picmeup: ++ .frame sp,0,$t0 ++ .prologue 0 ++ br $rem_4bit,.Lpic ++.Lpic: ldi $rem_4bit,12($rem_4bit) ++ ret ($t0) ++.end picmeup ++ nop ++rem_4bit: ++ .long 0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16 ++ .long 0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16 ++ .long 0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16 ++ .long 0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16 ++.ascii "GHASH for Sw_64, CRYPTOGAMS by " ++.align 4 ++ ++___ ++$output=pop and open STDOUT,">$output"; ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; ++ +diff --git a/crypto/sha/asm/sha1-sw_64.pl b/crypto/sha/asm/sha1-sw_64.pl +new file mode 100644 +index 0000000..cce4015 +--- /dev/null ++++ b/crypto/sha/asm/sha1-sw_64.pl +@@ -0,0 +1,329 @@ ++#! /usr/bin/env perl ++# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++ ++# SHA1 block procedure for Sw_64. ++ ++# On 21264 performance is 33% better than code generated by vendor ++# compiler, and 75% better than GCC [3.4], and in absolute terms is ++# 8.7 cycles per processed byte. Implementation features vectorized ++# byte swap, but not Xupdate. ++ ++@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", ++ "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); ++$ctx="a0"; # $16 ++$inp="a1"; ++$num="a2"; ++$A="a3"; ++$B="a4"; # 20 ++$C="a5"; ++$D="t8"; ++$E="t9"; @V=($A,$B,$C,$D,$E); ++$t0="t10"; # 24 ++$t1="t11"; ++$t2="ra"; ++$t3="t12"; ++$K="AT"; # 28 ++ ++sub BODY_00_19 { ++my ($i,$a,$b,$c,$d,$e)=@_; ++my $j=$i+1; ++$code.=<<___ if ($i==0); ++ ldl_u @X[0],0+0($inp) ++ ldl_u @X[1],0+7($inp) ++___ ++$code.=<<___ if (!($i&1) && $i<14); ++ ldl_u @X[$i+2],($i+2)*4+0($inp) ++ ldl_u @X[$i+3],($i+2)*4+7($inp) ++___ ++$code.=<<___ if (!($i&1) && $i<15); ++ extll @X[$i],$inp,@X[$i] ++ exthl @X[$i+1],$inp,@X[$i+1] ++ ++ or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched ++ ++ srl @X[$i],24,$t0 # vectorized byte swap ++ srl @X[$i],8,$t2 ++ ++ sll @X[$i],8,$t3 ++ sll @X[$i],24,@X[$i] ++ zapnot $t0,0x11,$t0 ++ zapnot $t2,0x22,$t2 ++ ++ zapnot @X[$i],0x88,@X[$i] ++ or $t0,$t2,$t0 ++ zapnot $t3,0x44,$t3 ++ sll $a,5,$t1 ++ ++ or @X[$i],$t0,@X[$i] ++ addw $K,$e,$e ++ and $b,$c,$t2 ++ zapnot $a,0xf,$a ++ ++ or @X[$i],$t3,@X[$i] ++ srl $a,27,$t0 ++ bic $d,$b,$t3 ++ sll $b,30,$b ++ ++ extll @X[$i],4,@X[$i+1] # extract upper half ++ or $t2,$t3,$t2 ++ addw @X[$i],$e,$e ++ ++ addw $t1,$e,$e ++ srl $b,32,$t3 ++ zapnot @X[$i],0xf,@X[$i] ++ ++ addw $t0,$e,$e ++ addw $t2,$e,$e ++ or $t3,$b,$b ++___ ++$code.=<<___ if (($i&1) && $i<15); ++ sll $a,5,$t1 ++ addw $K,$e,$e ++ and $b,$c,$t2 ++ zapnot $a,0xf,$a ++ ++ srl $a,27,$t0 ++ addw @X[$i%16],$e,$e ++ bic $d,$b,$t3 ++ sll $b,30,$b ++ ++ or $t2,$t3,$t2 ++ addw $t1,$e,$e ++ srl $b,32,$t3 ++ zapnot @X[$i],0xf,@X[$i] ++ ++ addw $t0,$e,$e ++ addw $t2,$e,$e ++ or $t3,$b,$b ++___ ++$code.=<<___ if ($i>=15); # with forward Xupdate ++ sll $a,5,$t1 ++ addw $K,$e,$e ++ and $b,$c,$t2 ++ xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ++ ++ zapnot $a,0xf,$a ++ addw @X[$i%16],$e,$e ++ bic $d,$b,$t3 ++ xor @X[($j+8)%16],@X[$j%16],@X[$j%16] ++ ++ srl $a,27,$t0 ++ addw $t1,$e,$e ++ or $t2,$t3,$t2 ++ xor @X[($j+13)%16],@X[$j%16],@X[$j%16] ++ ++ sll $b,30,$b ++ addw $t0,$e,$e ++ srl @X[$j%16],31,$t1 ++ ++ addw $t2,$e,$e ++ srl $b,32,$t3 ++ addw @X[$j%16],@X[$j%16],@X[$j%16] ++ ++ or $t3,$b,$b ++ zapnot @X[$i%16],0xf,@X[$i%16] ++ or $t1,@X[$j%16],@X[$j%16] ++___ ++} ++ ++sub BODY_20_39 { ++my ($i,$a,$b,$c,$d,$e)=@_; ++my $j=$i+1; ++$code.=<<___ if ($i<79); # with forward Xupdate ++ sll $a,5,$t1 ++ addw $K,$e,$e ++ zapnot $a,0xf,$a ++ xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ++ ++ sll $b,30,$t3 ++ addw $t1,$e,$e ++ xor $b,$c,$t2 ++ xor @X[($j+8)%16],@X[$j%16],@X[$j%16] ++ ++ srl $b,2,$b ++ addw @X[$i%16],$e,$e ++ xor $d,$t2,$t2 ++ xor @X[($j+13)%16],@X[$j%16],@X[$j%16] ++ ++ srl @X[$j%16],31,$t1 ++ addw $t2,$e,$e ++ srl $a,27,$t0 ++ addw @X[$j%16],@X[$j%16],@X[$j%16] ++ ++ or $t3,$b,$b ++ addw $t0,$e,$e ++ or $t1,@X[$j%16],@X[$j%16] ++___ ++$code.=<<___ if ($i<77); ++ zapnot @X[$i%16],0xf,@X[$i%16] ++___ ++$code.=<<___ if ($i==79); # with context fetch ++ sll $a,5,$t1 ++ addw $K,$e,$e ++ zapnot $a,0xf,$a ++ ldw @X[0],0($ctx) ++ ++ sll $b,30,$t3 ++ addw $t1,$e,$e ++ xor $b,$c,$t2 ++ ldw @X[1],4($ctx) ++ ++ srl $b,2,$b ++ addw @X[$i%16],$e,$e ++ xor $d,$t2,$t2 ++ ldw @X[2],8($ctx) ++ ++ srl $a,27,$t0 ++ addw $t2,$e,$e ++ ldw @X[3],12($ctx) ++ ++ or $t3,$b,$b ++ addw $t0,$e,$e ++ ldw @X[4],16($ctx) ++___ ++} ++ ++sub BODY_40_59 { ++my ($i,$a,$b,$c,$d,$e)=@_; ++my $j=$i+1; ++$code.=<<___; # with forward Xupdate ++ sll $a,5,$t1 ++ addw $K,$e,$e ++ zapnot $a,0xf,$a ++ xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ++ ++ srl $a,27,$t0 ++ and $b,$c,$t2 ++ and $b,$d,$t3 ++ xor @X[($j+8)%16],@X[$j%16],@X[$j%16] ++ ++ sll $b,30,$b ++ addw $t1,$e,$e ++ xor @X[($j+13)%16],@X[$j%16],@X[$j%16] ++ ++ srl @X[$j%16],31,$t1 ++ addw $t0,$e,$e ++ or $t2,$t3,$t2 ++ and $c,$d,$t3 ++ ++ or $t2,$t3,$t2 ++ srl $b,32,$t3 ++ addw @X[$i%16],$e,$e ++ addw @X[$j%16],@X[$j%16],@X[$j%16] ++ ++ or $t3,$b,$b ++ addw $t2,$e,$e ++ or $t1,@X[$j%16],@X[$j%16] ++ zapnot @X[$i%16],0xf,@X[$i%16] ++___ ++} ++ ++$code=<<___; ++#ifdef __linux__ ++#include ++#else ++#include ++#include ++#endif ++ ++.text ++ ++.set noat ++.set noreorder ++.globl sha1_block_data_order ++.align 5 ++.ent sha1_block_data_order ++sha1_block_data_order: ++ ldi sp,-64(sp) ++ stl ra,0(sp) ++ stl s0,8(sp) ++ stl s1,16(sp) ++ stl s2,24(sp) ++ stl s3,32(sp) ++ stl s4,40(sp) ++ stl s5,48(sp) ++ stl fp,56(sp) ++ .mask 0x0400fe00,-64 ++ .frame sp,64,ra ++ .prologue 0 ++ ++ ldw $A,0($ctx) ++ ldw $B,4($ctx) ++ sll $num,6,$num ++ ldw $C,8($ctx) ++ ldw $D,12($ctx) ++ ldw $E,16($ctx) ++ addl $inp,$num,$num ++ ++.Lloop: ++ .set noreorder ++ ldih $K,23170(zero) ++ zapnot $B,0xf,$B ++ ldi $K,31129($K) # K_00_19 ++___ ++for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } ++ ++$code.=<<___; ++ ldih $K,28378(zero) ++ ldi $K,-5215($K) # K_20_39 ++___ ++for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } ++ ++$code.=<<___; ++ ldih $K,-28900(zero) ++ ldi $K,-17188($K) # K_40_59 ++___ ++for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } ++ ++$code.=<<___; ++ ldih $K,-13725(zero) ++ ldi $K,-15914($K) # K_60_79 ++___ ++for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } ++ ++$code.=<<___; ++ addw @X[0],$A,$A ++ addw @X[1],$B,$B ++ addw @X[2],$C,$C ++ addw @X[3],$D,$D ++ addw @X[4],$E,$E ++ stw $A,0($ctx) ++ stw $B,4($ctx) ++ addl $inp,64,$inp ++ stw $C,8($ctx) ++ stw $D,12($ctx) ++ stw $E,16($ctx) ++ cmpult $inp,$num,$t1 ++ bne $t1,.Lloop ++ ++ .set noreorder ++ ldl ra,0(sp) ++ ldl s0,8(sp) ++ ldl s1,16(sp) ++ ldl s2,24(sp) ++ ldl s3,32(sp) ++ ldl s4,40(sp) ++ ldl s5,48(sp) ++ ldl fp,56(sp) ++ ldi sp,64(sp) ++ ret (ra) ++.end sha1_block_data_order ++.ascii "SHA1 block transform for Sw_64, CRYPTOGAMS by " ++.align 2 ++___ ++$output=pop and open STDOUT,">$output"; ++print $code; ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/sha/build.info b/crypto/sha/build.info +index 556a658..9c29460 100644 +--- a/crypto/sha/build.info ++++ b/crypto/sha/build.info +@@ -103,7 +103,7 @@ GENERATE[sha256-ia64.s]=asm/sha512-ia64.pl + GENERATE[sha512-ia64.s]=asm/sha512-ia64.pl + + GENERATE[sha1-alpha.S]=asm/sha1-alpha.pl +- ++GENERATE[sha1-sw_64.S]=asm/sha1-sw_64.pl + GENERATE[sha1-x86_64.s]=asm/sha1-x86_64.pl + GENERATE[sha1-mb-x86_64.s]=asm/sha1-mb-x86_64.pl + GENERATE[sha256-x86_64.s]=asm/sha512-x86_64.pl +diff --git a/crypto/sw_64cpuid.pl b/crypto/sw_64cpuid.pl +new file mode 100644 +index 0000000..0f2d44a +--- /dev/null ++++ b/crypto/sw_64cpuid.pl +@@ -0,0 +1,273 @@ ++#! /usr/bin/env perl ++# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the OpenSSL license (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++ ++$output = pop; ++open STDOUT,">$output"; ++ ++print <<'___'; ++.text ++ ++.set noat ++ ++.globl OPENSSL_cpuid_setup ++.ent OPENSSL_cpuid_setup ++OPENSSL_cpuid_setup: ++ .frame $30,0,$26 ++ .prologue 0 ++ ret ($26) ++.end OPENSSL_cpuid_setup ++ ++.globl OPENSSL_wipe_cpu ++.ent OPENSSL_wipe_cpu ++OPENSSL_wipe_cpu: ++ .frame $30,0,$26 ++ .prologue 0 ++ clr $1 ++ clr $2 ++ clr $3 ++ clr $4 ++ clr $5 ++ clr $6 ++ clr $7 ++ clr $8 ++ clr $16 ++ clr $17 ++ clr $18 ++ clr $19 ++ clr $20 ++ clr $21 ++ clr $22 ++ clr $23 ++ clr $24 ++ clr $25 ++ clr $27 ++ clr $at ++ clr $29 ++ fclr $f0 ++ fclr $f1 ++ fclr $f10 ++ fclr $f11 ++ fclr $f12 ++ fclr $f13 ++ fclr $f14 ++ fclr $f15 ++ fclr $f16 ++ fclr $f17 ++ fclr $f18 ++ fclr $f19 ++ fclr $f20 ++ fclr $f21 ++ fclr $f22 ++ fclr $f23 ++ fclr $f24 ++ fclr $f25 ++ fclr $f26 ++ fclr $f27 ++ fclr $f28 ++ fclr $f29 ++ fclr $f30 ++ mov $sp,$0 ++ ret ($26) ++.end OPENSSL_wipe_cpu ++ ++.globl OPENSSL_atomic_add ++.ent OPENSSL_atomic_add ++OPENSSL_atomic_add: ++ .frame $30,0,$26 ++ .prologue 0 ++1: lldw $0,0($16) ++ ldi $1,1 ++ wr_f $1 ++ addw $0,$17,$1 ++ lstw $1,0($16) ++ rd_f $1 ++ beq $1,1b ++ addw $0,$17,$0 ++ ret ($26) ++.end OPENSSL_atomic_add ++ ++.globl OPENSSL_rdtsc ++.ent OPENSSL_rdtsc ++OPENSSL_rdtsc: ++ .frame $30,0,$26 ++ .prologue 0 ++ rtc $0 ++ ret ($26) ++.end OPENSSL_rdtsc ++ ++.globl OPENSSL_cleanse ++.ent OPENSSL_cleanse ++OPENSSL_cleanse: ++ .frame $30,0,$26 ++ .prologue 0 ++ beq $17,.Ldone ++ and $16,7,$0 ++ bic $17,7,$at ++ beq $at,.Little ++ beq $0,.Laligned ++ ++.Little: ++ subl $0,8,$0 ++ ldl_u $1,0($16) ++ mov $16,$2 ++.Lalign: ++ masklb $1,$16,$1 ++ ldi $16,1($16) ++ subl $17,1,$17 ++ addl $0,1,$0 ++ beq $17,.Lout ++ bne $0,.Lalign ++.Lout: stl_u $1,0($2) ++ beq $17,.Ldone ++ bic $17,7,$at ++ beq $at,.Little ++ ++.Laligned: ++ stl $31,0($16) ++ subl $17,8,$17 ++ ldi $16,8($16) ++ bic $17,7,$at ++ bne $at,.Laligned ++ bne $17,.Little ++.Ldone: ret ($26) ++.end OPENSSL_cleanse ++ ++.globl CRYPTO_memcmp ++.ent CRYPTO_memcmp ++CRYPTO_memcmp: ++ .frame $30,0,$26 ++ .prologue 0 ++ xor $0,$0,$0 ++ beq $18,.Lno_data ++ ++ xor $1,$1,$1 ++ nop ++.Loop_cmp: ++ ldl_u $2,0($16) ++ subl $18,1,$18 ++ ldl_u $3,0($17) ++ extlb $2,$16,$2 ++ ldi $16,1($16) ++ extlb $3,$17,$3 ++ ldi $17,1($17) ++ xor $3,$2,$2 ++ or $2,$0,$0 ++ bne $18,.Loop_cmp ++ ++ subl $31,$0,$0 ++ srl $0,63,$0 ++.Lno_data: ++ ret ($26) ++.end CRYPTO_memcmp ++___ ++{ ++my ($out,$cnt,$max)=("\$16","\$17","\$18"); ++my ($tick,$lasttick)=("\$19","\$20"); ++my ($diff,$lastdiff)=("\$21","\$22"); ++my ($lock1,$lock2)=("\$23","\$24"); ++my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31"); ++ ++print <<___; ++.globl OPENSSL_instrument_bus ++.ent OPENSSL_instrument_bus ++OPENSSL_instrument_bus: ++ .frame $sp,0,$ra ++ .prologue 0 ++ mov $cnt,$v0 ++ ++ rtc $lasttick ++ mov 0,$diff ++ ++ #ecb ($out) ++ lldw $tick,0($out) ++ ldi $lock1,1 ++ wr_f $lock1 ++ addw $diff,$tick,$tick ++ mov $tick,$diff ++ lstw $tick,0($out) ++ rd_f $tick ++ stw $diff,0($out) ++ ++.Loop: rtc $tick ++ subl $tick,$lasttick,$diff ++ mov $tick,$lasttick ++ ++ #ecb ($out) ++ lldw $tick,0($out) ++ ldi $lock1,1 ++ wr_f $lock1 ++ addw $diff,$tick,$tick ++ mov $tick,$diff ++ lstw $tick,0($out) ++ rd_f $tick ++ stw $diff,0($out) ++ ++ subw $cnt,1,$cnt ++ ldi $out,4($out) ++ bne $cnt,.Loop ++ ++ ret ($ra) ++.end OPENSSL_instrument_bus ++ ++.globl OPENSSL_instrument_bus2 ++.ent OPENSSL_instrument_bus2 ++OPENSSL_instrument_bus2: ++ .frame $sp,0,$ra ++ .prologue 0 ++ mov $cnt,$v0 ++ ++ rtc $lasttick ++ mov 0,$diff ++ ++ #ecb ($out) ++ lldw $tick,0($out) ++ ldi $lock1,1 ++ wr_f $lock1 ++ addw $diff,$tick,$tick ++ mov $tick,$diff ++ lstw $tick,0($out) ++ rd_f $tick ++ stw $diff,0($out) ++ ++ rtc $tick ++ subl $tick,$lasttick,$diff ++ mov $tick,$lasttick ++ mov $diff,$lastdiff ++.Loop2: ++ #ecb ($out) ++ lldw $tick,0($out) ++ ldi $lock1,1 ++ wr_f $lock1 ++ addw $diff,$tick,$tick ++ mov $tick,$diff ++ lstw $tick,0($out) ++ rd_f $tick ++ stw $diff,0($out) ++ ++ subw $max,1,$max ++ beq $max,.Ldone2 ++ ++ rtc $tick ++ subl $tick,$lasttick,$diff ++ mov $tick,$lasttick ++ subl $lastdiff,$diff,$tick ++ mov $diff,$lastdiff ++ selne $tick,1,$tick ++ subw $cnt,$tick,$cnt ++ s4addl $tick,$out,$out ++ bne $cnt,.Loop2 ++ ++.Ldone2: ++ subw $v0,$cnt,$v0 ++ ret ($ra) ++.end OPENSSL_instrument_bus2 ++___ ++} ++ ++close STDOUT; +diff --git a/include/crypto/md32_common.h b/include/crypto/md32_common.h +index 3b16f1b..84dc45a 100644 +--- a/include/crypto/md32_common.h ++++ b/include/crypto/md32_common.h +@@ -226,7 +226,7 @@ int HASH_FINAL(unsigned char *md, HASH_CTX *c) + } + + #ifndef MD32_REG_T +-# if defined(__alpha) || defined(__sparcv9) || defined(__mips) ++# if defined(__alpha) || defined(__sw_64) || defined(__sparcv9) || defined(__mips) + # define MD32_REG_T long + /* + * This comment was originally written for MD5, which is why it +-- +2.27.0 + diff --git a/openssl.spec b/openssl.spec index aa2ea1b13e8225f054e65e23464d8279e569d556..df851e7b5645106be2f0344e2cb8f129024bb744 100644 --- a/openssl.spec +++ b/openssl.spec @@ -2,7 +2,7 @@ Name: openssl Epoch: 1 Version: 3.0.12 -Release: 16 +Release: 17 Summary: Cryptography and SSL/TLS Toolkit License: OpenSSL and SSLeay URL: https://www.openssl.org/ @@ -80,6 +80,7 @@ Patch66: backport-CVE-2024-13176-Fix-timing-side-channel.patch Patch9000: add-FIPS_mode_set-support.patch Patch9001: backport-CVE-2024-9143-Harden-BN_GF2m_poly2arr-against-misuse.patch Patch9002: Fix-build-error-for-ppc64le.patch +Patch9003: add-sw_64-support.patch BuildRequires: gcc gcc-c++ perl make lksctp-tools-devel coreutils util-linux zlib-devel Requires: coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release} @@ -280,6 +281,9 @@ make test || : %ldconfig_scriptlets libs %changelog +* Mon Mar 10 2025 mahailiang - 1:3.0.12-17 +- add sw_64 support + * Sat Feb 8 2025 jinlun - 1:3.0.12-16 - fix CVE-2024-13176