diff --git a/add-sw_64-support.patch b/add-sw_64-support.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6eccddb05346c49761cb91296a22abd99eef6c72
--- /dev/null
+++ b/add-sw_64-support.patch
@@ -0,0 +1,1520 @@
+From dfb837f21d55b9cf0bf78e6b3d5dc02567672400 Mon Sep 17 00:00:00 2001
+From: mahailiang <mahailiang@uniontech.com>
+Date: Sun, 29 Sep 2024 17:26:05 +0800
+Subject: [PATCH] to support sw_64
+
+---
+ Configurations/10-main.conf     |  12 +
+ crypto/bn/asm/sw_64-mont.pl     | 328 ++++++++++++++++++++++
+ crypto/bn/bn_local.h            |   2 +-
+ crypto/bn/build.info            |   2 +-
+ crypto/modes/asm/ghash-sw_64.pl | 467 ++++++++++++++++++++++++++++++++
+ crypto/sha/asm/sha1-sw_64.pl    | 329 ++++++++++++++++++++++
+ crypto/sha/build.info           |   2 +-
+ crypto/sw_64cpuid.pl            | 273 +++++++++++++++++++
+ include/crypto/md32_common.h    |   2 +-
+ 9 files changed, 1413 insertions(+), 4 deletions(-)
+ create mode 100644 crypto/bn/asm/sw_64-mont.pl
+ create mode 100644 crypto/modes/asm/ghash-sw_64.pl
+ create mode 100644 crypto/sha/asm/sha1-sw_64.pl
+ create mode 100644 crypto/sw_64cpuid.pl
+
+diff --git a/Configurations/10-main.conf b/Configurations/10-main.conf
+index 915e7dd..33fd760 100644
+--- a/Configurations/10-main.conf
++++ b/Configurations/10-main.conf
+@@ -984,6 +984,18 @@ my %targets = (
+         asm_arch         => 'alpha',
+         perlasm_scheme   => "void",
+     },
++    "linux-sw_64-gcc" => {
++        inherit_from     => [ "linux-generic64" ],
++        lib_cppflags     => add("-DL_ENDIAN"),
++        bn_ops           => "SIXTY_FOUR_BIT_LONG",
++    },
++    "linux-sw_64" => {
++        inherit_from     => [ "linux-generic64" ],
++        cflags           => add("-DL_ENDIAN"),
++        bn_ops           => "SIXTY_FOUR_BIT_LONG",
++        perlasm_scheme   => "elf",
++        multilib         => "64",
++    },
+     "linux-c64xplus" => {
+         inherit_from     => [ "BASE_unix" ],
+         # TI_CGT_C6000_7.3.x is a requirement
+diff --git a/crypto/bn/asm/sw_64-mont.pl b/crypto/bn/asm/sw_64-mont.pl
+new file mode 100644
+index 0000000..348b903
+--- /dev/null
++++ b/crypto/bn/asm/sw_64-mont.pl
+@@ -0,0 +1,328 @@
++#! /usr/bin/env perl
++# Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# On 21264 RSA sign performance improves by 70/35/20/15 percent for
++# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
++# instructed to '-tune host' code with in-line assembler. Other
++# benchmarks improve by 15-20%. To anchor it to something else, the
++# code provides approximately the same performance per GHz as AMD64.
++# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
++# difference.
++
++$output=pop;
++open STDOUT,">$output";
++
++# int bn_mul_mont(
++$rp="a0";	# BN_ULONG *rp,
++$ap="a1";	# const BN_ULONG *ap,
++$bp="a2";	# const BN_ULONG *bp,
++$np="a3";	# const BN_ULONG *np,
++$n0="a4";	# const BN_ULONG *n0,
++$num="a5";	# int num);
++
++$lo0="t0";
++$hi0="t1";
++$lo1="t2";
++$hi1="t3";
++$aj="t4";
++$bi="t5";
++$nj="t6";
++$tp="t7";
++$alo="t8";
++$ahi="t9";
++$nlo="t10";
++$nhi="t11";
++$tj="t12";
++$i="s3";
++$j="s4";
++$m1="s5";
++
++$code=<<___;
++#ifdef __linux__
++#include <asm/regdef.h>
++#else
++#include <asm.h>
++#include <regdef.h>
++#endif
++
++.text
++
++.set	noat
++.set	noreorder
++
++.globl	bn_mul_mont
++.align	5
++.ent	bn_mul_mont
++bn_mul_mont:
++	ldi	sp,-48(sp)
++	stl	ra,0(sp)
++	stl	s3,8(sp)
++	stl	s4,16(sp)
++	stl	s5,24(sp)
++	stl	fp,32(sp)
++	mov	sp,fp
++	.mask	0x0400f000,-48
++	.frame	fp,48,ra
++	.prologue 0
++
++	.align	4
++	.set	reorder
++	sextl	$num,$num
++	mov	0,v0
++	cmplt	$num,4,AT
++	bne	AT,.Lexit
++
++	ldl	$hi0,0($ap)	# ap[0]
++	s8addl	$num,16,AT
++	ldl	$aj,8($ap)
++	subl	sp,AT,sp
++	ldl	$bi,0($bp)	# bp[0]
++	ldi	AT,-4096(zero)	# mov	-4096,AT
++	ldl	$n0,0($n0)
++	and	sp,AT,sp
++
++	mull	$hi0,$bi,$lo0
++	ldl	$hi1,0($np)	# np[0]
++	umulh	$hi0,$bi,$hi0
++	ldl	$nj,8($np)
++
++	mull	$lo0,$n0,$m1
++
++	mull	$hi1,$m1,$lo1
++	umulh	$hi1,$m1,$hi1
++
++	addl	$lo1,$lo0,$lo1
++	cmpult	$lo1,$lo0,AT
++	addl	$hi1,AT,$hi1
++
++	mull	$aj,$bi,$alo
++	mov	2,$j
++	umulh	$aj,$bi,$ahi
++	mov	sp,$tp
++
++	mull	$nj,$m1,$nlo
++	s8addl	$j,$ap,$aj
++	umulh	$nj,$m1,$nhi
++	s8addl	$j,$np,$nj
++.align	4
++.L1st:
++	.set	noreorder
++	ldl	$aj,0($aj)
++	addw	$j,1,$j
++	ldl	$nj,0($nj)
++	ldi	$tp,8($tp)
++
++	addl	$alo,$hi0,$lo0
++	mull	$aj,$bi,$alo
++	cmpult	$lo0,$hi0,AT
++	addl	$nlo,$hi1,$lo1
++
++	mull	$nj,$m1,$nlo
++	addl	$ahi,AT,$hi0
++	cmpult	$lo1,$hi1,v0
++	cmplt	$j,$num,$tj
++
++	umulh	$aj,$bi,$ahi
++	addl	$nhi,v0,$hi1
++	addl	$lo1,$lo0,$lo1
++	s8addl	$j,$ap,$aj
++
++	umulh	$nj,$m1,$nhi
++	cmpult	$lo1,$lo0,v0
++	addl	$hi1,v0,$hi1
++	s8addl	$j,$np,$nj
++
++	stl	$lo1,-8($tp)
++	nop
++	unop
++	bne	$tj,.L1st
++	.set	reorder
++
++	addl	$alo,$hi0,$lo0
++	addl	$nlo,$hi1,$lo1
++	cmpult	$lo0,$hi0,AT
++	cmpult	$lo1,$hi1,v0
++	addl	$ahi,AT,$hi0
++	addl	$nhi,v0,$hi1
++
++	addl	$lo1,$lo0,$lo1
++	cmpult	$lo1,$lo0,v0
++	addl	$hi1,v0,$hi1
++
++	stl	$lo1,0($tp)
++
++	addl	$hi1,$hi0,$hi1
++	cmpult	$hi1,$hi0,AT
++	stl	$hi1,8($tp)
++	stl	AT,16($tp)
++
++	mov	1,$i
++.align	4
++.Louter:
++	s8addl	$i,$bp,$bi
++	ldl	$hi0,0($ap)
++	ldl	$aj,8($ap)
++	ldl	$bi,0($bi)
++	ldl	$hi1,0($np)
++	ldl	$nj,8($np)
++	ldl	$tj,0(sp)
++
++	mull	$hi0,$bi,$lo0
++	umulh	$hi0,$bi,$hi0
++
++	addl	$lo0,$tj,$lo0
++	cmpult	$lo0,$tj,AT
++	addl	$hi0,AT,$hi0
++
++	mull	$lo0,$n0,$m1
++
++	mull	$hi1,$m1,$lo1
++	umulh	$hi1,$m1,$hi1
++
++	addl	$lo1,$lo0,$lo1
++	cmpult	$lo1,$lo0,AT
++	mov	2,$j
++	addl	$hi1,AT,$hi1
++
++	mull	$aj,$bi,$alo
++	mov	sp,$tp
++	umulh	$aj,$bi,$ahi
++
++	mull	$nj,$m1,$nlo
++	s8addl	$j,$ap,$aj
++	umulh	$nj,$m1,$nhi
++.align	4
++.Linner:
++	.set	noreorder
++	ldl	$tj,8($tp)	#L0
++	nop			#U1
++	ldl	$aj,0($aj)	#L1
++	s8addl	$j,$np,$nj	#U0
++
++	ldl	$nj,0($nj)	#L0
++	nop			#U1
++	addl	$alo,$hi0,$lo0	#L1
++	ldi	$tp,8($tp)
++
++	mull	$aj,$bi,$alo	#U1
++	cmpult	$lo0,$hi0,AT	#L0
++	addl	$nlo,$hi1,$lo1	#L1
++	addw	$j,1,$j
++
++	mull	$nj,$m1,$nlo	#U1
++	addl	$ahi,AT,$hi0	#L0
++	addl	$lo0,$tj,$lo0	#L1
++	cmpult	$lo1,$hi1,v0	#U0
++
++	umulh	$aj,$bi,$ahi	#U1
++	cmpult	$lo0,$tj,AT	#L0
++	addl	$lo1,$lo0,$lo1	#L1
++	addl	$nhi,v0,$hi1	#U0
++
++	umulh	$nj,$m1,$nhi	#U1
++	s8addl	$j,$ap,$aj	#L0
++	cmpult	$lo1,$lo0,v0	#L1
++	cmplt	$j,$num,$tj	#U0	# borrow $tj
++
++	addl	$hi0,AT,$hi0	#L0
++	addl	$hi1,v0,$hi1	#U1
++	stl	$lo1,-8($tp)	#L1
++	bne	$tj,.Linner	#U0
++	.set	reorder
++
++	ldl	$tj,8($tp)
++	addl	$alo,$hi0,$lo0
++	addl	$nlo,$hi1,$lo1
++	cmpult	$lo0,$hi0,AT
++	cmpult	$lo1,$hi1,v0
++	addl	$ahi,AT,$hi0
++	addl	$nhi,v0,$hi1
++
++	addl	$lo0,$tj,$lo0
++	cmpult	$lo0,$tj,AT
++	addl	$hi0,AT,$hi0
++
++	ldl	$tj,16($tp)
++	addl	$lo1,$lo0,$j
++	cmpult	$j,$lo0,v0
++	addl	$hi1,v0,$hi1
++
++	addl	$hi1,$hi0,$lo1
++	stl	$j,0($tp)
++	cmpult	$lo1,$hi0,$hi1
++	addl	$lo1,$tj,$lo1
++	cmpult	$lo1,$tj,AT
++	addw	$i,1,$i
++	addl	$hi1,AT,$hi1
++	stl	$lo1,8($tp)
++	cmplt	$i,$num,$tj	# borrow $tj
++	stl	$hi1,16($tp)
++	bne	$tj,.Louter
++
++	s8addl	$num,sp,$tj	# &tp[num]
++	mov	$rp,$bp		# put rp aside
++	mov	sp,$tp
++	mov	sp,$ap
++	mov	0,$hi0		# clear borrow bit
++
++.align	4
++.Lsub:	ldl	$lo0,0($tp)
++	ldl	$lo1,0($np)
++	ldi	$tp,8($tp)
++	ldi	$np,8($np)
++	subl	$lo0,$lo1,$lo1	# tp[i]-np[i]
++	cmpult	$lo0,$lo1,AT
++	subl	$lo1,$hi0,$lo0
++	cmpult	$lo1,$lo0,$hi0
++	or	$hi0,AT,$hi0
++	stl	$lo0,0($rp)
++	cmpult	$tp,$tj,v0
++	ldi	$rp,8($rp)
++	bne	v0,.Lsub
++
++	subl	$hi1,$hi0,$hi0	# handle upmost overflow bit
++	mov	sp,$tp
++	mov	$bp,$rp		# restore rp
++
++.align	4
++.Lcopy:	ldl	$aj,0($tp)	# conditional copy
++	ldl	$nj,0($rp)
++	ldi	$tp,8($tp)
++	ldi	$rp,8($rp)
++	seleq	$hi0,$nj,$aj
++	stl	zero,-8($tp)	# zap tp
++	cmpult	$tp,$tj,AT
++	stl	$aj,-8($rp)
++	bne	AT,.Lcopy
++	mov	1,v0
++
++.Lexit:
++	.set	noreorder
++	mov	fp,sp
++	/*ldl	ra,0(sp)*/
++	ldl	s3,8(sp)
++	ldl	s4,16(sp)
++	ldl	s5,24(sp)
++	ldl	fp,32(sp)
++	ldi	sp,48(sp)
++	ret	(ra)
++.end	bn_mul_mont
++.ascii	"Montgomery Multiplication for Sw_64, CRYPTOGAMS by <appro\@openssl.org>"
++.align	2
++___
++
++print $code;
++close STDOUT or die "error closing STDOUT: $!";
+diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
+index 50e9d26..e73bd3f 100644
+--- a/crypto/bn/bn_local.h
++++ b/crypto/bn/bn_local.h
+@@ -387,7 +387,7 @@ struct bn_gencb_st {
+ #   define BN_UMULT_LOHI(low,high,a,b) ({       \
+         uint128_t ret=(uint128_t)(a)*(b);   \
+         (high)=ret>>64; (low)=ret;      })
+-#  elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
++#  elif (defined(__alpha) || defined(__sw_64)) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
+ #   if defined(__DECC)
+ #    include <c_asm.h>
+ #    define BN_UMULT_HIGH(a,b)   (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
+diff --git a/crypto/bn/build.info b/crypto/bn/build.info
+index c4ba51b..b598423 100644
+--- a/crypto/bn/build.info
++++ b/crypto/bn/build.info
+@@ -168,7 +168,7 @@ GENERATE[ppc-mont.s]=asm/ppc-mont.pl
+ GENERATE[ppc64-mont.s]=asm/ppc64-mont.pl
+ 
+ GENERATE[alpha-mont.S]=asm/alpha-mont.pl
+-
++GENERATE[sw_64-mont.S]=asm/sw_64-mont.pl
+ GENERATE[armv4-mont.S]=asm/armv4-mont.pl
+ INCLUDE[armv4-mont.o]=..
+ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl
+diff --git a/crypto/modes/asm/ghash-sw_64.pl b/crypto/modes/asm/ghash-sw_64.pl
+new file mode 100644
+index 0000000..59b5596
+--- /dev/null
++++ b/crypto/modes/asm/ghash-sw_64.pl
+@@ -0,0 +1,467 @@
++#! /usr/bin/env perl
++# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# March 2010
++#
++# The module implements "4-bit" GCM GHASH function and underlying
++# single multiplication operation in GF(2^128). "4-bit" means that it
++# uses 256 bytes per-key table [+128 bytes shared table]. Even though
++# loops are aggressively modulo-scheduled in respect to references to
++# Htbl and Z.hi updates for 8 cycles per byte, measured performance is
++# ~12 cycles per processed byte on 21264 CPU. It seems to be a dynamic
++# scheduling "glitch," because uprofile(1) indicates uniform sample
++# distribution, as if all instruction bundles execute in 1.5 cycles.
++# Meaning that it could have been even faster, yet 12 cycles is ~60%
++# better than gcc-generated code and ~80% than code generated by vendor
++# compiler.
++
++$cnt="v0";	# $0
++$t0="t0";
++$t1="t1";
++$t2="t2";
++$Thi0="t3";	# $4
++$Tlo0="t4";
++$Thi1="t5";
++$Tlo1="t6";
++$rem="t7";	# $8
++#################
++$Xi="a0";	# $16, input argument block
++$Htbl="a1";
++$inp="a2";
++$len="a3";
++$nlo="a4";	# $20
++$nhi="a5";
++$Zhi="t8";
++$Zlo="t9";
++$Xhi="t10";	# $24
++$Xlo="t11";
++$remp="t12";
++$rem_4bit="AT";	# $28
++
++{ my $N;
++  sub loop() {
++
++	$N++;
++$code.=<<___;
++.align	4
++	extlb	$Xlo,7,$nlo
++	and	$nlo,0xf0,$nhi
++	sll	$nlo,4,$nlo
++	and	$nlo,0xf0,$nlo
++
++	addl	$nlo,$Htbl,$nlo
++	ldl	$Zlo,8($nlo)
++	addl	$nhi,$Htbl,$nhi
++	ldl	$Zhi,0($nlo)
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	ldi	$cnt,6(zero)
++	extlb	$Xlo,6,$nlo
++
++	ldl	$Tlo1,8($nhi)
++	s8addl	$remp,$rem_4bit,$remp
++	ldl	$Thi1,0($nhi)
++	srl	$Zlo,4,$Zlo
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$t0,$Zlo,$Zlo
++	and	$nlo,0xf0,$nhi
++
++	xor	$Tlo1,$Zlo,$Zlo
++	sll	$nlo,4,$nlo
++	xor	$Thi1,$Zhi,$Zhi
++	and	$nlo,0xf0,$nlo
++
++	addl	$nlo,$Htbl,$nlo
++	ldl	$Tlo0,8($nlo)
++	addl	$nhi,$Htbl,$nhi
++	ldl	$Thi0,0($nlo)
++
++.Looplo$N:
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	subl	$cnt,1,$cnt
++	srl	$Zlo,4,$Zlo
++
++	ldl	$Tlo1,8($nhi)
++	xor	$rem,$Zhi,$Zhi
++	ldl	$Thi1,0($nhi)
++	s8addl	$remp,$rem_4bit,$remp
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$t0,$Zlo,$Zlo
++	extlb	$Xlo,$cnt,$nlo
++
++	and	$nlo,0xf0,$nhi
++	xor	$Thi0,$Zhi,$Zhi
++	xor	$Tlo0,$Zlo,$Zlo
++	sll	$nlo,4,$nlo
++
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	and	$nlo,0xf0,$nlo
++	srl	$Zlo,4,$Zlo
++
++	s8addl	$remp,$rem_4bit,$remp
++	xor	$rem,$Zhi,$Zhi
++	addl	$nlo,$Htbl,$nlo
++	addl	$nhi,$Htbl,$nhi
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	ldl	$Tlo0,8($nlo)
++	xor	$t0,$Zlo,$Zlo
++
++	xor	$Tlo1,$Zlo,$Zlo
++	xor	$Thi1,$Zhi,$Zhi
++	ldl	$Thi0,0($nlo)
++	bne	$cnt,.Looplo$N
++
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	ldi	$cnt,7(zero)
++	srl	$Zlo,4,$Zlo
++
++	ldl	$Tlo1,8($nhi)
++	xor	$rem,$Zhi,$Zhi
++	ldl	$Thi1,0($nhi)
++	s8addl	$remp,$rem_4bit,$remp
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$t0,$Zlo,$Zlo
++	extlb	$Xhi,$cnt,$nlo
++
++	and	$nlo,0xf0,$nhi
++	xor	$Thi0,$Zhi,$Zhi
++	xor	$Tlo0,$Zlo,$Zlo
++	sll	$nlo,4,$nlo
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	and	$nlo,0xf0,$nlo
++	srl	$Zlo,4,$Zlo
++
++	s8addl	$remp,$rem_4bit,$remp
++	xor	$rem,$Zhi,$Zhi
++	addl	$nlo,$Htbl,$nlo
++	addl	$nhi,$Htbl,$nhi
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	ldl	$Tlo0,8($nlo)
++	xor	$t0,$Zlo,$Zlo
++
++	xor	$Tlo1,$Zlo,$Zlo
++	xor	$Thi1,$Zhi,$Zhi
++	ldl	$Thi0,0($nlo)
++	unop
++
++
++.Loophi$N:
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	subl	$cnt,1,$cnt
++	srl	$Zlo,4,$Zlo
++
++	ldl	$Tlo1,8($nhi)
++	xor	$rem,$Zhi,$Zhi
++	ldl	$Thi1,0($nhi)
++	s8addl	$remp,$rem_4bit,$remp
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$t0,$Zlo,$Zlo
++	extlb	$Xhi,$cnt,$nlo
++
++	and	$nlo,0xf0,$nhi
++	xor	$Thi0,$Zhi,$Zhi
++	xor	$Tlo0,$Zlo,$Zlo
++	sll	$nlo,4,$nlo
++
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	and	$nlo,0xf0,$nlo
++	srl	$Zlo,4,$Zlo
++
++	s8addl	$remp,$rem_4bit,$remp
++	xor	$rem,$Zhi,$Zhi
++	addl	$nlo,$Htbl,$nlo
++	addl	$nhi,$Htbl,$nhi
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	ldl	$Tlo0,8($nlo)
++	xor	$t0,$Zlo,$Zlo
++
++	xor	$Tlo1,$Zlo,$Zlo
++	xor	$Thi1,$Zhi,$Zhi
++	ldl	$Thi0,0($nlo)
++	bne	$cnt,.Loophi$N
++
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	srl	$Zlo,4,$Zlo
++
++	ldl	$Tlo1,8($nhi)
++	xor	$rem,$Zhi,$Zhi
++	ldl	$Thi1,0($nhi)
++	s8addl	$remp,$rem_4bit,$remp
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$t0,$Zlo,$Zlo
++
++	xor	$Tlo0,$Zlo,$Zlo
++	xor	$Thi0,$Zhi,$Zhi
++
++	and	$Zlo,0x0f,$remp
++	sll	$Zhi,60,$t0
++	srl	$Zlo,4,$Zlo
++
++	s8addl	$remp,$rem_4bit,$remp
++	xor	$rem,$Zhi,$Zhi
++
++	ldl	$rem,0($remp)
++	srl	$Zhi,4,$Zhi
++	xor	$Tlo1,$Zlo,$Zlo
++	xor	$Thi1,$Zhi,$Zhi
++	xor	$t0,$Zlo,$Zlo
++	xor	$rem,$Zhi,$Zhi
++___
++}}
++
++$code=<<___;
++#ifdef __linux__
++#include <asm/regdef.h>
++#else
++#include <asm.h>
++#include <regdef.h>
++#endif
++
++.text
++
++.set	noat
++.set	noreorder
++.globl	gcm_gmult_4bit
++.align	4
++.ent	gcm_gmult_4bit
++gcm_gmult_4bit:
++	.frame	sp,0,ra
++	.prologue 0
++
++	ldl	$Xlo,8($Xi)
++	ldl	$Xhi,0($Xi)
++
++	bsr	$t0,picmeup
++	nop
++___
++
++	&loop();
++
++$code.=<<___;
++	srl	$Zlo,24,$t0	# byte swap
++	srl	$Zlo,8,$t1
++
++	sll	$Zlo,8,$t2
++	sll	$Zlo,24,$Zlo
++	zapnot	$t0,0x11,$t0
++	zapnot	$t1,0x22,$t1
++
++	zapnot	$Zlo,0x88,$Zlo
++	or	$t0,$t1,$t0
++	zapnot	$t2,0x44,$t2
++
++	or	$Zlo,$t0,$Zlo
++	srl	$Zhi,24,$t0
++	srl	$Zhi,8,$t1
++
++	or	$Zlo,$t2,$Zlo
++	sll	$Zhi,8,$t2
++	sll	$Zhi,24,$Zhi
++
++	srl	$Zlo,32,$Xlo
++	sll	$Zlo,32,$Zlo
++
++	zapnot	$t0,0x11,$t0
++	zapnot	$t1,0x22,$t1
++	or	$Zlo,$Xlo,$Xlo
++
++	zapnot	$Zhi,0x88,$Zhi
++	or	$t0,$t1,$t0
++	zapnot	$t2,0x44,$t2
++
++	or	$Zhi,$t0,$Zhi
++	or	$Zhi,$t2,$Zhi
++
++	srl	$Zhi,32,$Xhi
++	sll	$Zhi,32,$Zhi
++
++	or	$Zhi,$Xhi,$Xhi
++	stl	$Xlo,8($Xi)
++	stl	$Xhi,0($Xi)
++
++	ret	(ra)
++.end	gcm_gmult_4bit
++___
++
++$inhi="s0";
++$inlo="s1";
++
++$code.=<<___;
++.globl	gcm_ghash_4bit
++.align	4
++.ent	gcm_ghash_4bit
++gcm_ghash_4bit:
++	ldi	sp,-32(sp)
++	stl	ra,0(sp)
++	stl	s0,8(sp)
++	stl	s1,16(sp)
++	.mask	0x04000600,-32
++	.frame	sp,32,ra
++	.prologue 0
++
++	ldl_u	$inhi,0($inp)
++	ldl_u	$Thi0,7($inp)
++	ldl_u	$inlo,8($inp)
++	ldl_u	$Tlo0,15($inp)
++	ldl	$Xhi,0($Xi)
++	ldl	$Xlo,8($Xi)
++
++	bsr	$t0,picmeup
++	nop
++
++.Louter:
++	extll	$inhi,$inp,$inhi
++	exthl	$Thi0,$inp,$Thi0
++	or	$inhi,$Thi0,$inhi
++	ldi	$inp,16($inp)
++
++	extll	$inlo,$inp,$inlo
++	exthl	$Tlo0,$inp,$Tlo0
++	or	$inlo,$Tlo0,$inlo
++	subl	$len,16,$len
++
++	xor	$Xlo,$inlo,$Xlo
++	xor	$Xhi,$inhi,$Xhi
++___
++
++	&loop();
++
++$code.=<<___;
++	srl	$Zlo,24,$t0	# byte swap
++	srl	$Zlo,8,$t1
++
++	sll	$Zlo,8,$t2
++	sll	$Zlo,24,$Zlo
++	zapnot	$t0,0x11,$t0
++	zapnot	$t1,0x22,$t1
++
++	zapnot	$Zlo,0x88,$Zlo
++	or	$t0,$t1,$t0
++	zapnot	$t2,0x44,$t2
++
++	or	$Zlo,$t0,$Zlo
++	srl	$Zhi,24,$t0
++	srl	$Zhi,8,$t1
++
++	or	$Zlo,$t2,$Zlo
++	sll	$Zhi,8,$t2
++	sll	$Zhi,24,$Zhi
++
++	srl	$Zlo,32,$Xlo
++	sll	$Zlo,32,$Zlo
++	beq	$len,.Ldone
++
++	zapnot	$t0,0x11,$t0
++	zapnot	$t1,0x22,$t1
++	or	$Zlo,$Xlo,$Xlo
++	ldl_u	$inhi,0($inp)
++
++	zapnot	$Zhi,0x88,$Zhi
++	or	$t0,$t1,$t0
++	zapnot	$t2,0x44,$t2
++	ldl_u	$Thi0,7($inp)
++
++	or	$Zhi,$t0,$Zhi
++	or	$Zhi,$t2,$Zhi
++	ldl_u	$inlo,8($inp)
++	ldl_u	$Tlo0,15($inp)
++
++	srl	$Zhi,32,$Xhi
++	sll	$Zhi,32,$Zhi
++
++	or	$Zhi,$Xhi,$Xhi
++	br	zero,.Louter
++
++.Ldone:
++	zapnot	$t0,0x11,$t0
++	zapnot	$t1,0x22,$t1
++	or	$Zlo,$Xlo,$Xlo
++
++	zapnot	$Zhi,0x88,$Zhi
++	or	$t0,$t1,$t0
++	zapnot	$t2,0x44,$t2
++
++	or	$Zhi,$t0,$Zhi
++	or	$Zhi,$t2,$Zhi
++
++	srl	$Zhi,32,$Xhi
++	sll	$Zhi,32,$Zhi
++
++	or	$Zhi,$Xhi,$Xhi
++
++	stl	$Xlo,8($Xi)
++	stl	$Xhi,0($Xi)
++
++	.set	noreorder
++	/*ldl	ra,0(sp)*/
++	ldl	s0,8(sp)
++	ldl	s1,16(sp)
++	ldi	sp,32(sp)
++	ret	(ra)
++.end	gcm_ghash_4bit
++
++.align	4
++.ent	picmeup
++picmeup:
++	.frame	sp,0,$t0
++	.prologue 0
++	br	$rem_4bit,.Lpic
++.Lpic:	ldi	$rem_4bit,12($rem_4bit)
++	ret	($t0)
++.end	picmeup
++	nop
++rem_4bit:
++	.long	0,0x0000<<16, 0,0x1C20<<16, 0,0x3840<<16, 0,0x2460<<16
++	.long	0,0x7080<<16, 0,0x6CA0<<16, 0,0x48C0<<16, 0,0x54E0<<16
++	.long	0,0xE100<<16, 0,0xFD20<<16, 0,0xD940<<16, 0,0xC560<<16
++	.long	0,0x9180<<16, 0,0x8DA0<<16, 0,0xA9C0<<16, 0,0xB5E0<<16
++.ascii	"GHASH for Sw_64, CRYPTOGAMS by <appro\@openssl.org>"
++.align	4
++
++___
++$output=pop and open STDOUT,">$output";
++print $code;
++close STDOUT or die "error closing STDOUT: $!";
++
+diff --git a/crypto/sha/asm/sha1-sw_64.pl b/crypto/sha/asm/sha1-sw_64.pl
+new file mode 100644
+index 0000000..cce4015
+--- /dev/null
++++ b/crypto/sha/asm/sha1-sw_64.pl
+@@ -0,0 +1,329 @@
++#! /usr/bin/env perl
++# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++
++# SHA1 block procedure for Sw_64.
++
++# On 21264 performance is 33% better than code generated by vendor
++# compiler, and 75% better than GCC [3.4], and in absolute terms is
++# 8.7 cycles per processed byte. Implementation features vectorized
++# byte swap, but not Xupdate.
++
++@X=(	"\$0",	"\$1",	"\$2",	"\$3",	"\$4",	"\$5",	"\$6",	"\$7",
++	"\$8",	"\$9",	"\$10",	"\$11",	"\$12",	"\$13",	"\$14",	"\$15");
++$ctx="a0";	# $16
++$inp="a1";
++$num="a2";
++$A="a3";
++$B="a4";	# 20
++$C="a5";
++$D="t8";
++$E="t9";	@V=($A,$B,$C,$D,$E);
++$t0="t10";	# 24
++$t1="t11";
++$t2="ra";
++$t3="t12";
++$K="AT";	# 28
++
++sub BODY_00_19 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=$i+1;
++$code.=<<___ if ($i==0);
++	ldl_u	@X[0],0+0($inp)
++	ldl_u	@X[1],0+7($inp)
++___
++$code.=<<___ if (!($i&1) && $i<14);
++	ldl_u	@X[$i+2],($i+2)*4+0($inp)
++	ldl_u	@X[$i+3],($i+2)*4+7($inp)
++___
++$code.=<<___ if (!($i&1) && $i<15);
++	extll	@X[$i],$inp,@X[$i]
++	exthl	@X[$i+1],$inp,@X[$i+1]
++
++	or	@X[$i+1],@X[$i],@X[$i]	# pair of 32-bit values are fetched
++
++	srl	@X[$i],24,$t0		# vectorized byte swap
++	srl	@X[$i],8,$t2
++
++	sll	@X[$i],8,$t3
++	sll	@X[$i],24,@X[$i]
++	zapnot	$t0,0x11,$t0
++	zapnot	$t2,0x22,$t2
++
++	zapnot	@X[$i],0x88,@X[$i]
++	or	$t0,$t2,$t0
++	zapnot	$t3,0x44,$t3
++	sll	$a,5,$t1
++
++	or	@X[$i],$t0,@X[$i]
++	addw	$K,$e,$e
++	and	$b,$c,$t2
++	zapnot	$a,0xf,$a
++
++	or	@X[$i],$t3,@X[$i]
++	srl	$a,27,$t0
++	bic	$d,$b,$t3
++	sll	$b,30,$b
++
++	extll	@X[$i],4,@X[$i+1]	# extract upper half
++	or	$t2,$t3,$t2
++	addw	@X[$i],$e,$e
++
++	addw	$t1,$e,$e
++	srl	$b,32,$t3
++	zapnot	@X[$i],0xf,@X[$i]
++
++	addw	$t0,$e,$e
++	addw	$t2,$e,$e
++	or	$t3,$b,$b
++___
++$code.=<<___ if (($i&1) && $i<15);
++	sll	$a,5,$t1
++	addw	$K,$e,$e
++	and	$b,$c,$t2
++	zapnot	$a,0xf,$a
++
++	srl	$a,27,$t0
++	addw	@X[$i%16],$e,$e
++	bic	$d,$b,$t3
++	sll	$b,30,$b
++
++	or	$t2,$t3,$t2
++	addw	$t1,$e,$e
++	srl	$b,32,$t3
++	zapnot	@X[$i],0xf,@X[$i]
++
++	addw	$t0,$e,$e
++	addw	$t2,$e,$e
++	or	$t3,$b,$b
++___
++$code.=<<___ if ($i>=15);	# with forward Xupdate
++	sll	$a,5,$t1
++	addw	$K,$e,$e
++	and	$b,$c,$t2
++	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
++
++	zapnot	$a,0xf,$a
++	addw	@X[$i%16],$e,$e
++	bic	$d,$b,$t3
++	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
++
++	srl	$a,27,$t0
++	addw	$t1,$e,$e
++	or	$t2,$t3,$t2
++	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
++
++	sll	$b,30,$b
++	addw	$t0,$e,$e
++	srl	@X[$j%16],31,$t1
++
++	addw	$t2,$e,$e
++	srl	$b,32,$t3
++	addw	@X[$j%16],@X[$j%16],@X[$j%16]
++
++	or	$t3,$b,$b
++	zapnot	@X[$i%16],0xf,@X[$i%16]
++	or	$t1,@X[$j%16],@X[$j%16]
++___
++}
++
++sub BODY_20_39 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=$i+1;
++$code.=<<___ if ($i<79);	# with forward Xupdate
++	sll	$a,5,$t1
++	addw	$K,$e,$e
++	zapnot	$a,0xf,$a
++	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
++
++	sll	$b,30,$t3
++	addw	$t1,$e,$e
++	xor	$b,$c,$t2
++	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
++
++	srl	$b,2,$b
++	addw	@X[$i%16],$e,$e
++	xor	$d,$t2,$t2
++	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
++
++	srl	@X[$j%16],31,$t1
++	addw	$t2,$e,$e
++	srl	$a,27,$t0
++	addw	@X[$j%16],@X[$j%16],@X[$j%16]
++
++	or	$t3,$b,$b
++	addw	$t0,$e,$e
++	or	$t1,@X[$j%16],@X[$j%16]
++___
++$code.=<<___ if ($i<77);
++	zapnot	@X[$i%16],0xf,@X[$i%16]
++___
++$code.=<<___ if ($i==79);	# with context fetch
++	sll	$a,5,$t1
++	addw	$K,$e,$e
++	zapnot	$a,0xf,$a
++	ldw	@X[0],0($ctx)
++
++	sll	$b,30,$t3
++	addw	$t1,$e,$e
++	xor	$b,$c,$t2
++	ldw	@X[1],4($ctx)
++
++	srl	$b,2,$b
++	addw	@X[$i%16],$e,$e
++	xor	$d,$t2,$t2
++	ldw	@X[2],8($ctx)
++
++	srl	$a,27,$t0
++	addw	$t2,$e,$e
++	ldw	@X[3],12($ctx)
++
++	or	$t3,$b,$b
++	addw	$t0,$e,$e
++	ldw	@X[4],16($ctx)
++___
++}
++
++sub BODY_40_59 {
++my ($i,$a,$b,$c,$d,$e)=@_;
++my $j=$i+1;
++$code.=<<___;	# with forward Xupdate
++	sll	$a,5,$t1
++	addw	$K,$e,$e
++	zapnot	$a,0xf,$a
++	xor	@X[($j+2)%16],@X[$j%16],@X[$j%16]
++
++	srl	$a,27,$t0
++	and	$b,$c,$t2
++	and	$b,$d,$t3
++	xor	@X[($j+8)%16],@X[$j%16],@X[$j%16]
++
++	sll	$b,30,$b
++	addw	$t1,$e,$e
++	xor	@X[($j+13)%16],@X[$j%16],@X[$j%16]
++
++	srl	@X[$j%16],31,$t1
++	addw	$t0,$e,$e
++	or	$t2,$t3,$t2
++	and	$c,$d,$t3
++
++	or	$t2,$t3,$t2
++	srl	$b,32,$t3
++	addw	@X[$i%16],$e,$e
++	addw	@X[$j%16],@X[$j%16],@X[$j%16]
++
++	or	$t3,$b,$b
++	addw	$t2,$e,$e
++	or	$t1,@X[$j%16],@X[$j%16]
++	zapnot	@X[$i%16],0xf,@X[$i%16]
++___
++}
++
++$code=<<___;
++#ifdef __linux__
++#include <asm/regdef.h>
++#else
++#include <asm.h>
++#include <regdef.h>
++#endif
++
++.text
++
++.set	noat
++.set	noreorder
++.globl	sha1_block_data_order
++.align	5
++.ent	sha1_block_data_order
++sha1_block_data_order:
++	ldi	sp,-64(sp)
++	stl	ra,0(sp)
++	stl	s0,8(sp)
++	stl	s1,16(sp)
++	stl	s2,24(sp)
++	stl	s3,32(sp)
++	stl	s4,40(sp)
++	stl	s5,48(sp)
++	stl	fp,56(sp)
++	.mask	0x0400fe00,-64
++	.frame	sp,64,ra
++	.prologue 0
++
++	ldw	$A,0($ctx)
++	ldw	$B,4($ctx)
++	sll	$num,6,$num
++	ldw	$C,8($ctx)
++	ldw	$D,12($ctx)
++	ldw	$E,16($ctx)
++	addl	$inp,$num,$num
++
++.Lloop:
++	.set	noreorder
++	ldih	$K,23170(zero)
++	zapnot	$B,0xf,$B
++	ldi	$K,31129($K)	# K_00_19
++___
++for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
++
++$code.=<<___;
++	ldih	$K,28378(zero)
++	ldi	$K,-5215($K)	# K_20_39
++___
++for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
++
++$code.=<<___;
++	ldih	$K,-28900(zero)
++	ldi	$K,-17188($K)	# K_40_59
++___
++for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
++
++$code.=<<___;
++	ldih	$K,-13725(zero)
++	ldi	$K,-15914($K)	# K_60_79
++___
++for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
++
++$code.=<<___;
++	addw	@X[0],$A,$A
++	addw	@X[1],$B,$B
++	addw	@X[2],$C,$C
++	addw	@X[3],$D,$D
++	addw	@X[4],$E,$E
++	stw	$A,0($ctx)
++	stw	$B,4($ctx)
++	addl	$inp,64,$inp
++	stw	$C,8($ctx)
++	stw	$D,12($ctx)
++	stw	$E,16($ctx)
++	cmpult	$inp,$num,$t1
++	bne	$t1,.Lloop
++
++	.set	noreorder
++	ldl	ra,0(sp)
++	ldl	s0,8(sp)
++	ldl	s1,16(sp)
++	ldl	s2,24(sp)
++	ldl	s3,32(sp)
++	ldl	s4,40(sp)
++	ldl	s5,48(sp)
++	ldl	fp,56(sp)
++	ldi	sp,64(sp)
++	ret	(ra)
++.end	sha1_block_data_order
++.ascii	"SHA1 block transform for Sw_64, CRYPTOGAMS by <appro\@openssl.org>"
++.align	2
++___
++$output=pop and open STDOUT,">$output";
++print $code;
++close STDOUT or die "error closing STDOUT: $!";
+diff --git a/crypto/sha/build.info b/crypto/sha/build.info
+index 556a658..9c29460 100644
+--- a/crypto/sha/build.info
++++ b/crypto/sha/build.info
+@@ -103,7 +103,7 @@ GENERATE[sha256-ia64.s]=asm/sha512-ia64.pl
+ GENERATE[sha512-ia64.s]=asm/sha512-ia64.pl
+ 
+ GENERATE[sha1-alpha.S]=asm/sha1-alpha.pl
+-
++GENERATE[sha1-sw_64.S]=asm/sha1-sw_64.pl
+ GENERATE[sha1-x86_64.s]=asm/sha1-x86_64.pl
+ GENERATE[sha1-mb-x86_64.s]=asm/sha1-mb-x86_64.pl
+ GENERATE[sha256-x86_64.s]=asm/sha512-x86_64.pl
+diff --git a/crypto/sw_64cpuid.pl b/crypto/sw_64cpuid.pl
+new file mode 100644
+index 0000000..0f2d44a
+--- /dev/null
++++ b/crypto/sw_64cpuid.pl
+@@ -0,0 +1,273 @@
++#! /usr/bin/env perl
++# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the OpenSSL license (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++
++$output = pop;
++open STDOUT,">$output";
++
++print <<'___';
++.text
++
++.set	noat
++
++.globl	OPENSSL_cpuid_setup
++.ent	OPENSSL_cpuid_setup
++OPENSSL_cpuid_setup:
++	.frame	$30,0,$26
++	.prologue 0
++	ret	($26)
++.end	OPENSSL_cpuid_setup
++
++.globl	OPENSSL_wipe_cpu
++.ent	OPENSSL_wipe_cpu
++OPENSSL_wipe_cpu:
++	.frame	$30,0,$26
++	.prologue 0
++	clr	$1
++	clr	$2
++	clr	$3
++	clr	$4
++	clr	$5
++	clr	$6
++	clr	$7
++	clr	$8
++	clr	$16
++	clr	$17
++	clr	$18
++	clr	$19
++	clr	$20
++	clr	$21
++	clr	$22
++	clr	$23
++	clr	$24
++	clr	$25
++	clr	$27
++	clr	$at
++	clr	$29
++	fclr	$f0
++	fclr	$f1
++	fclr	$f10
++	fclr	$f11
++	fclr	$f12
++	fclr	$f13
++	fclr	$f14
++	fclr	$f15
++	fclr	$f16
++	fclr	$f17
++	fclr	$f18
++	fclr	$f19
++	fclr	$f20
++	fclr	$f21
++	fclr	$f22
++	fclr	$f23
++	fclr	$f24
++	fclr	$f25
++	fclr	$f26
++	fclr	$f27
++	fclr	$f28
++	fclr	$f29
++	fclr	$f30
++	mov	$sp,$0
++	ret	($26)
++.end	OPENSSL_wipe_cpu
++
++.globl	OPENSSL_atomic_add
++.ent	OPENSSL_atomic_add
++OPENSSL_atomic_add:
++	.frame	$30,0,$26
++	.prologue 0
++1:	lldw	$0,0($16)
++        ldi     $1,1
++	wr_f    $1
++	addw	$0,$17,$1
++	lstw	$1,0($16)
++	rd_f    $1
++	beq	$1,1b
++	addw	$0,$17,$0
++	ret	($26)
++.end	OPENSSL_atomic_add
++
++.globl	OPENSSL_rdtsc
++.ent	OPENSSL_rdtsc
++OPENSSL_rdtsc:
++	.frame	$30,0,$26
++	.prologue 0
++	rtc	$0
++	ret	($26)
++.end	OPENSSL_rdtsc
++
++.globl	OPENSSL_cleanse
++.ent	OPENSSL_cleanse
++OPENSSL_cleanse:
++	.frame	$30,0,$26
++	.prologue 0
++	beq	$17,.Ldone
++	and	$16,7,$0
++	bic	$17,7,$at
++	beq	$at,.Little
++	beq	$0,.Laligned
++
++.Little:
++	subl	$0,8,$0
++	ldl_u	$1,0($16)
++	mov	$16,$2
++.Lalign:
++	masklb	$1,$16,$1
++	ldi	$16,1($16)
++	subl	$17,1,$17
++	addl	$0,1,$0
++	beq	$17,.Lout
++	bne	$0,.Lalign
++.Lout:	stl_u	$1,0($2)
++	beq	$17,.Ldone
++	bic	$17,7,$at
++	beq	$at,.Little
++
++.Laligned:
++	stl	$31,0($16)
++	subl	$17,8,$17
++	ldi	$16,8($16)
++	bic	$17,7,$at
++	bne	$at,.Laligned
++	bne	$17,.Little
++.Ldone: ret	($26)
++.end	OPENSSL_cleanse
++
++.globl	CRYPTO_memcmp
++.ent	CRYPTO_memcmp
++CRYPTO_memcmp:
++	.frame	$30,0,$26
++	.prologue 0
++	xor	$0,$0,$0
++	beq	$18,.Lno_data
++
++	xor	$1,$1,$1
++	nop
++.Loop_cmp:
++	ldl_u	$2,0($16)
++	subl	$18,1,$18
++	ldl_u	$3,0($17)
++	extlb	$2,$16,$2
++	ldi	$16,1($16)
++	extlb	$3,$17,$3
++	ldi	$17,1($17)
++	xor	$3,$2,$2
++	or	$2,$0,$0
++	bne	$18,.Loop_cmp
++
++	subl	$31,$0,$0
++	srl	$0,63,$0
++.Lno_data:
++	ret	($26)
++.end	CRYPTO_memcmp
++___
++{
++my ($out,$cnt,$max)=("\$16","\$17","\$18");
++my ($tick,$lasttick)=("\$19","\$20");
++my ($diff,$lastdiff)=("\$21","\$22");
++my ($lock1,$lock2)=("\$23","\$24");
++my ($v0,$ra,$sp,$zero)=("\$0","\$26","\$30","\$31");
++
++print <<___;
++.globl	OPENSSL_instrument_bus
++.ent	OPENSSL_instrument_bus
++OPENSSL_instrument_bus:
++	.frame	$sp,0,$ra
++	.prologue 0
++	mov	$cnt,$v0
++
++	rtc	$lasttick
++	mov	0,$diff
++
++	#ecb	($out)
++	lldw	$tick,0($out)
++	ldi     $lock1,1
++	wr_f    $lock1
++	addw	$diff,$tick,$tick
++	mov	$tick,$diff
++	lstw	$tick,0($out)
++	rd_f    $tick
++	stw	$diff,0($out)
++
++.Loop:	rtc	$tick
++	subl	$tick,$lasttick,$diff
++	mov	$tick,$lasttick
++
++	#ecb	($out)
++	lldw	$tick,0($out)
++	ldi     $lock1,1
++	wr_f    $lock1
++	addw	$diff,$tick,$tick
++	mov	$tick,$diff
++	lstw	$tick,0($out)
++	rd_f    $tick
++	stw	$diff,0($out)
++
++	subw	$cnt,1,$cnt
++	ldi	$out,4($out)
++	bne	$cnt,.Loop
++
++	ret	($ra)
++.end	OPENSSL_instrument_bus
++
++.globl	OPENSSL_instrument_bus2
++.ent	OPENSSL_instrument_bus2
++OPENSSL_instrument_bus2:
++	.frame	$sp,0,$ra
++	.prologue 0
++	mov	$cnt,$v0
++
++	rtc	$lasttick
++	mov	0,$diff
++
++	#ecb	($out)
++	lldw	$tick,0($out)
++	ldi     $lock1,1
++	wr_f    $lock1
++	addw	$diff,$tick,$tick
++	mov	$tick,$diff
++	lstw	$tick,0($out)
++	rd_f    $tick
++	stw	$diff,0($out)
++
++	rtc	$tick
++	subl	$tick,$lasttick,$diff
++	mov	$tick,$lasttick
++	mov	$diff,$lastdiff
++.Loop2:
++	#ecb	($out)
++	lldw	$tick,0($out)
++	ldi     $lock1,1
++	wr_f    $lock1
++	addw	$diff,$tick,$tick
++	mov	$tick,$diff
++	lstw	$tick,0($out)
++	rd_f    $tick
++	stw	$diff,0($out)
++
++	subw	$max,1,$max
++	beq	$max,.Ldone2
++
++	rtc	$tick
++	subl	$tick,$lasttick,$diff
++	mov	$tick,$lasttick
++	subl	$lastdiff,$diff,$tick
++	mov	$diff,$lastdiff
++	selne	$tick,1,$tick
++	subw	$cnt,$tick,$cnt
++	s4addl	$tick,$out,$out
++	bne	$cnt,.Loop2
++
++.Ldone2:
++	subw	$v0,$cnt,$v0
++	ret	($ra)
++.end	OPENSSL_instrument_bus2
++___
++}
++
++close STDOUT;
+diff --git a/include/crypto/md32_common.h b/include/crypto/md32_common.h
+index 3b16f1b..84dc45a 100644
+--- a/include/crypto/md32_common.h
++++ b/include/crypto/md32_common.h
+@@ -226,7 +226,7 @@ int HASH_FINAL(unsigned char *md, HASH_CTX *c)
+ }
+ 
+ #ifndef MD32_REG_T
+-# if defined(__alpha) || defined(__sparcv9) || defined(__mips)
++# if defined(__alpha) || defined(__sw_64) || defined(__sparcv9) || defined(__mips)
+ #  define MD32_REG_T long
+ /*
+  * This comment was originally written for MD5, which is why it
+-- 
+2.27.0
+
diff --git a/openssl.spec b/openssl.spec
index aa2ea1b13e8225f054e65e23464d8279e569d556..df851e7b5645106be2f0344e2cb8f129024bb744 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -2,7 +2,7 @@
 Name:        openssl
 Epoch:       1
 Version:     3.0.12
-Release:     16
+Release:     17
 Summary:     Cryptography and SSL/TLS Toolkit
 License:     OpenSSL and SSLeay
 URL:         https://www.openssl.org/
@@ -80,6 +80,7 @@ Patch66:     backport-CVE-2024-13176-Fix-timing-side-channel.patch
 Patch9000:   add-FIPS_mode_set-support.patch
 Patch9001:   backport-CVE-2024-9143-Harden-BN_GF2m_poly2arr-against-misuse.patch
 Patch9002:   Fix-build-error-for-ppc64le.patch
+Patch9003:   add-sw_64-support.patch
 
 BuildRequires: gcc gcc-c++ perl make lksctp-tools-devel coreutils util-linux zlib-devel
 Requires:    coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release}
@@ -280,6 +281,9 @@ make test || :
 %ldconfig_scriptlets libs
 
 %changelog
+* Mon Mar 10 2025 mahailiang <mahailiang@uniontech.com> - 1:3.0.12-17
+- add sw_64 support
+
 * Sat Feb 8 2025 jinlun <jinlun@huawei.com> - 1:3.0.12-16
 - fix CVE-2024-13176