diff --git a/Feature-Add-Montgomery-implementation-in-generic-riscv64-asm.patch b/Feature-Add-Montgomery-implementation-in-generic-riscv64-asm.patch
new file mode 100644
index 0000000000000000000000000000000000000000..276765b0009f2657d707d156d6df56498996d452
--- /dev/null
+++ b/Feature-Add-Montgomery-implementation-in-generic-riscv64-asm.patch
@@ -0,0 +1,528 @@
+From 9da74c11fdde1962c3e68c4efac4a32ab0e23a77 Mon Sep 17 00:00:00 2001
+From: fengpengbo <feng.pengbo@zte.com.cn>
+Date: Fri, 25 Jul 2025 10:07:10 +0800
+Subject: [PATCH] Implement Montgomery multiplication assembly optimization for
+ RV64GC
+
+Reference:https://github.com/openssl/openssl/pull/28012
+
+This PR introduces the RV64GC-optimized assembly implementation for Montgomery multiplication, and significantly accelerating BN modular exponentiation and RSA operations.
+
+Test Verification
+```
+make test
+
+All tests successful.
+Files=251, Tests=2890, 7753 wallclock secs (178.34 usr 51.01 sys + 4480.51 cusr 2970.78 csys = 7680.64 CPU)
+Result: PASS
+```
+
+Performance Improvement
+```
+openssl speed rsa2048
+```
+benchmark results run on qemu_openEuler_24.03 (LTS-SP2):
+
+| Test Item | Baseline | ASM Optimized | Optimized/Baseline |
+| --------- | -------- | ------------- | ------------------ |
+| sign/s    | 43.6     | 115.3         | 264.45%            |
+| verify/s  | 2399.3   | 4426.9        | 184.51%            |
+
+Signed-off-by: liuqingtao <liu.qingtao2@zte.com.cn>
+---
+ crypto/bn/asm/riscv64-mont.pl | 460 ++++++++++++++++++++++++++++++++++
+ crypto/bn/build.info          |   5 +
+ 2 files changed, 465 insertions(+)
+ create mode 100644 crypto/bn/asm/riscv64-mont.pl
+
+diff --git a/crypto/bn/asm/riscv64-mont.pl b/crypto/bn/asm/riscv64-mont.pl
+new file mode 100644
+index 0000000..a80b766
+--- /dev/null
++++ b/crypto/bn/asm/riscv64-mont.pl
+@@ -0,0 +1,460 @@
++#! /usr/bin/env perl
++# Copyright 2025 The OpenSSL Project Authors. All Rights Reserved.
++#
++# Licensed under the Apache License 2.0 (the "License").  You may not use
++# this file except in compliance with the License.  You can obtain a copy
++# in the file LICENSE in the source distribution or at
++# https://www.openssl.org/source/license.html
++
++
++# $output is the last argument if it looks like a file (it has an extension)
++# $flavour is the first argument if it doesn't look like a file
++use strict;
++use warnings;
++
++use FindBin qw($Bin);
++use lib "$Bin";
++use lib "$Bin/../../perlasm";
++use riscv;
++
++my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
++my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
++
++$output and open STDOUT,">$output";
++
++################################################################################
++# Utility functions to help with keeping track of which registers to stack/
++# unstack when entering / exiting routines.
++################################################################################
++{
++    # Callee-saved registers
++    my @callee_saved = map("x$_",(2,8,9,18..27)); 
++    # Caller-saved registers
++    my @caller_saved = map("x$_",(1,5..7,10..17,28..31));
++    my @must_save; 
++    sub use_reg {
++        my $reg = shift;
++        if (grep(/^$reg$/, @callee_saved)) {
++            push(@must_save, $reg);
++        } elsif (!grep(/^$reg$/, @caller_saved)) {
++            # Register is not usable!
++            die("Unusable register ".$reg);
++        }
++        return $reg;
++    }
++    sub use_regs {
++        return map(use_reg("x$_"), @_);
++    }
++    sub save_regs {
++        my $ret = '';
++        my $stack_reservation = ($#must_save + 1) * 8; 
++        my $stack_offset = $stack_reservation;
++        if ($stack_reservation % 16) { 
++            $stack_reservation += 8;
++        }
++        $ret.="    addi    sp,sp,-$stack_reservation\n";
++        foreach (@must_save) {
++            $stack_offset -= 8;
++            $ret.="    sd      $_,$stack_offset(sp)\n";
++        }
++	    return $ret;
++    }
++    sub load_regs {
++        my $ret = '';
++        my $stack_reservation = ($#must_save + 1) * 8;
++        my $stack_offset = $stack_reservation;
++        if ($stack_reservation % 16) {
++            $stack_reservation += 8;
++        }
++        foreach (@must_save) {
++            $stack_offset -= 8;
++            $ret.="    ld      $_,$stack_offset(sp)\n";
++        }
++	    $ret.="    addi    sp,sp,$stack_reservation\n";
++        return $ret;
++    }
++    sub clear_regs {
++        @must_save = ();
++    }
++}
++
++################################################################################
++# Register assignment
++################################################################################
++
++# Function arguments 
++#      RISC-V    ABI
++# $rp	x10	     a0  # BN_ULONG *rp
++# $ap	x11	     a1  # const BN_ULONG *ap
++# $bp	x12	     a2  # const BN_ULONG *bp
++# $np	x13	     a3  # const BN_ULONG *np
++# $n0	x14      a4  # const BN_ULONG *n0
++# $num	x15      a5  # int num
++my ($rp,$ap,$bp,$np,$n0,$num) = use_regs(10,11,12,13,14,15);
++
++# Return address and Frame pointer
++#      RISC-V    ABI
++# $ra   x1       ra
++# $fp   x8       s0
++my ($ra,$fp) = use_regs(1,8);
++
++# Temporary variable allocation
++#      RISC-V    ABI
++# $lo0	x5	     t0    the sum of partial products of a and b
++# $hi0	x6	     t1    the high word of partial product of a and b + Carry
++# $aj	x7	     t2    ap[j]
++# $m0	x28	     t3    bp[i]
++# $alo	x29	     t4    the low word of partial product
++# $ahi	x30      t5    the high word of partial product
++# $lo1	x31	     t6    partial product + reduction term
++# $hi1	x18	     s2    the high word of reduction term + Carry
++# $nj	x19	     s3    np[j],modulus
++# $m1	x20	     s4    montgomery reduction coefficient
++# $nlo	x21	     s5    the low word of reduction term
++# $nhi	x22	     s6    the high word of reduction term
++# $ovf	x23	     s7    highest carry bit,overflow flag
++# $i	x24	     s8    outer loop index
++# $j	x25	     s9    inner loop index
++# $tp	x26	     s10   temporary result storage
++# $tj	x27	     s11   tp[j],temporary result value
++# $temp x9       s1
++my ($lo0,$hi0,$aj,$m0,$alo,$ahi,$lo1,$hi1,$nj,$m1,$nlo,$nhi,$ovf,$i,$j,$tp,$tj,$temp) = use_regs(5..7,28..31,18..27,9);
++
++# Carry variable
++# $carry1 x16      a6
++# $carry2 x17      a7
++my ($carry1,$carry2) = use_regs(16,17);
++
++my $code .= <<___;
++.text
++.balign 32
++.globl bn_mul_mont
++.type   bn_mul_mont,\@function
++bn_mul_mont:
++___
++
++$code .= save_regs();
++
++$code .= <<___;
++    mv $fp, sp
++___
++
++$code .= <<___;	
++    ld $m0, 0($bp)    # bp[0]
++    addi $bp, $bp,8
++    ld $hi0, 0($ap)    # ap[0]
++    ld $aj, 8($ap)    # ap[1]
++    addi $ap, $ap, 16
++    ld $n0, 0($n0)    # n0,precomputed modular inverse
++    ld $hi1, 0($np)    # np[0]
++    ld $nj, 8($np)    # np[1]
++    addi $np, $np, 16
++
++    slli $num, $num, 3
++    sub $tp, sp, $num
++    andi $tp, $tp, -16    # address alignment
++    mv sp, $tp    # alloca
++
++    addi $j, $num, -16    # $j=(num-2)*8
++
++    mul $lo0, $hi0, $m0    # ap[0]*bp[0]
++    mulhu $hi0, $hi0, $m0    
++    mul $alo, $aj, $m0    # ap[1]*bp[0]
++    mulhu $ahi, $aj, $m0
++
++    mul $m1, $lo0, $n0    # montgomery reduction coefficient tp[0]*n0
++    # montgomery optimization: np[0]*m1 ensures (np[0]*m1+lo0) has zero lower bits
++    # only carry status needed, not full lo1 result
++    # eliminates mul/adds instructions → Saves cycles & power
++    # mul $lo1, $hi1, $m1		// np[0]*m1
++    # adds $lo1, $lo1, $lo0   // discarded
++    mulhu $hi1, $hi1, $m1
++    snez $carry1, $lo0
++    add $hi1, $hi1, $carry1
++    mul $nlo, $nj, $m1    # np[1]*m1
++    mulhu $nhi, $nj, $m1
++    beqz $j, .L1st_last_entry
++
++.L1st:
++    ld $aj, 0($ap)
++    addi $ap, $ap, 8
++
++    # compute the sum of partial products of a and b
++    add $lo0, $alo, $hi0    # {ap[j-1]*bp[0],low}+{ap[j-2]*bp[0],high}, j ranges from 2 to num-1
++    sltu $carry1, $lo0, $alo
++    add $hi0, $ahi, $carry1    # {ap[j-1]*bp[0],high}+C_lo0, j ranges from 2 to num-1
++
++    addi $j, $j, -8    # $j--, $j ranges from (num-2)*8 to 0
++    ld $nj, 0($np)
++    addi $np, $np, 8
++
++    # compute the sum of reduction term
++    add $lo1, $nlo, $hi1    # {np[j-1]*m1,low}+{np[j-2]*m1,high}, j ranges from 2 to num-1
++    sltu $carry1, $lo1, $nlo
++    add $hi1, $nhi, $carry1    # {np[j-1]*m1,high}+C_lo1, j ranges from 2 to num-1
++
++    # partial product + reduction term
++    add $temp, $lo1, $lo0
++    sltu $carry1, $temp, $lo1
++    mv $lo1, $temp
++    add $hi1, $hi1, $carry1
++
++    sd $lo1, 0($tp)    # tp[j-2], j ranges from 2 to num-1
++    addi $tp, $tp, 8
++
++    mul $alo, $aj, $m0    # ap[j]*bp[0], j ranges from 2 to num-1
++    mulhu $ahi, $aj, $m0
++    mul $nlo, $nj, $m1    # np[j]*m1, j ranges from 2 to num-1
++    mulhu $nhi, $nj, $m1
++    bnez $j, .L1st
++
++.L1st_last_entry:
++    # last partial product
++    add $lo0, $alo, $hi0    # {ap[j]*bp[0],low}+{ap[j-1]*bp[0],high}, j is num-1
++    sltu $carry1, $lo0, $alo
++    add $hi0, $ahi, $carry1    # {ap[j]*bp[0],high}+C_lo0, j is num-1
++
++    sub $ap, $ap, $num    # rewind $ap
++    sub $np, $np, $num    # rewind $np
++
++    # last reduction term
++    add $lo1, $nlo, $hi1    # {np[j]*m1,low}+{np[j-1]*m1,high}, j is num-1
++    sltu $carry1, $lo1, $nlo
++    add $hi1, $nhi, $carry1    # {np[j]*m1,high}+C_lo1, j is num-1
++
++    # last partial product + last reduction term
++    add $lo1, $lo1, $lo0
++    sltu $carry1, $lo1, $lo0
++
++    add $temp, $hi1, $hi0
++    sltu $carry2, $temp, $hi1
++    add $hi1, $temp, $carry1
++    sltu $ovf, $hi1, $temp
++    or $carry1, $carry2, $ovf    # carry2 and ovf are mutually exclusive, both cannot be 1 simultaneously
++    mv $ovf, $carry1    # upmost overflow bit
++
++    addi $i, $num, -8    # $i=(num-1)*8
++
++    sd $lo1, 0($tp) # tp[j-1], j is num-1
++    sd $hi1, 8($tp) # tp[j], j is num-1
++
++.Louter:
++    ld $m0, 0($bp)    # bp[i], i ranges from 1 to num-1
++    addi $bp, $bp, 8
++    ld $hi0, 0($ap)
++    ld $aj, 8($ap)
++    addi $ap, $ap, 16
++    ld $tj, 0(sp)    # tp[0]
++    addi $tp, sp, 8    # tp[1]
++
++    mul $lo0, $hi0, $m0    # ap[0]*bp[i], i ranges from 1 to num-1
++    mulhu $hi0, $hi0, $m0
++
++    addi $j, $num,-16    # $j=(num-2)*8 
++    ld $hi1, 0($np)
++    ld $nj, 8($np)
++    addi $np, $np, 16
++
++    mul $alo, $aj, $m0    # ap[1]*bp[i], i ranges from 1 to num-1
++    mulhu $ahi, $aj, $m0
++
++    add $lo0, $lo0, $tj    # ap[0]*bp[i] + last_tp[0] , i ranges from 1 to num-1
++    sltu $carry1, $lo0, $tj
++    add $hi0, $hi0, $carry1    # $hi0 will not overflow
++
++    # compute the modular reduction coefficient
++    mul $m1, $lo0, $n0
++
++    addi $i, $i, -8    # $i--, $i ranges from (num-1)*8 to 0
++
++    # mul $lo1, $hi1, $m1	 # discarded
++    # adds	$lo1, $lo1, $lo0   # discarded
++    mulhu $hi1, $hi1, $m1
++    snez $carry1, $lo0
++    mul $nlo, $nj, $m1    # np[1]*m1
++    mulhu $nhi, $nj, $m1
++
++    beqz $j, .Linner_last_entry
++
++.Linner:
++    ld $aj, 0($ap)
++    addi $ap, $ap, 8
++    add $hi1, $hi1, $carry1
++
++    ld $tj, 0($tp)    # tp[j-1], j is 2 to num-1
++    addi $tp, $tp, 8
++
++    # compute the sum of partial products of a and b
++    add $lo0, $alo, $hi0    # {ap[j-1]*bp[i],low}+{ap[j-2]*bp[i],high}, j ranges from 2 to num-1, i ranges from 1 to num-1
++    sltu $carry1, $lo0, $alo
++    add $hi0, $ahi, $carry1    # {ap[j-1]*bp[i],high}+C_lo0, j ranges from 2 to num-1, i ranges from 1 to num-1
++
++    addi $j, $j, -8    # $j--, $j ranges from (num-2)*8 to 0
++    
++    # compute the sum of reduction term
++    add $lo1, $nlo, $hi1    # {np[j-1]*m1,low}+{np[j-2]*m1,high}, j ranges from 2 to num-1
++    sltu $carry1, $lo1, $nlo
++    add $hi1, $nhi, $carry1    # {np[j-1]*m1}+C_lo1, j ranges from 2 to num-1
++
++    ld $nj, 0($np)
++    addi $np, $np, 8
++
++    # partial product + reduction term
++    add $lo0, $lo0, $tj
++    sltu $carry1, $lo0, $tj
++    add $hi0, $hi0, $carry1
++
++    add $lo1, $lo1, $lo0
++    sltu $carry1, $lo1, $lo0
++
++    sd $lo1, -16($tp)    # tp[j-2], j ranges from 2 to num-1
++
++    mul $alo, $aj, $m0    # ap[j]*bp[i], j ranges from 2 to num-1, i ranges from 1 to num-1 
++    mulhu $ahi, $aj, $m0
++    mul $nlo, $nj, $m1    # np[j]*m1, j ranges from 2 to num-1
++    mulhu $nhi, $nj, $m1
++
++    bnez $j, .Linner       
++
++.Linner_last_entry:
++    ld $tj, 0($tp)    # tp[j], j is num-1
++    addi $tp, $tp, 8
++    add $hi1, $hi1, $carry1
++
++    # last partial product
++    add $lo0, $alo, $hi0    # {ap[j]*bp[i],low}+{ap[j-1]*bp[i],high}, j is num-1, i ranges from 1 to num-1
++    sltu $carry1, $lo0, $alo
++    add $hi0, $ahi, $carry1    # {ap[j]*bp[i],high}+C_lo0, j is num-1, i ranges from 1 to num-1
++
++    sub $ap, $ap, $num    # rewind $ap
++    sub	$np, $np, $num    # rewind $np
++
++    # last reduction term
++    add $lo1, $nlo, $hi1    # {np[j]*m1,low}+{np[j-1]*m1,high}, j is num-1
++    sltu $carry1, $lo1, $nlo
++    add $temp, $nhi, $ovf
++    sltu $carry2, $temp, $nhi
++    add $hi1, $temp, $carry1    # {np[j]*m1,high}+C_lo1, j is num-1
++    sltu $ovf, $hi1, $temp
++    or $carry1, $carry2, $ovf
++    mv $ovf, $carry1    # update the upmost overflow bit
++
++    # last partial product + last reduction term
++    add $lo0, $lo0, $tj
++    sltu $carry1, $lo0, $tj
++    add $hi0, $hi0, $carry1
++
++    add $lo1, $lo1, $lo0
++    sltu $carry1, $lo1, $lo0
++    add $temp, $hi1, $hi0
++    sltu $carry2, $temp, $hi1
++    add $hi1, $temp, $carry1
++    sltu $carry1, $hi1, $temp
++    or $carry1, $carry2, $carry1
++
++    add $ovf, $ovf, $carry1    # upmost overflow bit
++ 
++    sd $lo1, -16($tp)    # tp[j-1], j is num-1
++    sd $hi1, -8($tp)    # tp[j], j is num-1
++    bnez $i, .Louter
++     
++    ld $tj, 0(sp)    # tp[0]
++    addi $tp, sp, 8
++    ld $nj, 0($np)    # np[0]
++    addi $np, $np, 8
++    addi $j, $num, -8    # $j=(num-1)*8 and clear borrow
++    sltu $carry1, $num, 8
++    xori $carry1, $carry1, 1
++    mv $ap, $rp
++.Lsub: 
++    # tp[j]-np[j], j ranges from 0 to num-2, set carry flag
++    xori $carry1, $carry1,1
++    sub $temp, $tj, $nj
++    sltu $carry2, $tj, $temp
++    sub $aj, $temp, $carry1
++    sltu $carry1, $temp, $aj
++    or $carry1, $carry2, $carry1
++    xori $carry1, $carry1, 1
++
++    ld $tj, 0($tp)    # tp[j], j ranges from 1 to num-1
++    addi $tp, $tp, 8    
++    addi $j, $j, -8    # $j--, $j ranges from (num-1)*8 to 0
++    ld $nj, 0($np)
++    addi $np, $np, 8
++
++    sd $aj, 0($ap)    # rp[j]=tp[j]-np[j], j ranges from 0 to num-2
++    addi $ap, $ap, 8
++    bnez $j, .Lsub
++
++    # process the last word, tp[j]-np[j], j is num-1
++    xori $carry1, $carry1,1
++    sub $temp, $tj, $nj
++    sltu $carry2, $tj, $temp
++    sub $aj, $temp, $carry1
++    sltu $carry1, $temp, $aj
++    or $carry1, $carry2, $carry1
++    xori $carry1, $carry1, 1
++    
++    # whether there is a borrow
++    xori $carry1, $carry1, 1
++    sub $temp, $ovf, zero
++    sltu $carry2, $ovf, $temp
++    sub $ovf, $temp, $carry1
++    sltu $carry1, $temp, $ovf
++    or $carry1, $carry2, $carry1
++    xori $carry1, $carry1, 1
++
++    sd $aj, 0($ap)    # rp[j], j is num-1
++    addi $ap, $ap, 8
++   
++    # conditional result copying and cleanup
++    ld $tj, 0(sp)    # tp[0]
++    addi $tp, sp, 8
++    ld $aj, 0($rp)    # rp[0]
++    addi $rp, $rp, 8
++    addi $num, $num, -8    # num--
++    nop
++
++.Lcond_copy:
++    addi $num,$num, -8    # num--
++    # conditionally selects value based on borrow flag:
++    # when borrow occurs (borrow flag set): nj = tj (original t value)
++    snez $carry1, $carry1
++    sub $carry1, zero, $carry1
++    xor $nj, $tj, $aj
++    and $nj, $nj, $carry1
++    xor $nj, $tj, $nj
++
++    ld $tj, 0($tp)
++    addi $tp, $tp, 8
++    ld $aj, 0($rp)
++    addi $rp, $rp, 8
++    sd zero, -16($tp)    # wipe tp
++    sd $nj, -16($rp)    # result
++    bnez $num, .Lcond_copy
++
++    # process the last word
++    snez $carry1, $carry1
++    sub $carry1, zero, $carry1
++    xor $nj, $tj, $aj
++    and $nj, $nj, $carry1
++    xor $nj, $tj, $nj
++
++    sd zero, -8($tp)    # wipe tp
++    sd $nj, -8($rp)
++___
++
++$code .= <<___;
++    mv sp, $fp
++    li $rp, 1
++___
++
++$code .= load_regs();
++
++$code .= <<___;
++    ret
++.size	bn_mul_mont,.-bn_mul_mont
++___
++
++print $code;
++close STDOUT or die "error closing STDOUT: $!";
+\ No newline at end of file
+diff --git a/crypto/bn/build.info b/crypto/bn/build.info
+index c4ba51b..01acf8d 100644
+--- a/crypto/bn/build.info
++++ b/crypto/bn/build.info
+@@ -72,6 +72,9 @@ IF[{- !$disabled{asm} -}]
+   $BNASM_aarch64=bn_asm.c armv8-mont.S
+   $BNDEF_aarch64=OPENSSL_BN_ASM_MONT
+ 
++  $BNASM_riscv64=bn_asm.c riscv64-mont.S
++  $BNDEF_riscv64=OPENSSL_BN_ASM_MONT
++
+   $BNASM_parisc11=bn_asm.c parisc-mont.s
+   $BNDEF_parisc11=OPENSSL_BN_ASM_MONT
+   $BNASM_parisc20_64=$BNASM_parisc11
+@@ -175,3 +178,5 @@ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl
+ INCLUDE[armv4-gf2m.o]=..
+ GENERATE[armv8-mont.S]=asm/armv8-mont.pl
+ INCLUDE[armv8-mont.o]=..
++GENERATE[riscv64-mont.S]=asm/riscv64-mont.pl
++INCLUDE[riscv64-mont.o]=..
+\ No newline at end of file
+-- 
+2.27.0
+
diff --git a/openssl.spec b/openssl.spec
index df851e7b5645106be2f0344e2cb8f129024bb744..1c5a713bd5950f0c3d75a2b0cd231739a1e72984 100644
--- a/openssl.spec
+++ b/openssl.spec
@@ -2,7 +2,7 @@
 Name:        openssl
 Epoch:       1
 Version:     3.0.12
-Release:     17
+Release:     21
 Summary:     Cryptography and SSL/TLS Toolkit
 License:     OpenSSL and SSLeay
 URL:         https://www.openssl.org/
@@ -77,6 +77,10 @@ Patch64:     backport-Release-the-drbg-in-the-global-default-context-befor.patch
 Patch65:     backport-params-provide-a-faster-TRIE-based-param-lookup.patch
 Patch66:     backport-CVE-2024-13176-Fix-timing-side-channel.patch
 
+
+
+Patch70:     Feature-Add-Montgomery-implementation-in-generic-riscv64-asm.patch
+
 Patch9000:   add-FIPS_mode_set-support.patch
 Patch9001:   backport-CVE-2024-9143-Harden-BN_GF2m_poly2arr-against-misuse.patch
 Patch9002:   Fix-build-error-for-ppc64le.patch
@@ -281,6 +285,9 @@ make test || :
 %ldconfig_scriptlets libs
 
 %changelog
+* Fri Jul 25 2025 liuqingtao <liu.qingtao2@zte.com.cn> - 1:3.0.12-21
+- add Implement Montgomery multiplication assembly optimization for RV64GC
+
 * Mon Mar 10 2025 mahailiang <mahailiang@uniontech.com> - 1:3.0.12-17
 - add sw_64 support