diff --git a/Feature-add-SM4-ECB-mode-for-armv8.patch b/Feature-add-SM4-ECB-mode-for-armv8.patch new file mode 100644 index 0000000000000000000000000000000000000000..f568d93a38876a5f6522c8628929c9c8f446e453 --- /dev/null +++ b/Feature-add-SM4-ECB-mode-for-armv8.patch @@ -0,0 +1,196 @@ +From: Xu Yizhou + +--- +crypto/evp/e_sm4.c | 17 ++++- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 104 +++++++++++++++++++++++++++++++ +include/openssl/modes.h | 4 ++ +3 files changed, 123 insertions(+), 2 deletions(-) + +diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c +index d2ef74e..bbf8cb7 100644 +--- a/crypto/evp/e_sm4.c ++++ b/crypto/evp/e_sm4.c +@@ -32,6 +32,9 @@ typedef struct { + SM4_KEY ks; + } ks; + block128_f block; ++ union { ++ ecb128_f ecb; ++ } stream; +} EVP_SM4_KEY; + +#ifdef VPSM4_EX_CAPABLE +@@ -39,6 +42,8 @@ void vpsm4_ex_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key); +void vpsm4_ex_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key); +#define vpsm4_ex_encrypt SM4_encrypt +#define vpsm4_ex_decrypt SM4_encrypt ++void vpsm4_ex_ecb_encrypt( ++ const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key, const int enc); +#endif + +# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \ +@@ -72,6 +77,8 @@ static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + if (VPSM4_EX_CAPABLE) { + vpsm4_ex_set_decrypt_key(key, &dat->ks.ks); + dat->block = (block128_f) vpsm4_ex_decrypt; ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) vpsm4_ex_ecb_encrypt; + } else +#endif + { +@@ -83,6 +90,8 @@ static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + if (VPSM4_EX_CAPABLE) { + vpsm4_ex_set_encrypt_key(key, &dat->ks.ks); + dat->block = (block128_f) vpsm4_ex_encrypt; ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) vpsm4_ex_ecb_encrypt; + } else +#endif + { +@@ -131,8 +140,12 @@ static int sm4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + if (len < bl){ + return 1; + } +- for (i = 0, len -= bl; i <= len; i += bl) +- (*dat->block) (in + i, out + i, &dat->ks.ks); ++ if (dat->stream.ecb != NULL) ++ (*dat->stream.ecb) (in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_encrypting(ctx)); ++ else ++ for (i = 0, len -= bl; i <= len; i += bl) ++ (*dat->block) (in + i, out + i, &dat->ks.ks); + return 1; +} + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index 59c3081..5cdd5b6 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -572,6 +572,110 @@ ${prefix}_set_decrypt_key: +___ +}}} + ++ ++{{{ ++ ++$code.=<<___; ++.globl ${prefix}_ecb_encrypt ++.type ${prefix}_ecb_encrypt,%function ++.align 5 ++${prefix}_ecb_encrypt: ++ stp d8,d9,[sp,#-0x10]! ++ stp d10,d11,[sp,#-0x10]! ++ stp d12,d13,[sp,#-0x10]! ++ stp d14,d15,[sp,#-0x10]! ++ stp x16,x17,[sp,#-0x10]! ++ stp x29,x30,[sp,#-0x10]! ++___ ++ &load_sbox_matrix(); ++$code.=<<___; ++ // convert length into blocks ++ lsr x2,x2,4 ++.Lecb_8_blocks_process: ++ cmp $blocks,#8 ++ b.lt .Lecb_4_blocks_process ++ ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++ ld4 {@datax[0].4s,$datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++ &rev32(@datax[0],@datax[0]); ++ &rev32(@datax[1],@datax[1]); ++ &rev32(@datax[2],@datax[2]); ++ &rev32(@datax[3],@datax[3]); ++$code.=<<___; ++ bl ${prefix}_enc_8blks ++ st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ st4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 ++ subs $blocks,$blocks,#8 ++ b.gt .Lecb_8_blocks_process ++ b 100f ++.Lecb_4_blocks_process: ++ cmp $blocks,#4 ++ b.lt 1f ++ ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ sub $blocks,$blocks,#4 ++1: ++ // process last block ++ cmp $blocks,#1 ++ b.lt 100f ++ b.gt 1f ++ ld1 {@data[0].4s},[$inp] ++___ ++ &rev32(@data[0],@data[0]); ++ &encrypt_1blk(@data[0],$rks1); ++$code.=<<___; ++ st1 {@data[0].4s},[$outp] ++ b 100f ++1: // process last 2 blocks ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[0],[$inp],#16 ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[1],[$inp],#16 ++ cmp $blocks,#2 ++ b.gt 1f ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp] ++ b 100f ++1: // process last 3 blocks ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[2],[$inp],#16 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[2],[$outp] ++100: ++ ldp x29,x30,[sp],#0x10 ++ ldp x16,x17,[sp],#0x10 ++ ldp d14,d15,[sp],#0x10 ++ ldp d12,d13,[sp],#0x10 ++ ldp d10,d11,[sp],#0x10 ++ ldp d8,d9,[sp],#0x10 ++ ret ++.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt ++___ ++}}} ++ +######################################## +open SELF,$0; +while() { +diff --git a/include/openssl/modes.h b/include/openssl/modes.h +index d544f98..ddabe57 100644 +--- a/include/openssl/modes.h ++++ b/include/openssl/modes.h +@@ -22,6 +22,10 @@ typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], int enc); + ++typedef void (*ecb128_f) (const unsigned char *in, unsigned char *out, ++ size_t len, const void *key, ++ int enc); ++ +typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out, + size_t blocks, const void *key, + const unsigned char ivec[16]); +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-Sbox-optimized-by-AESE.patch b/Feature-add-Sbox-optimized-by-AESE.patch new file mode 100644 index 0000000000000000000000000000000000000000..c3969733e73f2e12fea08328adf5104f389c3187 --- /dev/null +++ b/Feature-add-Sbox-optimized-by-AESE.patch @@ -0,0 +1,127 @@ +From: Xu Yizhou + +--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 108 +++++++++++++++++++++++++++++++ +1 file changed, 108 insertions(+) + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index bee9499..ae73f72 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -91,6 +91,114 @@ $code.=<<___; +___ +} + ++# sbox operation for one single word ++sub sbox_1word () { ++ my $word = shift; ++ ++$code.=<<___; ++ mov @vtmp[3].s[0],$word ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {@vtmp[3].16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, @vtmp[2]); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, @vtmp[2]); ++$code.=<<___; ++ ++ mov $wtmp0,@vtmp[0].s[0] ++ eor $word,$wtmp0,$wtmp0,ror #32-2 ++ eor $word,$word,$wtmp0,ror #32-10 ++ eor $word,$word,$wtmp0,ror #32-18 ++ eor $word,$word,$wtmp0,ror #32-24 ++___ ++} ++ ++# sbox operation for 4-lane of words ++sub sbox() { ++ my $dat = shift; ++ ++$code.=<<___; ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {$dat.16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, $vtmp4); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, $vtmp4); ++$code.=<<___; ++ mov $dat.16b,@vtmp[0].16b ++ ++ // linear transformation ++ ushr @vtmp[0].4s,$dat.4s,32-2 ++ ushr @vtmp[1].4s,$dat.4s,32-10 ++ ushr @vtmp[2].4s,$dat.4s,32-18 ++ ushr @vtmp[3].4s,$dat.4s,32-24 ++ sli @vtmp[0].4s,$dat.4s,2 ++ sli @vtmp[1].4s,$dat.4s,10 ++ sli @vtmp[2].4s,$dat.4s,18 ++ sli @vtmp[3].4s,$dat.4s,24 ++ eor $vtmp4.16b,@vtmp[0].16b,$dat.16b ++ eor $vtmp4.16b,$vtmp4.16b,$vtmp[1].16b ++ eor $dat.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $dat.16b,$dat.16b,$vtmp4.16b ++___ ++} ++ ++# sbox operation for 8-lane of words ++sub sbox_double() { ++ my $dat = shift; ++ my $datx = shift; ++ ++$code.=<<___; ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {$dat.16b}, $MaskV.16b ++ tbl @vtmp[1].16b, {$datx.16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, $vtmp4); ++ &mul_matrix(@vtmp[1], $TAHMatV, $TALMatV, $vtmp4); ++$code.=<<___; ++ eor $vtmp5.16b, $vtmp5.16b, $vtmp5.16b ++ aese @vtmp[0].16b,$vtmp5.16b ++ aese @vtmp[1].16b,$vtmp5.16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV,$vtmp4); ++ &mul_matrix(@vtmp[1], $ATAHMatV, $ATALMatV,$vtmp4); ++$code.=<<___; ++ mov $dat.16b,@vtmp[0].16b ++ mov $datx.16b,@vtmp[1].16b ++ ++ // linear transformation ++ ushr @vtmp[0].4s,$dat.4s,32-2 ++ ushr $vtmp5.4s,$datx.4s,32-2 ++ ushr @vtmp[1].4s,$dat.4s,32-10 ++ ushr @vtmp[2].4s,$dat.4s,32-18 ++ ushr @vtmp[3].4s,$dat.4s,32-24 ++ sli @vtmp[0].4s,$dat.4s,2 ++ sli $vtmp5.4s,$datx.4s,2 ++ sli @vtmp[1].4s,$dat.4s,10 ++ sli @vtmp[2].4s,$dat.4s,18 ++ sli @vtmp[3].4s,$dat.4s,24 ++ eor $vtmp4.16b,@vtmp[0].16b,$dat.16b ++ eor $vtmp4.16b,$vtmp4.16b,@vtmp[1].16b ++ eor $dat.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $dat.16b,$dat.16b,$vtmp4.16b ++ ushr @vtmp[1].4s,$datx.4s,32-10 ++ ushr @vtmp[2].4s,$datx.4s,32-18 ++ ushr @vtmp[3].4s,$datx.4s,32-24 ++ sli @vtmp[1].4s,$datx.4s,10 ++ sli @vtmp[2].4s,$datx.4s,18 ++ sli @vtmp[3].4s,$datx.4s,24 ++ eor $vtmp4.16b,$vtmp5.16b,$datx.16b ++ eor $vtmp4.16b,$vtmp4.16b,@vtmp[1].16b ++ eor $datx.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $datx.16b,$datx.16b,$vtmp4.16b ++___ ++} +$code=<<___; +#include "arm_arch.h" +.arch armv8-a+crypto +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-sm4-enc-for-1-block.patch b/Feature-add-sm4-enc-for-1-block.patch new file mode 100644 index 0000000000000000000000000000000000000000..f0a8d24d616ab383677dac7e678873f7ef5ee68d --- /dev/null +++ b/Feature-add-sm4-enc-for-1-block.patch @@ -0,0 +1,92 @@ +From: Xu Yizhou + +--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 73 ++++++++++++++++++++++++++++++++ +1 file changed, 73 insertions(+) + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index ae73f72..8917a47 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -199,6 +199,79 @@ $code.=<<___; + eor $datx.16b,$datx.16b,$vtmp4.16b +___ +} ++ ++# sm4 for one block of data, in scalar registers word0/word1/word2/word3 ++sub sm4_1blk () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ eor $wtmp3,$word2,$word3 ++ eor $wtmp2,$wtmp0,$word1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word0,$word0,$wtmp3 ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ eor $wtmp3,$word2,$word3 ++ eor $wtmp2,$word0,$wtmp1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor $word1,$word1,$wtmp3 ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ eor $wtmp3,$word0,$word1 ++ eor $wtmp2,$wtmp0,$word3 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word2,$word2,$wtmp3 ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ eor $wtmp3,$word0,$word1 ++ eor $wtmp2,$word2,$wtmp1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word3,$word3,$wtmp3 ++___ ++} ++ ++sub encrypt_1blk_norev() { ++ my $dat = shift; ++ my $rks = shift; ++$code.=<<___; ++ mov $ptr,$rks ++ mov $counter,#8 ++ mov $word0,$dat.s[0] ++ mov $word1,$dat.s[1] ++ mov $word2,$dat.s[2] ++ mov $word3,$dat.s[3] ++10: ++___ ++ &sm4_1blk($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++ mov $dat.s[0],$word3 ++ mov $dat.s[1],$word2 ++ mov $dat.s[2],$word1 ++ mov $dat.s[3],$word0 ++___ ++} ++ ++sub encrypt_1blk() { ++ my $dat = shift; ++ my $rks = shift; ++ ++ &encrypt_1blk_norev($dat,$rks); ++ &rev32($dat,$dat); ++} +$code=<<___; +#include "arm_arch.h" +.arch armv8-a+crypto +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-sm4-enc-for-4-blocks.patch b/Feature-add-sm4-enc-for-4-blocks.patch new file mode 100644 index 0000000000000000000000000000000000000000..ec98aeba22df1515cec20367d23d12fa3b80ec0e --- /dev/null +++ b/Feature-add-sm4-enc-for-4-blocks.patch @@ -0,0 +1,110 @@ +From: Xu Yizhou + +--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 77 ++++++++++++++++++++++++++++++++ +1 file changed, 77 insertions(+) + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index 8917a47..1298179 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -242,6 +242,54 @@ $code.=<<___; +___ +} + ++# sm4 for 4-lanes of data, in neon registers data0/data1/data2/data3 ++sub sm4_4blks () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ dup $rk0.4s,$wtmp0 ++ dup $rk1.4s,$wtmp1 ++ ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ eor $rka.16b,@data[2].16b,@data[3].16b ++ eor $rk0.16b,@data[1].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,$rk0.16b ++___ ++ &sbox($rk0); ++$code.=<<___; ++ eor @data[0].16b,@data[0].16b,$rk0.16b ++ ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ eor $rka.16b,$rka.16b,@data[0].16b ++ eor $rk1.16b,$rka.16b,$rk1.16b ++___ ++ &sbox($rk1); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor @data[1].16b,@data[1].16b,$rk1.16b ++ ++ dup $rk0.4s,$wtmp0 ++ dup $rk1.4s,$wtmp1 ++ ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ eor $rka.16b,@data[0].16b,@data[1].16b ++ eor $rk0.16b,@data[3].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,$rk0.16b ++___ ++ &sbox($rk0); ++$code.=<<___; ++ eor @data[2].16b,@data[2].16b,$rk0.16b ++ ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ eor $rka.16b,$rka.16b,@data[2].16b ++ eor $rk1.16b,$rka.16b,$rk1.16b ++___ ++ &sbox($rk1); ++$code.=<<___; ++ eor @data[3].16b,@data[3].16b,$rk1.16b ++___ ++} +sub encrypt_1blk_norev() { + my $dat = shift; + my $rks = shift; +@@ -272,6 +320,23 @@ sub encrypt_1blk() { + &encrypt_1blk_norev($dat,$rks); + &rev32($dat,$dat); +} ++ ++sub encrypt_4blks() { ++$code.=<<___; ++ mov $ptr,$rks1 ++ mov $counter,#8 ++10: ++___ ++ &sm4_4blks($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++___ ++ &rev32(@vtmp[3],@data[0]); ++ &rev32(@vtmp[2],@data[1]); ++ &rev32(@vtmp[1],@data[2]); ++ &rev32(@vtmp[0],@data[3]); ++} +$code=<<___; +#include "arm_arch.h" +.arch armv8-a+crypto +@@ -363,6 +428,18 @@ ___ +}}} + + ++{{{ ++$code.=<<___; ++.type ${prefix}_enc_4blks,%function ++.align 4 ++${prefix}_enc_4blks: ++___ ++ &encrypt_4blks(); ++$code.=<<___; ++ ret ++.size ${prefix}_enc_4blks,.-${prefix}_enc_4blks ++___ ++}}} +{{{ +my ($key,$keys)=("x0","x1"); +$code.=<<___; +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-sm4-enc-for-8-blocks.patch b/Feature-add-sm4-enc-for-8-blocks.patch new file mode 100644 index 0000000000000000000000000000000000000000..863a77649ac9d8950a0d2e3ae44131a825b1b163 --- /dev/null +++ b/Feature-add-sm4-enc-for-8-blocks.patch @@ -0,0 +1,133 @@ +From: Xu Yizhou + +--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 100 +++++++++++++++++++++++++++++++ +1 file changed, 100 insertions(+) + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index 1298179..59c3081 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -290,6 +290,69 @@ $code.=<<___; + eor @data[3].16b,@data[3].16b,$rk1.16b +___ +} ++ ++# sm4 for 8 lanes of data, in neon registers ++# data0/data1/data2/data3 datax0/datax1/datax2/datax3 ++sub sm4_8blks () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ dup $rk0.4s,$wtmp0 ++ eor $rka.16b,@data[2].16b,@data[3].16b ++ eor $rkb.16b,@datax[2].16b,@datax[3].16b ++ eor @vtmp[0].16b,@data[1].16b,$rk0.16b ++ eor @vtmp[1].16b,@datax[1].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,@vtmp[0].16b ++ eor $rk1.16b,$rkb.16b,@vtmp[1].16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[0].16b,@data[0].16b,$rk0.16b ++ eor @datax[0].16b,@datax[0].16b,$rk1.16b ++ ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ dup $rk1.4s,$wtmp1 ++ eor $rka.16b,$rka.16b,@data[0].16b ++ eor $rkb.16b,$rkb.16b,@datax[0].16b ++ eor $rk0.16b,$rka.16b,$rk1.16b ++ eor $rk1.16b,$rkb.16b,$rk1.16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor @data[1].16b,@data[1].16b,$rk0.16b ++ eor @datax[1].16b,@datax[1].16b,$rk1.16b ++ ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ dup $rk0.4s,$wtmp0 ++ eor $rka.16b,@data[0].16b,@data[1].16b ++ eor $rkb.16b,@datax[0].16b,@datax[1].16b ++ eor @vtmp[0].16b,@data[3].16b,$rk0.16b ++ eor @vtmp[1].16b,@datax[3].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,@vtmp[0].16b ++ eor $rk1.16b,$rkb.16b,@vtmp[1].16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[2].16b,@data[2].16b,$rk0.16b ++ eor @datax[2].16b,@datax[2].16b,$rk1.16b ++ ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ dup $rk1.4s,$wtmp1 ++ eor $rka.16b,$rka.16b,@data[2].16b ++ eor $rkb.16b,$rkb.16b,@datax[2].16b ++ eor $rk0.16b,$rka.16b,$rk1.16b ++ eor $rk1.16b,$rkb.16b,$rk1.16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[3].16b,@data[3].16b,$rk0.16b ++ eor @datax[3].16b,@datax[3].16b,$rk1.16b ++___ ++} ++ +sub encrypt_1blk_norev() { + my $dat = shift; + my $rks = shift; +@@ -337,6 +400,29 @@ ___ + &rev32(@vtmp[1],@data[2]); + &rev32(@vtmp[0],@data[3]); +} ++ ++sub encrypt_8blks() { ++ my $rks = shift; ++$code.=<<___; ++ mov $ptr,$rks ++ mov $counter,#8 ++10: ++___ ++ &sm4_8blks($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++___ ++ &rev32(@vtmp[3],@data[0]); ++ &rev32(@vtmp[2],@data[1]); ++ &rev32(@vtmp[1],@data[2]); ++ &rev32(@vtmp[0],@data[3]); ++ &rev32(@data[3],@datax[0]); ++ &rev32(@data[2],@datax[1]); ++ &rev32(@data[1],@datax[2]); ++ &rev32(@data[0],@datax[3]); ++} ++ +$code=<<___; +#include "arm_arch.h" +.arch armv8-a+crypto +@@ -440,6 +526,20 @@ $code.=<<___; +.size ${prefix}_enc_4blks,.-${prefix}_enc_4blks +___ +}}} ++ ++{{{ ++$code.=<<___; ++.type ${prefix}_enc_8blks,%function ++.align 4 ++${prefix}_enc_8blks: ++___ ++ &encrypt_8blks($rks1); ++$code.=<<___; ++ ret ++.size ${prefix}_enc_8blks,.-${prefix}_enc_8blks ++___ ++}}} ++ +{{{ +my ($key,$keys)=("x0","x1"); +$code.=<<___; +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-sm4-set-enc-dec-key-for-armv8.patch b/Feature-add-sm4-set-enc-dec-key-for-armv8.patch new file mode 100644 index 0000000000000000000000000000000000000000..011c186b0e6f5b0eeb8149f7702835c82a1d10f6 --- /dev/null +++ b/Feature-add-sm4-set-enc-dec-key-for-armv8.patch @@ -0,0 +1,170 @@ +From: Xu Yizhou + +--- +Configurations/00-base-templates.conf | 1 + +Configure | 4 +++ +crypto/evp/e_sm4.c | 40 ++++++++++++++++++++++++--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 33 ++++++++++++++++++++++ +crypto/sm4/build.info | 5 +++- +5 files changed, 78 insertions(+), 5 deletions(-) + +diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf +index e01dc63..1d35012 100644 +--- a/Configurations/00-base-templates.conf ++++ b/Configurations/00-base-templates.conf +@@ -321,6 +321,7 @@ my %targets=( + chacha_asm_src => "chacha-armv8.S", + poly1305_asm_src=> "poly1305-armv8.S", + keccak1600_asm_src => "keccak1600-armv8.S", ++ sm4_asm_src => "vpsm4_ex-armv8.S", + }, + parisc11_asm => { + template => 1, +diff --git a/Configure b/Configure +index a41c897..3bfe360 100755 +--- a/Configure ++++ b/Configure +@@ -1420,6 +1420,9 @@ unless ($disabled{asm}) { + if ($target{poly1305_asm_src} ne "") { + push @{$config{lib_defines}}, "POLY1305_ASM"; + } ++ if ($target{sm4_asm_src} ne "") { ++ push @{$config{lib_defines}}, "VPSM4_EX_ASM"; ++ } +} + +my %predefined_C = compiler_predefined($config{CROSS_COMPILE}.$config{CC}); +@@ -3375,6 +3378,7 @@ sub print_table_entry + "mtoutflag", + "multilib", + "build_scheme", ++ "sm4_asm_src", + ); + + if ($type eq "TABLE") { +diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c +index 630dc6b..d2ef74e 100644 +--- a/crypto/evp/e_sm4.c ++++ b/crypto/evp/e_sm4.c +@@ -17,6 +17,15 @@ +# include "crypto/evp.h" +# include "evp_local.h" + ++#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) ++# include "arm_arch.h" ++# if __ARM_MAX_ARCH__>=7 ++# if defined(VPSM4_EX_ASM) ++# define VPSM4_EX_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) ++# endif ++# endif ++#endif ++ +typedef struct { + union { + double align; +@@ -25,6 +34,13 @@ typedef struct { + block128_f block; +} EVP_SM4_KEY; + ++#ifdef VPSM4_EX_CAPABLE ++void vpsm4_ex_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key); ++void vpsm4_ex_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key); ++#define vpsm4_ex_encrypt SM4_encrypt ++#define vpsm4_ex_decrypt SM4_encrypt ++#endif ++ +# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \ +static const EVP_CIPHER sm4_##mode = { \ + nid##_##nmode,blocksize,128/8,ivlen, \ +@@ -52,11 +68,27 @@ static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + + mode = EVP_CIPHER_CTX_mode(ctx); + if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) { +- dat->block = (block128_f)SM4_decrypt; +- SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++#ifdef VPSM4_EX_CAPABLE ++ if (VPSM4_EX_CAPABLE) { ++ vpsm4_ex_set_decrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) vpsm4_ex_decrypt; ++ } else ++#endif ++ { ++ dat->block = (block128_f)SM4_decrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } + } else { +- dat->block = (block128_f)SM4_encrypt; +- SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++#ifdef VPSM4_EX_CAPABLE ++ if (VPSM4_EX_CAPABLE) { ++ vpsm4_ex_set_encrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) vpsm4_ex_encrypt; ++ } else ++#endif ++ { ++ dat->block = (block128_f)SM4_encrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } + } + return 1; +} +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +index 038b6e2..bee9499 100644 +--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -181,6 +181,39 @@ $code.=<<___; +___ +}}} + ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++$code.=<<___; ++.globl ${prefix}_set_encrypt_key ++.type ${prefix}_set_encrypt_key,%function ++.align 5 ++${prefix}_set_encrypt_key: ++ stp x29,x30,[sp,#-16]! ++ mov w2,1 ++ bl ${prefix}_set_key ++ ldp x29,x30,[sp],#16 ++ ret ++.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key ++___ ++}}} ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++$code.=<<___; ++.globl ${prefix}_set_decrypt_key ++.type ${prefix}_set_decrypt_key,%function ++.align 5 ++${prefix}_set_decrypt_key: ++ stp x29,x30,[sp,#-16]! ++ mov w2,0 ++ bl ${prefix}_set_key ++ ldp x29,x30,[sp],#16 ++ ret ++.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key ++___ ++}}} ++ +######################################## +open SELF,$0; +while() { +diff --git a/crypto/sm4/build.info b/crypto/sm4/build.info +index b65a7d1..bb042c5 100644 +--- a/crypto/sm4/build.info ++++ b/crypto/sm4/build.info +@@ -1,4 +1,7 @@ +LIBS=../../libcrypto +SOURCE[../../libcrypto]=\ +- sm4.c ++ sm4.c {- $target{sm4_asm_src} -} + ++ ++GENERATE[vpsm4_ex-armv8.S]=asm/vpsm4_ex-armv8.pl $(PERLASM_SCHEME) ++INCLUDE[vpsm4_ex-armv8.o]=.. +\ No newline at end of file +-- +2.27.0 \ No newline at end of file diff --git a/Feature-add-sm4-set-key-for-armv8.patch b/Feature-add-sm4-set-key-for-armv8.patch new file mode 100644 index 0000000000000000000000000000000000000000..b35faa410d25adbbe6f0ea52efe0efcdc631342b --- /dev/null +++ b/Feature-add-sm4-set-key-for-armv8.patch @@ -0,0 +1,213 @@ +From: Xu Yizhou + +--- +crypto/sm4/asm/vpsm4_ex-armv8.pl | 198 +++++++++++++++++++++++++++++++ +1 file changed, 198 insertions(+) +create mode 100644 crypto/sm4/asm/vpsm4_ex-armv8.pl + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +new file mode 100644 +index 0000000..038b6e2 +--- /dev/null ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -0,0 +1,198 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# This module implements SM4 with ASIMD and AESE on AARCH64 ++# ++# Feb 2022 ++# ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or ++die "can't locate arm-xlate.pl"; ++ ++open OUT,"| \"$^X\" $xlate $flavour \"$output\"" ++ or die "can't call $xlate: $!"; ++*STDOUT=*OUT; ++ ++$prefix="vpsm4_ex"; ++my ($inp,$outp,$rks1,$rks2,$ivp,$enc)=("x0","x1","x3","x4","x5","x6"); ++my ($blocks,$len)=("x2","x2"); ++my $remain=("x7"); ++my ($ptr,$counter)=("x12","w13"); ++my ($wtmp0,$wtmp1,$wtmp2,$wtmp3)=("w8","w9","w10","w11"); ++my ($xtmp0,$xtmp1,$xtmp2,$xtmp3)=("x8","x9","x10","x11"); ++my ($word0,$word1,$word2,$word3)=("w14","w15","w16","w17"); ++ ++my @qtmp=map("q$_",(8..11)); ++my @vtmp=map("v$_",(8..11)); ++my ($rk0,$rk1)=("v12","v13"); ++my ($rka,$rkb)=("v14","v15"); ++my @data=map("v$_",(16..19)); ++my @datax=map("v$_",(20..23)); ++my ($vtmp4,$vtmp5)=("v24","v25"); ++my ($MaskV,$TAHMatV,$TALMatV,$ATAHMatV,$ATALMatV,$ANDMaskV)=("v26","v27","v28","v29","v30","v31"); ++my ($MaskQ,$TAHMatQ,$TALMatQ,$ATAHMatQ,$ATALMatQ,$ANDMaskQ)=("q26","q27","q28","q29","q30","q31"); ++ ++sub rev32() { ++ my $dst = shift; ++ my $src = shift; ++ ++ if ($src and ("$src" ne "$dst")) { ++$code.=<<___; ++#ifndef __ARMEB__ ++ rev32 $dst.16b,$src.16b ++#else ++ mov $dst.16b,$src.16b ++#endif ++___ ++ } else { ++$code.=<<___; ++#ifndef __ARMEB__ ++ rev32 $dst.16b,$dst.16b ++#endif ++___ ++ } ++} ++ ++sub load_sbox_matrix () { ++$code.=<<___; ++ ldr $MaskQ, =0x0306090c0f0205080b0e0104070a0d00 ++ ldr $TAHMatQ, =0x22581a6002783a4062185a2042387a00 ++ ldr $TALMatQ, =0xc10bb67c4a803df715df62a89e54e923 ++ ldr $ATAHMatQ, =0x1407c6d56c7fbeadb9aa6b78c1d21300 ++ ldr $ATALMatQ, =0xe383c1a1fe9edcbc6404462679195b3b ++ ldr $ANDMaskQ, =0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f ++___ ++} ++# matrix multiplication Mat*x = (lowerMat*x) ^ (higherMat*x) ++sub mul_matrix() { ++ my $x = shift; ++ my $higherMat = shift; ++ my $lowerMat = shift; ++ my $tmp = shift; ++$code.=<<___; ++ ushr $tmp.16b, $x.16b, 4 ++ and $x.16b, $x.16b, $ANDMaskV.16b ++ tbl $x.16b, {$lowerMat.16b}, $x.16b ++ tbl $tmp.16b, {$higherMat.16b}, $tmp.16b ++ eor $x.16b, $x.16b, $tmp.16b ++___ ++} ++ ++$code=<<___; ++#include "arm_arch.h" ++.arch armv8-a+crypto ++.text ++ ++.type ${prefix}_consts,%object ++.align 7 ++${prefix}_consts: ++.Lck: ++ .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 ++ .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 ++ .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 ++ .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 ++ .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 ++ .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 ++ .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 ++ .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 ++.Lfk: ++ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc ++.Lshuffles: ++ .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100 ++ ++.size ${prefix}_consts,.-${prefix}_consts ++___ ++ ++{{{ ++my ($userKey,$roundKey,$enc)=("x0","x1","w2"); ++my ($pointer,$schedules,$wtmp,$roundkey)=("x5","x6","w7","w8"); ++my ($vkey,$vfk,$vmap)=("v5","v6","v7"); ++$code.=<<___; ++.type ${prefix}_set_key,%function ++.align 4 ++${prefix}_set_key: ++ ld1 {$vkey.4s},[$userKey] ++___ ++ &load_sbox_matrix(); ++ &rev32($vkey,$vkey); ++$code.=<<___; ++ adr $pointer,.Lshuffles ++ ld1 {$vmap.4s},[$pointer] ++ adr $pointer,.Lfk ++ ld1 {$vfk.4s},[$pointer] ++ eor $vkey.16b,$vkey.16b,$vfk.16b ++ mov $schedules,#32 ++ adr $pointer,.Lck ++ movi @vtmp[0].16b,#64 ++ cbnz $enc,1f ++ add $roundKey,$roundKey,124 ++1: ++ mov $wtmp,$vkey.s[1] ++ ldr $roundkey,[$pointer],#4 ++ eor $roundkey,$roundkey,$wtmp ++ mov $wtmp,$vkey.s[2] ++ eor $roundkey,$roundkey,$wtmp ++ mov $wtmp,$vkey.s[3] ++ eor $roundkey,$roundkey,$wtmp ++ ++ // optimize sbox using AESE instruction ++ mov @data[0].s[0],$roundkey ++ tbl @vtmp[0].16b, {@data[0].16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, @vtmp[2]); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, @vtmp[2]); ++$code.=<<___; ++ mov $wtmp,@vtmp[0].s[0] ++ ++ // linear transformation ++ eor $roundkey,$wtmp,$wtmp,ror #19 ++ eor $roundkey,$roundkey,$wtmp,ror #9 ++ mov $wtmp,$vkey.s[0] ++ eor $roundkey,$roundkey,$wtmp ++ mov $vkey.s[0],$roundkey ++ cbz $enc,2f ++ str $roundkey,[$roundKey],#4 ++ b 3f ++2: ++ str $roundkey,[$roundKey],#-4 ++3: ++ tbl $vkey.16b,{$vkey.16b},$vmap.16b ++ subs $schedules,$schedules,#1 ++ b.ne 1b ++ ret ++.size ${prefix}_set_key,.-${prefix}_set_key ++___ ++}}} ++ ++######################################## ++open SELF,$0; ++while() { ++ next if (/^#!/); ++ last if (!s/^#/\/\// and !/^$/); ++ print; ++} ++close SELF; ++ ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/ge; ++ print $_,"\n"; ++} ++ ++close STDOUT or die "error closing STDOUT: $!"; +-- +2.27.0 \ No newline at end of file diff --git a/Feature-refactor-sm4-evp-implementation.patch b/Feature-refactor-sm4-evp-implementation.patch new file mode 100644 index 0000000000000000000000000000000000000000..a09e665336e13febd555254dea3bc43fa7c13562 --- /dev/null +++ b/Feature-refactor-sm4-evp-implementation.patch @@ -0,0 +1,189 @@ +From: Xu Yizhou + +--- +crypto/evp/e_sm4.c | 141 +++++++++++++++++++++++++++++---------------- +1 file changed, 90 insertions(+), 51 deletions(-) + +diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c +index fce3279..630dc6b 100644 +--- a/crypto/evp/e_sm4.c ++++ b/crypto/evp/e_sm4.c +@@ -15,86 +15,125 @@ +# include +# include "crypto/sm4.h" +# include "crypto/evp.h" ++# include "evp_local.h" + +typedef struct { +- SM4_KEY ks; ++ union { ++ double align; ++ SM4_KEY ks; ++ } ks; ++ block128_f block; +} EVP_SM4_KEY; + ++# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \ ++static const EVP_CIPHER sm4_##mode = { \ ++ nid##_##nmode,blocksize,128/8,ivlen, \ ++ flags|EVP_CIPH_##MODE##_MODE, \ ++ sm4_init_key, \ ++ sm4_##mode##_cipher, \ ++ NULL, \ ++ sizeof(EVP_SM4_KEY), \ ++ NULL,NULL,NULL,NULL }; \ ++const EVP_CIPHER *EVP_sm4_##mode(void) \ ++{ return &sm4_##mode; } ++ ++#define BLOCK_CIPHER_generic_pack(nid,flags) \ ++ BLOCK_CIPHER_generic(nid,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ctr,ctr,CTR,flags) ++ +static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) +{ +- SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ int mode; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY, ctx); ++ ++ mode = EVP_CIPHER_CTX_mode(ctx); ++ if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) { ++ dat->block = (block128_f)SM4_decrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } else { ++ dat->block = (block128_f)SM4_encrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } + return 1; +} + +-static void sm4_cbc_encrypt(const unsigned char *in, unsigned char *out, +- size_t len, const SM4_KEY *key, +- unsigned char *ivec, const int enc) ++static int sm4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) +{ +- if (enc) +- CRYPTO_cbc128_encrypt(in, out, len, key, ivec, +- (block128_f)SM4_encrypt); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (EVP_CIPHER_CTX_encrypting(ctx)) ++ CRYPTO_cbc128_encrypt(in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_iv_noconst(ctx), dat->block); + else +- CRYPTO_cbc128_decrypt(in, out, len, key, ivec, +- (block128_f)SM4_decrypt); ++ CRYPTO_cbc128_decrypt(in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_iv_noconst(ctx), dat->block); ++ return 1; +} + +-static void sm4_cfb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num, const int enc) ++static int sm4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) +{ +- CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc, +- (block128_f)SM4_encrypt); +-} ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_num(ctx); + +-static void sm4_ecb_encrypt(const unsigned char *in, unsigned char *out, +- const SM4_KEY *key, const int enc) +-{ +- if (enc) +- SM4_encrypt(in, out, key); +- else +- SM4_decrypt(in, out, key); ++ CRYPTO_cfb128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, &num, ++ EVP_CIPHER_CTX_encrypting(ctx), dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ ++ return 1; +} + +-static void sm4_ofb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num) ++static int sm4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) +{ +- CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num, +- (block128_f)SM4_encrypt); +-} ++ size_t bl = EVP_CIPHER_CTX_block_size(ctx); ++ size_t i; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); + +-IMPLEMENT_BLOCK_CIPHER(sm4, ks, sm4, EVP_SM4_KEY, NID_sm4, +- 16, 16, 16, 128, EVP_CIPH_FLAG_DEFAULT_ASN1, +- sm4_init_key, 0, 0, 0, 0) ++ if (len < bl){ ++ return 1; ++ } ++ for (i = 0, len -= bl; i <= len; i += bl) ++ (*dat->block) (in + i, out + i, &dat->ks.ks); ++ return 1; ++} + +-static int sm4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++static int sm4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) +{ +- unsigned int num = EVP_CIPHER_CTX_num(ctx); +- EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY, ctx); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_num(ctx); + +- CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, +- EVP_CIPHER_CTX_iv_noconst(ctx), +- EVP_CIPHER_CTX_buf_noconst(ctx), &num, +- (block128_f)SM4_encrypt); ++ CRYPTO_ofb128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, &num, dat->block); + EVP_CIPHER_CTX_set_num(ctx, num); + return 1; +} + +-static const EVP_CIPHER sm4_ctr_mode = { +- NID_sm4_ctr, 1, 16, 16, +- EVP_CIPH_CTR_MODE, +- sm4_init_key, +- sm4_ctr_cipher, +- NULL, +- sizeof(EVP_SM4_KEY), +- NULL, NULL, NULL, NULL +-}; +- +-const EVP_CIPHER *EVP_sm4_ctr(void) ++static int sm4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) +{ +- return &sm4_ctr_mode; ++ int n = EVP_CIPHER_CTX_num(ctx); ++ unsigned int num; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (n < 0) ++ return 0; ++ num = (unsigned int)n; ++ ++ CRYPTO_ctr128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, ++ EVP_CIPHER_CTX_buf_noconst(ctx), &num, ++ dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ return 1; +} + ++BLOCK_CIPHER_generic_pack(NID_sm4, 0) +#endif +-- +2.27.0 diff --git a/openssl.spec b/openssl.spec index a6b45e03ce56dad0f69bc084b663c50ed7689058..1854823030257e48dbbd89c00a63fe53c493b1e7 100644 --- a/openssl.spec +++ b/openssl.spec @@ -2,7 +2,7 @@ Name: openssl Epoch: 1 Version: 1.1.1m -Release: 6 +Release: 7 Summary: Cryptography and SSL/TLS Toolkit License: OpenSSL and SSLeay URL: https://www.openssl.org/ @@ -32,6 +32,14 @@ Patch21: Feature-Support-TLCP-protocol.patch Patch22: Feature-X509-command-supports-SM2-certificate-signing-with-default-sm2id.patch Patch23: CVE-2022-2068-Fix-file-operations-in-c_rehash.patch Patch24: CVE-2022-2097-Fix-AES-OCB-encrypt-decrypt-for-x86-AES-NI.patch +Patch25: Feature-refactor-sm4-evp-implementation +Patch26: Feature-add-sm4-set-key-for-armv8 +Patch27: Feature-add-sm4-set-enc/dec-key-for-armv8 +Patch28: Feature-add-Sbox-optimized-by-AESE +Patch29: Feature-add-sm4-enc-for-1-block +Patch30: Feature-add-sm4-enc-for-4-blocks +Patch31: Feature-add-sm4-enc-for-8-blocks +Patch32: Feature-add-SM4-ECB-mode-for-armv8 BuildRequires: gcc perl make lksctp-tools-devel coreutils util-linux zlib-devel Requires: coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release} @@ -228,6 +236,9 @@ make test || : %ldconfig_scriptlets libs %changelog +* Mon Oct 17 2022 xuyizhou - 1:1.1.1m-7 +- add ARMv8 optimization of SM4-ECB + * Tue Jul 12 2022 wangcheng - 1:1.1.1m-6 - fix CVE-2022-2097