diff --git a/Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch b/Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch new file mode 100644 index 0000000000000000000000000000000000000000..1deb0abab8b714390966fc5c9bf2c95c2e990067 --- /dev/null +++ b/Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch @@ -0,0 +1,2225 @@ +From df56c1da16d705fb6f471651feb77a69171af9e3 Mon Sep 17 00:00:00 2001 +From: Xu Yizhou +Date: Wed, 19 Oct 2022 13:28:58 +0800 +Subject: [PATCH] add ARMv8 implementations of SM4 in ECB and XTS + +--- + Configurations/00-base-templates.conf | 1 + + Configure | 4 + + crypto/evp/c_allc.c | 1 + + crypto/evp/e_sm4.c | 352 ++++++- + crypto/modes/build.info | 2 +- + crypto/modes/xts128gb.c | 204 ++++ + crypto/objects/obj_dat.h | 15 +- + crypto/objects/obj_mac.num | 1 + + crypto/objects/objects.txt | 1 + + crypto/sm4/asm/vpsm4_ex-armv8.pl | 1173 +++++++++++++++++++++ + crypto/sm4/build.info | 5 +- + doc/man3/EVP_sm4_xts.pod | 67 ++ + fuzz/oids.txt | 1 + + include/openssl/evp.h | 4 + + include/openssl/modes.h | 9 + + include/openssl/obj_mac.h | 5 + + test/evp_test.c | 17 +- + test/recipes/30-test_evp_data/evpciph.txt | 22 + + util/libcrypto.num | 2 + + 19 files changed, 1832 insertions(+), 54 deletions(-) + create mode 100644 crypto/modes/xts128gb.c + create mode 100644 crypto/sm4/asm/vpsm4_ex-armv8.pl + create mode 100644 doc/man3/EVP_sm4_xts.pod + +diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf +index e01dc63..1d35012 100644 +--- a/Configurations/00-base-templates.conf ++++ b/Configurations/00-base-templates.conf +@@ -321,6 +321,7 @@ my %targets=( + chacha_asm_src => "chacha-armv8.S", + poly1305_asm_src=> "poly1305-armv8.S", + keccak1600_asm_src => "keccak1600-armv8.S", ++ sm4_asm_src => "vpsm4_ex-armv8.S", + }, + parisc11_asm => { + template => 1, +diff --git a/Configure b/Configure +index a41c897..3bfe360 100755 +--- a/Configure ++++ b/Configure +@@ -1420,6 +1420,9 @@ unless ($disabled{asm}) { + if ($target{poly1305_asm_src} ne "") { + push @{$config{lib_defines}}, "POLY1305_ASM"; + } ++ if ($target{sm4_asm_src} ne "") { ++ push @{$config{lib_defines}}, "VPSM4_EX_ASM"; ++ } + } + + my %predefined_C = compiler_predefined($config{CROSS_COMPILE}.$config{CC}); +@@ -3375,6 +3378,7 @@ sub print_table_entry + "mtoutflag", + "multilib", + "build_scheme", ++ "sm4_asm_src", + ); + + if ($type eq "TABLE") { +diff --git a/crypto/evp/c_allc.c b/crypto/evp/c_allc.c +index 22fdcc4..01b0d1f 100644 +--- a/crypto/evp/c_allc.c ++++ b/crypto/evp/c_allc.c +@@ -85,6 +85,7 @@ void openssl_add_all_ciphers_int(void) + EVP_add_cipher(EVP_sm4_cfb()); + EVP_add_cipher(EVP_sm4_ofb()); + EVP_add_cipher(EVP_sm4_ctr()); ++ EVP_add_cipher(EVP_sm4_xts()); + EVP_add_cipher_alias(SN_sm4_cbc, "SM4"); + EVP_add_cipher_alias(SN_sm4_cbc, "sm4"); + #endif +diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c +index fce3279..169d6c7 100644 +--- a/crypto/evp/e_sm4.c ++++ b/crypto/evp/e_sm4.c +@@ -15,86 +15,346 @@ + # include + # include "crypto/sm4.h" + # include "crypto/evp.h" ++# include "evp_local.h" ++# include "modes_local.h" ++ ++#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__)) ++# include "arm_arch.h" ++# if __ARM_MAX_ARCH__>=7 ++# if defined(VPSM4_EX_ASM) ++# define VPSM4_EX_CAPABLE (OPENSSL_armcap_P & ARMV8_AES) ++# endif ++# endif ++#endif + + typedef struct { +- SM4_KEY ks; ++ union { ++ double align; ++ SM4_KEY ks; ++ } ks; ++ block128_f block; ++ union { ++ ecb128_f ecb; ++ } stream; + } EVP_SM4_KEY; + ++#ifdef VPSM4_EX_CAPABLE ++void vpsm4_ex_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key); ++void vpsm4_ex_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key); ++#define vpsm4_ex_encrypt SM4_encrypt ++#define vpsm4_ex_decrypt SM4_encrypt ++void vpsm4_ex_ecb_encrypt( ++ const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key, const int enc); ++/* xts mode in GB/T 17964-2021 */ ++void vpsm4_ex_xts_encrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1, ++ const SM4_KEY *key2, const uint8_t iv[16]); ++void vpsm4_ex_xts_decrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1, ++ const SM4_KEY *key2, const uint8_t iv[16]); ++/* xts mode in IEEE Std 1619-2007 */ ++void vpsm4_ex_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1, ++ const SM4_KEY *key2, const uint8_t iv[16]); ++void vpsm4_ex_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1, ++ const SM4_KEY *key2, const uint8_t iv[16]); ++#endif ++ ++# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \ ++static const EVP_CIPHER sm4_##mode = { \ ++ nid##_##nmode,blocksize,128/8,ivlen, \ ++ flags|EVP_CIPH_##MODE##_MODE, \ ++ sm4_init_key, \ ++ sm4_##mode##_cipher, \ ++ NULL, \ ++ sizeof(EVP_SM4_KEY), \ ++ NULL,NULL,NULL,NULL }; \ ++const EVP_CIPHER *EVP_sm4_##mode(void) \ ++{ return &sm4_##mode; } ++ ++#define BLOCK_CIPHER_generic_pack(nid,flags) \ ++ BLOCK_CIPHER_generic(nid,16,16,cbc,cbc,CBC,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,16,0,ecb,ecb,ECB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ofb128,ofb,OFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,cfb128,cfb,CFB,flags|EVP_CIPH_FLAG_DEFAULT_ASN1) \ ++ BLOCK_CIPHER_generic(nid,1,16,ctr,ctr,CTR,flags) ++ + static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, + const unsigned char *iv, int enc) + { +- SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ int mode; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY, ctx); ++ ++ mode = EVP_CIPHER_CTX_mode(ctx); ++ if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) { ++#ifdef VPSM4_EX_CAPABLE ++ if (VPSM4_EX_CAPABLE) { ++ vpsm4_ex_set_decrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) vpsm4_ex_decrypt; ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) vpsm4_ex_ecb_encrypt; ++ } else ++#endif ++ { ++ dat->block = (block128_f)SM4_decrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } ++ } else { ++#ifdef VPSM4_EX_CAPABLE ++ if (VPSM4_EX_CAPABLE) { ++ vpsm4_ex_set_encrypt_key(key, &dat->ks.ks); ++ dat->block = (block128_f) vpsm4_ex_encrypt; ++ if (mode == EVP_CIPH_ECB_MODE) ++ dat->stream.ecb = (ecb128_f) vpsm4_ex_ecb_encrypt; ++ } else ++#endif ++ { ++ dat->block = (block128_f)SM4_encrypt; ++ SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx)); ++ } ++ } + return 1; + } + +-static void sm4_cbc_encrypt(const unsigned char *in, unsigned char *out, +- size_t len, const SM4_KEY *key, +- unsigned char *ivec, const int enc) ++static int sm4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- if (enc) +- CRYPTO_cbc128_encrypt(in, out, len, key, ivec, +- (block128_f)SM4_encrypt); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (EVP_CIPHER_CTX_encrypting(ctx)) ++ CRYPTO_cbc128_encrypt(in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_iv_noconst(ctx), dat->block); + else +- CRYPTO_cbc128_decrypt(in, out, len, key, ivec, +- (block128_f)SM4_decrypt); ++ CRYPTO_cbc128_decrypt(in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_iv_noconst(ctx), dat->block); ++ return 1; + } + +-static void sm4_cfb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num, const int enc) ++static int sm4_cfb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- CRYPTO_cfb128_encrypt(in, out, length, key, ivec, num, enc, +- (block128_f)SM4_encrypt); ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_num(ctx); ++ ++ CRYPTO_cfb128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, &num, ++ EVP_CIPHER_CTX_encrypting(ctx), dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ ++ return 1; + } + +-static void sm4_ecb_encrypt(const unsigned char *in, unsigned char *out, +- const SM4_KEY *key, const int enc) ++static int sm4_ecb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- if (enc) +- SM4_encrypt(in, out, key); ++ size_t bl = EVP_CIPHER_CTX_block_size(ctx); ++ size_t i; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (len < bl){ ++ return 1; ++ } ++ if (dat->stream.ecb != NULL) ++ (*dat->stream.ecb) (in, out, len, &dat->ks.ks, ++ EVP_CIPHER_CTX_encrypting(ctx)); + else +- SM4_decrypt(in, out, key); ++ for (i = 0, len -= bl; i <= len; i += bl) ++ (*dat->block) (in + i, out + i, &dat->ks.ks); ++ return 1; + } + +-static void sm4_ofb128_encrypt(const unsigned char *in, unsigned char *out, +- size_t length, const SM4_KEY *key, +- unsigned char *ivec, int *num) ++static int sm4_ofb_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) + { +- CRYPTO_ofb128_encrypt(in, out, length, key, ivec, num, +- (block128_f)SM4_encrypt); +-} ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ int num = EVP_CIPHER_CTX_num(ctx); + +-IMPLEMENT_BLOCK_CIPHER(sm4, ks, sm4, EVP_SM4_KEY, NID_sm4, +- 16, 16, 16, 128, EVP_CIPH_FLAG_DEFAULT_ASN1, +- sm4_init_key, 0, 0, 0, 0) ++ CRYPTO_ofb128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, &num, dat->block); ++ EVP_CIPHER_CTX_set_num(ctx, num); ++ return 1; ++} + + static int sm4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, + const unsigned char *in, size_t len) + { +- unsigned int num = EVP_CIPHER_CTX_num(ctx); +- EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY, ctx); ++ int n = EVP_CIPHER_CTX_num(ctx); ++ unsigned int num; ++ EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx); ++ ++ if (n < 0) ++ return 0; ++ num = (unsigned int)n; + +- CRYPTO_ctr128_encrypt(in, out, len, &dat->ks, +- EVP_CIPHER_CTX_iv_noconst(ctx), +- EVP_CIPHER_CTX_buf_noconst(ctx), &num, +- (block128_f)SM4_encrypt); ++ CRYPTO_ctr128_encrypt(in, out, len, &dat->ks.ks, ++ ctx->iv, ++ EVP_CIPHER_CTX_buf_noconst(ctx), &num, ++ dat->block); + EVP_CIPHER_CTX_set_num(ctx, num); + return 1; + } + +-static const EVP_CIPHER sm4_ctr_mode = { +- NID_sm4_ctr, 1, 16, 16, +- EVP_CIPH_CTR_MODE, +- sm4_init_key, +- sm4_ctr_cipher, +- NULL, +- sizeof(EVP_SM4_KEY), +- NULL, NULL, NULL, NULL +-}; ++BLOCK_CIPHER_generic_pack(NID_sm4, 0) + +-const EVP_CIPHER *EVP_sm4_ctr(void) ++typedef struct { ++ union { ++ double align; ++ SM4_KEY ks; ++ } ks1, ks2; /* sm4 key schedules to use */ ++ XTS128_CONTEXT xts; ++ int std; /* 0 for xts mode in GB/T 17964-2021 */ ++ /* 1 for xts mode in IEEE Std 1619-2007 */ ++ void (*stream_gb) (const unsigned char *in, ++ unsigned char *out, size_t length, ++ const SM4_KEY *key1, const SM4_KEY *key2, ++ const unsigned char iv[16]); /* stream for xts mode in GB/T 17964-2021 */ ++ void (*stream) (const unsigned char *in, ++ unsigned char *out, size_t length, ++ const SM4_KEY *key1, const SM4_KEY *key2, ++ const unsigned char iv[16]); /* stream for xts mode in IEEE Std 1619-2007 */ ++} EVP_SM4_XTS_CTX; ++ ++static int sm4_xts_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) ++{ ++ EVP_SM4_XTS_CTX *xctx = EVP_C_DATA(EVP_SM4_XTS_CTX, c); ++ ++ if (type == EVP_CTRL_COPY) { ++ EVP_CIPHER_CTX *out = ptr; ++ EVP_SM4_XTS_CTX *xctx_out = EVP_C_DATA(EVP_SM4_XTS_CTX,out); ++ ++ if (xctx->xts.key1) { ++ if (xctx->xts.key1 != &xctx->ks1) ++ return 0; ++ xctx_out->xts.key1 = &xctx_out->ks1; ++ } ++ if (xctx->xts.key2) { ++ if (xctx->xts.key2 != &xctx->ks2) ++ return 0; ++ xctx_out->xts.key2 = &xctx_out->ks2; ++ } ++ return 1; ++ } else if (type == EVP_CTRL_XTS_STANDARD) { ++ if ((arg < 0) || (arg > 1)) ++ return 0; ++ xctx->std = arg; ++ return 1; ++ } else if (type != EVP_CTRL_INIT) ++ return -1; ++ /* key1 and key2 are used as an indicator both key and IV are set */ ++ xctx->xts.key1 = NULL; ++ xctx->xts.key2 = NULL; ++ return 1; ++} ++ ++static int sm4_xts_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, ++ const unsigned char *iv, int enc) ++{ ++ EVP_SM4_XTS_CTX *xctx = EVP_C_DATA(EVP_SM4_XTS_CTX,ctx); ++ ++ if (!iv && !key) ++ return 1; ++ ++ if (key) ++ do { ++ /* The key is two half length keys in reality */ ++ const int bytes = EVP_CIPHER_CTX_key_length(ctx) / 2; ++ xctx->stream_gb = NULL; ++ xctx->stream = NULL; ++#ifdef VPSM4_EX_CAPABLE ++ if (VPSM4_EX_CAPABLE) { ++ if (enc) { ++ vpsm4_ex_set_encrypt_key(key, &xctx->ks1.ks); ++ xctx->xts.block1 = (block128_f) vpsm4_ex_encrypt; ++ xctx->stream_gb = vpsm4_ex_xts_encrypt_gb; ++ xctx->stream = vpsm4_ex_xts_encrypt; ++ } else { ++ vpsm4_ex_set_decrypt_key(key, &xctx->ks1.ks); ++ xctx->xts.block1 = (block128_f) vpsm4_ex_decrypt; ++ xctx->stream_gb = vpsm4_ex_xts_decrypt_gb; ++ xctx->stream = vpsm4_ex_xts_decrypt; ++ } ++ vpsm4_ex_set_encrypt_key(key + bytes, &xctx->ks2.ks); ++ xctx->xts.block2 = (block128_f) vpsm4_ex_encrypt; ++ ++ xctx->xts.key1 = &xctx->ks1; ++ break; ++ } else ++#endif ++ (void)0; /* terminate potentially open 'else' */ ++ ++ if (enc) { ++ SM4_set_key(key, &xctx->ks1.ks); ++ xctx->xts.block1 = (block128_f) SM4_encrypt; ++ } else { ++ SM4_set_key(key, &xctx->ks1.ks); ++ xctx->xts.block1 = (block128_f) SM4_decrypt; ++ } ++ ++ SM4_set_key(key + bytes, &xctx->ks2.ks); ++ xctx->xts.block2 = (block128_f) SM4_encrypt; ++ ++ xctx->xts.key1 = &xctx->ks1; ++ } while (0); ++ ++ if (iv) { ++ xctx->xts.key2 = &xctx->ks2; ++ memcpy(EVP_CIPHER_CTX_iv_noconst(ctx), iv, 16); ++ } ++ ++ return 1; ++} ++ ++static int sm4_xts_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t len) ++{ ++ EVP_SM4_XTS_CTX *xctx = EVP_C_DATA(EVP_SM4_XTS_CTX,ctx); ++ if (!xctx->xts.key1 || !xctx->xts.key2) ++ return 0; ++ if (!out || !in || len < SM4_BLOCK_SIZE) ++ return 0; ++ if (xctx->std) { ++ if (xctx->stream) ++ (*xctx->stream) (in, out, len, ++ xctx->xts.key1, xctx->xts.key2, ++ EVP_CIPHER_CTX_iv_noconst(ctx)); ++ else if (CRYPTO_xts128_encrypt(&xctx->xts, EVP_CIPHER_CTX_iv_noconst(ctx), ++ in, out, len, ++ EVP_CIPHER_CTX_encrypting(ctx))) ++ return 0; ++ } else { ++ if (xctx->stream_gb) ++ (*xctx->stream_gb) (in, out, len, ++ xctx->xts.key1, xctx->xts.key2, ++ EVP_CIPHER_CTX_iv_noconst(ctx)); ++ else if (CRYPTO_xts128gb_encrypt(&xctx->xts, EVP_CIPHER_CTX_iv_noconst(ctx), ++ in, out, len, ++ EVP_CIPHER_CTX_encrypting(ctx))) ++ return 0; ++ } ++ return 1; ++} ++ ++#define SM4_XTS_BLOCK_SIZE 1 ++#define SM4_XTS_IV_LENGTH 16 ++#define SM4_XTS_KEY_LENGTH 32 ++ ++#define XTS_FLAGS (EVP_CIPH_FLAG_DEFAULT_ASN1 | EVP_CIPH_CUSTOM_IV \ ++ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT \ ++ | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_XTS_MODE) ++ ++static const EVP_CIPHER sm4_xts_mode = { ++ NID_sm4_xts, ++ SM4_XTS_BLOCK_SIZE, ++ SM4_XTS_KEY_LENGTH, ++ SM4_XTS_IV_LENGTH, ++ XTS_FLAGS, ++ sm4_xts_init_key, ++ sm4_xts_cipher, ++ NULL, ++ sizeof(EVP_SM4_XTS_CTX), ++ NULL, NULL, sm4_xts_ctrl, NULL ++}; ++ ++const EVP_CIPHER *EVP_sm4_xts(void) + { +- return &sm4_ctr_mode; ++ return &sm4_xts_mode; + } + + #endif +diff --git a/crypto/modes/build.info b/crypto/modes/build.info +index 821340e..f974b04 100644 +--- a/crypto/modes/build.info ++++ b/crypto/modes/build.info +@@ -1,7 +1,7 @@ + LIBS=../../libcrypto + SOURCE[../../libcrypto]=\ + cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \ +- ccm128.c xts128.c wrap128.c ocb128.c \ ++ ccm128.c xts128.c xts128gb.c wrap128.c ocb128.c \ + {- $target{modes_asm_src} -} + + INCLUDE[gcm128.o]=.. +diff --git a/crypto/modes/xts128gb.c b/crypto/modes/xts128gb.c +new file mode 100644 +index 0000000..8f57cc5 +--- /dev/null ++++ b/crypto/modes/xts128gb.c +@@ -0,0 +1,204 @@ ++/* ++ * Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. ++ * ++ * Licensed under the OpenSSL license (the "License"). You may not use ++ * this file except in compliance with the License. You can obtain a copy ++ * in the file LICENSE in the source distribution or at ++ * https://www.openssl.org/source/license.html ++ */ ++ ++// This is the xts mode in GB/T 17964-2021 ++#include ++#include "modes_local.h" ++#include ++ ++#ifndef STRICT_ALIGNMENT ++# ifdef __GNUC__ ++typedef u64 u64_a1 __attribute((__aligned__(1))); ++# else ++typedef u64 u64_a1; ++# endif ++#endif ++ ++int CRYPTO_xts128gb_encrypt(const XTS128_CONTEXT *ctx, ++ const unsigned char iv[16], ++ const unsigned char *inp, unsigned char *out, ++ size_t len, int enc) ++{ ++ const union { ++ long one; ++ char little; ++ } is_endian = { ++ 1 ++ }; ++ union { ++ u64 u[2]; ++ u32 d[4]; ++ u8 c[16]; ++ } tweak, scratch; ++ unsigned int i; ++ ++ if (len < 16) ++ return -1; ++ ++ memcpy(tweak.c, iv, 16); ++ ++ (*ctx->block2) (tweak.c, tweak.c, ctx->key2); ++ ++ if (!enc && (len % 16)) ++ len -= 16; ++ ++ while (len >= 16) { ++#if defined(STRICT_ALIGNMENT) ++ memcpy(scratch.c, inp, 16); ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++#else ++ scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak.u[0]; ++ scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak.u[1]; ++#endif ++ (*ctx->block1) (scratch.c, scratch.c, ctx->key1); ++#if defined(STRICT_ALIGNMENT) ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++ memcpy(out, scratch.c, 16); ++#else ++ ((u64_a1 *)out)[0] = scratch.u[0] ^= tweak.u[0]; ++ ((u64_a1 *)out)[1] = scratch.u[1] ^= tweak.u[1]; ++#endif ++ inp += 16; ++ out += 16; ++ len -= 16; ++ ++ if (len == 0) ++ return 0; ++ ++ if (is_endian.little) { ++ u8 res; ++ u64 hi, lo; ++#ifdef BSWAP8 ++ hi = BSWAP8(tweak.u[0]); ++ lo = BSWAP8(tweak.u[1]); ++#else ++ u8 *p = tweak.c; ++ ++ hi = (u64)GETU32(p) << 32 | GETU32(p + 4); ++ lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); ++#endif ++ res = (u8)lo & 1; ++ tweak.u[0] = (lo >> 1) | (hi << 63); ++ tweak.u[1] = hi >> 1; ++ if (res) ++ tweak.c[15] ^= 0xe1; ++#ifdef BSWAP8 ++ hi = BSWAP8(tweak.u[0]); ++ lo = BSWAP8(tweak.u[1]); ++#else ++ p = tweak.c; ++ ++ hi = (u64)GETU32(p) << 32 | GETU32(p + 4); ++ lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); ++#endif ++ tweak.u[0] = lo; ++ tweak.u[1] = hi; ++ } else { ++ u8 Cin, Cout; ++ Cin = 0; ++ for (i = 0; i < 16; ++i) { ++ Cout = (tweak.c[i] << 7) & 0x80; ++ tweak.c[i] = ((tweak.c[i] >> 1) + Cin) & 0xff; ++ Cin = Cout; ++ } ++ if (Cout) ++ tweak.c[0] ^= 0xe1; ++ } ++ } ++ if (enc) { ++ for (i = 0; i < len; ++i) { ++ u8 c = inp[i]; ++ out[i] = scratch.c[i]; ++ scratch.c[i] = c; ++ } ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++ (*ctx->block1) (scratch.c, scratch.c, ctx->key1); ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++ memcpy(out - 16, scratch.c, 16); ++ } else { ++ union { ++ u64 u[2]; ++ u8 c[16]; ++ } tweak1; ++ ++ if (is_endian.little) { ++ u8 res; ++ u64 hi, lo; ++#ifdef BSWAP8 ++ hi = BSWAP8(tweak.u[0]); ++ lo = BSWAP8(tweak.u[1]); ++#else ++ u8 *p = tweak.c; ++ ++ hi = (u64)GETU32(p) << 32 | GETU32(p + 4); ++ lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); ++#endif ++ res = (u8)lo & 1; ++ tweak1.u[0] = (lo >> 1) | (hi << 63); ++ tweak1.u[1] = hi >> 1; ++ if (res) ++ tweak1.c[15] ^= 0xe1; ++#ifdef BSWAP8 ++ hi = BSWAP8(tweak1.u[0]); ++ lo = BSWAP8(tweak1.u[1]); ++#else ++ p = tweak1.c; ++ ++ hi = (u64)GETU32(p) << 32 | GETU32(p + 4); ++ lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12); ++#endif ++ tweak1.u[0] = lo; ++ tweak1.u[1] = hi; ++ } else { ++ u8 Cin, Cout; ++ Cin = 0; ++ for ( i = 0; i < 16; ++i ) { ++ Cout = (tweak.c[i] << 7) & 0x80; ++ tweak1.c[i] = ((tweak.c[i] >> 1) + Cin) & 0xff; ++ Cin = Cout; ++ } ++ if (Cout) ++ tweak1.c[0] ^= 0xe1; ++ } ++#if defined(STRICT_ALIGNMENT) ++ memcpy(scratch.c, inp, 16); ++ scratch.u[0] ^= tweak1.u[0]; ++ scratch.u[1] ^= tweak1.u[1]; ++#else ++ scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak1.u[0]; ++ scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak1.u[1]; ++#endif ++ (*ctx->block1) (scratch.c, scratch.c, ctx->key1); ++ scratch.u[0] ^= tweak1.u[0]; ++ scratch.u[1] ^= tweak1.u[1]; ++ ++ for (i = 0; i < len; ++i) { ++ u8 c = inp[16 + i]; ++ out[16 + i] = scratch.c[i]; ++ scratch.c[i] = c; ++ } ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++ (*ctx->block1) (scratch.c, scratch.c, ctx->key1); ++#if defined(STRICT_ALIGNMENT) ++ scratch.u[0] ^= tweak.u[0]; ++ scratch.u[1] ^= tweak.u[1]; ++ memcpy(out, scratch.c, 16); ++#else ++ ((u64_a1 *)out)[0] = scratch.u[0] ^ tweak.u[0]; ++ ((u64_a1 *)out)[1] = scratch.u[1] ^ tweak.u[1]; ++#endif ++ } ++ ++ return 0; ++} +diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h +index eb4cce4..6d60f87 100644 +--- a/crypto/objects/obj_dat.h ++++ b/crypto/objects/obj_dat.h +@@ -10,7 +10,7 @@ + */ + + /* Serialized OID's */ +-static const unsigned char so[7770] = { ++static const unsigned char so[7778] = { + 0x2A,0x86,0x48,0x86,0xF7,0x0D, /* [ 0] OBJ_rsadsi */ + 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x01, /* [ 6] OBJ_pkcs */ + 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x02, /* [ 13] OBJ_md2 */ +@@ -1077,9 +1077,10 @@ static const unsigned char so[7770] = { + 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x0C, /* [ 7745] OBJ_hmacWithSHA512_224 */ + 0x2A,0x86,0x48,0x86,0xF7,0x0D,0x02,0x0D, /* [ 7753] OBJ_hmacWithSHA512_256 */ + 0x2A,0x81,0x1C,0xCF,0x55,0x01,0x83,0x75, /* [ 7761] OBJ_SM2_with_SM3 */ ++ 0x2A,0x81,0x1C,0xCF,0x55,0x01,0x68,0x0A, /* [ 7769] OBJ_sm4_xts */ + }; + +-#define NUM_NID 1196 ++#define NUM_NID 1197 + static const ASN1_OBJECT nid_objs[NUM_NID] = { + {"UNDEF", "undefined", NID_undef}, + {"rsadsi", "RSA Data Security, Inc.", NID_rsadsi, 6, &so[0]}, +@@ -2277,9 +2278,10 @@ static const ASN1_OBJECT nid_objs[NUM_NID] = { + {"hmacWithSHA512-224", "hmacWithSHA512-224", NID_hmacWithSHA512_224, 8, &so[7745]}, + {"hmacWithSHA512-256", "hmacWithSHA512-256", NID_hmacWithSHA512_256, 8, &so[7753]}, + {"SM2-SM3", "SM2-with-SM3", NID_SM2_with_SM3, 8, &so[7761]}, ++ {"SM4-XTS", "sm4-xts", NID_sm4_xts, 8, &so[7769]}, + }; + +-#define NUM_SN 1187 ++#define NUM_SN 1188 + static const unsigned int sn_objs[NUM_SN] = { + 364, /* "AD_DVCS" */ + 419, /* "AES-128-CBC" */ +@@ -2554,6 +2556,7 @@ static const unsigned int sn_objs[NUM_SN] = { + 1139, /* "SM4-CTR" */ + 1133, /* "SM4-ECB" */ + 1135, /* "SM4-OFB" */ ++ 1196, /* "SM4-XTS" */ + 188, /* "SMIME" */ + 167, /* "SMIME-CAPS" */ + 100, /* "SN" */ +@@ -3470,7 +3473,7 @@ static const unsigned int sn_objs[NUM_SN] = { + 1093, /* "x509ExtAdmission" */ + }; + +-#define NUM_LN 1187 ++#define NUM_LN 1188 + static const unsigned int ln_objs[NUM_LN] = { + 363, /* "AD Time Stamping" */ + 405, /* "ANSI X9.62" */ +@@ -4613,6 +4616,7 @@ static const unsigned int ln_objs[NUM_LN] = { + 1139, /* "sm4-ctr" */ + 1133, /* "sm4-ecb" */ + 1135, /* "sm4-ofb" */ ++ 1196, /* "sm4-xts" */ + 16, /* "stateOrProvinceName" */ + 660, /* "streetAddress" */ + 498, /* "subtreeMaximumQuality" */ +@@ -4661,7 +4665,7 @@ static const unsigned int ln_objs[NUM_LN] = { + 125, /* "zlib compression" */ + }; + +-#define NUM_OBJ 1072 ++#define NUM_OBJ 1073 + static const unsigned int obj_objs[NUM_OBJ] = { + 0, /* OBJ_undef 0 */ + 181, /* OBJ_iso 1 */ +@@ -5128,6 +5132,7 @@ static const unsigned int obj_objs[NUM_OBJ] = { + 1136, /* OBJ_sm4_cfb1 1 2 156 10197 1 104 5 */ + 1138, /* OBJ_sm4_cfb8 1 2 156 10197 1 104 6 */ + 1139, /* OBJ_sm4_ctr 1 2 156 10197 1 104 7 */ ++ 1196, /* OBJ_sm4_xts 1 2 156 10197 1 104 10 */ + 1172, /* OBJ_sm2 1 2 156 10197 1 301 */ + 1143, /* OBJ_sm3 1 2 156 10197 1 401 */ + 1195, /* OBJ_SM2_with_SM3 1 2 156 10197 1 501 */ +diff --git a/crypto/objects/obj_mac.num b/crypto/objects/obj_mac.num +index 8b797b0..77ad385 100644 +--- a/crypto/objects/obj_mac.num ++++ b/crypto/objects/obj_mac.num +@@ -1193,3 +1193,4 @@ magma_mac 1192 + hmacWithSHA512_224 1193 + hmacWithSHA512_256 1194 + SM2_with_SM3 1195 ++sm4_xts 1196 +diff --git a/crypto/objects/objects.txt b/crypto/objects/objects.txt +index be9da47..5713fae 100644 +--- a/crypto/objects/objects.txt ++++ b/crypto/objects/objects.txt +@@ -1520,6 +1520,7 @@ sm-scheme 104 4 : SM4-CFB : sm4-cfb + sm-scheme 104 5 : SM4-CFB1 : sm4-cfb1 + sm-scheme 104 6 : SM4-CFB8 : sm4-cfb8 + sm-scheme 104 7 : SM4-CTR : sm4-ctr ++sm-scheme 104 10 : SM4-XTS : sm4-xts + + # There is no OID that just denotes "HMAC" oddly enough... + +diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl +new file mode 100644 +index 0000000..86a6f89 +--- /dev/null ++++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl +@@ -0,0 +1,1173 @@ ++#! /usr/bin/env perl ++# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++# ++# Licensed under the Apache License 2.0 (the "License"). You may not use ++# this file except in compliance with the License. You can obtain a copy ++# in the file LICENSE in the source distribution or at ++# https://www.openssl.org/source/license.html ++ ++# ++# This module implements SM4 with ASIMD and AESE on AARCH64 ++# ++# Feb 2022 ++# ++ ++# $output is the last argument if it looks like a file (it has an extension) ++# $flavour is the first argument if it doesn't look like a file ++$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; ++$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or ++die "can't locate arm-xlate.pl"; ++ ++open OUT,"| \"$^X\" $xlate $flavour \"$output\"" ++ or die "can't call $xlate: $!"; ++*STDOUT=*OUT; ++ ++$prefix="vpsm4_ex"; ++my ($inp,$outp,$rks1,$rks2,$ivp,$enc)=("x0","x1","x3","x4","x5","x6"); ++my ($blocks,$len)=("x2","x2"); ++my $remain=("x7"); ++my ($ptr,$counter)=("x12","w13"); ++my ($wtmp0,$wtmp1,$wtmp2,$wtmp3)=("w8","w9","w10","w11"); ++my ($xtmp0,$xtmp1,$xtmp2,$xtmp3)=("x8","x9","x10","x11"); ++my ($word0,$word1,$word2,$word3)=("w14","w15","w16","w17"); ++my @twx=map("x$_",(14..29)); ++my $lastBlk=("x26"); ++ ++my @tweak=map("v$_",(0..7)); ++my @qtmp=map("q$_",(8..11)); ++my @vtmp=map("v$_",(8..11)); ++my ($rk0,$rk1)=("v12","v13"); ++my ($rka,$rkb)=("v14","v15"); ++my @data=map("v$_",(16..19)); ++my @datax=map("v$_",(20..23)); ++my ($vtmp4,$vtmp5)=("v24","v25"); ++my $lastTweak=("v25"); ++my ($MaskV,$TAHMatV,$TALMatV,$ATAHMatV,$ATALMatV,$ANDMaskV)=("v26","v27","v28","v29","v30","v31"); ++my ($MaskQ,$TAHMatQ,$TALMatQ,$ATAHMatQ,$ATALMatQ,$ANDMaskQ)=("q26","q27","q28","q29","q30","q31"); ++ ++sub rev32() { ++ my $dst = shift; ++ my $src = shift; ++ ++ if ($src and ("$src" ne "$dst")) { ++$code.=<<___; ++#ifndef __ARMEB__ ++ rev32 $dst.16b,$src.16b ++#else ++ mov $dst.16b,$src.16b ++#endif ++___ ++ } else { ++$code.=<<___; ++#ifndef __ARMEB__ ++ rev32 $dst.16b,$dst.16b ++#endif ++___ ++ } ++} ++ ++sub rev32_armeb() { ++ my $dst = shift; ++ my $src = shift; ++ ++ if ($src and ("$src" ne "$dst")) { ++$code.=<<___; ++#ifdef __ARMEB__ ++ rev32 $dst.16b,$src.16b ++#else ++ mov $dst.16b,$src.16b ++#endif ++___ ++ } else { ++$code.=<<___; ++#ifdef __ARMEB__ ++ rev32 $dst.16b,$dst.16b ++#endif ++___ ++ } ++} ++ ++sub transpose() { ++ my ($dat0,$dat1,$dat2,$dat3,$vt0,$vt1,$vt2,$vt3) = @_; ++ ++$code.=<<___; ++ zip1 $vt0.4s,$dat0.4s,$dat1.4s ++ zip2 $vt1.4s,$dat0.4s,$dat1.4s ++ zip1 $vt2.4s,$dat2.4s,$dat3.4s ++ zip2 $vt3.4s,$dat2.4s,$dat3.4s ++ zip1 $dat0.2d,$vt0.2d,$vt2.2d ++ zip2 $dat1.2d,$vt0.2d,$vt2.2d ++ zip1 $dat2.2d,$vt1.2d,$vt3.2d ++ zip2 $dat3.2d,$vt1.2d,$vt3.2d ++___ ++} ++ ++sub load_sbox_matrix () { ++$code.=<<___; ++ ldr $MaskQ, =0x0306090c0f0205080b0e0104070a0d00 ++ ldr $TAHMatQ, =0x22581a6002783a4062185a2042387a00 ++ ldr $TALMatQ, =0xc10bb67c4a803df715df62a89e54e923 ++ ldr $ATAHMatQ, =0x1407c6d56c7fbeadb9aa6b78c1d21300 ++ ldr $ATALMatQ, =0xe383c1a1fe9edcbc6404462679195b3b ++ ldr $ANDMaskQ, =0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f ++___ ++} ++# matrix multiplication Mat*x = (lowerMat*x) ^ (higherMat*x) ++sub mul_matrix() { ++ my $x = shift; ++ my $higherMat = shift; ++ my $lowerMat = shift; ++ my $tmp = shift; ++$code.=<<___; ++ ushr $tmp.16b, $x.16b, 4 ++ and $x.16b, $x.16b, $ANDMaskV.16b ++ tbl $x.16b, {$lowerMat.16b}, $x.16b ++ tbl $tmp.16b, {$higherMat.16b}, $tmp.16b ++ eor $x.16b, $x.16b, $tmp.16b ++___ ++} ++ ++# sbox operation for one single word ++sub sbox_1word () { ++ my $word = shift; ++ ++$code.=<<___; ++ mov @vtmp[3].s[0],$word ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {@vtmp[3].16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, @vtmp[2]); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, @vtmp[2]); ++$code.=<<___; ++ ++ mov $wtmp0,@vtmp[0].s[0] ++ eor $word,$wtmp0,$wtmp0,ror #32-2 ++ eor $word,$word,$wtmp0,ror #32-10 ++ eor $word,$word,$wtmp0,ror #32-18 ++ eor $word,$word,$wtmp0,ror #32-24 ++___ ++} ++ ++# sbox operation for 4-lane of words ++sub sbox() { ++ my $dat = shift; ++ ++$code.=<<___; ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {$dat.16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, $vtmp4); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, $vtmp4); ++$code.=<<___; ++ mov $dat.16b,@vtmp[0].16b ++ ++ // linear transformation ++ ushr @vtmp[0].4s,$dat.4s,32-2 ++ ushr @vtmp[1].4s,$dat.4s,32-10 ++ ushr @vtmp[2].4s,$dat.4s,32-18 ++ ushr @vtmp[3].4s,$dat.4s,32-24 ++ sli @vtmp[0].4s,$dat.4s,2 ++ sli @vtmp[1].4s,$dat.4s,10 ++ sli @vtmp[2].4s,$dat.4s,18 ++ sli @vtmp[3].4s,$dat.4s,24 ++ eor $vtmp4.16b,@vtmp[0].16b,$dat.16b ++ eor $vtmp4.16b,$vtmp4.16b,$vtmp[1].16b ++ eor $dat.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $dat.16b,$dat.16b,$vtmp4.16b ++___ ++} ++ ++# sbox operation for 8-lane of words ++sub sbox_double() { ++ my $dat = shift; ++ my $datx = shift; ++ ++$code.=<<___; ++ // optimize sbox using AESE instruction ++ tbl @vtmp[0].16b, {$dat.16b}, $MaskV.16b ++ tbl @vtmp[1].16b, {$datx.16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, $vtmp4); ++ &mul_matrix(@vtmp[1], $TAHMatV, $TALMatV, $vtmp4); ++$code.=<<___; ++ eor $vtmp5.16b, $vtmp5.16b, $vtmp5.16b ++ aese @vtmp[0].16b,$vtmp5.16b ++ aese @vtmp[1].16b,$vtmp5.16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV,$vtmp4); ++ &mul_matrix(@vtmp[1], $ATAHMatV, $ATALMatV,$vtmp4); ++$code.=<<___; ++ mov $dat.16b,@vtmp[0].16b ++ mov $datx.16b,@vtmp[1].16b ++ ++ // linear transformation ++ ushr @vtmp[0].4s,$dat.4s,32-2 ++ ushr $vtmp5.4s,$datx.4s,32-2 ++ ushr @vtmp[1].4s,$dat.4s,32-10 ++ ushr @vtmp[2].4s,$dat.4s,32-18 ++ ushr @vtmp[3].4s,$dat.4s,32-24 ++ sli @vtmp[0].4s,$dat.4s,2 ++ sli $vtmp5.4s,$datx.4s,2 ++ sli @vtmp[1].4s,$dat.4s,10 ++ sli @vtmp[2].4s,$dat.4s,18 ++ sli @vtmp[3].4s,$dat.4s,24 ++ eor $vtmp4.16b,@vtmp[0].16b,$dat.16b ++ eor $vtmp4.16b,$vtmp4.16b,@vtmp[1].16b ++ eor $dat.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $dat.16b,$dat.16b,$vtmp4.16b ++ ushr @vtmp[1].4s,$datx.4s,32-10 ++ ushr @vtmp[2].4s,$datx.4s,32-18 ++ ushr @vtmp[3].4s,$datx.4s,32-24 ++ sli @vtmp[1].4s,$datx.4s,10 ++ sli @vtmp[2].4s,$datx.4s,18 ++ sli @vtmp[3].4s,$datx.4s,24 ++ eor $vtmp4.16b,$vtmp5.16b,$datx.16b ++ eor $vtmp4.16b,$vtmp4.16b,@vtmp[1].16b ++ eor $datx.16b,@vtmp[2].16b,@vtmp[3].16b ++ eor $datx.16b,$datx.16b,$vtmp4.16b ++___ ++} ++ ++# sm4 for one block of data, in scalar registers word0/word1/word2/word3 ++sub sm4_1blk () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ eor $wtmp3,$word2,$word3 ++ eor $wtmp2,$wtmp0,$word1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word0,$word0,$wtmp3 ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ eor $wtmp3,$word2,$word3 ++ eor $wtmp2,$word0,$wtmp1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor $word1,$word1,$wtmp3 ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ eor $wtmp3,$word0,$word1 ++ eor $wtmp2,$wtmp0,$word3 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word2,$word2,$wtmp3 ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ eor $wtmp3,$word0,$word1 ++ eor $wtmp2,$word2,$wtmp1 ++ eor $wtmp3,$wtmp3,$wtmp2 ++___ ++ &sbox_1word($wtmp3); ++$code.=<<___; ++ eor $word3,$word3,$wtmp3 ++___ ++} ++ ++# sm4 for 4-lanes of data, in neon registers data0/data1/data2/data3 ++sub sm4_4blks () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ dup $rk0.4s,$wtmp0 ++ dup $rk1.4s,$wtmp1 ++ ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ eor $rka.16b,@data[2].16b,@data[3].16b ++ eor $rk0.16b,@data[1].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,$rk0.16b ++___ ++ &sbox($rk0); ++$code.=<<___; ++ eor @data[0].16b,@data[0].16b,$rk0.16b ++ ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ eor $rka.16b,$rka.16b,@data[0].16b ++ eor $rk1.16b,$rka.16b,$rk1.16b ++___ ++ &sbox($rk1); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor @data[1].16b,@data[1].16b,$rk1.16b ++ ++ dup $rk0.4s,$wtmp0 ++ dup $rk1.4s,$wtmp1 ++ ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ eor $rka.16b,@data[0].16b,@data[1].16b ++ eor $rk0.16b,@data[3].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,$rk0.16b ++___ ++ &sbox($rk0); ++$code.=<<___; ++ eor @data[2].16b,@data[2].16b,$rk0.16b ++ ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ eor $rka.16b,$rka.16b,@data[2].16b ++ eor $rk1.16b,$rka.16b,$rk1.16b ++___ ++ &sbox($rk1); ++$code.=<<___; ++ eor @data[3].16b,@data[3].16b,$rk1.16b ++___ ++} ++ ++# sm4 for 8 lanes of data, in neon registers ++# data0/data1/data2/data3 datax0/datax1/datax2/datax3 ++sub sm4_8blks () { ++ my $kptr = shift; ++ ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ /* B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0) */ ++ dup $rk0.4s,$wtmp0 ++ eor $rka.16b,@data[2].16b,@data[3].16b ++ eor $rkb.16b,@datax[2].16b,@datax[3].16b ++ eor @vtmp[0].16b,@data[1].16b,$rk0.16b ++ eor @vtmp[1].16b,@datax[1].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,@vtmp[0].16b ++ eor $rk1.16b,$rkb.16b,@vtmp[1].16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[0].16b,@data[0].16b,$rk0.16b ++ eor @datax[0].16b,@datax[0].16b,$rk1.16b ++ ++ /* B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1) */ ++ dup $rk1.4s,$wtmp1 ++ eor $rka.16b,$rka.16b,@data[0].16b ++ eor $rkb.16b,$rkb.16b,@datax[0].16b ++ eor $rk0.16b,$rka.16b,$rk1.16b ++ eor $rk1.16b,$rkb.16b,$rk1.16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ ldp $wtmp0,$wtmp1,[$kptr],8 ++ eor @data[1].16b,@data[1].16b,$rk0.16b ++ eor @datax[1].16b,@datax[1].16b,$rk1.16b ++ ++ /* B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2) */ ++ dup $rk0.4s,$wtmp0 ++ eor $rka.16b,@data[0].16b,@data[1].16b ++ eor $rkb.16b,@datax[0].16b,@datax[1].16b ++ eor @vtmp[0].16b,@data[3].16b,$rk0.16b ++ eor @vtmp[1].16b,@datax[3].16b,$rk0.16b ++ eor $rk0.16b,$rka.16b,@vtmp[0].16b ++ eor $rk1.16b,$rkb.16b,@vtmp[1].16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[2].16b,@data[2].16b,$rk0.16b ++ eor @datax[2].16b,@datax[2].16b,$rk1.16b ++ ++ /* B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3) */ ++ dup $rk1.4s,$wtmp1 ++ eor $rka.16b,$rka.16b,@data[2].16b ++ eor $rkb.16b,$rkb.16b,@datax[2].16b ++ eor $rk0.16b,$rka.16b,$rk1.16b ++ eor $rk1.16b,$rkb.16b,$rk1.16b ++___ ++ &sbox_double($rk0,$rk1); ++$code.=<<___; ++ eor @data[3].16b,@data[3].16b,$rk0.16b ++ eor @datax[3].16b,@datax[3].16b,$rk1.16b ++___ ++} ++ ++sub encrypt_1blk_norev() { ++ my $dat = shift; ++ my $rks = shift; ++$code.=<<___; ++ mov $ptr,$rks ++ mov $counter,#8 ++ mov $word0,$dat.s[0] ++ mov $word1,$dat.s[1] ++ mov $word2,$dat.s[2] ++ mov $word3,$dat.s[3] ++10: ++___ ++ &sm4_1blk($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++ mov $dat.s[0],$word3 ++ mov $dat.s[1],$word2 ++ mov $dat.s[2],$word1 ++ mov $dat.s[3],$word0 ++___ ++} ++ ++sub encrypt_1blk() { ++ my $dat = shift; ++ my $rks = shift; ++ ++ &encrypt_1blk_norev($dat,$rks); ++ &rev32($dat,$dat); ++} ++ ++sub encrypt_4blks() { ++$code.=<<___; ++ mov $ptr,$rks1 ++ mov $counter,#8 ++10: ++___ ++ &sm4_4blks($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++___ ++ &rev32(@vtmp[3],@data[0]); ++ &rev32(@vtmp[2],@data[1]); ++ &rev32(@vtmp[1],@data[2]); ++ &rev32(@vtmp[0],@data[3]); ++} ++ ++sub encrypt_8blks() { ++ my $rks = shift; ++$code.=<<___; ++ mov $ptr,$rks ++ mov $counter,#8 ++10: ++___ ++ &sm4_8blks($ptr); ++$code.=<<___; ++ subs $counter,$counter,#1 ++ b.ne 10b ++___ ++ &rev32(@vtmp[3],@data[0]); ++ &rev32(@vtmp[2],@data[1]); ++ &rev32(@vtmp[1],@data[2]); ++ &rev32(@vtmp[0],@data[3]); ++ &rev32(@data[3],@datax[0]); ++ &rev32(@data[2],@datax[1]); ++ &rev32(@data[1],@datax[2]); ++ &rev32(@data[0],@datax[3]); ++} ++ ++sub mov_reg_to_vec() { ++ my $src0 = shift; ++ my $src1 = shift; ++ my $desv = shift; ++$code.=<<___; ++ mov $desv.d[0],$src0 ++ mov $desv.d[1],$src1 ++#ifdef __ARMEB__ ++ rev32 $desv.16b,$desv.16b ++#endif ++___ ++} ++ ++sub mov_vec_to_reg() { ++ my $srcv = shift; ++ my $des0 = shift; ++ my $des1 = shift; ++$code.=<<___; ++ mov $des0,$srcv.d[0] ++ mov $des1,$srcv.d[1] ++___ ++} ++ ++sub compute_tweak() { ++ my $src0 = shift; ++ my $src1 = shift; ++ my $des0 = shift; ++ my $des1 = shift; ++$code.=<<___; ++ mov $wtmp0,0x87 ++ extr $xtmp2,$src1,$src1,#32 ++ extr $des1,$src1,$src0,#63 ++ and $wtmp1,$wtmp0,$wtmp2,asr#31 ++ eor $des0,$xtmp1,$src0,lsl#1 ++___ ++} ++ ++sub compute_tweak_vec() { ++ my $src = shift; ++ my $des = shift; ++ &rbit(@vtmp[2],$src); ++$code.=<<___; ++ ldr @qtmp[0], =0x01010101010101010101010101010187 ++ shl $des.16b, @vtmp[2].16b, #1 ++ ext @vtmp[1].16b, @vtmp[2].16b, @vtmp[2].16b,#15 ++ ushr @vtmp[1].16b, @vtmp[1].16b, #7 ++ mul @vtmp[1].16b, @vtmp[1].16b, @vtmp[0].16b ++ eor $des.16b, $des.16b, @vtmp[1].16b ++___ ++ &rbit($des,$des); ++} ++ ++sub mov_en_to_enc(){ ++ my $en = shift; ++ if ($en eq "en") { ++$code.=<<___; ++ mov $enc,1 ++___ ++ } else { ++$code.=<<___; ++ mov $enc,0 ++___ ++ } ++} ++ ++sub rbit() { ++ my $dst = shift; ++ my $src = shift; ++ ++ if ($src and ("$src" ne "$dst")) { ++ if ($standard eq "_gb") { ++$code.=<<___; ++ rbit $dst.16b,$src.16b ++___ ++ } else { ++$code.=<<___; ++ mov $dst.16b,$src.16b ++___ ++ } ++ } else { ++ if ($standard eq "_gb") { ++$code.=<<___; ++ rbit $dst.16b,$src.16b ++___ ++ } ++ } ++} ++ ++$code=<<___; ++#include "arm_arch.h" ++.arch armv8-a+crypto ++.text ++ ++.type ${prefix}_consts,%object ++.align 7 ++${prefix}_consts: ++.Lck: ++ .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 ++ .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 ++ .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 ++ .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 ++ .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 ++ .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 ++ .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 ++ .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 ++.Lfk: ++ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc ++.Lshuffles: ++ .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100 ++ ++.size ${prefix}_consts,.-${prefix}_consts ++___ ++ ++{{{ ++my ($userKey,$roundKey,$enc)=("x0","x1","w2"); ++my ($pointer,$schedules,$wtmp,$roundkey)=("x5","x6","w7","w8"); ++my ($vkey,$vfk,$vmap)=("v5","v6","v7"); ++$code.=<<___; ++.type ${prefix}_set_key,%function ++.align 4 ++${prefix}_set_key: ++ ld1 {$vkey.4s},[$userKey] ++___ ++ &load_sbox_matrix(); ++ &rev32($vkey,$vkey); ++$code.=<<___; ++ adr $pointer,.Lshuffles ++ ld1 {$vmap.4s},[$pointer] ++ adr $pointer,.Lfk ++ ld1 {$vfk.4s},[$pointer] ++ eor $vkey.16b,$vkey.16b,$vfk.16b ++ mov $schedules,#32 ++ adr $pointer,.Lck ++ movi @vtmp[0].16b,#64 ++ cbnz $enc,1f ++ add $roundKey,$roundKey,124 ++1: ++ mov $wtmp,$vkey.s[1] ++ ldr $roundkey,[$pointer],#4 ++ eor $roundkey,$roundkey,$wtmp ++ mov $wtmp,$vkey.s[2] ++ eor $roundkey,$roundkey,$wtmp ++ mov $wtmp,$vkey.s[3] ++ eor $roundkey,$roundkey,$wtmp ++ ++ // optimize sbox using AESE instruction ++ mov @data[0].s[0],$roundkey ++ tbl @vtmp[0].16b, {@data[0].16b}, $MaskV.16b ++___ ++ &mul_matrix(@vtmp[0], $TAHMatV, $TALMatV, @vtmp[2]); ++$code.=<<___; ++ eor @vtmp[1].16b, @vtmp[1].16b, @vtmp[1].16b ++ aese @vtmp[0].16b,@vtmp[1].16b ++___ ++ &mul_matrix(@vtmp[0], $ATAHMatV, $ATALMatV, @vtmp[2]); ++$code.=<<___; ++ mov $wtmp,@vtmp[0].s[0] ++ ++ // linear transformation ++ eor $roundkey,$wtmp,$wtmp,ror #19 ++ eor $roundkey,$roundkey,$wtmp,ror #9 ++ mov $wtmp,$vkey.s[0] ++ eor $roundkey,$roundkey,$wtmp ++ mov $vkey.s[0],$roundkey ++ cbz $enc,2f ++ str $roundkey,[$roundKey],#4 ++ b 3f ++2: ++ str $roundkey,[$roundKey],#-4 ++3: ++ tbl $vkey.16b,{$vkey.16b},$vmap.16b ++ subs $schedules,$schedules,#1 ++ b.ne 1b ++ ret ++.size ${prefix}_set_key,.-${prefix}_set_key ++___ ++}}} ++ ++ ++{{{ ++$code.=<<___; ++.type ${prefix}_enc_4blks,%function ++.align 4 ++${prefix}_enc_4blks: ++___ ++ &encrypt_4blks(); ++$code.=<<___; ++ ret ++.size ${prefix}_enc_4blks,.-${prefix}_enc_4blks ++___ ++}}} ++ ++{{{ ++$code.=<<___; ++.type ${prefix}_enc_8blks,%function ++.align 4 ++${prefix}_enc_8blks: ++___ ++ &encrypt_8blks($rks1); ++$code.=<<___; ++ ret ++.size ${prefix}_enc_8blks,.-${prefix}_enc_8blks ++___ ++}}} ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++$code.=<<___; ++.globl ${prefix}_set_encrypt_key ++.type ${prefix}_set_encrypt_key,%function ++.align 5 ++${prefix}_set_encrypt_key: ++ stp x29,x30,[sp,#-16]! ++ mov w2,1 ++ bl ${prefix}_set_key ++ ldp x29,x30,[sp],#16 ++ ret ++.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key ++___ ++}}} ++ ++{{{ ++my ($key,$keys)=("x0","x1"); ++$code.=<<___; ++.globl ${prefix}_set_decrypt_key ++.type ${prefix}_set_decrypt_key,%function ++.align 5 ++${prefix}_set_decrypt_key: ++ stp x29,x30,[sp,#-16]! ++ mov w2,0 ++ bl ${prefix}_set_key ++ ldp x29,x30,[sp],#16 ++ ret ++.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key ++___ ++}}} ++ ++ ++{{{ ++ ++$code.=<<___; ++.globl ${prefix}_ecb_encrypt ++.type ${prefix}_ecb_encrypt,%function ++.align 5 ++${prefix}_ecb_encrypt: ++ stp d8,d9,[sp,#-0x10]! ++ stp d10,d11,[sp,#-0x10]! ++ stp d12,d13,[sp,#-0x10]! ++ stp d14,d15,[sp,#-0x10]! ++ stp x16,x17,[sp,#-0x10]! ++ stp x29,x30,[sp,#-0x10]! ++___ ++ &load_sbox_matrix(); ++$code.=<<___; ++ // convert length into blocks ++ lsr x2,x2,4 ++.Lecb_8_blocks_process: ++ cmp $blocks,#8 ++ b.lt .Lecb_4_blocks_process ++ ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++ ld4 {@datax[0].4s,$datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++ &rev32(@datax[0],@datax[0]); ++ &rev32(@datax[1],@datax[1]); ++ &rev32(@datax[2],@datax[2]); ++ &rev32(@datax[3],@datax[3]); ++$code.=<<___; ++ bl ${prefix}_enc_8blks ++ st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ st4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 ++ subs $blocks,$blocks,#8 ++ b.gt .Lecb_8_blocks_process ++ b 100f ++.Lecb_4_blocks_process: ++ cmp $blocks,#4 ++ b.lt 1f ++ ld4 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ sub $blocks,$blocks,#4 ++1: ++ // process last block ++ cmp $blocks,#1 ++ b.lt 100f ++ b.gt 1f ++ ld1 {@data[0].4s},[$inp] ++___ ++ &rev32(@data[0],@data[0]); ++ &encrypt_1blk(@data[0],$rks1); ++$code.=<<___; ++ st1 {@data[0].4s},[$outp] ++ b 100f ++1: // process last 2 blocks ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[0],[$inp],#16 ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[1],[$inp],#16 ++ cmp $blocks,#2 ++ b.gt 1f ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp] ++ b 100f ++1: // process last 3 blocks ++ ld4 {@data[0].s,@data[1].s,@data[2].s,@data[3].s}[2],[$inp],#16 ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++ st4 {@vtmp[0].s-@vtmp[3].s}[0],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[1],[$outp],#16 ++ st4 {@vtmp[0].s-@vtmp[3].s}[2],[$outp] ++100: ++ ldp x29,x30,[sp],#0x10 ++ ldp x16,x17,[sp],#0x10 ++ ldp d14,d15,[sp],#0x10 ++ ldp d12,d13,[sp],#0x10 ++ ldp d10,d11,[sp],#0x10 ++ ldp d8,d9,[sp],#0x10 ++ ret ++.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt ++___ ++}}} ++ ++{{{ ++sub gen_xts_do_cipher() { ++$code.=<<___; ++.globl ${prefix}_xts_do_cipher${standard} ++.type ${prefix}_xts_do_cipher${standard},%function ++.align 5 ++${prefix}_xts_do_cipher${standard}: ++ stp x29,x30,[sp,#-16]! ++ ld1 {@tweak[0].4s}, [$ivp] ++___ ++ &load_sbox_matrix(); ++ &rev32(@tweak[0],@tweak[0]); ++ &encrypt_1blk(@tweak[0],$rks2); ++$code.=<<___; ++ and $remain,$len,#0x0F ++ // convert length into blocks ++ lsr $blocks,$len,4 ++ cmp $blocks,#1 ++ b.lt .return${standard} ++ ++ cmp $remain,0 ++ // If the encryption/decryption Length is N times of 16, ++ // the all blocks are encrypted/decrypted in .xts_encrypt_blocks${standard} ++ b.eq .xts_encrypt_blocks${standard} ++ ++ // If the encryption/decryption length is not N times of 16, ++ // the last two blocks are encrypted/decrypted in .last_2blks_tweak${standard} or .only_2blks_tweak${standard} ++ // the other blocks are encrypted/decrypted in .xts_encrypt_blocks${standard} ++ subs $blocks,$blocks,#1 ++ b.eq .only_2blks_tweak${standard} ++.xts_encrypt_blocks${standard}: ++___ ++ &rbit(@tweak[0],@tweak[0]); ++ &rev32_armeb(@tweak[0],@tweak[0]); ++ &mov_vec_to_reg(@tweak[0],@twx[0],@twx[1]); ++ &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3]); ++ &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5]); ++ &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7]); ++ &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9]); ++ &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11]); ++ &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13]); ++ &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15]); ++$code.=<<___; ++.Lxts_8_blocks_process${standard}: ++ cmp $blocks,#8 ++___ ++ &mov_reg_to_vec(@twx[0],@twx[1],@tweak[0]); ++ &compute_tweak(@twx[14],@twx[15],@twx[0],@twx[1]); ++ &mov_reg_to_vec(@twx[2],@twx[3],@tweak[1]); ++ &compute_tweak(@twx[0],@twx[1],@twx[2],@twx[3]); ++ &mov_reg_to_vec(@twx[4],@twx[5],@tweak[2]); ++ &compute_tweak(@twx[2],@twx[3],@twx[4],@twx[5]); ++ &mov_reg_to_vec(@twx[6],@twx[7],@tweak[3]); ++ &compute_tweak(@twx[4],@twx[5],@twx[6],@twx[7]); ++ &mov_reg_to_vec(@twx[8],@twx[9],@tweak[4]); ++ &compute_tweak(@twx[6],@twx[7],@twx[8],@twx[9]); ++ &mov_reg_to_vec(@twx[10],@twx[11],@tweak[5]); ++ &compute_tweak(@twx[8],@twx[9],@twx[10],@twx[11]); ++ &mov_reg_to_vec(@twx[12],@twx[13],@tweak[6]); ++ &compute_tweak(@twx[10],@twx[11],@twx[12],@twx[13]); ++ &mov_reg_to_vec(@twx[14],@twx[15],@tweak[7]); ++ &compute_tweak(@twx[12],@twx[13],@twx[14],@twx[15]); ++$code.=<<___; ++ b.lt .Lxts_4_blocks_process${standard} ++ ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++___ ++ &rbit(@tweak[0],@tweak[0]); ++ &rbit(@tweak[1],@tweak[1]); ++ &rbit(@tweak[2],@tweak[2]); ++ &rbit(@tweak[3],@tweak[3]); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++ eor @data[1].16b, @data[1].16b, @tweak[1].16b ++ eor @data[2].16b, @data[2].16b, @tweak[2].16b ++ eor @data[3].16b, @data[3].16b, @tweak[3].16b ++ ld1 {@datax[0].4s,$datax[1].4s,@datax[2].4s,@datax[3].4s},[$inp],#64 ++___ ++ &rbit(@tweak[4],@tweak[4]); ++ &rbit(@tweak[5],@tweak[5]); ++ &rbit(@tweak[6],@tweak[6]); ++ &rbit(@tweak[7],@tweak[7]); ++$code.=<<___; ++ eor @datax[0].16b, @datax[0].16b, @tweak[4].16b ++ eor @datax[1].16b, @datax[1].16b, @tweak[5].16b ++ eor @datax[2].16b, @datax[2].16b, @tweak[6].16b ++ eor @datax[3].16b, @datax[3].16b, @tweak[7].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++ &rev32(@datax[0],@datax[0]); ++ &rev32(@datax[1],@datax[1]); ++ &rev32(@datax[2],@datax[2]); ++ &rev32(@datax[3],@datax[3]); ++ &transpose(@data,@vtmp); ++ &transpose(@datax,@vtmp); ++$code.=<<___; ++ bl ${prefix}_enc_8blks ++___ ++ &transpose(@vtmp,@datax); ++ &transpose(@data,@datax); ++$code.=<<___; ++ eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b ++ eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b ++ eor @vtmp[2].16b, @vtmp[2].16b, @tweak[2].16b ++ eor @vtmp[3].16b, @vtmp[3].16b, @tweak[3].16b ++ eor @data[0].16b, @data[0].16b, @tweak[4].16b ++ eor @data[1].16b, @data[1].16b, @tweak[5].16b ++ eor @data[2].16b, @data[2].16b, @tweak[6].16b ++ eor @data[3].16b, @data[3].16b, @tweak[7].16b ++ ++ // save the last tweak ++ mov $lastTweak.16b,@tweak[7].16b ++ st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ st1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$outp],#64 ++ subs $blocks,$blocks,#8 ++ b.gt .Lxts_8_blocks_process${standard} ++ b 100f ++.Lxts_4_blocks_process${standard}: ++ cmp $blocks,#4 ++ b.lt 1f ++ ld1 {@data[0].4s,@data[1].4s,@data[2].4s,@data[3].4s},[$inp],#64 ++___ ++ &rbit(@tweak[0],@tweak[0]); ++ &rbit(@tweak[1],@tweak[1]); ++ &rbit(@tweak[2],@tweak[2]); ++ &rbit(@tweak[3],@tweak[3]); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++ eor @data[1].16b, @data[1].16b, @tweak[1].16b ++ eor @data[2].16b, @data[2].16b, @tweak[2].16b ++ eor @data[3].16b, @data[3].16b, @tweak[3].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &rev32(@data[3],@data[3]); ++ &transpose(@data,@vtmp); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++___ ++ &transpose(@vtmp,@data); ++$code.=<<___; ++ eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b ++ eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b ++ eor @vtmp[2].16b, @vtmp[2].16b, @tweak[2].16b ++ eor @vtmp[3].16b, @vtmp[3].16b, @tweak[3].16b ++ st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s,@vtmp[3].4s},[$outp],#64 ++ sub $blocks,$blocks,#4 ++ mov @tweak[0].16b,@tweak[4].16b ++ mov @tweak[1].16b,@tweak[5].16b ++ mov @tweak[2].16b,@tweak[6].16b ++ // save the last tweak ++ mov $lastTweak.16b,@tweak[3].16b ++1: ++ // process last block ++ cmp $blocks,#1 ++ b.lt 100f ++ b.gt 1f ++ ld1 {@data[0].4s},[$inp],#16 ++___ ++ &rbit(@tweak[0],@tweak[0]); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &encrypt_1blk(@data[0],$rks1); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++ st1 {@data[0].4s},[$outp],#16 ++ // save the last tweak ++ mov $lastTweak.16b,@tweak[0].16b ++ b 100f ++1: // process last 2 blocks ++ cmp $blocks,#2 ++ b.gt 1f ++ ld1 {@data[0].4s,@data[1].4s},[$inp],#32 ++___ ++ &rbit(@tweak[0],@tweak[0]); ++ &rbit(@tweak[1],@tweak[1]); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++ eor @data[1].16b, @data[1].16b, @tweak[1].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &transpose(@data,@vtmp); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++___ ++ &transpose(@vtmp,@data); ++$code.=<<___; ++ eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b ++ eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b ++ st1 {@vtmp[0].4s,@vtmp[1].4s},[$outp],#32 ++ // save the last tweak ++ mov $lastTweak.16b,@tweak[1].16b ++ b 100f ++1: // process last 3 blocks ++ ld1 {@data[0].4s,@data[1].4s,@data[2].4s},[$inp],#48 ++___ ++ &rbit(@tweak[0],@tweak[0]); ++ &rbit(@tweak[1],@tweak[1]); ++ &rbit(@tweak[2],@tweak[2]); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[0].16b ++ eor @data[1].16b, @data[1].16b, @tweak[1].16b ++ eor @data[2].16b, @data[2].16b, @tweak[2].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &rev32(@data[1],@data[1]); ++ &rev32(@data[2],@data[2]); ++ &transpose(@data,@vtmp); ++$code.=<<___; ++ bl ${prefix}_enc_4blks ++___ ++ &transpose(@vtmp,@data); ++$code.=<<___; ++ eor @vtmp[0].16b, @vtmp[0].16b, @tweak[0].16b ++ eor @vtmp[1].16b, @vtmp[1].16b, @tweak[1].16b ++ eor @vtmp[2].16b, @vtmp[2].16b, @tweak[2].16b ++ st1 {@vtmp[0].4s,@vtmp[1].4s,@vtmp[2].4s},[$outp],#48 ++ // save the last tweak ++ mov $lastTweak.16b,@tweak[2].16b ++100: ++ cmp $remain,0 ++ b.eq .return${standard} ++ ++// This brance calculates the last two tweaks, ++// while the encryption/decryption length is larger than 32 ++.last_2blks_tweak${standard}: ++___ ++ &rev32_armeb($lastTweak,$lastTweak); ++ &compute_tweak_vec($lastTweak,@tweak[1]); ++ &compute_tweak_vec(@tweak[1],@tweak[2]); ++$code.=<<___; ++ b .check_dec${standard} ++ ++ ++// This brance calculates the last two tweaks, ++// while the encryption/decryption length is equal to 32, who only need two tweaks ++.only_2blks_tweak${standard}: ++ mov @tweak[1].16b,@tweak[0].16b ++___ ++ &rev32_armeb(@tweak[1],@tweak[1]); ++ &compute_tweak_vec(@tweak[1],@tweak[2]); ++$code.=<<___; ++ b .check_dec${standard} ++ ++ ++// Determine whether encryption or decryption is required. ++// The last two tweaks need to be swapped for decryption. ++.check_dec${standard}: ++ // encryption:1 decryption:0 ++ cmp $enc,1 ++ b.eq .prcess_last_2blks${standard} ++ mov @vtmp[0].16B,@tweak[1].16b ++ mov @tweak[1].16B,@tweak[2].16b ++ mov @tweak[2].16B,@vtmp[0].16b ++ ++.prcess_last_2blks${standard}: ++___ ++ &rev32_armeb(@tweak[1],@tweak[1]); ++ &rev32_armeb(@tweak[2],@tweak[2]); ++$code.=<<___; ++ ld1 {@data[0].4s},[$inp],#16 ++ eor @data[0].16b, @data[0].16b, @tweak[1].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &encrypt_1blk(@data[0],$rks1); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[1].16b ++ st1 {@data[0].4s},[$outp],#16 ++ ++ sub $lastBlk,$outp,16 ++ .loop${standard}: ++ subs $remain,$remain,1 ++ ldrb $wtmp0,[$lastBlk,$remain] ++ ldrb $wtmp1,[$inp,$remain] ++ strb $wtmp1,[$lastBlk,$remain] ++ strb $wtmp0,[$outp,$remain] ++ b.gt .loop${standard} ++ ld1 {@data[0].4s}, [$lastBlk] ++ eor @data[0].16b, @data[0].16b, @tweak[2].16b ++___ ++ &rev32(@data[0],@data[0]); ++ &encrypt_1blk(@data[0],$rks1); ++$code.=<<___; ++ eor @data[0].16b, @data[0].16b, @tweak[2].16b ++ st1 {@data[0].4s}, [$lastBlk] ++.return${standard}: ++ ldp x29,x30,[sp],#16 ++ ret ++.size ${prefix}_xts_do_cipher${standard},.-${prefix}_xts_do_cipher${standard} ++___ ++} #end of gen_xts_do_cipher ++ ++}}} ++ ++{{{ ++sub gen_xts_cipher() { ++ my $en = shift; ++ ++$code.=<<___; ++.globl ${prefix}_xts_${en}crypt${standard} ++.type ${prefix}_xts_${en}crypt${standard},%function ++.align 5 ++${prefix}_xts_${en}crypt${standard}: ++ stp x15, x16, [sp, #-0x10]! ++ stp x17, x18, [sp, #-0x10]! ++ stp x19, x20, [sp, #-0x10]! ++ stp x21, x22, [sp, #-0x10]! ++ stp x23, x24, [sp, #-0x10]! ++ stp x25, x26, [sp, #-0x10]! ++ stp x27, x28, [sp, #-0x10]! ++ stp x29, x30, [sp, #-0x10]! ++ stp d8, d9, [sp, #-0x10]! ++ stp d10, d11, [sp, #-0x10]! ++ stp d12, d13, [sp, #-0x10]! ++ stp d14, d15, [sp, #-0x10]! ++___ ++ &mov_en_to_enc($en); ++$code.=<<___; ++ bl ${prefix}_xts_do_cipher${standard} ++ ldp d14, d15, [sp], #0x10 ++ ldp d12, d13, [sp], #0x10 ++ ldp d10, d11, [sp], #0x10 ++ ldp d8, d9, [sp], #0x10 ++ ldp x29, x30, [sp], #0x10 ++ ldp x27, x28, [sp], #0x10 ++ ldp x25, x26, [sp], #0x10 ++ ldp x23, x24, [sp], #0x10 ++ ldp x21, x22, [sp], #0x10 ++ ldp x19, x20, [sp], #0x10 ++ ldp x17, x18, [sp], #0x10 ++ ldp x15, x16, [sp], #0x10 ++ ret ++.size ${prefix}_xts_${en}crypt${standard},.-${prefix}_xts_${en}crypt${standard} ++___ ++ ++} # end of gen_xts_cipher ++$standard="_gb"; ++&gen_xts_do_cipher(); ++&gen_xts_cipher("en"); ++&gen_xts_cipher("de"); ++$standard=""; ++&gen_xts_do_cipher(); ++&gen_xts_cipher("en"); ++&gen_xts_cipher("de"); ++}}} ++ ++######################################## ++open SELF,$0; ++while() { ++ next if (/^#!/); ++ last if (!s/^#/\/\// and !/^$/); ++ print; ++} ++close SELF; ++ ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/ge; ++ print $_,"\n"; ++} ++ ++close STDOUT or die "error closing STDOUT: $!"; +diff --git a/crypto/sm4/build.info b/crypto/sm4/build.info +index b65a7d1..bb042c5 100644 +--- a/crypto/sm4/build.info ++++ b/crypto/sm4/build.info +@@ -1,4 +1,7 @@ + LIBS=../../libcrypto + SOURCE[../../libcrypto]=\ +- sm4.c ++ sm4.c {- $target{sm4_asm_src} -} + ++ ++GENERATE[vpsm4_ex-armv8.S]=asm/vpsm4_ex-armv8.pl $(PERLASM_SCHEME) ++INCLUDE[vpsm4_ex-armv8.o]=.. +\ No newline at end of file +diff --git a/doc/man3/EVP_sm4_xts.pod b/doc/man3/EVP_sm4_xts.pod +new file mode 100644 +index 0000000..09ca3fb +--- /dev/null ++++ b/doc/man3/EVP_sm4_xts.pod +@@ -0,0 +1,67 @@ ++=pod ++ ++=head1 NAME ++ ++EVP_sm4_xts, ++- EVP SM4 cipher ++ ++=head1 SYNOPSIS ++ ++ #include ++ ++ const EVP_CIPHER *EVP_sm4_xts(void); ++ ++=head1 DESCRIPTION ++ ++The XTS mode of operation (GB/T 17964-2021) for SM4 block cipher. ++ ++=over 4 ++ ++=item EVP_sm4_xts(), ++ ++The SM4 blockcipher with a 256-bit key in XTS mode. This mode use a key length of 256 bits and acts on blocks of 128 bits. ++ ++The B parameter to L or L is the XTS first "tweak" value. XTS mode has two implementations to calculate the following tweak values, one is standardized in IEEE Std. 1619-2007 and has been widely used (e.g., XTS AES), the other is proposed recently (GB/T 17964-2021 implemented in May 2022) and is currently only used in SM4. ++ ++Assume that the input data (B, B, and B) are consistent, the following tweak values are inconsistent due to different standards. As a result, the first ciphertext block are consistent, but the subsequent ciphertext blocks (if any) are different. ++ ++By default, EVP_sm4_xts is standardized in GB/T 17964-2021, and can be changed by EVP_CIPHER_CTX_ctrl. The following Is is supported in XTS mode for SM4. ++ ++=over 4 ++ ++=item EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_XTS_STANDARD, std, NULL) ++ ++Sets the standard of EVP_sm4_xts to B. This must be one of 0 or 1, 0 for XTS mode in GB/T 17964-2021, 1 for XTS mode in IEEE Std 1619-2007. ++ ++=back ++ ++The XTS implementation in OpenSSL does not support streaming. That is there must ++only be one L call per L call (and ++similarly with the "Decrypt" functions). ++ ++=back ++ ++=head1 RETURN VALUES ++ ++These functions return a B structure that contains the ++implementation of the symmetric cipher. See L for ++details of the B structure. ++ ++=head1 SEE ALSO ++ ++L, ++L, ++L ++ ++=head1 COPYRIGHT ++ ++Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. ++Copyright 2022 Ribose Inc. All Rights Reserved. ++ ++Licensed under the OpenSSL license (the "License"). You may not use ++this file except in compliance with the License. You can obtain a copy ++in the file LICENSE in the source distribution or at ++L. ++ ++=cut ++ +diff --git a/fuzz/oids.txt b/fuzz/oids.txt +index 8dfdea9..d1f98a8 100644 +--- a/fuzz/oids.txt ++++ b/fuzz/oids.txt +@@ -1064,3 +1064,4 @@ OBJ_id_tc26_gost_3410_2012_256_paramSetD="\x2A\x85\x03\x07\x01\x02\x01\x01\x04" + OBJ_hmacWithSHA512_224="\x2A\x86\x48\x86\xF7\x0D\x02\x0C" + OBJ_hmacWithSHA512_256="\x2A\x86\x48\x86\xF7\x0D\x02\x0D" + OBJ_SM2_with_SM3="\x2A\x81\x1C\xCF\x55\x01\x83\x75" ++OBJ_sm4_xts="\x2A\x81\x1C\xCF\x55\x01\x68\x0A" +diff --git a/include/openssl/evp.h b/include/openssl/evp.h +index 3116c1b..69326bc 100644 +--- a/include/openssl/evp.h ++++ b/include/openssl/evp.h +@@ -353,6 +353,9 @@ int (*EVP_CIPHER_meth_get_ctrl(const EVP_CIPHER *cipher))(EVP_CIPHER_CTX *, + + # define EVP_CTRL_GET_IVLEN 0x25 + ++/* Set the XTS mode standard, SM4 only */ ++# define EVP_CTRL_XTS_STANDARD 0x26 ++ + /* Padding modes */ + #define EVP_PADDING_PKCS7 1 + #define EVP_PADDING_ISO7816_4 2 +@@ -937,6 +940,7 @@ const EVP_CIPHER *EVP_sm4_cfb128(void); + # define EVP_sm4_cfb EVP_sm4_cfb128 + const EVP_CIPHER *EVP_sm4_ofb(void); + const EVP_CIPHER *EVP_sm4_ctr(void); ++const EVP_CIPHER *EVP_sm4_xts(void); + # endif + + # if OPENSSL_API_COMPAT < 0x10100000L +diff --git a/include/openssl/modes.h b/include/openssl/modes.h +index d544f98..dea324f 100644 +--- a/include/openssl/modes.h ++++ b/include/openssl/modes.h +@@ -22,6 +22,10 @@ typedef void (*cbc128_f) (const unsigned char *in, unsigned char *out, + size_t len, const void *key, + unsigned char ivec[16], int enc); + ++typedef void (*ecb128_f) (const unsigned char *in, unsigned char *out, ++ size_t len, const void *key, ++ int enc); ++ + typedef void (*ctr128_f) (const unsigned char *in, unsigned char *out, + size_t blocks, const void *key, + const unsigned char ivec[16]); +@@ -153,6 +157,11 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, + const unsigned char *inp, unsigned char *out, + size_t len, int enc); + ++int CRYPTO_xts128gb_encrypt(const XTS128_CONTEXT *ctx, ++ const unsigned char iv[16], ++ const unsigned char *inp, unsigned char *out, ++ size_t len, int enc); ++ + size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, size_t inlen, +diff --git a/include/openssl/obj_mac.h b/include/openssl/obj_mac.h +index 9b125c1..edfc87d 100644 +--- a/include/openssl/obj_mac.h ++++ b/include/openssl/obj_mac.h +@@ -4772,6 +4772,11 @@ + #define NID_sm4_ctr 1139 + #define OBJ_sm4_ctr OBJ_sm_scheme,104L,7L + ++#define SN_sm4_xts "SM4-XTS" ++#define LN_sm4_xts "sm4-xts" ++#define NID_sm4_xts 1196 ++#define OBJ_sm4_xts OBJ_sm_scheme,104L,10L ++ + #define SN_hmac "HMAC" + #define LN_hmac "hmac" + #define NID_hmac 855 +diff --git a/test/evp_test.c b/test/evp_test.c +index 62f20ec..3c65ce9 100644 +--- a/test/evp_test.c ++++ b/test/evp_test.c +@@ -485,6 +485,8 @@ typedef struct cipher_data_st { + unsigned char *tag; + size_t tag_len; + int tag_late; ++ /* SM4 XTS only */ ++ int std; + } CIPHER_DATA; + + static int cipher_test_init(EVP_TEST *t, const char *alg) +@@ -568,6 +570,15 @@ static int cipher_test_parse(EVP_TEST *t, const char *keyword, + return -1; + return 1; + } ++ if (strcmp(keyword, "Standard") == 0) { ++ if (strcmp(value, "GB") == 0) ++ cdat->std = 0; ++ else if (strcmp(value, "IEEE") == 0) ++ cdat->std = 1; ++ else ++ return -1; ++ return 1; ++ } + return 0; + } + +@@ -707,7 +718,11 @@ static int cipher_test_enc(EVP_TEST *t, int enc, + goto err; + } + } +- ++ if (expected->std) { ++ if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_XTS_STANDARD, expected->std, NULL)) { ++ goto err; ++ }; ++ } + EVP_CIPHER_CTX_set_padding(ctx, 0); + t->err = "CIPHERUPDATE_ERROR"; + tmplen = 0; +diff --git a/test/recipes/30-test_evp_data/evpciph.txt b/test/recipes/30-test_evp_data/evpciph.txt +index 76c839b..a3687bc 100644 +--- a/test/recipes/30-test_evp_data/evpciph.txt ++++ b/test/recipes/30-test_evp_data/evpciph.txt +@@ -2132,6 +2132,28 @@ IV = 0123456789ABCDEFFEDCBA9876543210 + Plaintext = AAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCDDDDDDDDDDDDDDDDEEEEEEEEEEEEEEEEFFFFFFFFFFFFFFFFEEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAA + Ciphertext = C2B4759E78AC3CF43D0852F4E8D5F9FD7256E8A5FCB65A350EE00630912E44492A0B17E1B85B060D0FBA612D8A95831638B361FD5FFACD942F081485A83CA35D + ++Title = SM4 XTS test vectors, the XTS mode is standardized in GB/T 17964-2021 by default ++Cipher = SM4-XTS ++Key = 2B7E151628AED2A6ABF7158809CF4F3C000102030405060708090A0B0C0D0E0F ++IV = F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF ++Plaintext = 6BC1BEE22E409F96E93D7E117393172AAE2D8A571E03AC9C9EB76FAC45AF8E5130C81C46A35CE411E5FBC1191A0A52EFF69F2445DF4F9B17 ++Ciphertext = E9538251C71D7B80BBE4483FEF497BD12C5C581BD6242FC51E08964FB4F60FDB0BA42F63499279213D318D2C11F6886E903BE7F93A1B3479 ++ ++Title = SM4 test vectors for XTS mode in GB/T 17964-2021 and IEEE Std 1619-2007 ++Cipher = SM4-XTS ++Key = 2B7E151628AED2A6ABF7158809CF4F3C000102030405060708090A0B0C0D0E0F ++IV = F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF ++Plaintext = 6BC1BEE22E409F96E93D7E117393172AAE2D8A571E03AC9C9EB76FAC45AF8E5130C81C46A35CE411E5FBC1191A0A52EFF69F2445DF4F9B17 ++Ciphertext = E9538251C71D7B80BBE4483FEF497BD12C5C581BD6242FC51E08964FB4F60FDB0BA42F63499279213D318D2C11F6886E903BE7F93A1B3479 ++Standard = GB ++ ++Cipher = SM4-XTS ++Key = 2B7E151628AED2A6ABF7158809CF4F3C000102030405060708090A0B0C0D0E0F ++IV = F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF ++Plaintext = 6BC1BEE22E409F96E93D7E117393172AAE2D8A571E03AC9C9EB76FAC45AF8E5130C81C46A35CE411E5FBC1191A0A52EFF69F2445DF4F9B17 ++Ciphertext = E9538251C71D7B80BBE4483FEF497BD1B3DB1A3E60408C575D63FF7DB39F83260869F9E2585FEC9F0B863BF8FD784B8627D16C0DB6D2CFC7 ++Standard = IEEE ++ + Title = ARIA test vectors from RFC5794 (and others) + + Cipher = ARIA-128-ECB +diff --git a/util/libcrypto.num b/util/libcrypto.num +index 95bccf9..62e2ea2 100644 +--- a/util/libcrypto.num ++++ b/util/libcrypto.num +@@ -4632,3 +4632,5 @@ X509_REQ_get0_sm2_id 6385 1_1_1m EXIST::FUNCTION:SM2 + X509_REQ_set0_sm2_id 6386 1_1_1m EXIST::FUNCTION:SM2 + EVP_PKEY_is_sm2 6387 1_1_1m EXIST::FUNCTION:SM2 + SM2_compute_key 6388 1_1_1m EXIST::FUNCTION: ++EVP_sm4_xts 6389 1_1_1m EXIST::FUNCTION:SM4 ++CRYPTO_xts128gb_encrypt 6390 1_1_1m EXIST::FUNCTION: +\ No newline at end of file +-- +2.36.1 + diff --git a/openssl.spec b/openssl.spec index 70823d0f7cd4c0a994913359ecc26ab6e8f2c31f..d51bbd0696fe20fcd9ea20581447252f0023df62 100644 --- a/openssl.spec +++ b/openssl.spec @@ -2,7 +2,7 @@ Name: openssl Epoch: 1 Version: 1.1.1m -Release: 13 +Release: 14 Summary: Cryptography and SSL/TLS Toolkit License: OpenSSL and SSLeay URL: https://www.openssl.org/ @@ -37,6 +37,7 @@ Patch26: Feature-Support-TLCP-protocol.patch Patch27: Feature-X509-command-supports-SM2-certificate-signing-with-default-sm2id.patch Patch28: Feature-PKCS7-sign-and-verify-support-SM2-algorithm.patch Patch29: backport-Update-further-expiring-certificates-that-affect-tes.patch +Patch30: Feature-add-ARMv8-implementations-of-SM4-in-ECB-and-XTS.patch BuildRequires: gcc perl make lksctp-tools-devel coreutils util-linux zlib-devel Requires: coreutils %{name}-libs%{?_isa} = %{epoch}:%{version}-%{release} @@ -239,6 +240,9 @@ make test || : %ldconfig_scriptlets libs %changelog +* Sat Oct 29 2022 Xu Yizhou - 1:1.1.1m-14 +- add ARMv8 implementations of SM4 in ECB and XTS + * Fri Oct 28 2022 zhujianwei - 1:1.1.1m-13 - update further expiring certificates