Merge tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu: "API: - Test in-place en/decryption with two sglists in testmgr - Fix process vs softirq race in cryptd Algorithms: - Add arm64 acceleration for sm4 - Add s390 acceleration for chacha20 Drivers: - Add polarfire soc hwrng support in mpsf - Add support for TI SoC AM62x in sa2ul - Add support for ATSHA204 cryptochip in atmel-sha204a - Add support for PRNG in caam - Restore support for storage encryption in qat - Restore support for storage encryption in hisilicon/sec" * tag 'v5.19-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (116 commits) hwrng: omap3-rom - fix using wrong clk_disable() in omap_rom_rng_runtime_resume() crypto: hisilicon/sec - delete the flag CRYPTO_ALG_ALLOCATES_MEMORY crypto: qat - add support for 401xx devices crypto: qat - re-enable registration of algorithms crypto: qat - honor CRYPTO_TFM_REQ_MAY_SLEEP flag crypto: qat - add param check for DH crypto: qat - add param check for RSA crypto: qat - remove dma_free_coherent() for DH crypto: qat - remove dma_free_coherent() for RSA crypto: qat - fix memory leak in RSA crypto: qat - add backlog mechanism crypto: qat - refactor submission logic crypto: qat - use pre-allocated buffers in datapath crypto: qat - set to zero DH parameters before free crypto: s390 - add crypto library interface for ChaCha20 crypto: talitos - Uniform coding style with defined variable crypto: octeontx2 - simplify the return expression of otx2_cpt_aead_cbc_aes_sha_setkey() crypto: cryptd - Protect per-CPU resource by disabling BH. crypto: sun8i-ce - do not fallback if cryptlen is less than sg length crypto: sun8i-ce - rework debugging ...
author: Linus Torvalds 2022-05-27 18:06:49 -0700
committer: Linus Torvalds 2022-05-27 18:06:49 -0700
commit: d075c0c1be279c5f4c6688ac0442fff6494e56bc (patch)
tree: c3e3ab6b35139229ad0a5096ccea0c00eb97998b /crypto
parent: bf272460d744112bacd4c4d562592decbf0edf64 (diff)
parent: e4e62bbc6aba49a5edb3156ec65f6698ff37d228 (diff)
8 files changed, 525 insertions, 36 deletions
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 41068811fd0e..19197469cfab 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -274,7 +274,7 @@ config CRYPTO_ECRDSA
 
 config CRYPTO_SM2
 	tristate "SM2 algorithm"
-	select CRYPTO_LIB_SM3
+	select CRYPTO_SM3
 	select CRYPTO_AKCIPHER
 	select CRYPTO_MANAGER
 	select MPILIB
@@ -1010,9 +1010,12 @@ config CRYPTO_SHA3
 	  http://keccak.noekeon.org/
 
 config CRYPTO_SM3
+	tristate
+
+config CRYPTO_SM3_GENERIC
 	tristate "SM3 digest algorithm"
 	select CRYPTO_HASH
-	select CRYPTO_LIB_SM3
+	select CRYPTO_SM3
 	help
 	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
 	  It is part of the Chinese Commercial Cryptography suite.
@@ -1025,7 +1028,7 @@ config CRYPTO_SM3_AVX_X86_64
 	tristate "SM3 digest algorithm (x86_64/AVX)"
 	depends on X86 && 64BIT
 	select CRYPTO_HASH
-	select CRYPTO_LIB_SM3
+	select CRYPTO_SM3
 	help
 	  SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
 	  It is part of the Chinese Commercial Cryptography suite. This is
@@ -1572,9 +1575,12 @@ config CRYPTO_SERPENT_AVX2_X86_64
 	  <https://www.cl.cam.ac.uk/~rja14/serpent.html>
 
 config CRYPTO_SM4
+	tristate
+
+config CRYPTO_SM4_GENERIC
 	tristate "SM4 cipher algorithm"
 	select CRYPTO_ALGAPI
-	select CRYPTO_LIB_SM4
+	select CRYPTO_SM4
 	help
 	  SM4 cipher algorithms (OSCCA GB/T 32907-2016).
 
@@ -1603,7 +1609,7 @@ config CRYPTO_SM4_AESNI_AVX_X86_64
 	select CRYPTO_SKCIPHER
 	select CRYPTO_SIMD
 	select CRYPTO_ALGAPI
-	select CRYPTO_LIB_SM4
+	select CRYPTO_SM4
 	help
 	  SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX).
 
@@ -1624,7 +1630,7 @@ config CRYPTO_SM4_AESNI_AVX2_X86_64
 	select CRYPTO_SKCIPHER
 	select CRYPTO_SIMD
 	select CRYPTO_ALGAPI
-	select CRYPTO_LIB_SM4
+	select CRYPTO_SM4
 	select CRYPTO_SM4_AESNI_AVX_X86_64
 	help
 	  SM4 cipher algorithms (OSCCA GB/T 32907-2016) (x86_64/AES-NI/AVX2).
diff --git a/crypto/Makefile b/crypto/Makefile
index f754c4d17d6b..43bc33e247d1 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -78,7 +78,8 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
 obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
 obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
 obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
-obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o
+obj-$(CONFIG_CRYPTO_SM3) += sm3.o
+obj-$(CONFIG_CRYPTO_SM3_GENERIC) += sm3_generic.o
 obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
 obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
@@ -134,7 +135,8 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
 CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
 CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
-obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
+obj-$(CONFIG_CRYPTO_SM4) += sm4.o
+obj-$(CONFIG_CRYPTO_SM4_GENERIC) += sm4_generic.o
 obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
 obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
 obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index a1bea0f4baa8..668095eca0fa 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -39,6 +39,10 @@ struct cryptd_cpu_queue {
 };
 
 struct cryptd_queue {
+	/*
+	 * Protected by disabling BH to allow enqueueing from softinterrupt and
+	 * dequeuing from kworker (cryptd_queue_worker()).
+	 */
 	struct cryptd_cpu_queue __percpu *cpu_queue;
 };
 
@@ -125,28 +129,28 @@ static void cryptd_fini_queue(struct cryptd_queue *queue)
 static int cryptd_enqueue_request(struct cryptd_queue *queue,
 				  struct crypto_async_request *request)
 {
-	int cpu, err;
+	int err;
 	struct cryptd_cpu_queue *cpu_queue;
 	refcount_t *refcnt;
 
-	cpu = get_cpu();
+	local_bh_disable();
 	cpu_queue = this_cpu_ptr(queue->cpu_queue);
 	err = crypto_enqueue_request(&cpu_queue->queue, request);
 
 	refcnt = crypto_tfm_ctx(request->tfm);
 
 	if (err == -ENOSPC)
-		goto out_put_cpu;
+		goto out;
 
-	queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
+	queue_work_on(smp_processor_id(), cryptd_wq, &cpu_queue->work);
 
 	if (!refcount_read(refcnt))
-		goto out_put_cpu;
+		goto out;
 
 	refcount_inc(refcnt);
 
-out_put_cpu:
-	put_cpu();
+out:
+	local_bh_enable();
 
 	return err;
 }
@@ -162,15 +166,10 @@ static void cryptd_queue_worker(struct work_struct *work)
 	cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
 	/*
 	 * Only handle one request at a time to avoid hogging crypto workqueue.
-	 * preempt_disable/enable is used to prevent being preempted by
-	 * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
-	 * cryptd_enqueue_request() being accessed from software interrupts.
 	 */
 	local_bh_disable();
-	preempt_disable();
 	backlog = crypto_get_backlog(&cpu_queue->queue);
 	req = crypto_dequeue_request(&cpu_queue->queue);
-	preempt_enable();
 	local_bh_enable();
 
 	if (!req)
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index 6056a990c9f2..bb8e77077f02 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -253,6 +253,7 @@ static void crypto_pump_work(struct kthread_work *work)
  * crypto_transfer_request - transfer the new request into the engine queue
  * @engine: the hardware engine
  * @req: the request need to be listed into the engine queue
+ * @need_pump: indicates whether queue the pump of request to kthread_work
  */
 static int crypto_transfer_request(struct crypto_engine *engine,
 				   struct crypto_async_request *req,
diff --git a/crypto/ecrdsa.c b/crypto/ecrdsa.c
index b32ffcaad9ad..f3c6b5e15e75 100644
--- a/crypto/ecrdsa.c
+++ b/crypto/ecrdsa.c
@@ -113,15 +113,15 @@ static int ecrdsa_verify(struct akcipher_request *req)
 
 	/* Step 1: verify that 0 < r < q, 0 < s < q */
 	if (vli_is_zero(r, ndigits) ||
-	    vli_cmp(r, ctx->curve->n, ndigits) == 1 ||
+	    vli_cmp(r, ctx->curve->n, ndigits) >= 0 ||
 	    vli_is_zero(s, ndigits) ||
-	    vli_cmp(s, ctx->curve->n, ndigits) == 1)
+	    vli_cmp(s, ctx->curve->n, ndigits) >= 0)
 		return -EKEYREJECTED;
 
 	/* Step 2: calculate hash (h) of the message (passed as input) */
 	/* Step 3: calculate e = h \mod q */
 	vli_from_le64(e, digest, ndigits);
-	if (vli_cmp(e, ctx->curve->n, ndigits) == 1)
+	if (vli_cmp(e, ctx->curve->n, ndigits) >= 0)
 		vli_sub(e, e, ctx->curve->n, ndigits);
 	if (vli_is_zero(e, ndigits))
 		e[0] = 1;
@@ -137,7 +137,7 @@ static int ecrdsa_verify(struct akcipher_request *req)
 	/* Step 6: calculate point C = z_1P + z_2Q, and R = x_c \mod q */
 	ecc_point_mult_shamir(&cc, z1, &ctx->curve->g, z2, &ctx->pub_key,
 			      ctx->curve);
-	if (vli_cmp(cc.x, ctx->curve->n, ndigits) == 1)
+	if (vli_cmp(cc.x, ctx->curve->n, ndigits) >= 0)
 		vli_sub(cc.x, cc.x, ctx->curve->n, ndigits);
 
 	/* Step 7: if R == r signature is valid */
diff --git a/crypto/sm3.c b/crypto/sm3.c
new file mode 100644
index 000000000000..d473e358a873
--- /dev/null
+++ b/crypto/sm3.c
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * SM3 secure hash, as specified by OSCCA GM/T 0004-2012 SM3 and described
+ * at https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02
+ *
+ * Copyright (C) 2017 ARM Limited or its affiliates.
+ * Copyright (C) 2017 Gilad Ben-Yossef <gilad@benyossef.com>
+ * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm3.h>
+
+static const u32 ____cacheline_aligned K[64] = {
+	0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+	0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+	0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+	0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+	0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+	0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+	0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+	0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+	0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+	0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+	0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+	0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+};
+
+/*
+ * Transform the message X which consists of 16 32-bit-words. See
+ * GM/T 004-2012 for details.
+ */
+#define R(i, a, b, c, d, e, f, g, h, t, w1, w2)			\
+	do {							\
+		ss1 = rol32((rol32((a), 12) + (e) + (t)), 7);	\
+		ss2 = ss1 ^ rol32((a), 12);			\
+		d += FF ## i(a, b, c) + ss2 + ((w1) ^ (w2));	\
+		h += GG ## i(e, f, g) + ss1 + (w1);		\
+		b = rol32((b), 9);				\
+		f = rol32((f), 19);				\
+		h = P0((h));					\
+	} while (0)
+
+#define R1(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(1, a, b, c, d, e, f, g, h, t, w1, w2)
+#define R2(a, b, c, d, e, f, g, h, t, w1, w2) \
+	R(2, a, b, c, d, e, f, g, h, t, w1, w2)
+
+#define FF1(x, y, z)  (x ^ y ^ z)
+#define FF2(x, y, z)  ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z)  FF1(x, y, z)
+#define GG2(x, y, z)  ((x & y) | (~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol32((x), 9) ^ rol32((x), 17))
+#define P1(x) ((x) ^ rol32((x), 15) ^ rol32((x), 23))
+#define I(i)  (W[i] = get_unaligned_be32(data + i * 4))
+#define W1(i) (W[i & 0x0f])
+#define W2(i) (W[i & 0x0f] =				\
+		P1(W[i & 0x0f]				\
+			^ W[(i-9) & 0x0f]		\
+			^ rol32(W[(i-3) & 0x0f], 15))	\
+		^ rol32(W[(i-13) & 0x0f], 7)		\
+		^ W[(i-6) & 0x0f])
+
+static void sm3_transform(struct sm3_state *sctx, u8 const *data, u32 W[16])
+{
+	u32 a, b, c, d, e, f, g, h, ss1, ss2;
+
+	a = sctx->state[0];
+	b = sctx->state[1];
+	c = sctx->state[2];
+	d = sctx->state[3];
+	e = sctx->state[4];
+	f = sctx->state[5];
+	g = sctx->state[6];
+	h = sctx->state[7];
+
+	R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+	R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+	R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+	R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+	R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+	R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+	R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+	R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+	R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+	R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+	R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+	R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+	R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+	R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+	R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+	R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+	R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+	R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+	R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+	R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+	R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+	R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+	R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+	R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+	R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+	R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+	R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+	R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+	R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+	R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+	R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+	R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+	R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+	R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+	R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+	R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+	R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+	R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+	R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+	R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+	R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+	R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+	R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+	R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+	R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+	R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+	R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+	R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+	R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+	R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+	R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+	R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+	R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+	R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+	R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+	R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+	R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+	R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+	R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+	R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+	R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+	R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+	R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+	R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+	sctx->state[0] ^= a;
+	sctx->state[1] ^= b;
+	sctx->state[2] ^= c;
+	sctx->state[3] ^= d;
+	sctx->state[4] ^= e;
+	sctx->state[5] ^= f;
+	sctx->state[6] ^= g;
+	sctx->state[7] ^= h;
+}
+#undef R
+#undef R1
+#undef R2
+#undef I
+#undef W1
+#undef W2
+
+static inline void sm3_block(struct sm3_state *sctx,
+		u8 const *data, int blocks, u32 W[16])
+{
+	while (blocks--) {
+		sm3_transform(sctx, data, W);
+		data += SM3_BLOCK_SIZE;
+	}
+}
+
+void sm3_update(struct sm3_state *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+
+	sctx->count += len;
+
+	if ((partial + len) >= SM3_BLOCK_SIZE) {
+		int blocks;
+
+		if (partial) {
+			int p = SM3_BLOCK_SIZE - partial;
+
+			memcpy(sctx->buffer + partial, data, p);
+			data += p;
+			len -= p;
+
+			sm3_block(sctx, sctx->buffer, 1, W);
+		}
+
+		blocks = len / SM3_BLOCK_SIZE;
+		len %= SM3_BLOCK_SIZE;
+
+		if (blocks) {
+			sm3_block(sctx, data, blocks, W);
+			data += blocks * SM3_BLOCK_SIZE;
+		}
+
+		memzero_explicit(W, sizeof(W));
+
+		partial = 0;
+	}
+	if (len)
+		memcpy(sctx->buffer + partial, data, len);
+}
+EXPORT_SYMBOL_GPL(sm3_update);
+
+void sm3_final(struct sm3_state *sctx, u8 *out)
+{
+	const int bit_offset = SM3_BLOCK_SIZE - sizeof(u64);
+	__be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
+	__be32 *digest = (__be32 *)out;
+	unsigned int partial = sctx->count % SM3_BLOCK_SIZE;
+	u32 W[16];
+	int i;
+
+	sctx->buffer[partial++] = 0x80;
+	if (partial > bit_offset) {
+		memset(sctx->buffer + partial, 0, SM3_BLOCK_SIZE - partial);
+		partial = 0;
+
+		sm3_block(sctx, sctx->buffer, 1, W);
+	}
+
+	memset(sctx->buffer + partial, 0, bit_offset - partial);
+	*bits = cpu_to_be64(sctx->count << 3);
+	sm3_block(sctx, sctx->buffer, 1, W);
+
+	for (i = 0; i < 8; i++)
+		put_unaligned_be32(sctx->state[i], digest++);
+
+	/* Zeroize sensitive information. */
+	memzero_explicit(W, sizeof(W));
+	memzero_explicit(sctx, sizeof(*sctx));
+}
+EXPORT_SYMBOL_GPL(sm3_final);
+
+MODULE_DESCRIPTION("Generic SM3 library");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/sm4.c b/crypto/sm4.c
new file mode 100644
index 000000000000..2c44193bc27e
--- /dev/null
+++ b/crypto/sm4.c
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4, as specified in
+ * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
+ *
+ * Copyright (C) 2018 ARM Limited or its affiliates.
+ * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <crypto/sm4.h>
+
+static const u32 ____cacheline_aligned fk[4] = {
+	0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static const u32 ____cacheline_aligned ck[32] = {
+	0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+	0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+	0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+	0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+	0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+	0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+	0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+	0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+static const u8 ____cacheline_aligned sbox[256] = {
+	0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+	0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+	0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+	0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+	0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+	0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+	0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+	0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+	0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+	0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+	0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+	0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+	0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+	0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+	0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+	0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+	0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+	0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+	0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+	0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+	0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+	0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+	0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+	0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+	0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+	0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+	0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+	0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+	0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+	0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+	0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+	0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+};
+
+extern const u32 crypto_sm4_fk[4] __alias(fk);
+extern const u32 crypto_sm4_ck[32] __alias(ck);
+extern const u8 crypto_sm4_sbox[256] __alias(sbox);
+
+EXPORT_SYMBOL(crypto_sm4_fk);
+EXPORT_SYMBOL(crypto_sm4_ck);
+EXPORT_SYMBOL(crypto_sm4_sbox);
+
+static inline u32 sm4_t_non_lin_sub(u32 x)
+{
+	u32 out;
+
+	out  = (u32)sbox[x & 0xff];
+	out |= (u32)sbox[(x >> 8) & 0xff] << 8;
+	out |= (u32)sbox[(x >> 16) & 0xff] << 16;
+	out |= (u32)sbox[(x >> 24) & 0xff] << 24;
+
+	return out;
+}
+
+static inline u32 sm4_key_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 13) ^ rol32(x, 23);
+}
+
+static inline u32 sm4_enc_lin_sub(u32 x)
+{
+	return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24);
+}
+
+static inline u32 sm4_key_sub(u32 x)
+{
+	return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_enc_sub(u32 x)
+{
+	return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk)
+{
+	return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+
+/**
+ * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ */
+int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key,
+			  unsigned int key_len)
+{
+	u32 rk[4];
+	const u32 *key = (u32 *)in_key;
+	int i;
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	rk[0] = get_unaligned_be32(&key[0]) ^ fk[0];
+	rk[1] = get_unaligned_be32(&key[1]) ^ fk[1];
+	rk[2] = get_unaligned_be32(&key[2]) ^ fk[2];
+	rk[3] = get_unaligned_be32(&key[3]) ^ fk[3];
+
+	for (i = 0; i < 32; i += 4) {
+		rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
+		rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
+		rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
+		rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
+
+		ctx->rkey_enc[i + 0] = rk[0];
+		ctx->rkey_enc[i + 1] = rk[1];
+		ctx->rkey_enc[i + 2] = rk[2];
+		ctx->rkey_enc[i + 3] = rk[3];
+		ctx->rkey_dec[31 - 0 - i] = rk[0];
+		ctx->rkey_dec[31 - 1 - i] = rk[1];
+		ctx->rkey_dec[31 - 2 - i] = rk[2];
+		ctx->rkey_dec[31 - 3 - i] = rk[3];
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sm4_expandkey);
+
+/**
+ * sm4_crypt_block - Encrypt or decrypt a single SM4 block
+ * @rk:		The rkey_enc for encrypt or rkey_dec for decrypt
+ * @out:	Buffer to store output data
+ * @in: 	Buffer containing the input data
+ */
+void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in)
+{
+	u32 x[4], i;
+
+	x[0] = get_unaligned_be32(in + 0 * 4);
+	x[1] = get_unaligned_be32(in + 1 * 4);
+	x[2] = get_unaligned_be32(in + 2 * 4);
+	x[3] = get_unaligned_be32(in + 3 * 4);
+
+	for (i = 0; i < 32; i += 4) {
+		x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
+		x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
+		x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
+		x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
+	}
+
+	put_unaligned_be32(x[3 - 0], out + 0 * 4);
+	put_unaligned_be32(x[3 - 1], out + 1 * 4);
+	put_unaligned_be32(x[3 - 2], out + 2 * 4);
+	put_unaligned_be32(x[3 - 3], out + 3 * 4);
+}
+EXPORT_SYMBOL_GPL(sm4_crypt_block);
+
+MODULE_DESCRIPTION("Generic SM4 library");
+MODULE_LICENSE("GPL v2");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 4948201065cc..5801a8f9f713 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -232,6 +232,20 @@ enum finalization_type {
 	FINALIZATION_TYPE_DIGEST,	/* use digest() */
 };
 
+/*
+ * Whether the crypto operation will occur in-place, and if so whether the
+ * source and destination scatterlist pointers will coincide (req->src ==
+ * req->dst), or whether they'll merely point to two separate scatterlists
+ * (req->src != req->dst) that reference the same underlying memory.
+ *
+ * This is only relevant for algorithm types that support in-place operation.
+ */
+enum inplace_mode {
+	OUT_OF_PLACE,
+	INPLACE_ONE_SGLIST,
+	INPLACE_TWO_SGLISTS,
+};
+
 #define TEST_SG_TOTAL	10000
 
 /**
@@ -265,7 +279,7 @@ struct test_sg_division {
  * crypto test vector can be tested.
  *
  * @name: name of this config, logged for debugging purposes if a test fails
- * @inplace: operate on the data in-place, if applicable for the algorithm type?
+ * @inplace_mode: whether and how to operate on the data in-place, if applicable
  * @req_flags: extra request_flags, e.g. CRYPTO_TFM_REQ_MAY_SLEEP
  * @src_divs: description of how to arrange the source scatterlist
  * @dst_divs: description of how to arrange the dst scatterlist, if applicable
@@ -282,7 +296,7 @@ struct test_sg_division {
  */
 struct testvec_config {
 	const char *name;
-	bool inplace;
+	enum inplace_mode inplace_mode;
 	u32 req_flags;
 	struct test_sg_division src_divs[XBUFSIZE];
 	struct test_sg_division dst_divs[XBUFSIZE];
@@ -307,11 +321,16 @@ struct testvec_config {
 /* Configs for skciphers and aeads */
 static const struct testvec_config default_cipher_testvec_configs[] = {
 	{
-		.name = "in-place",
-		.inplace = true,
+		.name = "in-place (one sglist)",
+		.inplace_mode = INPLACE_ONE_SGLIST,
+		.src_divs = { { .proportion_of_total = 10000 } },
+	}, {
+		.name = "in-place (two sglists)",
+		.inplace_mode = INPLACE_TWO_SGLISTS,
 		.src_divs = { { .proportion_of_total = 10000 } },
 	}, {
 		.name = "out-of-place",
+		.inplace_mode = OUT_OF_PLACE,
 		.src_divs = { { .proportion_of_total = 10000 } },
 	}, {
 		.name = "unaligned buffer, offset=1",
@@ -349,7 +368,7 @@ static const struct testvec_config default_cipher_testvec_configs[] = {
 		.key_offset = 3,
 	}, {
 		.name = "misaligned splits crossing pages, inplace",
-		.inplace = true,
+		.inplace_mode = INPLACE_ONE_SGLIST,
 		.src_divs = {
 			{
 				.proportion_of_total = 7500,
@@ -749,18 +768,39 @@ static int build_cipher_test_sglists(struct cipher_test_sglists *tsgls,
 
 	iov_iter_kvec(&input, WRITE, inputs, nr_inputs, src_total_len);
 	err = build_test_sglist(&tsgls->src, cfg->src_divs, alignmask,
-				cfg->inplace ?
+				cfg->inplace_mode != OUT_OF_PLACE ?
 					max(dst_total_len, src_total_len) :
 					src_total_len,
 				&input, NULL);
 	if (err)
 		return err;
 
-	if (cfg->inplace) {
+	/*
+	 * In-place crypto operations can use the same scatterlist for both the
+	 * source and destination (req->src == req->dst), or can use separate
+	 * scatterlists (req->src != req->dst) which point to the same
+	 * underlying memory.  Make sure to test both cases.
+	 */
+	if (cfg->inplace_mode == INPLACE_ONE_SGLIST) {
 		tsgls->dst.sgl_ptr = tsgls->src.sgl;
 		tsgls->dst.nents = tsgls->src.nents;
 		return 0;
 	}
+	if (cfg->inplace_mode == INPLACE_TWO_SGLISTS) {
+		/*
+		 * For now we keep it simple and only test the case where the
+		 * two scatterlists have identical entries, rather than
+		 * different entries that split up the same memory differently.
+		 */
+		memcpy(tsgls->dst.sgl, tsgls->src.sgl,
+		       tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
+		memcpy(tsgls->dst.sgl_saved, tsgls->src.sgl,
+		       tsgls->src.nents * sizeof(tsgls->src.sgl[0]));
+		tsgls->dst.sgl_ptr = tsgls->dst.sgl;
+		tsgls->dst.nents = tsgls->src.nents;
+		return 0;
+	}
+	/* Out of place */
 	return build_test_sglist(&tsgls->dst,
 				 cfg->dst_divs[0].proportion_of_total ?
 					cfg->dst_divs : cfg->src_divs,
@@ -995,9 +1035,19 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
 
 	p += scnprintf(p, end - p, "random:");
 
-	if (prandom_u32() % 2 == 0) {
-		cfg->inplace = true;
-		p += scnprintf(p, end - p, " inplace");
+	switch (prandom_u32() % 4) {
+	case 0:
+	case 1:
+		cfg->inplace_mode = OUT_OF_PLACE;
+		break;
+	case 2:
+		cfg->inplace_mode = INPLACE_ONE_SGLIST;
+		p += scnprintf(p, end - p, " inplace_one_sglist");
+		break;
+	default:
+		cfg->inplace_mode = INPLACE_TWO_SGLISTS;
+		p += scnprintf(p, end - p, " inplace_two_sglists");
+		break;
 	}
 
 	if (prandom_u32() % 2 == 0) {
@@ -1034,7 +1084,7 @@ static void generate_random_testvec_config(struct testvec_config *cfg,
 					  cfg->req_flags);
 	p += scnprintf(p, end - p, "]");
 
-	if (!cfg->inplace && prandom_u32() % 2 == 0) {
+	if (cfg->inplace_mode == OUT_OF_PLACE && prandom_u32() % 2 == 0) {
 		p += scnprintf(p, end - p, " dst_divs=[");
 		p = generate_random_sgl_divisions(cfg->dst_divs,
 						  ARRAY_SIZE(cfg->dst_divs),
@@ -2085,7 +2135,8 @@ static int test_aead_vec_cfg(int enc, const struct aead_testvec *vec,
 	/* Check for the correct output (ciphertext or plaintext) */
 	err = verify_correct_output(&tsgls->dst, enc ? vec->ctext : vec->ptext,
 				    enc ? vec->clen : vec->plen,
-				    vec->alen, enc || !cfg->inplace);
+				    vec->alen,
+				    enc || cfg->inplace_mode == OUT_OF_PLACE);
 	if (err == -EOVERFLOW) {
 		pr_err("alg: aead: %s %s overran dst buffer on test vector %s, cfg=\"%s\"\n",
 		       driver, op, vec_name, cfg->name);
author	Linus Torvalds	2022-05-27 18:06:49 -0700
committer	Linus Torvalds	2022-05-27 18:06:49 -0700
commit	d075c0c1be279c5f4c6688ac0442fff6494e56bc (patch)
tree	c3e3ab6b35139229ad0a5096ccea0c00eb97998b /crypto
parent	bf272460d744112bacd4c4d562592decbf0edf64 (diff)
parent	e4e62bbc6aba49a5edb3156ec65f6698ff37d228 (diff)