diff --git a/0001-uadk-fix-build-issue-of-pthread_atfork.patch b/0001-uadk-fix-build-issue-of-pthread_atfork.patch deleted file mode 100644 index 5e1931d..0000000 --- a/0001-uadk-fix-build-issue-of-pthread_atfork.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 7ef31a9e64706d8312f7137b771c47e1658d4f6d Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 10 Jan 2024 10:24:19 +0000 -Subject: [PATCH 1/8] uadk: fix build issue of pthread_atfork - -In ubuntu 16.04 and 18.04, build uadk fail -log: -/bin/bash ../libtool --tag=CC --mode=link aarch64-linux-gnu-gcc -Wall -O0 -Werror -fno-strict-aliasing -I../include -I.. -g -O2 -Wl,-rpath,'/usr/local/lib' -pthread -o wd_mempool_test wd_mempool_test.o -L../.libs -lwd -ldl -lwd_crypto -lnuma -libtool: link: aarch64-linux-gnu-gcc -Wall -O0 -Werror -fno-strict-aliasing -I../include -I.. -g -O2 -Wl,-rpath -Wl,/usr/local/lib -pthread -o .libs/wd_mempool_test wd_mempool_test.o -L../.libs /mnt/uadk/.libs/libwd.so -ldl /mnt/uadk/.libs/libwd_crypto.so -lnuma -lpthread -/usr/bin/ld: .libs/wd_mempool_test: hidden symbol `pthread_atfork' in /usr/lib/aarch64-linux-gnu/libpthread_nonshared.a(pthread_atfork.oS) is referenced by DSO - -Reason is pthread_atfork in wd_xxx.c, missing -lpthread. - -Signed-off-by: Zhangfei Gao ---- - Makefile.am | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/Makefile.am b/Makefile.am -index d81e8cc..a369b7e 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -114,11 +114,11 @@ libwd_la_LDFLAGS=$(UADK_VERSION) $(UADK_WD_SYMBOL) $(UADK_V1_SYMBOL) - libwd_la_LIBADD= -ldl -lnuma - - libwd_comp_la_LIBADD= -lwd -ldl -lnuma --libwd_comp_la_LDFLAGS=$(UADK_VERSION) $(UADK_COMP_SYMBOL) -+libwd_comp_la_LDFLAGS=$(UADK_VERSION) $(UADK_COMP_SYMBOL) -lpthread - libwd_comp_la_DEPENDENCIES= libwd.la - - libwd_crypto_la_LIBADD= -lwd -ldl -lnuma --libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) -+libwd_crypto_la_LDFLAGS=$(UADK_VERSION) $(UADK_CRYPTO_SYMBOL) -lpthread - libwd_crypto_la_DEPENDENCIES= libwd.la - - libhisi_zip_la_LIBADD= -lwd -ldl -lwd_comp --- -2.25.1 - diff --git a/0001-uadk_tool-benchmark-skip-sm4-benchmark-if-openssl-wi.patch b/0001-uadk_tool-benchmark-skip-sm4-benchmark-if-openssl-wi.patch new file mode 100644 index 0000000..14c7423 --- /dev/null +++ b/0001-uadk_tool-benchmark-skip-sm4-benchmark-if-openssl-wi.patch @@ -0,0 +1,52 @@ +From 92eed6318fc0951e71c2dfc3b72826bd7e1eb4c8 Mon Sep 17 00:00:00 2001 +From: Rongqi Sun +Date: Thu, 13 Jun 2024 07:07:02 +0000 +Subject: [PATCH 01/16] uadk_tool/benchmark: skip sm4 benchmark if openssl with + 'no-sm4' + +When openssl is with 'no-sm4', which cause macro 'OPENSSL_NO_SM4' is defined. So sm4 benchmark should check the macro too, otherwise build would be failed. + +Signed-off-by: Rongqi Sun +--- + uadk_tool/benchmark/sec_soft_benchmark.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/uadk_tool/benchmark/sec_soft_benchmark.c b/uadk_tool/benchmark/sec_soft_benchmark.c +index 3a38cbd..84dab63 100644 +--- a/uadk_tool/benchmark/sec_soft_benchmark.c ++++ b/uadk_tool/benchmark/sec_soft_benchmark.c +@@ -280,6 +280,7 @@ static int sec_soft_param_parse(soft_thread *tddata, struct acc_option *options) + mode = WD_CIPHER_CBC; + tddata->evp_cipher = EVP_des_ede3_cbc(); + break; ++#ifndef OPENSSL_NO_SM4 + case SM4_128_ECB: + keysize = 16; + mode = WD_CIPHER_ECB; +@@ -309,6 +310,7 @@ static int sec_soft_param_parse(soft_thread *tddata, struct acc_option *options) + keysize = 16; + mode = WD_CIPHER_XTS; + break; ++#endif + case AES_128_CCM: + keysize = 16; + mode = WD_CIPHER_CCM; +@@ -354,6 +356,7 @@ static int sec_soft_param_parse(soft_thread *tddata, struct acc_option *options) + mode = WD_CIPHER_CBC; + tddata->evp_cipher = EVP_aes_256_cbc(); + break; ++#ifndef OPENSSL_NO_SM4 + case SM4_128_CCM: + keysize = 16; + mode = WD_CIPHER_CCM; +@@ -362,6 +365,7 @@ static int sec_soft_param_parse(soft_thread *tddata, struct acc_option *options) + keysize = 16; + mode = WD_CIPHER_GCM; + break; ++#endif + case SM3_ALG: // digest mode is optype + keysize = 4; + mode = optype; +-- +2.25.1 + diff --git a/0002-uadk-fix-static-build-error.patch b/0002-uadk-fix-static-build-error.patch deleted file mode 100644 index e455b9e..0000000 --- a/0002-uadk-fix-static-build-error.patch +++ /dev/null @@ -1,36 +0,0 @@ -From e50c7b8ab28696c904afa86741a4b6951c90b0b3 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 10 Jan 2024 13:33:18 +0000 -Subject: [PATCH 2/8] uadk: fix static build error - -static build fail on both ubuntu and openEuler -/usr/bin/ld: ../.libs/libwd_comp.a(wd_comp.o): relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `stderr@@GLIBC_2.17' which may bind externally can not be used when making a shared object; recompile with -fPIC -/usr/bin/ld: ../.libs/libwd_comp.a(wd_comp.o)(.text+0x84): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `stderr@@GLIBC_2.17' - -Solved by adding -fPIC - -The -fPIC option in GCC stands for "Position Independent Code." -When this option is used, the generated machine code is not dependent -on being located at a specific address in order to work - -Signed-off-by: Zhangfei Gao ---- - Makefile.am | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Makefile.am b/Makefile.am -index a369b7e..51691cb 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -86,7 +86,7 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ - libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h wd_hpre_drv.h - if WD_STATIC_DRV --AM_CFLAGS += -DWD_STATIC_DRV -+AM_CFLAGS += -DWD_STATIC_DRV -fPIC - AM_CFLAGS += -DWD_NO_LOG - - libwd_la_LIBADD = $(libwd_la_OBJECTS) -ldl -lnuma --- -2.25.1 - diff --git a/0002-uadk-v1-hpre-remove-redundant-comments.patch b/0002-uadk-v1-hpre-remove-redundant-comments.patch new file mode 100644 index 0000000..53588c9 --- /dev/null +++ b/0002-uadk-v1-hpre-remove-redundant-comments.patch @@ -0,0 +1,78 @@ +From 1e0bacb328334ebf0f0bc9cf951a11b971f3ab03 Mon Sep 17 00:00:00 2001 +From: Zhiqi Song +Date: Tue, 23 Jul 2024 19:37:19 +0800 +Subject: [PATCH 02/16] uadk/v1/hpre: remove redundant comments + +Remove redundant comments, as the API name clearly +describes the function. And fix code wrap. + +Signed-off-by: Zhiqi Song +Signed-off-by: Qi Tao +--- + v1/drv/hisi_hpre_udrv.c | 14 +++----------- + 1 file changed, 3 insertions(+), 11 deletions(-) + +diff --git a/v1/drv/hisi_hpre_udrv.c b/v1/drv/hisi_hpre_udrv.c +index 84ec710..05518ab 100644 +--- a/v1/drv/hisi_hpre_udrv.c ++++ b/v1/drv/hisi_hpre_udrv.c +@@ -490,12 +490,10 @@ int qm_fill_rsa_sqe(void *message, struct qm_queue_info *info, __u16 i) + return -WD_EINVAL; + hw_msg->task_len1 = msg->key_bytes / BYTE_BITS - 0x1; + +- /* prepare rsa key */ + ret = qm_rsa_prepare_key(msg, q, hw_msg, &va, &size); + if (unlikely(ret)) + return ret; + +- /* prepare in/out put */ + ret = qm_rsa_prepare_iot(msg, q, hw_msg); + if (unlikely(ret)) { + rsa_key_unmap(msg, q, hw_msg, va, size); +@@ -576,13 +574,11 @@ static int fill_dh_g_param(struct wd_queue *q, struct wcrypto_dh_msg *msg, + int ret; + + ret = qm_crypto_bin_to_hpre_bin((char *)msg->g, +- (const char *)msg->g, msg->key_bytes, +- msg->gbytes, "dh g"); ++ (const char *)msg->g, msg->key_bytes, msg->gbytes, "dh g"); + if (unlikely(ret)) + return ret; + +- phy = (uintptr_t)drv_iova_map(q, (void *)msg->g, +- msg->key_bytes); ++ phy = (uintptr_t)drv_iova_map(q, (void *)msg->g, msg->key_bytes); + if (unlikely(!phy)) { + WD_ERR("Get dh g parameter dma address fail!\n"); + return -WD_ENOMEM; +@@ -1338,8 +1334,7 @@ static int qm_ecc_prepare_in(struct wcrypto_ecc_msg *msg, + hw_msg->bd_rsv2 = 1; /* fall through */ + case WCRYPTO_ECXDH_GEN_KEY: /* fall through */ + case WCRYPTO_SM2_KG: +- ret = ecc_prepare_dh_gen_in((void *)in, +- data); ++ ret = ecc_prepare_dh_gen_in((void *)in, data); + break; + case WCRYPTO_ECXDH_COMPUTE_KEY: + /* +@@ -1667,17 +1662,14 @@ static int qm_fill_ecc_sqe_general(void *message, struct qm_queue_info *info, + memset(hw_msg, 0, sizeof(struct hisi_hpre_sqe)); + hw_msg->task_len1 = ((msg->key_bytes) >> BYTE_BITS_SHIFT) - 0x1; + +- /* prepare algorithm */ + ret = qm_ecc_prepare_alg(hw_msg, msg); + if (unlikely(ret)) + return ret; + +- /* prepare key */ + ret = qm_ecc_prepare_key(msg, q, hw_msg, &va, &size); + if (unlikely(ret)) + return ret; + +- /* prepare in/out put */ + ret = qm_ecc_prepare_iot(msg, q, hw_msg); + if (unlikely(ret)) + goto map_key_fail; +-- +2.25.1 + diff --git a/0003-uadk-add-secure-compilation-option.patch b/0003-uadk-add-secure-compilation-option.patch deleted file mode 100644 index 71c2903..0000000 --- a/0003-uadk-add-secure-compilation-option.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 4cd0b3e82205767ac151835e69736c61aca4eda8 Mon Sep 17 00:00:00 2001 -From: Qi Tao -Date: Thu, 18 Jan 2024 21:07:26 +0800 -Subject: [PATCH 3/8] uadk: add secure compilation option -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add PIE, PIC, BIND_NOW, SP, NO Rpath/RunPath, FS, -Ftrapv and Strip compilation options. - -PIC(-fPIC): - Generate position-Independent-Code and andomly - load dynamic libraries. -PIE(-fPIE -pie): - Generate location-independent executables,which - reduces the probability of fixed address attacks - and buffer overflow attacks. -BIND_NOW(-Wl,-z,relro,-z,now): - GOT table redirects all read-only,which defends - against ret2plt attacks. -SP(-fstack-protector-strong/all): - Determine whether an overflow attack occurs. -Strip(-Wl,-s): - Deleting symbol tables defends against hacker - attacks and reduces the file size. -FS(-D_FORTIFY_SOURCE=2 -O2): - Provides access checks for fixed-size buffers - at compile time and at run time. -Ftrapv(-ftrapv): - Detects integer overflow. -NO Rpath/RunPath(hardcode_into_libs=no): - Eliminates dynamic library search paths, - which defense against attacks by replacing - dynamic libraries with the same name. - -Signed-off-by: Qi Tao ---- - Makefile.am | 2 ++ - configure.ac | 1 + - 2 files changed, 3 insertions(+) - -diff --git a/Makefile.am b/Makefile.am -index 51691cb..64cfa44 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -1,6 +1,8 @@ - ACLOCAL_AMFLAGS = -I m4 -I./include - AUTOMAKE_OPTIONS = foreign subdir-objects - AM_CFLAGS=-Wall -Werror -fno-strict-aliasing -I$(top_srcdir)/include -+AM_CFLAGS+=-fPIC -fPIE -pie -fstack-protector-strong -D_FORTIFY_SOURCE=2 \ -+ -O2 -ftrapv -Wl,-z,relro,-z,now -Wl,-s - CLEANFILES = - - if WITH_LOG_FILE -diff --git a/configure.ac b/configure.ac -index 2692175..b198417 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -18,6 +18,7 @@ AM_PROG_AR - AC_PROG_LIBTOOL - AM_PROG_LIBTOOL - LT_INIT -+AC_SUBST([hardcode_into_libs], [no]) - AM_PROG_CC_C_O - - AC_ARG_ENABLE([debug-log], --- -2.25.1 - diff --git a/0003-uadk-v1-fix-for-atomic-memory-order.patch b/0003-uadk-v1-fix-for-atomic-memory-order.patch new file mode 100644 index 0000000..af2855a --- /dev/null +++ b/0003-uadk-v1-fix-for-atomic-memory-order.patch @@ -0,0 +1,34 @@ +From 7b1afc72dc9ffe79d60dad4c395b0f7c7010ec22 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 23 Jul 2024 19:39:06 +0800 +Subject: [PATCH 03/16] uadk/v1: fix for atomic memory order + +If the memory order of the atomic operation is used improperly, +an exception occurs in multiple threads. +The load operation should use __ATOMIC_ACQUIRE, all memory access +operations of the current thread cannot be reordered before this +instruction, which is visible to the store operation (release) +of other threads. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/wd_util.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/v1/wd_util.c b/v1/wd_util.c +index d441805..f44da99 100644 +--- a/v1/wd_util.c ++++ b/v1/wd_util.c +@@ -25,7 +25,7 @@ + void wd_spinlock(struct wd_lock *lock) + { + while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) +- while (__atomic_load_n(&lock->lock, __ATOMIC_RELAXED)) ++ while (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE)) + ; + } + +-- +2.25.1 + diff --git a/0004-uadk-replace-wd_lock-to-pthread_spinlock.patch b/0004-uadk-replace-wd_lock-to-pthread_spinlock.patch new file mode 100644 index 0000000..c9e677b --- /dev/null +++ b/0004-uadk-replace-wd_lock-to-pthread_spinlock.patch @@ -0,0 +1,271 @@ +From a9ed7daa2d67398d0aa785601bca2a5829550c14 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 23 Jul 2024 19:40:29 +0800 +Subject: [PATCH 04/16] uadk: replace wd_lock to pthread_spinlock + +pthread_spinlock was proved to be more reliable than +self-implemented lock, so we replaced wd_lock. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + wd_mempool.c | 74 +++++++++++++++++++++++++--------------------------- + 1 file changed, 35 insertions(+), 39 deletions(-) + +diff --git a/wd_mempool.c b/wd_mempool.c +index 2d21a0d..22db843 100644 +--- a/wd_mempool.c ++++ b/wd_mempool.c +@@ -36,21 +36,6 @@ + #define WD_HUNDRED 100 + #define PAGE_SIZE_OFFSET 10 + +-struct wd_lock { +- __u32 lock; +-}; +- +-static inline void wd_spinlock(struct wd_lock *lock) +-{ +- while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) +- while (__atomic_load_n(&lock->lock, __ATOMIC_RELAXED)); +-} +- +-static inline void wd_unspinlock(struct wd_lock *lock) +-{ +- __atomic_clear(&lock->lock, __ATOMIC_RELEASE); +-} +- + struct wd_ref { + __u32 ref; + }; +@@ -127,7 +112,7 @@ struct blkpool { + struct mempool *mp; + struct memzone_list mz_list; + unsigned long free_block_num; +- struct wd_lock lock; ++ pthread_spinlock_t lock; + struct wd_ref ref; + }; + +@@ -161,8 +146,7 @@ struct mempool { + size_t size; + size_t real_size; + struct bitmap *bitmap; +- /* use self-define lock to avoid to use pthread lib in libwd */ +- struct wd_lock lock; ++ pthread_spinlock_t lock; + struct wd_ref ref; + struct sys_hugepage_list hp_list; + unsigned long free_blk_num; +@@ -314,16 +298,16 @@ void *wd_block_alloc(handle_t blkpool) + return NULL; + } + +- wd_spinlock(&bp->lock); ++ pthread_spin_lock(&bp->lock); + if (bp->top > 0) { + bp->top--; + bp->free_block_num--; + p = bp->blk_elem[bp->top]; +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + return p; + } + +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + wd_atomic_sub(&bp->ref, 1); + + return NULL; +@@ -336,17 +320,17 @@ void wd_block_free(handle_t blkpool, void *addr) + if (!bp || !addr) + return; + +- wd_spinlock(&bp->lock); ++ pthread_spin_lock(&bp->lock); + if (bp->top < bp->depth) { + bp->blk_elem[bp->top] = addr; + bp->top++; + bp->free_block_num++; +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + wd_atomic_sub(&bp->ref, 1); + return; + } + +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + } + + static int alloc_memzone(struct blkpool *bp, void *addr, size_t blk_num, +@@ -392,9 +376,9 @@ static void free_mem_to_mempool(struct blkpool *bp) + { + struct mempool *mp = bp->mp; + +- wd_spinlock(&mp->lock); ++ pthread_spin_lock(&mp->lock); + free_mem_to_mempool_nolock(bp); +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + } + + static int check_mempool_real_size(struct mempool *mp, struct blkpool *bp) +@@ -455,7 +439,7 @@ static int alloc_mem_multi_in_one(struct mempool *mp, struct blkpool *bp) + int ret = -WD_ENOMEM; + int pos = 0; + +- wd_spinlock(&mp->lock); ++ pthread_spin_lock(&mp->lock); + if (check_mempool_real_size(mp, bp)) + goto err_check_size; + +@@ -471,13 +455,13 @@ static int alloc_mem_multi_in_one(struct mempool *mp, struct blkpool *bp) + pos = ret; + } + +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + return 0; + + err_free_memzone: + free_mem_to_mempool_nolock(bp); + err_check_size: +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + return ret; + } + +@@ -493,7 +477,7 @@ static int alloc_mem_one_need_multi(struct mempool *mp, struct blkpool *bp) + int ret = -WD_ENOMEM; + int pos = 0; + +- wd_spinlock(&mp->lock); ++ pthread_spin_lock(&mp->lock); + if (check_mempool_real_size(mp, bp)) + goto err_check_size; + +@@ -509,13 +493,13 @@ static int alloc_mem_one_need_multi(struct mempool *mp, struct blkpool *bp) + mp->real_size -= mp->blk_size * mem_combined_num; + } + +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + return 0; + + err_free_memzone: + free_mem_to_mempool_nolock(bp); + err_check_size: +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + return ret; + } + +@@ -576,10 +560,13 @@ handle_t wd_blockpool_create(handle_t mempool, size_t block_size, + bp->blk_size = block_size; + bp->free_block_num = block_num; + bp->mp = mp; ++ ret = pthread_spin_init(&bp->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret < 0) ++ goto err_free_bp; + + ret = alloc_mem_from_mempool(mp, bp); + if (ret < 0) +- goto err_free_bp; ++ goto err_uninit_lock; + + ret = init_blkpool_elem(bp); + if (ret < 0) +@@ -590,6 +577,8 @@ handle_t wd_blockpool_create(handle_t mempool, size_t block_size, + + err_free_mem: + free_mem_to_mempool(bp); ++err_uninit_lock: ++ pthread_spin_destroy(&bp->lock); + err_free_bp: + free(bp); + err_sub_ref: +@@ -613,6 +602,7 @@ void wd_blockpool_destroy(handle_t blkpool) + sched_yield(); + + free_mem_to_mempool(bp); ++ pthread_spin_destroy(&bp->lock); + free(bp->blk_elem); + free(bp); + wd_atomic_sub(&mp->ref, 1); +@@ -919,10 +909,13 @@ handle_t wd_mempool_create(size_t size, int node) + mp->node = node; + mp->size = tmp; + mp->blk_size = WD_MEMPOOL_BLOCK_SIZE; ++ ret = pthread_spin_init(&mp->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret < 0) ++ goto free_pool; + + ret = alloc_mem_from_hugepage(mp); + if (ret < 0) +- goto free_pool; ++ goto uninit_lock; + + ret = init_mempool(mp); + if (ret < 0) +@@ -933,6 +926,8 @@ handle_t wd_mempool_create(size_t size, int node) + + free_pool_memory: + free_hugepage_mem(mp); ++uninit_lock: ++ pthread_spin_destroy(&mp->lock); + free_pool: + free(mp); + return (handle_t)(-WD_ENOMEM); +@@ -951,6 +946,7 @@ void wd_mempool_destroy(handle_t mempool) + while(wd_atomic_load(&mp->ref)); + uninit_mempool(mp); + free_hugepage_mem(mp); ++ pthread_spin_destroy(&mp->lock); + free(mp); + } + +@@ -968,7 +964,7 @@ void wd_mempool_stats(handle_t mempool, struct wd_mempool_stats *stats) + return; + } + +- wd_spinlock(&mp->lock); ++ pthread_spin_lock(&mp->lock); + + stats->page_type = mp->page_type; + stats->page_size = mp->page_size; +@@ -979,7 +975,7 @@ void wd_mempool_stats(handle_t mempool, struct wd_mempool_stats *stats) + stats->blk_usage_rate = (stats->blk_num - mp->free_blk_num) / + stats->blk_num * WD_HUNDRED; + +- wd_unspinlock(&mp->lock); ++ pthread_spin_unlock(&mp->lock); + } + + void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) +@@ -993,7 +989,7 @@ void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) + return; + } + +- wd_spinlock(&bp->lock); ++ pthread_spin_lock(&bp->lock); + + stats->block_size = bp->blk_size; + stats->block_num = bp->depth; +@@ -1006,12 +1002,12 @@ void wd_blockpool_stats(handle_t blkpool, struct wd_blockpool_stats *stats) + + if (!size) { + WD_ERR("invalid: blkpool size is zero!\n"); +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + return; + } + + stats->mem_waste_rate = (size - bp->blk_size * bp->depth) / + size * WD_HUNDRED; + +- wd_unspinlock(&bp->lock); ++ pthread_spin_unlock(&bp->lock); + } +-- +2.25.1 + diff --git a/0004-uadk_tool-fix-build-error.patch b/0004-uadk_tool-fix-build-error.patch deleted file mode 100644 index 7214660..0000000 --- a/0004-uadk_tool-fix-build-error.patch +++ /dev/null @@ -1,153 +0,0 @@ -From 797031c0562786591f9837650c1521573dee356c Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 24 Jan 2024 04:00:50 +0000 -Subject: [PATCH 4/8] uadk_tool: fix build error - -Fix build errors, like -"overlapping comparisons always evaluate to true", -"used uninitialized" etc. - -Signed-off-by: Zhangfei Gao -Signed-off-by: 15859157387 <977713017@qq.com> ---- - uadk_tool/benchmark/hpre_uadk_benchmark.c | 4 ++-- - uadk_tool/benchmark/hpre_wd_benchmark.c | 4 ++-- - uadk_tool/benchmark/sec_uadk_benchmark.c | 8 ++++---- - uadk_tool/benchmark/sec_wd_benchmark.c | 8 ++++---- - uadk_tool/benchmark/uadk_benchmark.c | 12 ------------ - uadk_tool/benchmark/uadk_benchmark.h | 1 - - uadk_tool/test/test_sec.c | 1 - - 7 files changed, 12 insertions(+), 26 deletions(-) - -diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c -index 028e102..0cbbdf2 100644 ---- a/uadk_tool/benchmark/hpre_uadk_benchmark.c -+++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c -@@ -2130,7 +2130,7 @@ static void *ecc_uadk_sync_run(void *arg) - memset(&req, 0, sizeof(req)); - - memset(&setup, 0, sizeof(setup)); -- if (subtype != X448_TYPE || subtype != X25519_TYPE) { -+ if (subtype != X448_TYPE && subtype != X25519_TYPE) { - ret = get_ecc_curve(&setup, cid); - if (ret) - return NULL; -@@ -2289,7 +2289,7 @@ static void *ecc_uadk_async_run(void *arg) - memset(&req, 0, sizeof(req)); - - memset(&setup, 0, sizeof(setup)); -- if (subtype != X448_TYPE || subtype != X25519_TYPE) { -+ if (subtype != X448_TYPE && subtype != X25519_TYPE) { - ret = get_ecc_curve(&setup, cid); - if (ret) - return NULL; -diff --git a/uadk_tool/benchmark/hpre_wd_benchmark.c b/uadk_tool/benchmark/hpre_wd_benchmark.c -index 67d57c6..2873ffd 100644 ---- a/uadk_tool/benchmark/hpre_wd_benchmark.c -+++ b/uadk_tool/benchmark/hpre_wd_benchmark.c -@@ -2090,7 +2090,7 @@ static void *ecc_wd_sync_run(void *arg) - queue = g_thread_queue.bd_res[pdata->td_id].queue; - - memset(&setup, 0, sizeof(setup)); -- if (subtype != X448_TYPE || subtype != X25519_TYPE) { -+ if (subtype != X448_TYPE && subtype != X25519_TYPE) { - ret = get_ecc_curve(&setup, cid); - if (ret) - return NULL; -@@ -2248,7 +2248,7 @@ static void *ecc_wd_async_run(void *arg) - queue = g_thread_queue.bd_res[pdata->td_id].queue; - - memset(&setup, 0, sizeof(setup)); -- if (subtype != X448_TYPE || subtype != X25519_TYPE) { -+ if (subtype != X448_TYPE && subtype != X25519_TYPE) { - ret = get_ecc_curve(&setup, cid); - if (ret) - return NULL; -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index f1ae18b..c99ae89 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -148,10 +148,10 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - bool is_union = false; - u8 keysize = 0; - u8 ivsize = 0; -- u8 dmode; -- u8 dalg; -- u8 mode; -- u8 alg; -+ u8 dmode = 0; -+ u8 dalg = 0; -+ u8 mode = 0; -+ u8 alg = 0; - - switch(algtype) { - case AES_128_ECB: -diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c -index 6e5c8a0..aa03db8 100644 ---- a/uadk_tool/benchmark/sec_wd_benchmark.c -+++ b/uadk_tool/benchmark/sec_wd_benchmark.c -@@ -214,10 +214,10 @@ static int sec_wd_param_parse(thread_data *tddata, struct acc_option *options) - bool is_union = false; - u8 keysize = 0; - u8 ivsize = 0; -- u8 dmode; -- u8 dalg; -- u8 mode; -- u8 alg; -+ u8 dmode = 0; -+ u8 dalg = 0; -+ u8 mode = 0; -+ u8 alg = 0; - - switch(algtype) { - case AES_128_ECB: -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index 6d5d009..cf3a93c 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -364,18 +364,6 @@ int get_pid_cpu_time(u32 *ptime) - return 0; - } - --void mdelay(u32 ms) --{ -- int clock_tcy = 2600000000; // 2.6Ghz CPU; -- int i; -- -- while(ms) { -- i++; -- if (i == clock_tcy) -- ms--; -- } --} -- - static void alarm_end(int sig) - { - if (sig == SIGALRM) { -diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h -index 1cce63d..1752948 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.h -+++ b/uadk_tool/benchmark/uadk_benchmark.h -@@ -198,7 +198,6 @@ enum test_alg { - ALG_MAX, - }; - --extern void mdelay(u32 ms); - extern int get_pid_cpu_time(u32 *ptime); - extern void cal_perfermance_data(struct acc_option *option, u32 sttime); - extern void time_start(u32 seconds); -diff --git a/uadk_tool/test/test_sec.c b/uadk_tool/test/test_sec.c -index b00a933..16feaf0 100644 ---- a/uadk_tool/test/test_sec.c -+++ b/uadk_tool/test/test_sec.c -@@ -60,7 +60,6 @@ static unsigned int g_ctxnum; - static unsigned int g_data_fmt = WD_FLAT_BUF; - static unsigned int g_sgl_num = 0; - static unsigned int g_init; --static pthread_spinlock_t lock = 0; - - static struct hash_testvec g_long_hash_tv; - --- -2.25.1 - diff --git a/0005-uadk-v1-fix-for-wd_lock-implementation.patch b/0005-uadk-v1-fix-for-wd_lock-implementation.patch new file mode 100644 index 0000000..c0676e5 --- /dev/null +++ b/0005-uadk-v1-fix-for-wd_lock-implementation.patch @@ -0,0 +1,50 @@ +From ab4b2b3beb56847ad42e07abd9f5834d36034448 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 23 Jul 2024 19:42:35 +0800 +Subject: [PATCH 05/16] uadk/v1: fix for wd_lock implementation + +struct wd_lock has synchronization problems, therefore, +modify wd_spinlock by referring to the implementation +of pthread_spin_lock. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/wd_util.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/v1/wd_util.c b/v1/wd_util.c +index f44da99..29f6579 100644 +--- a/v1/wd_util.c ++++ b/v1/wd_util.c +@@ -24,14 +24,23 @@ + + void wd_spinlock(struct wd_lock *lock) + { +- while (__atomic_test_and_set(&lock->lock, __ATOMIC_ACQUIRE)) +- while (__atomic_load_n(&lock->lock, __ATOMIC_ACQUIRE)) +- ; ++ int val = 0; ++ ++ if (__atomic_compare_exchange_n(&lock->lock, &val, 1, 1, ++ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) ++ return; ++ ++ do { ++ do { ++ val = __atomic_load_n(&lock->lock, __ATOMIC_RELAXED); ++ } while (val != 0); ++ } while (!__atomic_compare_exchange_n(&lock->lock, &val, 1, 1, ++ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + } + + void wd_unspinlock(struct wd_lock *lock) + { +- __atomic_clear(&lock->lock, __ATOMIC_RELEASE); ++ __atomic_store_n(&lock->lock, 0, __ATOMIC_RELEASE); + } + + void *drv_iova_map(struct wd_queue *q, void *va, size_t sz) +-- +2.25.1 + diff --git a/0005-v1-fix-build-error.patch b/0005-v1-fix-build-error.patch deleted file mode 100644 index 50db7ea..0000000 --- a/0005-v1-fix-build-error.patch +++ /dev/null @@ -1,134 +0,0 @@ -From cb909f390823ec007cfe62aae9cf0dc750c0de2b Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 24 Jan 2024 04:06:04 +0000 -Subject: [PATCH 5/8] v1: fix build error - -Fix build errors, like -"'struct wcrypto_rsa_prikey1' not at the end of a struct or class", -"__TEST_WD_MEM_H not match" etc. - -Signed-off-by: Zhangfei Gao -Signed-off-by: 15859157387 <977713017@qq.com> ---- - v1/test/hisi_zip_test_sgl/zip_alg_sgl.h | 2 +- - v1/test/test_mm/test_wd_mem.c | 5 ----- - v1/test/test_mm/test_wd_mem.h | 2 +- - v1/wd_rsa.c | 18 ++++++++++-------- - 4 files changed, 12 insertions(+), 15 deletions(-) - -diff --git a/v1/test/hisi_zip_test_sgl/zip_alg_sgl.h b/v1/test/hisi_zip_test_sgl/zip_alg_sgl.h -index 0baa35b..1e35069 100644 ---- a/v1/test/hisi_zip_test_sgl/zip_alg_sgl.h -+++ b/v1/test/hisi_zip_test_sgl/zip_alg_sgl.h -@@ -14,7 +14,7 @@ - * limitations under the License. - */ - --#ifndef __WD_ZIP_ALG_SGL__H__ -+#ifndef __WD_ZIP_ALG_SGL_H__ - #define __WD_ZIP_ALG_SGL_H__ - - #include -diff --git a/v1/test/test_mm/test_wd_mem.c b/v1/test/test_mm/test_wd_mem.c -index ba873f9..09824b9 100644 ---- a/v1/test/test_mm/test_wd_mem.c -+++ b/v1/test/test_mm/test_wd_mem.c -@@ -75,11 +75,6 @@ static inline unsigned long long va_to_pa(struct wd_queue *q, void *va) - return (unsigned long long)wd_iova_map(q, va, 0); - } - --static inline void *pa_to_va(struct wd_queue *q, unsigned long long pa) --{ -- return wd_dma_to_va(q, (void *)pa); --} -- - struct mmt_queue_mempool *mmt_test_mempool_create(struct wd_queue *q, - unsigned int block_size, unsigned int block_num) - { -diff --git a/v1/test/test_mm/test_wd_mem.h b/v1/test/test_mm/test_wd_mem.h -index 8a7f561..0962b44 100644 ---- a/v1/test/test_mm/test_wd_mem.h -+++ b/v1/test/test_mm/test_wd_mem.h -@@ -15,7 +15,7 @@ - */ - - #ifndef __TEST_WD_MEM_H --#define ___TEST_WD_MEM_H -+#define __TEST_WD_MEM_H - - #include - #include -diff --git a/v1/wd_rsa.c b/v1/wd_rsa.c -index bda8d31..2c8692b 100644 ---- a/v1/wd_rsa.c -+++ b/v1/wd_rsa.c -@@ -105,8 +105,10 @@ struct wcrypto_rsa_prikey2 { - }; - - struct wcrypto_rsa_prikey { -- struct wcrypto_rsa_prikey1 pkey1; -- struct wcrypto_rsa_prikey2 pkey2; -+ union { -+ struct wcrypto_rsa_prikey1 pkey1; -+ struct wcrypto_rsa_prikey2 pkey2; -+ } pkey; - }; - - /* RSA CRT private key parameter types */ -@@ -444,7 +446,7 @@ static int create_ctx_key(struct wcrypto_rsa_ctx_setup *setup, - WD_ERR("alloc prikey2 fail!\n"); - return -WD_ENOMEM; - } -- pkey2 = &ctx->prikey->pkey2; -+ pkey2 = &ctx->prikey->pkey.pkey2; - memset(ctx->prikey, 0, len); - init_pkey2(pkey2, ctx->key_size); - } else { -@@ -459,7 +461,7 @@ static int create_ctx_key(struct wcrypto_rsa_ctx_setup *setup, - WD_ERR("alloc prikey1 fail!\n"); - return -WD_ENOMEM; - } -- pkey1 = &ctx->prikey->pkey1; -+ pkey1 = &ctx->prikey->pkey.pkey1; - memset(ctx->prikey, 0, len); - init_pkey1(pkey1, ctx->key_size); - } -@@ -716,7 +718,7 @@ int wcrypto_set_rsa_prikey_params(void *ctx, struct wd_dtb *d, struct wd_dtb *n) - WD_ERR("ctx err in set rsa private key1!\n"); - return -WD_EINVAL; - } -- pkey1 = &c->prikey->pkey1; -+ pkey1 = &c->prikey->pkey.pkey1; - if (d) { - if (d->dsize > pkey1->key_size || !d->data) { - WD_ERR("d err in set rsa private key1!\n"); -@@ -750,7 +752,7 @@ void wcrypto_get_rsa_prikey_params(struct wcrypto_rsa_prikey *pvk, struct wd_dtb - return; - } - -- pkey1 = &pvk->pkey1; -+ pkey1 = &pvk->pkey.pkey1; - - if (d) - *d = &pkey1->d; -@@ -825,7 +827,7 @@ int wcrypto_set_rsa_crt_prikey_params(void *ctx, struct wd_dtb *dq, - return ret; - } - -- pkey2 = &c->prikey->pkey2; -+ pkey2 = &c->prikey->pkey.pkey2; - ret = rsa_prikey2_param_set(pkey2, dq, WD_CRT_PRIKEY_DQ); - if (ret) { - WD_ERR("dq err in set rsa private key2!\n"); -@@ -871,7 +873,7 @@ void wcrypto_get_rsa_crt_prikey_params(struct wcrypto_rsa_prikey *pvk, - return; - } - -- pkey2 = &pvk->pkey2; -+ pkey2 = &pvk->pkey.pkey2; - - if (dq) - *dq = &pkey2->dq; --- -2.25.1 - diff --git a/0006-uadk-fix-for-env-uninit-segment-fault.patch b/0006-uadk-fix-for-env-uninit-segment-fault.patch new file mode 100644 index 0000000..198849e --- /dev/null +++ b/0006-uadk-fix-for-env-uninit-segment-fault.patch @@ -0,0 +1,28 @@ +From 21d1a0a1f57487a8d643f45150774b42273a56d5 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 23 Jul 2024 19:43:46 +0800 +Subject: [PATCH 06/16] uadk: fix for env uninit segment fault + +config ctx_config should not be set if init failed. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + wd_util.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/wd_util.c b/wd_util.c +index 99be973..76548c9 100644 +--- a/wd_util.c ++++ b/wd_util.c +@@ -1212,6 +1212,7 @@ err_free_ctxs: + free(ctx_config->ctxs); + err_free_ctx_config: + free(ctx_config); ++ config->ctx_config = NULL; + return ret; + } + +-- +2.25.1 + diff --git a/0006-wd_mempool-fix-build-error.patch b/0006-wd_mempool-fix-build-error.patch deleted file mode 100644 index 6d5e71b..0000000 --- a/0006-wd_mempool-fix-build-error.patch +++ /dev/null @@ -1,43 +0,0 @@ -From d335549b1d076a22735bb7211823c2f4140c62af Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 24 Jan 2024 04:15:46 +0000 -Subject: [PATCH 6/8] wd_mempool: fix build error - -Fix build errors like -"passing 'int *' to parameter of type '__u32 *' (aka 'unsigned int *') -converts between pointers to integer types with different sign" - -Signed-off-by: Zhangfei Gao -Signed-off-by: 15859157387 <977713017@qq.com> ---- - wd_mempool.c | 7 +------ - 1 file changed, 1 insertion(+), 6 deletions(-) - -diff --git a/wd_mempool.c b/wd_mempool.c -index ed107d1..47ad36b 100644 ---- a/wd_mempool.c -+++ b/wd_mempool.c -@@ -71,7 +71,7 @@ static inline int wd_atomic_test_add(struct wd_ref *ref, int a, int u) - c = __atomic_load_n(&ref->ref, __ATOMIC_RELAXED); - if (c == u) - break; -- } while (! __atomic_compare_exchange_n(&ref->ref, &c, c + a, true, -+ } while (!__atomic_compare_exchange_n(&ref->ref, (__u32 *)&c, c + a, true, - __ATOMIC_RELAXED, __ATOMIC_RELAXED)); - - return c; -@@ -299,11 +299,6 @@ static int test_bit(struct bitmap *bm, unsigned int nr) - return !(*p & mask); - } - --inline static size_t wd_get_page_size(void) --{ -- return sysconf(_SC_PAGESIZE); --} -- - void *wd_block_alloc(handle_t blkpool) - { - struct blkpool *bp = (struct blkpool*)blkpool; --- -2.25.1 - diff --git a/0007-uadk-v1-drv-hisi_zip_udrv-fix-the-wrong-literal-buff.patch b/0007-uadk-v1-drv-hisi_zip_udrv-fix-the-wrong-literal-buff.patch new file mode 100644 index 0000000..e8258ca --- /dev/null +++ b/0007-uadk-v1-drv-hisi_zip_udrv-fix-the-wrong-literal-buff.patch @@ -0,0 +1,62 @@ +From aa977445de462fad116e90f5242f84e4fdb1f1fb Mon Sep 17 00:00:00 2001 +From: Yang Shen +Date: Tue, 23 Jul 2024 19:45:27 +0800 +Subject: [PATCH 07/16] uadk/v1/drv: hisi_zip_udrv - fix the wrong literal + buffer size + +The driver reserves more 16 bytes for literal output buffer needed by +hardware. But it forgets to add the offset to the beginning of the +sequence. So the literal and sequence buffers have 16 bytes of overlap. +In some case, the sequence data will be overwrited. + +Signed-off-by: Yang Shen +Signed-off-by: Qi Tao +--- + v1/drv/hisi_zip_udrv.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/v1/drv/hisi_zip_udrv.c b/v1/drv/hisi_zip_udrv.c +index 9c9694b..cc55ef5 100644 +--- a/v1/drv/hisi_zip_udrv.c ++++ b/v1/drv/hisi_zip_udrv.c +@@ -177,13 +177,11 @@ int qm_fill_zip_sqe(void *smsg, struct qm_queue_info *info, __u16 i) + return -WD_EINVAL; + } + +- if (unlikely(msg->data_fmt != WD_SGL_BUF && +- msg->in_size > MAX_BUFFER_SIZE)) { ++ if (unlikely(msg->data_fmt != WD_SGL_BUF && msg->in_size > MAX_BUFFER_SIZE)) { + WD_ERR("The in_len is out of range in_len(%u)!\n", msg->in_size); + return -WD_EINVAL; + } +- if (unlikely(msg->data_fmt != WD_SGL_BUF && +- msg->avail_out > MAX_BUFFER_SIZE)) { ++ if (unlikely(msg->data_fmt != WD_SGL_BUF && msg->avail_out > MAX_BUFFER_SIZE)) { + WD_ERR("warning: avail_out is out of range (%u), will set 8MB size max!\n", + msg->avail_out); + msg->avail_out = MAX_BUFFER_SIZE; +@@ -500,8 +498,10 @@ static int fill_zip_addr_lz77_zstd(void *ssqe, + } else { + sqe->cipher_key_addr_l = lower_32_bits((__u64)addr.dest_addr); + sqe->cipher_key_addr_h = upper_32_bits((__u64)addr.dest_addr); +- sqe->dest_addr_l = lower_32_bits((__u64)addr.dest_addr + msg->in_size); +- sqe->dest_addr_h = upper_32_bits((__u64)addr.dest_addr + msg->in_size); ++ sqe->dest_addr_l = lower_32_bits((__u64)addr.dest_addr + ++ msg->in_size + ZSTD_LIT_RSV_SIZE); ++ sqe->dest_addr_h = upper_32_bits((__u64)addr.dest_addr + ++ msg->in_size + ZSTD_LIT_RSV_SIZE); + } + + sqe->stream_ctx_addr_l = lower_32_bits((__u64)addr.ctxbuf_addr); +@@ -671,7 +671,7 @@ static void fill_priv_lz77_zstd(void *ssqe, struct wcrypto_comp_msg *recv_msg) + format->sequences_start = zstd_out->sequence; + } else { + format->literals_start = recv_msg->dst; +- format->sequences_start = recv_msg->dst + recv_msg->in_size; ++ format->sequences_start = recv_msg->dst + recv_msg->in_size + ZSTD_LIT_RSV_SIZE; + format->freq = (void *)(&format->lit_length_overflow_pos + 1); + } + +-- +2.25.1 + diff --git a/0007-wd_rsa-fix-build-error.patch b/0007-wd_rsa-fix-build-error.patch deleted file mode 100644 index 4de8d82..0000000 --- a/0007-wd_rsa-fix-build-error.patch +++ /dev/null @@ -1,101 +0,0 @@ -From 6077f4317961f8e308ecad2f02c3cdbb09aa707a Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 24 Jan 2024 04:18:57 +0000 -Subject: [PATCH 7/8] wd_rsa: fix build error - -Fix build errors, like -"field 'pkey1' with variable sized type 'struct wd_rsa_prikey1' -not at the end of a struct or class is a GNU extension" - -Signed-off-by: Zhangfei Gao -Signed-off-by: 15859157387 <977713017@qq.com> ---- - wd_rsa.c | 22 ++++++++++++---------- - 1 file changed, 12 insertions(+), 10 deletions(-) - -diff --git a/wd_rsa.c b/wd_rsa.c -index a27f061..de0b796 100644 ---- a/wd_rsa.c -+++ b/wd_rsa.c -@@ -45,8 +45,10 @@ struct wd_rsa_prikey2 { - }; - - struct wd_rsa_prikey { -- struct wd_rsa_prikey1 pkey1; -- struct wd_rsa_prikey2 pkey2; -+ union { -+ struct wd_rsa_prikey1 pkey1; -+ struct wd_rsa_prikey2 pkey2; -+ } pkey; - }; - - /* RSA private key parameter types */ -@@ -830,7 +832,7 @@ static int create_sess_key(struct wd_rsa_sess_setup *setup, - WD_ERR("failed to alloc sess prikey2!\n"); - return -WD_ENOMEM; - } -- pkey2 = &sess->prikey->pkey2; -+ pkey2 = &sess->prikey->pkey.pkey2; - memset(sess->prikey, 0, len); - init_pkey2(pkey2, sess->key_size); - } else { -@@ -841,7 +843,7 @@ static int create_sess_key(struct wd_rsa_sess_setup *setup, - WD_ERR("failed to alloc sess prikey1!\n"); - return -WD_ENOMEM; - } -- pkey1 = &sess->prikey->pkey1; -+ pkey1 = &sess->prikey->pkey.pkey1; - memset(sess->prikey, 0, len); - init_pkey1(pkey1, sess->key_size); - } -@@ -872,9 +874,9 @@ static void del_sess_key(struct wd_rsa_sess *sess) - } - - if (sess->setup.is_crt) -- wd_memset_zero(prk->pkey2.data, CRT_PARAMS_SZ(sess->key_size)); -+ wd_memset_zero(prk->pkey.pkey2.data, CRT_PARAMS_SZ(sess->key_size)); - else -- wd_memset_zero(prk->pkey1.data, GEN_PARAMS_SZ(sess->key_size)); -+ wd_memset_zero(prk->pkey.pkey1.data, GEN_PARAMS_SZ(sess->key_size)); - free(sess->prikey); - free(sess->pubkey); - } -@@ -1027,7 +1029,7 @@ int wd_rsa_set_prikey_params(handle_t sess, struct wd_dtb *d, struct wd_dtb *n) - WD_ERR("invalid: sess err in set rsa private key1!\n"); - return -WD_EINVAL; - } -- pkey1 = &c->prikey->pkey1; -+ pkey1 = &c->prikey->pkey.pkey1; - if (d) { - if (!d->dsize || !d->data || d->dsize > pkey1->key_size) { - WD_ERR("invalid: d err in set rsa private key1!\n"); -@@ -1062,7 +1064,7 @@ void wd_rsa_get_prikey_params(struct wd_rsa_prikey *pvk, struct wd_dtb **d, - return; - } - -- pkey1 = &pvk->pkey1; -+ pkey1 = &pvk->pkey.pkey1; - - if (d) - *d = &pkey1->d; -@@ -1134,7 +1136,7 @@ int wd_rsa_set_crt_prikey_params(handle_t sess, struct wd_dtb *dq, - return ret; - } - -- pkey2 = &c->prikey->pkey2; -+ pkey2 = &c->prikey->pkey.pkey2; - ret = rsa_prikey2_param_set(pkey2, dq, WD_CRT_PRIKEY_DQ); - if (ret) { - WD_ERR("failed to set dq for rsa private key2!\n"); -@@ -1180,7 +1182,7 @@ void wd_rsa_get_crt_prikey_params(struct wd_rsa_prikey *pvk, - return; - } - -- pkey2 = &pvk->pkey2; -+ pkey2 = &pvk->pkey.pkey2; - - if (dq) - *dq = &pkey2->dq; --- -2.25.1 - diff --git a/0008-test-fix-build-error.patch b/0008-test-fix-build-error.patch deleted file mode 100644 index 7bc0986..0000000 --- a/0008-test-fix-build-error.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 4a451be8acc77467d6ffec9506b8f89ef92def8a Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Wed, 24 Jan 2024 04:34:16 +0000 -Subject: [PATCH 8/8] test: fix build error - -Fix build errors like "unused variable" - -Signed-off-by: Zhangfei Gao -Signed-off-by: 15859157387 <977713017@qq.com> ---- - test/wd_mempool_test.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/test/wd_mempool_test.c b/test/wd_mempool_test.c -index ad04636..6e28b46 100644 ---- a/test/wd_mempool_test.c -+++ b/test/wd_mempool_test.c -@@ -644,7 +644,7 @@ static void *sva_sec_cipher_async(void *arg) - int cnt = g_times; - handle_t h_sess; - int ret; -- int j, i; -+ int j; - - setup->alg = WD_CIPHER_AES; - setup->mode = WD_CIPHER_CBC; -@@ -658,7 +658,6 @@ static void *sva_sec_cipher_async(void *arg) - SEC_TST_PRT("test sec cipher set key is failed!\n"); - goto out;; - } -- i = cnt; - /* run task */ - do { - try_do_again: -@@ -666,7 +665,6 @@ try_do_again: - req->src = pdata->bd_pool->bds[j].src; - req->dst = pdata->bd_pool->bds[j].dst; - ret = wd_do_cipher_async(h_sess, req); -- i--; - if (ret == -EBUSY) { // busy - usleep(100); - goto try_do_again; --- -2.25.1 - diff --git a/0008-uadk-v1-replace-wd_spinlock-to-pthread_spin_lock.patch b/0008-uadk-v1-replace-wd_spinlock-to-pthread_spin_lock.patch new file mode 100644 index 0000000..2cbe518 --- /dev/null +++ b/0008-uadk-v1-replace-wd_spinlock-to-pthread_spin_lock.patch @@ -0,0 +1,289 @@ +From f7c2a7a3e5116dc0ce4af539070f2ed93bb18af8 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 23 Jul 2024 19:47:09 +0800 +Subject: [PATCH 08/16] uadk/v1: replace wd_spinlock to pthread_spin_lock + +Due to memory differences, using wd_spinlock may +cause synchronization problems, it is better to use +the standard pthread spin lock of glibc. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/drv/hisi_qm_udrv.c | 51 +++++++++++++++++++++++++++++------------- + v1/drv/hisi_qm_udrv.h | 4 ++-- + v1/drv/hisi_rng_udrv.c | 25 +++++++++++++++------ + v1/drv/hisi_rng_udrv.h | 2 +- + 4 files changed, 56 insertions(+), 26 deletions(-) + +diff --git a/v1/drv/hisi_qm_udrv.c b/v1/drv/hisi_qm_udrv.c +index 175a5c4..1d4f1d8 100644 +--- a/v1/drv/hisi_qm_udrv.c ++++ b/v1/drv/hisi_qm_udrv.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -458,6 +459,11 @@ static int qm_init_queue_info(struct wd_queue *q) + struct hisi_qp_ctx qp_ctx = {0}; + int ret; + ++ if (!info->sqe_size) { ++ WD_ERR("invalid: sqe size is 0!\n"); ++ return -WD_EINVAL; ++ } ++ + info->sq_tail_index = 0; + info->cq_head_index = 0; + info->cqc_phase = 1; +@@ -502,11 +508,6 @@ static int qm_set_queue_info(struct wd_queue *q) + ret = qm_set_queue_regions(q); + if (ret) + return -WD_EINVAL; +- if (!info->sqe_size) { +- WD_ERR("sqe size =%d err!\n", info->sqe_size); +- ret = -WD_EINVAL; +- goto err_with_regions; +- } + info->cq_base = (void *)((uintptr_t)info->sq_base + + info->sqe_size * info->sq_depth); + +@@ -534,8 +535,24 @@ static int qm_set_queue_info(struct wd_queue *q) + goto err_with_regions; + } + ++ ret = pthread_spin_init(&info->sd_lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) { ++ WD_ERR("failed to init qinfo sd_lock!\n"); ++ goto free_cache; ++ } ++ ++ ret = pthread_spin_init(&info->rc_lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) { ++ WD_ERR("failed to init qinfo rc_lock!\n"); ++ goto uninit_lock; ++ } ++ + return 0; + ++uninit_lock: ++ pthread_spin_destroy(&info->sd_lock); ++free_cache: ++ free(info->req_cache); + err_with_regions: + qm_unset_queue_regions(q); + return ret; +@@ -576,8 +593,10 @@ void qm_uninit_queue(struct wd_queue *q) + struct q_info *qinfo = q->qinfo; + struct qm_queue_info *info = qinfo->priv; + +- qm_unset_queue_regions(q); ++ pthread_spin_destroy(&info->rc_lock); ++ pthread_spin_destroy(&info->sd_lock); + free(info->req_cache); ++ qm_unset_queue_regions(q); + free(qinfo->priv); + qinfo->priv = NULL; + } +@@ -605,10 +624,10 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + int ret; + __u32 i; + +- wd_spinlock(&info->sd_lock); ++ pthread_spin_lock(&info->sd_lock); + if (unlikely((__u32)__atomic_load_n(&info->used, __ATOMIC_RELAXED) > + info->sq_depth - num - 1)) { +- wd_unspinlock(&info->sd_lock); ++ pthread_spin_unlock(&info->sd_lock); + WD_ERR("queue is full!\n"); + return -WD_EBUSY; + } +@@ -617,7 +636,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + ret = info->sqe_fill[qinfo->atype](req[i], qinfo->priv, + info->sq_tail_index); + if (unlikely(ret != WD_SUCCESS)) { +- wd_unspinlock(&info->sd_lock); ++ pthread_spin_unlock(&info->sd_lock); + WD_ERR("sqe fill error, ret %d!\n", ret); + return -WD_EINVAL; + } +@@ -629,7 +648,7 @@ int qm_send(struct wd_queue *q, void **req, __u32 num) + } + + ret = qm_tx_update(info, num); +- wd_unspinlock(&info->sd_lock); ++ pthread_spin_unlock(&info->sd_lock); + + return ret; + } +@@ -662,9 +681,9 @@ static int check_ds_rx_base(struct qm_queue_info *info, + return 0; + + if (before) { +- wd_spinlock(&info->rc_lock); ++ pthread_spin_lock(&info->rc_lock); + qm_rx_from_cache(info, resp, num); +- wd_unspinlock(&info->rc_lock); ++ pthread_spin_unlock(&info->rc_lock); + WD_ERR("wd queue hw error happened before qm receive!\n"); + } else { + WD_ERR("wd queue hw error happened after qm receive!\n"); +@@ -705,7 +724,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + if (unlikely(ret)) + return ret; + +- wd_spinlock(&info->rc_lock); ++ pthread_spin_lock(&info->rc_lock); + for (i = 0; i < num; i++) { + cqe = info->cq_base + info->cq_head_index * sizeof(struct cqe); + if (info->cqc_phase != CQE_PHASE(cqe)) +@@ -714,7 +733,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + mb(); /* make sure the data is all in memory before read */ + sq_head = CQE_SQ_HEAD_INDEX(cqe); + if (unlikely(sq_head >= info->sq_depth)) { +- wd_unspinlock(&info->rc_lock); ++ pthread_spin_unlock(&info->rc_lock); + WD_ERR("CQE_SQ_HEAD_INDEX(%u) error\n", sq_head); + return -WD_EIO; + } +@@ -726,7 +745,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + if (!ret) { + break; + } else if (ret < 0) { +- wd_unspinlock(&info->rc_lock); ++ pthread_spin_unlock(&info->rc_lock); + WD_ERR("recv sqe error %u\n", sq_head); + return ret; + } +@@ -747,7 +766,7 @@ int qm_recv(struct wd_queue *q, void **resp, __u32 num) + ret = i; + } + +- wd_unspinlock(&info->rc_lock); ++ pthread_spin_unlock(&info->rc_lock); + + return ret; + } +diff --git a/v1/drv/hisi_qm_udrv.h b/v1/drv/hisi_qm_udrv.h +index 4d54cf6..06ac66a 100644 +--- a/v1/drv/hisi_qm_udrv.h ++++ b/v1/drv/hisi_qm_udrv.h +@@ -166,8 +166,8 @@ struct qm_queue_info { + qm_sqe_parse sqe_parse[WCRYPTO_MAX_ALG]; + hisi_qm_sqe_fill_priv sqe_fill_priv; + hisi_qm_sqe_parse_priv sqe_parse_priv; +- struct wd_lock sd_lock; +- struct wd_lock rc_lock; ++ pthread_spinlock_t sd_lock; ++ pthread_spinlock_t rc_lock; + struct wd_queue *q; + int (*sgl_info)(struct hw_sgl_info *info); + int (*sgl_init)(void *pool, struct wd_sgl *sgl); +diff --git a/v1/drv/hisi_rng_udrv.c b/v1/drv/hisi_rng_udrv.c +index 86a20cb..605ef27 100644 +--- a/v1/drv/hisi_rng_udrv.c ++++ b/v1/drv/hisi_rng_udrv.c +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -34,6 +35,7 @@ int rng_init_queue(struct wd_queue *q) + { + struct q_info *qinfo = q->qinfo; + struct rng_queue_info *info; ++ int ret; + + info = calloc(1, sizeof(*info)); + if (!info) { +@@ -41,12 +43,20 @@ int rng_init_queue(struct wd_queue *q) + return -ENOMEM; + } + ++ ret = pthread_spin_init(&info->lock, PTHREAD_PROCESS_PRIVATE); ++ if (ret) { ++ free(info); ++ WD_ERR("failed to init rng qinfo lock!\n"); ++ return ret; ++ } ++ + qinfo->priv = info; + info->mmio_base = wd_drv_mmap_qfr(q, WD_UACCE_QFRT_MMIO, 0); + if (info->mmio_base == MAP_FAILED) { + info->mmio_base = NULL; +- free(qinfo->priv); + qinfo->priv = NULL; ++ pthread_spin_destroy(&info->lock); ++ free(info); + WD_ERR("mmap trng mmio fail\n"); + return -ENOMEM; + } +@@ -63,6 +73,7 @@ void rng_uninit_queue(struct wd_queue *q) + + free(qinfo->priv); + qinfo->priv = NULL; ++ pthread_spin_destroy(&info->lock); + } + + int rng_send(struct wd_queue *q, void **req, __u32 num) +@@ -70,14 +81,14 @@ int rng_send(struct wd_queue *q, void **req, __u32 num) + struct q_info *qinfo = q->qinfo; + struct rng_queue_info *info = qinfo->priv; + +- wd_spinlock(&info->lock); ++ pthread_spin_lock(&info->lock); + if (!info->req_cache[info->send_idx]) { + info->req_cache[info->send_idx] = req[0]; + info->send_idx++; +- wd_unspinlock(&info->lock); ++ pthread_spin_unlock(&info->lock); + return 0; + } +- wd_unspinlock(&info->lock); ++ pthread_spin_unlock(&info->lock); + + WD_ERR("queue is full!\n"); + return -WD_EBUSY; +@@ -128,16 +139,16 @@ int rng_recv(struct wd_queue *q, void **resp, __u32 num) + struct wcrypto_cb_tag *tag; + __u32 currsize = 0; + +- wd_spinlock(&info->lock); ++ pthread_spin_lock(&info->lock); + msg = info->req_cache[info->recv_idx]; + if (!msg) { +- wd_unspinlock(&info->lock); ++ pthread_spin_unlock(&info->lock); + return 0; + } + + info->req_cache[info->recv_idx] = NULL; + info->recv_idx++; +- wd_unspinlock(&info->lock); ++ pthread_spin_unlock(&info->lock); + + tag = (void *)(uintptr_t)msg->usr_tag; + if (usr && tag->ctx_id != usr) +diff --git a/v1/drv/hisi_rng_udrv.h b/v1/drv/hisi_rng_udrv.h +index 56814a4..3efa10e 100644 +--- a/v1/drv/hisi_rng_udrv.h ++++ b/v1/drv/hisi_rng_udrv.h +@@ -29,7 +29,7 @@ struct rng_queue_info { + void *req_cache[TRNG_Q_DEPTH]; + __u8 send_idx; + __u8 recv_idx; +- struct wd_lock lock; ++ pthread_spinlock_t lock; + }; + + int rng_init_queue(struct wd_queue *q); +-- +2.25.1 + diff --git a/0009-uadk-sec-move-function-to-wd_digest_drv.h.patch b/0009-uadk-sec-move-function-to-wd_digest_drv.h.patch deleted file mode 100644 index 783ea8d..0000000 --- a/0009-uadk-sec-move-function-to-wd_digest_drv.h.patch +++ /dev/null @@ -1,201 +0,0 @@ -From b1eeb7ddb8305466cdfb4e49cc68b0b4264d4a43 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Mon, 5 Feb 2024 17:24:21 +0800 -Subject: [PATCH 09/44] uadk/sec: move function to wd_digest_drv.h - -Since function get_hash_bd_type() will be used in multiple files, -move it to wd_digest_drv.h. And rename get_hash_bd_type to -get_hash_msg_type to make the function generic. - -Signed-off-by: Weili Qian ---- - drv/hisi_sec.c | 52 ++++++++++--------------------------- - include/drv/wd_digest_drv.h | 27 ++++++++++++++++++- - 2 files changed, 39 insertions(+), 40 deletions(-) - -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index 5b114f6..9da21a8 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -200,13 +200,6 @@ enum sec_c_width { - C_WIDTH_CS3 = 0x3, - }; - --enum hash_bd_type { -- HASH_SINGLE_BD, -- HASH_FRIST_BD, -- HASH_MIDDLE_BD, -- HASH_END_BD, --}; -- - struct hisi_sec_ctx { - struct wd_ctx_config_internal config; - }; -@@ -1549,29 +1542,10 @@ static int long_hash_param_check(handle_t h_qp, struct wd_digest_msg *msg) - return 0; - } - --static enum hash_bd_type get_hash_bd_type(struct wd_digest_msg *msg) --{ -- /* -- * [has_next , iv_bytes] -- * [ 1 , 0 ] = long hash(frist bd) -- * [ 1 , 1 ] = long hash(middle bd) -- * [ 0 , 1 ] = long hash(end bd) -- * [ 0 , 0 ] = block hash(single bd) -- */ -- if (msg->has_next && !msg->iv_bytes) -- return HASH_FRIST_BD; -- else if (msg->has_next && msg->iv_bytes) -- return HASH_MIDDLE_BD; -- else if (!msg->has_next && msg->iv_bytes) -- return HASH_END_BD; -- else -- return HASH_SINGLE_BD; --} -- - static int fill_digest_long_hash(handle_t h_qp, struct wd_digest_msg *msg, - struct hisi_sec_sqe *sqe) - { -- enum hash_bd_type bd_type = get_hash_bd_type(msg); -+ enum hash_block_type block_type = get_hash_block_type(msg); - __u64 total_bits; - int ret; - -@@ -1579,20 +1553,20 @@ static int fill_digest_long_hash(handle_t h_qp, struct wd_digest_msg *msg, - if (ret) - return ret; - -- if (bd_type == HASH_FRIST_BD) { -+ if (block_type == HASH_FRIST_BLOCK) { - /* Long hash first */ - sqe->ai_apd_cs = AI_GEN_INNER; - sqe->ai_apd_cs |= AUTHPAD_NOPAD << AUTHPAD_OFFSET; - } - -- if (bd_type == HASH_MIDDLE_BD) { -+ if (block_type == HASH_MIDDLE_BLOCK) { - /* Long hash middle */ - sqe->ai_apd_cs = AI_GEN_IVIN_ADDR; - sqe->ai_apd_cs |= AUTHPAD_NOPAD << AUTHPAD_OFFSET; - sqe->type2.a_ivin_addr = sqe->type2.mac_addr; - } - -- if (bd_type == HASH_END_BD) { -+ if (block_type == HASH_END_BLOCK) { - /* Long hash end */ - sqe->ai_apd_cs = AI_GEN_IVIN_ADDR; - sqe->ai_apd_cs |= AUTHPAD_PAD << AUTHPAD_OFFSET; -@@ -1658,16 +1632,16 @@ static int digest_long_bd_align_check(struct wd_digest_msg *msg) - - static int digest_bd2_type_check(struct wd_digest_msg *msg) - { -- enum hash_bd_type type = get_hash_bd_type(msg); -+ enum hash_block_type type = get_hash_block_type(msg); - - /* Long hash first and middle bd */ -- if (type == HASH_FRIST_BD || type == HASH_MIDDLE_BD) { -+ if (type == HASH_FRIST_BLOCK || type == HASH_MIDDLE_BLOCK) { - WD_ERR("hardware v2 not supports 0 size in long hash!\n"); - return -WD_EINVAL; - } - - /* Block mode hash bd */ -- if (type == HASH_SINGLE_BD) { -+ if (type == HASH_SINGLE_BLOCK) { - WD_ERR("hardware v2 not supports 0 size in block hash!\n"); - return -WD_EINVAL; - } -@@ -1677,9 +1651,9 @@ static int digest_bd2_type_check(struct wd_digest_msg *msg) - - static int digest_bd3_type_check(struct wd_digest_msg *msg) - { -- enum hash_bd_type type = get_hash_bd_type(msg); -+ enum hash_block_type type = get_hash_block_type(msg); - /* Long hash first and middle bd */ -- if (type == HASH_FRIST_BD || type == HASH_MIDDLE_BD) { -+ if (type == HASH_FRIST_BLOCK || type == HASH_MIDDLE_BLOCK) { - WD_ERR("invalid: hardware v3 not supports 0 size in long hash!\n"); - return -WD_EINVAL; - } -@@ -1920,7 +1894,7 @@ static int aes_auth_long_hash_check(struct wd_digest_msg *msg) - static int fill_digest_long_hash3(handle_t h_qp, struct wd_digest_msg *msg, - struct hisi_sec_sqe3 *sqe) - { -- enum hash_bd_type bd_type = get_hash_bd_type(msg); -+ enum hash_block_type block_type = get_hash_block_type(msg); - __u64 total_bits; - int ret; - -@@ -1932,20 +1906,20 @@ static int fill_digest_long_hash3(handle_t h_qp, struct wd_digest_msg *msg, - if (ret) - return ret; - -- if (bd_type == HASH_FRIST_BD) { -+ if (block_type == HASH_FRIST_BLOCK) { - /* Long hash first */ - sqe->auth_mac_key |= AI_GEN_INNER << SEC_AI_GEN_OFFSET_V3; - sqe->stream_scene.stream_auth_pad = AUTHPAD_NOPAD; - } - -- if (bd_type == HASH_MIDDLE_BD) { -+ if (block_type == HASH_MIDDLE_BLOCK) { - /* Long hash middle */ - sqe->auth_mac_key |= AI_GEN_IVIN_ADDR << SEC_AI_GEN_OFFSET_V3; - sqe->stream_scene.stream_auth_pad = AUTHPAD_NOPAD; - sqe->auth_ivin.a_ivin_addr = sqe->mac_addr; - } - -- if (bd_type == HASH_END_BD) { -+ if (block_type == HASH_END_BLOCK) { - /* Long hash end */ - sqe->auth_mac_key |= AI_GEN_IVIN_ADDR << SEC_AI_GEN_OFFSET_V3; - sqe->stream_scene.stream_auth_pad = AUTHPAD_PAD; -diff --git a/include/drv/wd_digest_drv.h b/include/drv/wd_digest_drv.h -index 3c4477d..304b506 100644 ---- a/include/drv/wd_digest_drv.h -+++ b/include/drv/wd_digest_drv.h -@@ -10,7 +10,13 @@ - extern "C" { - #endif - --/* fixme wd_digest_msg */ -+enum hash_block_type { -+ HASH_FRIST_BLOCK, -+ HASH_MIDDLE_BLOCK, -+ HASH_END_BLOCK, -+ HASH_SINGLE_BLOCK, -+}; -+ - struct wd_digest_msg { - struct wd_digest_req req; - /* request identifier */ -@@ -51,6 +57,25 @@ struct wd_digest_msg { - __u64 long_data_len; - }; - -+static inline enum hash_block_type get_hash_block_type(struct wd_digest_msg *msg) -+{ -+ /* -+ * [has_next , iv_bytes] -+ * [ 1 , 0 ] = long hash(frist bd) -+ * [ 1 , 1 ] = long hash(middle bd) -+ * [ 0 , 1 ] = long hash(end bd) -+ * [ 0 , 0 ] = block hash(single bd) -+ */ -+ if (msg->has_next && !msg->iv_bytes) -+ return HASH_FRIST_BLOCK; -+ else if (msg->has_next && msg->iv_bytes) -+ return HASH_MIDDLE_BLOCK; -+ else if (!msg->has_next && msg->iv_bytes) -+ return HASH_END_BLOCK; -+ else -+ return HASH_SINGLE_BLOCK; -+} -+ - struct wd_digest_msg *wd_digest_get_msg(__u32 idx, __u32 tag); - - #ifdef __cplusplus --- -2.25.1 - diff --git a/0009-uadk_tools-add-segfault-locating-function.patch b/0009-uadk_tools-add-segfault-locating-function.patch new file mode 100644 index 0000000..1b94515 --- /dev/null +++ b/0009-uadk_tools-add-segfault-locating-function.patch @@ -0,0 +1,181 @@ +From 4beed871afcdd1d9f1a50f89bba960d91181f3b5 Mon Sep 17 00:00:00 2001 +From: Longfang Liu +Date: Tue, 23 Jul 2024 19:48:13 +0800 +Subject: [PATCH 09/16] uadk_tools: add segfault locating function + +When a segfault occurs within a test thread, it is generally +difficult to locate the problem. In order to improve the efficiency +of problem location, a segmentation fault capture entry is added +to each business thread entry. +And register a segfault callback handler function. As long as a +segfault occurs within the thread, the callback is triggered and +the segfault error message is output. + +Signed-off-by: Longfang Liu +Signed-off-by: Qi Tao +--- + uadk_tool/benchmark/hpre_uadk_benchmark.c | 1 + + uadk_tool/benchmark/hpre_wd_benchmark.c | 1 + + uadk_tool/benchmark/sec_soft_benchmark.c | 1 + + uadk_tool/benchmark/sec_uadk_benchmark.c | 1 + + uadk_tool/benchmark/sec_wd_benchmark.c | 1 + + uadk_tool/benchmark/trng_wd_benchmark.c | 1 + + uadk_tool/benchmark/uadk_benchmark.c | 15 +++++++++++++++ + uadk_tool/benchmark/uadk_benchmark.h | 3 +++ + uadk_tool/benchmark/zip_uadk_benchmark.c | 1 + + uadk_tool/benchmark/zip_wd_benchmark.c | 1 + + 10 files changed, 26 insertions(+) + +diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c +index 0148e56..5dd6a39 100644 +--- a/uadk_tool/benchmark/hpre_uadk_benchmark.c ++++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c +@@ -2706,6 +2706,7 @@ int hpre_uadk_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + g_ctxnum = options->ctxnums; + +diff --git a/uadk_tool/benchmark/hpre_wd_benchmark.c b/uadk_tool/benchmark/hpre_wd_benchmark.c +index 5545ad8..0196e62 100644 +--- a/uadk_tool/benchmark/hpre_wd_benchmark.c ++++ b/uadk_tool/benchmark/hpre_wd_benchmark.c +@@ -2564,6 +2564,7 @@ int hpre_wd_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + + if (options->optype >= (WCRYPTO_EC_OP_MAX - WCRYPTO_ECDSA_VERIFY)) { +diff --git a/uadk_tool/benchmark/sec_soft_benchmark.c b/uadk_tool/benchmark/sec_soft_benchmark.c +index 84dab63..8fa523c 100644 +--- a/uadk_tool/benchmark/sec_soft_benchmark.c ++++ b/uadk_tool/benchmark/sec_soft_benchmark.c +@@ -1277,6 +1277,7 @@ int sec_soft_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + g_pktlen = options->pktlen; + g_jobsnum = options->ctxnums; +diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c +index 56f4fa6..41b7416 100644 +--- a/uadk_tool/benchmark/sec_uadk_benchmark.c ++++ b/uadk_tool/benchmark/sec_uadk_benchmark.c +@@ -1777,6 +1777,7 @@ int sec_uadk_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + g_pktlen = options->pktlen; + g_ctxnum = options->ctxnums; +diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c +index bb47d61..e022dcb 100644 +--- a/uadk_tool/benchmark/sec_wd_benchmark.c ++++ b/uadk_tool/benchmark/sec_wd_benchmark.c +@@ -1630,6 +1630,7 @@ int sec_wd_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_alg = options->subtype; + g_algtype = options->algtype; + g_optype = options->optype; +diff --git a/uadk_tool/benchmark/trng_wd_benchmark.c b/uadk_tool/benchmark/trng_wd_benchmark.c +index 3ce329a..2f058d4 100644 +--- a/uadk_tool/benchmark/trng_wd_benchmark.c ++++ b/uadk_tool/benchmark/trng_wd_benchmark.c +@@ -312,6 +312,7 @@ int trng_wd_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + + ret = init_trng_wd_queue(options); +diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c +index 1bf9fee..0f01fdf 100644 +--- a/uadk_tool/benchmark/uadk_benchmark.c ++++ b/uadk_tool/benchmark/uadk_benchmark.c +@@ -331,6 +331,21 @@ void cal_avg_latency(u32 count) + ACC_TST_PRT("thread<%lu> avg latency: %.1fus\n", gettid(), latency); + } + ++void segmentfault_handler(int sig) ++{ ++#define BUF_SZ 64 ++ void *array[BUF_SZ]; ++ size_t size; ++ ++ /* Get void*'s for all entries on the stack */ ++ size = backtrace(array, BUF_SZ); ++ ++ /* Print out all the frames to stderr */ ++ fprintf(stderr, "Error: signal %d:\n", sig); ++ backtrace_symbols_fd(array, size, STDERR_FILENO); ++ exit(1); ++} ++ + /*-------------------------------------main code------------------------------------------------------*/ + static void parse_alg_param(struct acc_option *option) + { +diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h +index c493ac3..0def4b9 100644 +--- a/uadk_tool/benchmark/uadk_benchmark.h ++++ b/uadk_tool/benchmark/uadk_benchmark.h +@@ -4,6 +4,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -15,6 +16,7 @@ + #include + #include + #include ++#include + #include + + #define ACC_TST_PRT printf +@@ -217,6 +219,7 @@ extern void add_send_complete(void); + extern u32 get_recv_time(void); + extern void cal_avg_latency(u32 count); + extern int get_alg_name(int alg, char *alg_name); ++extern void segmentfault_handler(int sig); + + int acc_cmd_parse(int argc, char *argv[], struct acc_option *option); + int acc_default_case(struct acc_option *option); +diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c +index ecb688f..22aa916 100644 +--- a/uadk_tool/benchmark/zip_uadk_benchmark.c ++++ b/uadk_tool/benchmark/zip_uadk_benchmark.c +@@ -1331,6 +1331,7 @@ int zip_uadk_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + g_pktlen = options->pktlen; + g_ctxnum = options->ctxnums; +diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c +index cbe07fc..8ad3e96 100644 +--- a/uadk_tool/benchmark/zip_wd_benchmark.c ++++ b/uadk_tool/benchmark/zip_wd_benchmark.c +@@ -1162,6 +1162,7 @@ int zip_wd_benchmark(struct acc_option *options) + u32 ptime; + int ret; + ++ signal(SIGSEGV, segmentfault_handler); + g_thread_num = options->threads; + g_pktlen = options->pktlen; + +-- +2.25.1 + diff --git a/0010-uadk-bugfix-CE-driver-initialization-problem.patch b/0010-uadk-bugfix-CE-driver-initialization-problem.patch new file mode 100644 index 0000000..6d14513 --- /dev/null +++ b/0010-uadk-bugfix-CE-driver-initialization-problem.patch @@ -0,0 +1,71 @@ +From fe6638d807d10d94ebee234a80e07c498c129fbc Mon Sep 17 00:00:00 2001 +From: Longfang Liu +Date: Tue, 23 Jul 2024 19:49:15 +0800 +Subject: [PATCH 10/16] uadk: bugfix CE driver initialization problem + +When using UADK provider, using the default business type TASK_MIX +will cause driver initialization to fail. +Analysis found that the CE driver will be initialized by fallback, +and NULL will be passed to the input parameter during initialization. +This NULL parameter will cause a segmentation fault during CE driver +initialization. +Therefore, initialization is skipped for NULL parameters in the CE driver. + +Signed-off-by: Longfang Liu +Signed-off-by: Qi Tao +--- + drv/hash_mb/hash_mb.c | 4 ++++ + drv/isa_ce_sm3.c | 6 +++++- + drv/isa_ce_sm4.c | 4 ++++ + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/drv/hash_mb/hash_mb.c b/drv/hash_mb/hash_mb.c +index a73c698..e4a9564 100644 +--- a/drv/hash_mb/hash_mb.c ++++ b/drv/hash_mb/hash_mb.c +@@ -192,6 +192,10 @@ static int hash_mb_init(struct wd_alg_driver *drv, void *conf) + struct hash_mb_ctx *priv; + int ret; + ++ /* Fallback init is NULL */ ++ if (!drv || !conf) ++ return 0; ++ + priv = malloc(sizeof(struct hash_mb_ctx)); + if (!priv) + return -WD_ENOMEM; +diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c +index 0309861..59f3940 100644 +--- a/drv/isa_ce_sm3.c ++++ b/drv/isa_ce_sm3.c +@@ -375,7 +375,11 @@ static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf) + struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf; + struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv; + +- config->epoll_en = false; ++ /* Fallback init is NULL */ ++ if (!drv || !conf) ++ return 0; ++ ++ config->epoll_en = 0; + + /* return if already inited */ + if (sctx) +diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c +index 6961471..e937893 100644 +--- a/drv/isa_ce_sm4.c ++++ b/drv/isa_ce_sm4.c +@@ -36,6 +36,10 @@ static int isa_ce_init(struct wd_alg_driver *drv, void *conf) + struct wd_ctx_config_internal *config = conf; + struct sm4_ce_drv_ctx *sctx = drv->priv; + ++ /* Fallback init is NULL */ ++ if (!drv || !conf) ++ return 0; ++ + config->epoll_en = 0; + memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal)); + +-- +2.25.1 + diff --git a/0010-uadk-digest-add-partial_block-to-store-partial-data.patch b/0010-uadk-digest-add-partial_block-to-store-partial-data.patch deleted file mode 100644 index 70e5a46..0000000 --- a/0010-uadk-digest-add-partial_block-to-store-partial-data.patch +++ /dev/null @@ -1,149 +0,0 @@ -From 6ad149cab59176faf05e65233b4986916a1f7c8d Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Mon, 5 Feb 2024 17:27:07 +0800 -Subject: [PATCH 10/44] uadk/digest: add partial_block to store partial data - -For the long hash first blcok and middle block, if the size of -the data is not aligned with the block size, the partial data is -stored in partial_block and combined with the next block to form -an aligned length for calculation. Currently, partial_block is -added to struct wd_digest_sess to store partial data. - -Signed-off-by: Weili Qian ---- - include/drv/wd_digest_drv.h | 4 +++ - wd_digest.c | 50 ++++++++++++++++++++++++------------- - 2 files changed, 36 insertions(+), 18 deletions(-) - -diff --git a/include/drv/wd_digest_drv.h b/include/drv/wd_digest_drv.h -index 304b506..8a4aa0b 100644 ---- a/include/drv/wd_digest_drv.h -+++ b/include/drv/wd_digest_drv.h -@@ -44,6 +44,8 @@ struct wd_digest_msg { - __u32 in_bytes; - /* out_bytes */ - __u32 out_bytes; -+ /* partial bytes for stream mode */ -+ __u32 partial_bytes; - - /* input key pointer */ - __u8 *key; -@@ -53,6 +55,8 @@ struct wd_digest_msg { - __u8 *in; - /* output data pointer */ - __u8 *out; -+ /* partial pointer for stream mode */ -+ __u8 *partial_block; - /* total of data for stream mode */ - __u64 long_data_len; - }; -diff --git a/wd_digest.c b/wd_digest.c -index acf341a..dba2f95 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -11,6 +11,7 @@ - #include "wd_digest.h" - - #define GMAC_IV_LEN 16 -+#define MAX_BLOCK_SIZE 128 - - static __u32 g_digest_mac_len[WD_DIGEST_TYPE_MAX] = { - WD_DIGEST_SM3_LEN, WD_DIGEST_MD5_LEN, WD_DIGEST_SHA1_LEN, -@@ -45,6 +46,19 @@ struct wd_digest_setting { - void *dlh_list; - } wd_digest_setting; - -+struct wd_digest_stream_data { -+ /* Long hash mode, first and middle block misaligned data */ -+ unsigned char partial_block[MAX_BLOCK_SIZE]; -+ __u32 partial_bytes; -+ /* Total data length for stream mode */ -+ __u64 long_data_len; -+ /* -+ * Notify the stream message state, zero is frist message, -+ * non-zero is middle or final message. -+ */ -+ int msg_state; -+}; -+ - struct wd_digest_sess { - char *alg_name; - enum wd_digest_type alg; -@@ -53,14 +67,7 @@ struct wd_digest_sess { - unsigned char key[MAX_HMAC_KEY_SIZE]; - __u32 key_bytes; - void *sched_key; -- /* -- * Notify the stream message state, zero is frist message, -- * non-zero is middle or final message. -- */ -- int msg_state; -- -- /* Total data length for stream mode */ -- __u64 long_data_len; -+ struct wd_digest_stream_data stream_data; - }; - - struct wd_env_config wd_digest_env_config; -@@ -536,12 +543,12 @@ static void fill_request_msg(struct wd_digest_msg *msg, - memcpy(&msg->req, req, sizeof(struct wd_digest_req)); - - if (unlikely(req->has_next == WD_DIGEST_STREAM_END)) { -- sess->long_data_len = req->long_data_len; -- sess->msg_state = WD_DIGEST_DOING; -+ sess->stream_data.long_data_len = req->long_data_len; -+ sess->stream_data.msg_state = WD_DIGEST_DOING; - req->has_next = WD_DIGEST_END; - } else if (unlikely(req->has_next == WD_DIGEST_STREAM_DOING)) { -- sess->long_data_len = req->long_data_len; -- sess->msg_state = WD_DIGEST_DOING; -+ sess->stream_data.long_data_len = req->long_data_len; -+ sess->stream_data.msg_state = WD_DIGEST_DOING; - req->has_next = WD_DIGEST_DOING; - } - -@@ -557,10 +564,12 @@ static void fill_request_msg(struct wd_digest_msg *msg, - msg->out_bytes = req->out_bytes; - msg->data_fmt = req->data_fmt; - msg->has_next = req->has_next; -- msg->long_data_len = sess->long_data_len + req->in_bytes; -+ msg->long_data_len = sess->stream_data.long_data_len + req->in_bytes; -+ msg->partial_block = sess->stream_data.partial_block; -+ msg->partial_bytes = sess->stream_data.partial_bytes; - - /* Use iv_bytes to store the stream message state */ -- msg->iv_bytes = sess->msg_state; -+ msg->iv_bytes = sess->stream_data.msg_state; - } - - static int send_recv_sync(struct wd_ctx_internal *ctx, struct wd_digest_sess *dsess, -@@ -579,17 +588,22 @@ static int send_recv_sync(struct wd_ctx_internal *ctx, struct wd_digest_sess *ds - if (unlikely(ret)) - return ret; - -- /* After a stream mode job was done, update session long_data_len */ -+ /* -+ * After a stream mode job was done, update session -+ * long_data_len and partial_bytes. -+ */ - if (msg->has_next) { - /* Long hash(first and middle message) */ -- dsess->long_data_len += msg->in_bytes; -+ dsess->stream_data.long_data_len += msg->in_bytes; -+ dsess->stream_data.partial_bytes = msg->partial_bytes; - } else if (msg->iv_bytes) { - /* Long hash(final message) */ -- dsess->long_data_len = 0; -+ dsess->stream_data.long_data_len = 0; -+ dsess->stream_data.partial_bytes = 0; - } - - /* Update session message state */ -- dsess->msg_state = msg->has_next; -+ dsess->stream_data.msg_state = msg->has_next; - - return 0; - } --- -2.25.1 - diff --git a/0011-uadk-digest-add-wd_ctx_spin_lock-function.patch b/0011-uadk-digest-add-wd_ctx_spin_lock-function.patch deleted file mode 100644 index 6af2e7e..0000000 --- a/0011-uadk-digest-add-wd_ctx_spin_lock-function.patch +++ /dev/null @@ -1,77 +0,0 @@ -From b06161de909136e59ecd7f148ef7e8ba72652e34 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Mon, 5 Feb 2024 17:27:17 +0800 -Subject: [PATCH 11/44] uadk/digest: add wd_ctx_spin_lock function - -In synchronous mode, to protect hardware queue resources and -prevent multiple threads from sending packets to the same queue -at the same time, lock is added before packets are sent in function -send_recv_sync(). - -In non-hard computing scenarios, the resources are independent, -and multiple synchronization threads can process at the same time. -If lock is added before packets are sent, the multi-thread performance -deteriorates. Therefore, the wd_ctx_spin_lock and wd_ctx_spin_unlock -interfaces are added. In non-hard computing scenarios, the lock -is not added. - -Signed-off-by: Weili Qian ---- - include/wd_util.h | 23 +++++++++++++++++++++++ - wd_digest.c | 4 ++-- - 2 files changed, 25 insertions(+), 2 deletions(-) - -diff --git a/include/wd_util.h b/include/wd_util.h -index 3059ac1..f217f0f 100644 ---- a/include/wd_util.h -+++ b/include/wd_util.h -@@ -527,6 +527,29 @@ static inline void wd_dfx_msg_cnt(struct wd_ctx_config_internal *config, - config->msg_cnt[sqn]++; - } - -+/** -+ * wd_ctx_spin_lock() - Lock interface, which is used in the synchronization process. -+ * @ctx: queue context. -+ * @type: the type of the driver. -+ * -+ * If the drvier type is not UADK_ALG_HW, the lock is not required. -+ */ -+static inline void wd_ctx_spin_lock(struct wd_ctx_internal *ctx, int type) -+{ -+ if (type != UADK_ALG_HW) -+ return; -+ -+ pthread_spin_lock(&ctx->lock); -+} -+ -+static inline void wd_ctx_spin_unlock(struct wd_ctx_internal *ctx, int type) -+{ -+ if (type != UADK_ALG_HW) -+ return; -+ -+ pthread_spin_unlock(&ctx->lock); -+} -+ - #ifdef __cplusplus - } - #endif -diff --git a/wd_digest.c b/wd_digest.c -index dba2f95..c59184d 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -581,10 +581,10 @@ static int send_recv_sync(struct wd_ctx_internal *ctx, struct wd_digest_sess *ds - msg_handle.send = wd_digest_setting.driver->send; - msg_handle.recv = wd_digest_setting.driver->recv; - -- pthread_spin_lock(&ctx->lock); -+ wd_ctx_spin_lock(ctx, wd_digest_setting.driver->calc_type); - ret = wd_handle_msg_sync(wd_digest_setting.driver, &msg_handle, ctx->ctx, - msg, NULL, wd_digest_setting.config.epoll_en); -- pthread_spin_unlock(&ctx->lock); -+ wd_ctx_spin_unlock(ctx, wd_digest_setting.driver->calc_type); - if (unlikely(ret)) - return ret; - --- -2.25.1 - diff --git a/0011-uadk-v1-fix-for-sec_dump_bd.patch b/0011-uadk-v1-fix-for-sec_dump_bd.patch new file mode 100644 index 0000000..aad73fc --- /dev/null +++ b/0011-uadk-v1-fix-for-sec_dump_bd.patch @@ -0,0 +1,68 @@ +From ae15a43b77e69b87e464bc350a6768ae51d253f3 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 20 Aug 2024 10:46:50 +0800 +Subject: [PATCH 11/16] uadk/v1: fix for sec_dump_bd + +Fix for uninit parameters and wrong data type. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/drv/hisi_sec_udrv.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +diff --git a/v1/drv/hisi_sec_udrv.c b/v1/drv/hisi_sec_udrv.c +index 36b93e6..ac6df5f 100644 +--- a/v1/drv/hisi_sec_udrv.c ++++ b/v1/drv/hisi_sec_udrv.c +@@ -1738,7 +1738,7 @@ int qm_fill_digest_bd3_sqe(void *message, struct qm_queue_info *info, __u16 i) + info->req_cache[i] = msg; + + #ifdef DEBUG_LOG +- sec_dump_bd((unsigned int *)sqe, SQE_BYTES_NUMS); ++ sec_dump_bd((unsigned char *)temp, SQE_BYTES_NUMS); + #endif + + return WD_SUCCESS; +@@ -1961,10 +1961,7 @@ int qm_parse_cipher_bd3_sqe(void *msg, const struct qm_queue_info *info, + } + + #ifdef DEBUG_LOG +- if (sqe3->type == BD_TYPE3) +- sec_dump_bd((unsigned char *)sqe3, SQE_BYTES_NUMS); +- else +- sec_dump_bd((unsigned char *)sqe, SQE_BYTES_NUMS); ++ sec_dump_bd((unsigned char *)msg, SQE_BYTES_NUMS); + #endif + + return 1; +@@ -2515,7 +2512,7 @@ int qm_fill_aead_bd3_sqe(void *message, struct qm_queue_info *info, __u16 i) + info->req_cache[i] = msg; + + #ifdef DEBUG_LOG +- sec_dump_bd((unsigned char *)sqe, SQE_BYTES_NUMS); ++ sec_dump_bd((unsigned char *)temp, SQE_BYTES_NUMS); + #endif + + return ret; +@@ -2602,7 +2599,7 @@ int qm_parse_aead_bd3_sqe(void *msg, const struct qm_queue_info *info, + } + + #ifdef DEBUG_LOG +- sec_dump_bd((unsigned char *)sqe, SQE_BYTES_NUMS); ++ sec_dump_bd((unsigned char *)msg, SQE_BYTES_NUMS); + #endif + + return 1; +@@ -2669,7 +2666,7 @@ int qm_parse_digest_bd3_sqe(void *msg, const struct qm_queue_info *info, + } + + #ifdef DEBUG_LOG +- sec_dump_bd((unsigned int *)sqe, SQE_BYTES_NUMS); ++ sec_dump_bd((unsigned char *)msg, SQE_BYTES_NUMS); + #endif + + return 1; +-- +2.25.1 + diff --git a/0012-uadk-remove-redundant-header-file-in-makefile.patch b/0012-uadk-remove-redundant-header-file-in-makefile.patch deleted file mode 100644 index e07651b..0000000 --- a/0012-uadk-remove-redundant-header-file-in-makefile.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 415b2d379fd74e1e115c9f15b86e976f5c5addb7 Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Tue, 12 Mar 2024 11:38:46 +0800 -Subject: [PATCH 12/44] uadk: remove redundant header file in makefile - -Remove wrong 'wd_hpre_udrv.h' of hpre. - -Signed-off-by: Zhiqi Song ---- - Makefile.am | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/Makefile.am b/Makefile.am -index 64cfa44..25853eb 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -86,7 +86,7 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h wd_cipher_drv.h wd_aead_drv.h aes.h galois.h - - libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ -- hisi_qm_udrv.h wd_hpre_drv.h -+ hisi_qm_udrv.h - if WD_STATIC_DRV - AM_CFLAGS += -DWD_STATIC_DRV -fPIC - AM_CFLAGS += -DWD_NO_LOG --- -2.25.1 - diff --git a/0012-uadk-v1-fix-for-wd_recv_sync-print.patch b/0012-uadk-v1-fix-for-wd_recv_sync-print.patch new file mode 100644 index 0000000..4c277b2 --- /dev/null +++ b/0012-uadk-v1-fix-for-wd_recv_sync-print.patch @@ -0,0 +1,46 @@ +From 31e9ab6950e4b4f68f528120e500f83a8150b7e0 Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 20 Aug 2024 10:47:59 +0800 +Subject: [PATCH 12/16] uadk/v1: fix for wd_recv_sync print + +wd_recv_sync should print more information when return. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/wd.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/v1/wd.c b/v1/wd.c +index 4286bbe..02bc49c 100644 +--- a/v1/wd.c ++++ b/v1/wd.c +@@ -688,8 +688,10 @@ int wd_wait(struct wd_queue *q, __u16 ms) + fds[0].events = POLLIN; + + ret = poll(fds, 1, ms); +- if (unlikely(ret < 0)) ++ if (unlikely(ret < 0)) { ++ WD_ERR("failed to poll a queue!\n"); + return -WD_ENODEV; ++ } + + /* return 0 for no data, 1 for new message */ + return ret; +@@ -700,8 +702,11 @@ int wd_recv_sync(struct wd_queue *q, void **resp, __u16 ms) + int ret; + + ret = wd_wait(q, ms); +- if (likely(ret > 0)) +- return wd_recv(q, resp); ++ if (likely(ret > 0)) { ++ ret = wd_recv(q, resp); ++ if (unlikely(!ret)) ++ WD_ERR("failed to recv data after poll!\n"); ++ } + + return ret; + } +-- +2.25.1 + diff --git a/0013-uadk-isa-ce-support-sm3-ce-instruction.patch b/0013-uadk-isa-ce-support-sm3-ce-instruction.patch deleted file mode 100644 index 4705b71..0000000 --- a/0013-uadk-isa-ce-support-sm3-ce-instruction.patch +++ /dev/null @@ -1,1888 +0,0 @@ -From da5f058d30f6d7eb28b4afbe27633d7664ba0961 Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Mon, 11 Mar 2024 18:07:22 +0800 -Subject: [PATCH 13/44] uadk/isa-ce: support sm3 ce instruction - -Support sync sm3 ce instruction, users can use ce -instruction to accelerate sm3 sync task through init2 -related functions. - -This patch also includes: -1. Add compile parameter and related file to support -isa-ce library. -2. Check whether the platform supports the CE instruction -in alg driver register process. -3. Make HW driver and INSTR driver of the same alg can -be requested at the same time. -4. Support sm3 ce block mode and stream mode for sm3-normal -and hmac-sm3. - -Signed-off-by: Zhiqi Song ---- - Makefile.am | 15 +- - configure.ac | 3 + - drv/isa_ce_sm3.c | 401 ++++++++++++++++++++ - drv/isa_ce_sm3.h | 86 +++++ - drv/isa_ce_sm3_armv8.S | 765 ++++++++++++++++++++++++++++++++++++++ - include/drv/arm_arch_ce.h | 199 ++++++++++ - include/wd_alg.h | 43 +++ - wd_alg.c | 32 +- - wd_digest.c | 2 +- - wd_sched.c | 2 +- - wd_util.c | 87 ++++- - 11 files changed, 1616 insertions(+), 19 deletions(-) - create mode 100644 drv/isa_ce_sm3.c - create mode 100644 drv/isa_ce_sm3.h - create mode 100644 drv/isa_ce_sm3_armv8.S - create mode 100644 include/drv/arm_arch_ce.h - -diff --git a/Makefile.am b/Makefile.am -index 25853eb..19eab30 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -43,7 +43,8 @@ nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd - lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la - - uadk_driversdir=$(libdir)/uadk --uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la -+uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ -+ libisa_ce.la - - libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ - v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ -@@ -87,6 +88,10 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ - - libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h -+ -+libisa_ce_la_SOURCES=drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S arm_arch_ce.h \ -+ drv/isa_ce_sm3.h -+ - if WD_STATIC_DRV - AM_CFLAGS += -DWD_STATIC_DRV -fPIC - AM_CFLAGS += -DWD_NO_LOG -@@ -106,6 +111,10 @@ libhisi_sec_la_DEPENDENCIES = libwd.la libwd_crypto.la - - libhisi_hpre_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) - libhisi_hpre_la_DEPENDENCIES = libwd.la libwd_crypto.la -+ -+libisa_ce_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) -+libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la -+ - else - UADK_WD_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd.map - UADK_CRYPTO_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_crypto.map -@@ -134,6 +143,10 @@ libhisi_sec_la_DEPENDENCIES= libwd.la libwd_crypto.la - libhisi_hpre_la_LIBADD= -lwd -lwd_crypto - libhisi_hpre_la_LDFLAGS=$(UADK_VERSION) - libhisi_hpre_la_DEPENDENCIES= libwd.la libwd_crypto.la -+ -+libisa_ce_la_LIBADD= -lwd -lwd_crypto -+libisa_ce_la_LDFLAGS=$(UADK_VERSION) -+libisa_ce_la_DEPENDENCIES= libwd.la libwd_crypto.la - endif # WD_STATIC_DRV - - pkgconfigdir = $(libdir)/pkgconfig -diff --git a/configure.ac b/configure.ac -index b198417..4ed111e 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -21,6 +21,9 @@ LT_INIT - AC_SUBST([hardcode_into_libs], [no]) - AM_PROG_CC_C_O - -+# Support assembler -+AM_PROG_AS -+ - AC_ARG_ENABLE([debug-log], - AS_HELP_STRING([--enable-debug-log], [enable debug logging globally]), - [ AS_IF([test "x$enable_debug_log" = "xyes"], -diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c -new file mode 100644 -index 0000000..f16bdd3 ---- /dev/null -+++ b/drv/isa_ce_sm3.c -@@ -0,0 +1,401 @@ -+// SPDX-License-Identifier: Apache-2.0 -+/* -+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. -+ * -+ * Licensed under the Apache License 2.0 (the "License"). You may not use -+ * this file except in compliance with the License. You can obtain a copy -+ * in the file LICENSE in the source distribution or at -+ * https://www.openssl.org/source/license.html -+ */ -+/* -+ * Copyright 2023 Huawei Technologies Co.,Ltd. All rights reserved. -+ */ -+ -+#include -+#include -+#include -+#include "drv/isa_ce_sm3.h" -+#include "drv/wd_digest_drv.h" -+#include "wd_digest.h" -+#include "wd_util.h" -+ -+typedef void (sm3_ce_block_fn)(__u32 word_reg[SM3_STATE_WORDS], -+ const unsigned char *src, size_t blocks); -+ -+static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf); -+static void sm3_ce_drv_exit(struct wd_alg_driver *drv); -+static int sm3_ce_drv_send(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg); -+static int sm3_ce_drv_recv(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg); -+static int sm3_ce_get_usage(void *param); -+ -+static struct wd_alg_driver sm3_ce_alg_driver = { -+ .drv_name = "isa_ce_sm3", -+ .alg_name = "sm3", -+ .calc_type = UADK_ALG_CE_INSTR, -+ .priority = 200, -+ .queue_num = 1, -+ .op_type_num = 1, -+ .fallback = 0, -+ .init = sm3_ce_drv_init, -+ .exit = sm3_ce_drv_exit, -+ .send = sm3_ce_drv_send, -+ .recv = sm3_ce_drv_recv, -+ .get_usage = sm3_ce_get_usage, -+}; -+ -+static void __attribute__((constructor)) sm3_ce_probe(void) -+{ -+ int ret; -+ -+ WD_INFO("Info: register SM3 CE alg driver!\n"); -+ ret = wd_alg_driver_register(&sm3_ce_alg_driver); -+ if (ret && ret != -WD_ENODEV) -+ WD_ERR("Error: register SM3 CE failed!\n"); -+} -+ -+static void __attribute__((destructor)) sm3_ce_remove(void) -+{ -+ wd_alg_driver_unregister(&sm3_ce_alg_driver); -+} -+ -+static int sm3_ce_get_usage(void *param) -+{ -+ return WD_SUCCESS; -+} -+ -+static inline void sm3_ce_init(struct sm3_ce_ctx *sctx) -+{ -+ sctx->word_reg[0] = SM3_IVA; -+ sctx->word_reg[1] = SM3_IVB; -+ sctx->word_reg[2] = SM3_IVC; -+ sctx->word_reg[3] = SM3_IVD; -+ sctx->word_reg[4] = SM3_IVE; -+ sctx->word_reg[5] = SM3_IVF; -+ sctx->word_reg[6] = SM3_IVG; -+ sctx->word_reg[7] = SM3_IVH; -+} -+ -+static void trans_output_result(__u8 *out_digest, __u32 *word_reg) -+{ -+ size_t i; -+ -+ for (i = 0; i < SM3_STATE_WORDS; i++) -+ PUTU32_TO_U8(out_digest + i * WORD_TO_CHAR_OFFSET, word_reg[i]); -+} -+ -+static void sm3_ce_init_ex(struct sm3_ce_ctx *sctx, __u8 *iv, __u16 iv_bytes) -+{ -+ size_t i; -+ -+ if (iv_bytes != SM3_DIGEST_SIZE) { -+ WD_ERR("invalid iv size: %u\n", iv_bytes); -+ return; -+ } -+ -+ for (i = 0; i < SM3_STATE_WORDS; i++) -+ PUTU8_TO_U32(sctx->word_reg[i], iv + i * WORD_TO_CHAR_OFFSET); -+} -+ -+static void sm3_ce_update(struct sm3_ce_ctx *sctx, const __u8 *data, -+ size_t data_len, sm3_ce_block_fn *block_fn) -+{ -+ size_t remain_data_len, blk_num; -+ -+ /* Get the data num that need compute currently */ -+ sctx->num &= (SM3_BLOCK_SIZE - 1); -+ -+ if (sctx->num) { -+ remain_data_len = SM3_BLOCK_SIZE - sctx->num; -+ /* If data_len does not enough a block size, then leave it to final */ -+ if (data_len < remain_data_len) { -+ memcpy(sctx->block + sctx->num, data, data_len); -+ sctx->num += data_len; -+ return; -+ } -+ -+ memcpy(sctx->block + sctx->num, data, remain_data_len); -+ block_fn(sctx->word_reg, sctx->block, 1); -+ sctx->nblocks++; -+ data += remain_data_len; -+ data_len -= remain_data_len; -+ } -+ -+ /* Group the filled msg by 512-bits (64-bytes) */ -+ blk_num = data_len / SM3_BLOCK_SIZE; -+ if (blk_num) { -+ block_fn(sctx->word_reg, data, blk_num); -+ sctx->nblocks += blk_num; -+ data += SM3_BLOCK_SIZE * blk_num; -+ data_len -= SM3_BLOCK_SIZE * blk_num; -+ } -+ -+ sctx->num = data_len; -+ if (data_len) -+ memcpy(sctx->block, data, data_len); -+} -+ -+static void sm3_ce_final(struct sm3_ce_ctx *sctx, __u8 *md, -+ sm3_ce_block_fn *block_fn) -+{ -+ size_t i, offset1, offset2; -+ __u64 nh, nl; -+ -+ sctx->num &= (SM3_BLOCK_SIZE - 1); -+ sctx->block[sctx->num] = SM3_PADDING_BYTE; -+ -+ if (sctx->num <= SM3_BLOCK_SIZE - BIT_TO_BLOCK_OFFSET) { -+ memset(sctx->block + sctx->num + 1, 0, SM3_BLOCK_SIZE - sctx->num - 9); -+ } else { -+ memset(sctx->block + sctx->num + 1, 0, SM3_BLOCK_SIZE - sctx->num - 1); -+ block_fn(sctx->word_reg, sctx->block, 1); -+ memset(sctx->block, 0, SM3_BLOCK_SIZE - 8); -+ } -+ -+ /* -+ * Put the length of the message in bits into the last -+ * 64-bits (penultimate two words). -+ */ -+ offset2 = SM3_BLOCK_SIZE - WORD_TO_CHAR_OFFSET * 2; -+ offset1 = SM3_BLOCK_SIZE - WORD_TO_CHAR_OFFSET; -+ nh = sctx->nblocks >> NH_OFFSET; -+ nl = (sctx->nblocks << BIT_TO_BLOCK_OFFSET) + (sctx->num << BIT_TO_BYTE_OFFSET); -+ PUTU32_TO_U8(sctx->block + offset2 , nh); -+ PUTU32_TO_U8(sctx->block + offset1, nl); -+ -+ block_fn(sctx->word_reg, sctx->block, 1); -+ for (i = 0; i < SM3_STATE_WORDS; i++) -+ PUTU32_TO_U8(md + i * WORD_TO_CHAR_OFFSET, sctx->word_reg[i]); -+} -+ -+static int do_sm3_ce(struct wd_digest_msg *msg, __u8 *out_digest) -+{ -+ enum hash_block_type block_type; -+ struct sm3_ce_ctx sctx = {0}; -+ size_t data_len, iv_len; -+ __u8 *data, *iv; -+ -+ block_type = get_hash_block_type(msg); -+ data_len = msg->in_bytes; -+ data = msg->in; -+ iv_len = SM3_DIGEST_SIZE; -+ /* Use last output as the iv in current cycle */ -+ iv = msg->out; -+ -+ switch(block_type) { -+ case HASH_SINGLE_BLOCK: -+ sm3_ce_init(&sctx); -+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); -+ sm3_ce_final(&sctx, out_digest, sm3_ce_block_compress); -+ break; -+ case HASH_FRIST_BLOCK: -+ sm3_ce_init(&sctx); -+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); -+ trans_output_result(out_digest, sctx.word_reg); -+ break; -+ case HASH_MIDDLE_BLOCK: -+ sm3_ce_init_ex(&sctx, iv, iv_len); -+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); -+ /* Transform the middle result without final padding */ -+ trans_output_result(out_digest, sctx.word_reg); -+ break; -+ case HASH_END_BLOCK: -+ sm3_ce_init_ex(&sctx, iv, iv_len); -+ sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); -+ /* Put the whole message length in last 64-bits */ -+ sctx.nblocks = msg->long_data_len / SM3_BLOCK_SIZE; -+ sm3_ce_final(&sctx, out_digest, sm3_ce_block_compress); -+ break; -+ default: -+ WD_ERR("Invalid block type!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->out_bytes < SM3_DIGEST_SIZE) -+ memcpy(msg->out, out_digest, msg->out_bytes); -+ else -+ memcpy(msg->out, out_digest, SM3_DIGEST_SIZE); -+ -+ memset(&sctx, 0, sizeof(struct sm3_ce_ctx)); -+ -+ return WD_SUCCESS; -+} -+ -+static void sm3_hmac_key_padding(struct hmac_sm3_ctx *hctx, -+ const __u8 *key, size_t key_len) -+{ -+ size_t i; -+ -+ if (key_len <= SM3_BLOCK_SIZE) { -+ memcpy(hctx->key, key, key_len); -+ memset(hctx->key + key_len, 0, SM3_BLOCK_SIZE - key_len); -+ } else { -+ sm3_ce_init(&hctx->sctx); -+ sm3_ce_update(&hctx->sctx, key, key_len, sm3_ce_block_compress); -+ sm3_ce_final(&hctx->sctx, hctx->key, sm3_ce_block_compress); -+ /* Pad key to SM3_BLOCK_SIZE after hash */ -+ memset(hctx->key + SM3_DIGEST_SIZE, 0, -+ SM3_BLOCK_SIZE - SM3_DIGEST_SIZE); -+ } -+ -+ for (i = 0; i < SM3_BLOCK_SIZE; i++) { -+ hctx->key[i] ^= IPAD_DATA; -+ } -+} -+ -+static void sm3_ce_hmac_init(struct hmac_sm3_ctx *hctx, const __u8 *key, size_t key_len) -+{ -+ sm3_hmac_key_padding(hctx, key, key_len); -+ -+ /* Ipadded key is the first block to hash in first cycle */ -+ sm3_ce_init(&hctx->sctx); -+ sm3_ce_update(&hctx->sctx, hctx->key, SM3_BLOCK_SIZE, sm3_ce_block_compress); -+} -+ -+static void sm3_ce_hmac_update(struct hmac_sm3_ctx *hctx, const __u8 *data, size_t data_len) -+{ -+ sm3_ce_update(&hctx->sctx, data, data_len, sm3_ce_block_compress); -+} -+ -+static void sm3_ce_hmac_final(struct hmac_sm3_ctx *hctx, __u8 *out_hmac) -+{ -+ __u8 digest[SM3_DIGEST_SIZE] = {0}; -+ size_t i; -+ -+ for (i = 0; i < SM3_BLOCK_SIZE; i++) { -+ hctx->key[i] ^= (IPAD_DATA ^ OPAD_DATA); -+ } -+ -+ /* Compute the last data from update process */ -+ sm3_ce_final(&hctx->sctx, digest, sm3_ce_block_compress); -+ -+ /* Opadded key is the first block to hash in second cycle */ -+ memset(&hctx->sctx, 0, sizeof(struct sm3_ce_ctx)); -+ sm3_ce_init(&hctx->sctx); -+ sm3_ce_update(&hctx->sctx, hctx->key, SM3_BLOCK_SIZE, sm3_ce_block_compress); -+ -+ /* Compute the the first cycle result */ -+ sm3_ce_update(&hctx->sctx, digest, SM3_DIGEST_SIZE, sm3_ce_block_compress); -+ sm3_ce_final(&hctx->sctx, out_hmac, sm3_ce_block_compress); -+} -+ -+static int do_hmac_sm3_ce(struct wd_digest_msg *msg, __u8 *out_hmac) -+{ -+ size_t data_len, key_len, iv_len; -+ enum hash_block_type block_type; -+ struct hmac_sm3_ctx hctx = {0}; -+ __u8 *data, *key, *iv; -+ -+ data_len = msg->in_bytes; -+ data = msg->in; -+ key = msg->key; -+ key_len = msg->key_bytes; -+ iv_len = SM3_DIGEST_SIZE; -+ /* Use last output as the iv in current cycle */ -+ iv = msg->out; -+ -+ if (!key_len) { -+ WD_ERR("invalid hmac key_len is 0!\n"); -+ return -WD_EINVAL; -+ } -+ -+ block_type = get_hash_block_type(msg); -+ switch(block_type) { -+ case HASH_SINGLE_BLOCK: -+ sm3_ce_hmac_init(&hctx, key, key_len); -+ sm3_ce_hmac_update(&hctx, data, data_len); -+ sm3_ce_hmac_final(&hctx, out_hmac); -+ break; -+ case HASH_FRIST_BLOCK: -+ sm3_ce_hmac_init(&hctx, key, key_len); -+ sm3_ce_hmac_update(&hctx, data, data_len); -+ trans_output_result(out_hmac, hctx.sctx.word_reg); -+ break; -+ case HASH_MIDDLE_BLOCK: -+ sm3_ce_init_ex(&(hctx.sctx), iv, iv_len); -+ sm3_ce_hmac_update(&hctx, data, data_len); -+ trans_output_result(out_hmac, hctx.sctx.word_reg); -+ break; -+ case HASH_END_BLOCK: -+ sm3_hmac_key_padding(&hctx, key, key_len); -+ sm3_ce_init_ex(&(hctx.sctx), iv, iv_len); -+ sm3_ce_hmac_update(&hctx, data, data_len); -+ hctx.sctx.nblocks = msg->long_data_len / SM3_BLOCK_SIZE + KEY_BLOCK_NUM; -+ sm3_ce_hmac_final(&hctx, out_hmac); -+ break; -+ default: -+ WD_ERR("Invalid block type!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->out_bytes < SM3_DIGEST_SIZE) -+ memcpy(msg->out, out_hmac, msg->out_bytes); -+ else -+ memcpy(msg->out, out_hmac, SM3_DIGEST_SIZE); -+ -+ memset(&hctx, 0, sizeof(struct hmac_sm3_ctx)); -+ -+ return WD_SUCCESS; -+} -+ -+static int sm3_ce_drv_send(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg) -+{ -+ struct wd_digest_msg *msg = (struct wd_digest_msg *)digest_msg; -+ __u8 digest[SM3_DIGEST_SIZE] = {0}; -+ int ret; -+ -+ if (!msg) { -+ WD_ERR("invalid: digest_msg is NULL!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->data_fmt == WD_SGL_BUF) { -+ WD_ERR("invalid: SM3 CE driver do not support sgl data format!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->mode == WD_DIGEST_NORMAL) { -+ ret = do_sm3_ce(msg, digest); -+ } else if (msg->mode == WD_DIGEST_HMAC) { -+ ret = do_hmac_sm3_ce(msg, digest); -+ } else { -+ WD_ERR("invalid digest mode!\n"); -+ ret = -WD_EINVAL; -+ } -+ -+ return ret; -+} -+ -+static int sm3_ce_drv_recv(struct wd_alg_driver *drv, handle_t ctx, void *digest_msg) -+{ -+ return WD_SUCCESS; -+} -+ -+static int sm3_ce_drv_init(struct wd_alg_driver *drv, void *conf) -+{ -+ struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf; -+ struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv; -+ -+ config->epoll_en = false; -+ -+ /* return if already inited */ -+ if (sctx) -+ return WD_SUCCESS; -+ sctx = malloc(sizeof(struct sm3_ce_drv_ctx)); -+ if (!sctx) -+ return -WD_EINVAL; -+ -+ memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal)); -+ -+ return WD_SUCCESS; -+} -+ -+static void sm3_ce_drv_exit(struct wd_alg_driver *drv) -+{ -+ struct sm3_ce_drv_ctx *sctx = (struct sm3_ce_drv_ctx *)drv->priv; -+ -+ if (!sctx) -+ return; -+ -+ free(sctx); -+ drv->priv = NULL; -+} -diff --git a/drv/isa_ce_sm3.h b/drv/isa_ce_sm3.h -new file mode 100644 -index 0000000..13edb0a ---- /dev/null -+++ b/drv/isa_ce_sm3.h -@@ -0,0 +1,86 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* Copyright 2020-2021 Huawei Technologies Co.,Ltd. All rights reserved. */ -+#ifndef __ISA_CE_SM3_H -+#define __ISA_CE_SM3_H -+ -+#include "wd_alg_common.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define SM3_DIGEST_SIZE 32 -+#define SM3_BLOCK_SIZE 64 -+#define SM3_STATE_WORDS 8 -+#define HMAC_BLOCK_SIZE 64 -+#define WORD_TO_CHAR_OFFSET 4 -+#define SM3_PADDING_BYTE 0x80 -+#define NH_OFFSET 23 -+#define BIT_TO_BLOCK_OFFSET 9 -+#define BIT_TO_BYTE_OFFSET 3 -+#define IPAD_DATA 0x36 -+#define OPAD_DATA 0x5c -+#define KEY_BLOCK_NUM 1 -+ -+#define SM3_IVA 0x7380166f -+#define SM3_IVB 0x4914b2b9 -+#define SM3_IVC 0x172442d7 -+#define SM3_IVD 0xda8a0600 -+#define SM3_IVE 0xa96f30bc -+#define SM3_IVF 0x163138aa -+#define SM3_IVG 0xe38dee4d -+#define SM3_IVH 0xb0fb0e4e -+ -+#define PUTU32_TO_U8(dst, src) \ -+ ((dst)[0] = (__u8)((src) >> 24), \ -+ (dst)[1] = (__u8)((src) >> 16), \ -+ (dst)[2] = (__u8)((src) >> 8), \ -+ (dst)[3] = (__u8)(src)) -+ -+#define PUTU8_TO_U32(dst, src) \ -+ ((dst) = (((__u32)(src)[0]) << 24) + \ -+ (((__u32)(src)[1]) << 16) + \ -+ (((__u32)(src)[2]) << 8) + \ -+ ((__u32)(src)[3])) -+ -+struct sm3_ce_ctx { -+ /* -+ * Use an array to represent the eight 32-bits word registers, -+ * SM3_IVA, SM3_IVB, ..., SM3_IVH, save IV and the final digest. -+ */ -+ __u32 word_reg[SM3_STATE_WORDS]; -+ /* -+ * The length (in bits) of all the msg fragments, the length of the -+ * whole msg should less than 2^64 bit, a msg block is 512-bits, -+ * make a 64-bits number in two parts, low 32-bits - 'Nl' and -+ * high 32-bits - 'Nh'. -+ */ -+ __u64 nblocks; -+ /* -+ * Message block, a msg block is 512-bits, use sixteen __u32 type -+ * element to store it, used in B(i) = W0||W1||W2||...||W15. -+ * Use a __u8 array to replace the 32-bit array. -+ */ -+ __u8 block[SM3_BLOCK_SIZE]; -+ /* The number of msg that need to compute in current cycle or turn. */ -+ size_t num; -+}; -+ -+struct hmac_sm3_ctx { -+ struct sm3_ce_ctx sctx; -+ /* Save user key */ -+ __u8 key[SM3_BLOCK_SIZE]; -+}; -+ -+struct sm3_ce_drv_ctx { -+ struct wd_ctx_config_internal config; -+}; -+ -+void sm3_ce_block_compress(__u32 word_reg[SM3_STATE_WORDS], -+ const __u8 *src, size_t blocks); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ISA_CE_SM3_H */ -diff --git a/drv/isa_ce_sm3_armv8.S b/drv/isa_ce_sm3_armv8.S -new file mode 100644 -index 0000000..3d08e2d ---- /dev/null -+++ b/drv/isa_ce_sm3_armv8.S -@@ -0,0 +1,765 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* -+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. -+ * -+ * Licensed under the Apache License 2.0 (the "License"). You may not use -+ * this file except in compliance with the License. You can obtain a copy -+ * in the file LICENSE in the source distribution or at -+ * https://www.openssl.org/source/license.html -+ */ -+ -+#include "../include/drv/arm_arch_ce.h" -+ -+.arch armv8.2-a -+.text -+.globl sm3_ce_block_compress -+.type sm3_ce_block_compress,%function -+.align 5 -+sm3_ce_block_compress: -+ AARCH64_VALID_CALL_TARGET -+/* Loads state */ -+ /* -+ * Loads multiple single-element structures from memory(X0 register) and -+ * writes result to two SIMD&FP registers(v5.4s and v6.4s). -+ */ -+ ld1 {v5.4s,v6.4s}, [x0] /* 4s -- 4 * 32bit */ -+ /* -+ * Reverses the order of 32-bit(type:s) elements in each doubleword of the -+ * vector in the src SIMD&FP register(v5), places the result into a vector -+ * and writes the vector to the dst SIDM&FP register(v5). -+ */ -+ rev64 v5.4s, v5.4s -+ rev64 v6.4s, v6.4s -+ /* -+ * Extracts the lowest vector elements from the second src SIMD&FP register, -+ * and highest vector elements from the first source SIMD&FP register, -+ * concatenates the result into a vector, and writes the vector to the -+ * dst SIMD&FP register vector. #8 means the numbered byte element to be extracted. -+ * Format: ext , , , -+ * #imm: immediate data. -+ */ -+ ext v5.16b, v5.16b, v5.16b, #8 /* 16b -- 16 * 8bit */ -+ ext v6.16b, v6.16b, v6.16b, #8 -+ /* From PC-relative address adds an immediate value to form a PC-relative -+ * address, and writes the result to the dst register. -+ */ -+ adr x8, .Tj /* 'Tj' is the constant defined in SM3 protocol */ -+ /* Loads pair of register calculates an address from a base register value -+ * and an immediate offset, loads two 32-bit words from memory, and writes -+ * them to two registers. */ -+ ldp s16, s17, [x8] /* 'sn' is the scalar register, 'vn' is the vector register */ -+ -+.Loop: -+/* Loads input */ -+ /* -+ * Loads multipule single-element structrue to four registers. -+ * #64 is the immediate offset variant, it is the post-index immediate offset. -+ * Loads the input src data, msg to be hashed. -+ */ -+ ld1 {v0.16b,v1.16b,v2.16b,v3.16b}, [x1], #64 -+ /* -+ * Substracts an optionally-shifted immediate value from a register value, -+ * and writes the result to the dst register. -+ */ -+ sub w2, w2, #1 -+ -+ /* Copies the value in a src register to the dst register. */ -+ mov v18.16b, v5.16b -+ mov v19.16b, v6.16b -+ -+#ifndef __ARMEB__ -+ rev32 v0.16b, v0.16b -+ rev32 v1.16b, v1.16b -+ rev32 v2.16b, v2.16b -+ rev32 v3.16b, v3.16b -+#endif -+ -+ ext v20.16b, v16.16b, v16.16b, #4 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v4.16b, v1.16b, v2.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v0.16b, v1.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v2.16b, v3.16b, #8 -+ /* sm3partw1 v4.4s, v0.4s, v3.4s */ -+.inst 0xce63c004 -+ /* sm3partw2 v4.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e4 -+ eor v22.16b, v0.16b, v1.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5682e5 -+ /* sm3tt2a v6.4s, v23.4s, v0.4s[0] */ -+.inst 0xce408ae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5692e5 -+ /* sm3tt2a v6.4s, v23.4s, v0.4s[1] */ -+.inst 0xce409ae6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a2e5 -+ /* sm3tt2a v6.4s, v23.4s, v0.4s[2] */ -+.inst 0xce40aae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b2e5 -+ /* sm3tt2a v6.4s, v23.4s, v0.4s[3] */ -+.inst 0xce40bae6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v0.16b, v2.16b, v3.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v1.16b, v2.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v3.16b, v4.16b, #8 -+ /* sm3partw1 v0.4s, v1.4s, v4.4s */ -+.inst 0xce64c020 -+ /* sm3partw2 v0.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e0 -+ eor v22.16b, v1.16b, v2.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5682e5 -+ /* sm3tt2a v6.4s, v23.4s, v1.4s[0] */ -+.inst 0xce418ae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5692e5 -+ /* sm3tt2a v6.4s, v23.4s, v1.4s[1] */ -+.inst 0xce419ae6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a2e5 -+ /* sm3tt2a v6.4s, v23.4s, v1.4s[2] */ -+.inst 0xce41aae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b2e5 -+ /* sm3tt2a v6.4s, v23.4s, v1.4s[3] */ -+.inst 0xce41bae6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v1.16b, v3.16b, v4.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v2.16b, v3.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v4.16b, v0.16b, #8 -+ /* sm3partw1 v1.4s, v2.4s, v0.4s */ -+.inst 0xce60c041 -+ /* sm3partw2 v1.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e1 -+ eor v22.16b, v2.16b, v3.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5682e5 -+ /* sm3tt2a v6.4s, v23.4s, v2.4s[0] */ -+.inst 0xce428ae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5692e5 -+ /* sm3tt2a v6.4s, v23.4s, v2.4s[1] */ -+.inst 0xce429ae6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a2e5 -+ /* sm3tt2a v6.4s, v23.4s, v2.4s[2] */ -+.inst 0xce42aae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b2e5 -+ /* sm3tt2a v6.4s, v23.4s, v2.4s[3] */ -+.inst 0xce42bae6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v2.16b, v4.16b, v0.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v3.16b, v4.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v0.16b, v1.16b, #8 -+ /* sm3partw1 v2.4s, v3.4s, v1.4s */ -+.inst 0xce61c062 -+ /* sm3partw2 v2.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e2 -+ eor v22.16b, v3.16b, v4.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5682e5 -+ /* sm3tt2a v6.4s, v23.4s, v3.4s[0] */ -+.inst 0xce438ae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5692e5 -+ /* sm3tt2a v6.4s, v23.4s, v3.4s[1] */ -+.inst 0xce439ae6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a2e5 -+ /* sm3tt2a v6.4s, v23.4s, v3.4s[2] */ -+.inst 0xce43aae6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1a v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b2e5 -+ /* sm3tt2a v6.4s, v23.4s, v3.4s[3] */ -+.inst 0xce43bae6 -+ ext v20.16b, v17.16b, v17.16b, #4 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v3.16b, v0.16b, v1.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v4.16b, v0.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v1.16b, v2.16b, #8 -+ /* sm3partw1 v3.4s, v4.4s, v2.4s */ -+.inst 0xce62c083 -+ /* sm3partw2 v3.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e3 -+ eor v22.16b, v4.16b, v0.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */ -+.inst 0xce448ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */ -+.inst 0xce449ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */ -+.inst 0xce44aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */ -+.inst 0xce44bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v4.16b, v1.16b, v2.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v0.16b, v1.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v2.16b, v3.16b, #8 -+ /* sm3partw1 v4.4s, v0.4s, v3.4s */ -+.inst 0xce63c004 -+ /* sm3partw2 v4.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e4 -+ eor v22.16b, v0.16b, v1.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */ -+.inst 0xce408ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */ -+.inst 0xce409ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */ -+.inst 0xce40aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */ -+.inst 0xce40bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v0.16b, v2.16b, v3.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v1.16b, v2.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v3.16b, v4.16b, #8 -+ /* sm3partw1 v0.4s, v1.4s, v4.4s */ -+.inst 0xce64c020 -+ /* sm3partw2 v0.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e0 -+ eor v22.16b, v1.16b, v2.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[0] */ -+.inst 0xce418ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[1] */ -+.inst 0xce419ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[2] */ -+.inst 0xce41aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[3] */ -+.inst 0xce41bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v1.16b, v3.16b, v4.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v2.16b, v3.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v4.16b, v0.16b, #8 -+ /* sm3partw1 v1.4s, v2.4s, v0.4s */ -+.inst 0xce60c041 -+ /* sm3partw2 v1.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e1 -+ eor v22.16b, v2.16b, v3.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[0] */ -+.inst 0xce428ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[1] */ -+.inst 0xce429ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[2] */ -+.inst 0xce42aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[3] */ -+.inst 0xce42bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v2.16b, v4.16b, v0.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v3.16b, v4.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v0.16b, v1.16b, #8 -+ /* sm3partw1 v2.4s, v3.4s, v1.4s */ -+.inst 0xce61c062 -+ /* sm3partw2 v2.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e2 -+ eor v22.16b, v3.16b, v4.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[0] */ -+.inst 0xce438ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[1] */ -+.inst 0xce439ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[2] */ -+.inst 0xce43aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[3] */ -+.inst 0xce43bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v3.16b, v0.16b, v1.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v4.16b, v0.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v1.16b, v2.16b, #8 -+ /* sm3partw1 v3.4s, v4.4s, v2.4s */ -+.inst 0xce62c083 -+ /* sm3partw2 v3.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e3 -+ eor v22.16b, v4.16b, v0.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */ -+.inst 0xce448ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */ -+.inst 0xce449ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */ -+.inst 0xce44aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */ -+.inst 0xce44bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v4.16b, v1.16b, v2.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v0.16b, v1.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v2.16b, v3.16b, #8 -+ /* sm3partw1 v4.4s, v0.4s, v3.4s */ -+.inst 0xce63c004 -+ /* sm3partw2 v4.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e4 -+ eor v22.16b, v0.16b, v1.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */ -+.inst 0xce408ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */ -+.inst 0xce409ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */ -+.inst 0xce40aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */ -+.inst 0xce40bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v0.16b, v2.16b, v3.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v1.16b, v2.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v3.16b, v4.16b, #8 -+ /* sm3partw1 v0.4s, v1.4s, v4.4s */ -+.inst 0xce64c020 -+ /* sm3partw2 v0.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e0 -+ eor v22.16b, v1.16b, v2.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[0] */ -+.inst 0xce418ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[1] */ -+.inst 0xce419ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[2] */ -+.inst 0xce41aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v1.4s[3] */ -+.inst 0xce41bee6 -+ /* s4 = w7 | w8 | w9 | w10 */ -+ ext v1.16b, v3.16b, v4.16b, #12 -+ /* vtmp1 = w3 | w4 | w5 | w6 */ -+ ext v22.16b, v2.16b, v3.16b, #12 -+ /* vtmp2 = w10 | w11 | w12 | w13 */ -+ ext v23.16b, v4.16b, v0.16b, #8 -+ /* sm3partw1 v1.4s, v2.4s, v0.4s */ -+.inst 0xce60c041 -+ /* sm3partw2 v1.4s, v23.4s, v22.4s */ -+.inst 0xce76c6e1 -+ eor v22.16b, v2.16b, v3.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[0] */ -+.inst 0xce428ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[1] */ -+.inst 0xce429ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[2] */ -+.inst 0xce42aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v2.4s[3] */ -+.inst 0xce42bee6 -+ eor v22.16b, v3.16b, v4.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[0] */ -+.inst 0xce438ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[1] */ -+.inst 0xce439ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[2] */ -+.inst 0xce43aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v3.4s[3] */ -+.inst 0xce43bee6 -+ eor v22.16b, v4.16b, v0.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[0] */ -+.inst 0xce448ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[1] */ -+.inst 0xce449ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[2] */ -+.inst 0xce44aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v4.4s[3] */ -+.inst 0xce44bee6 -+ eor v22.16b, v0.16b, v1.16b -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[0] */ -+.inst 0xce5686e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[0] */ -+.inst 0xce408ee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[1] */ -+.inst 0xce5696e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[1] */ -+.inst 0xce409ee6 -+ /* sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s */ -+.inst 0xce5418b7 -+ shl v21.4s, v20.4s, #1 -+ sri v21.4s, v20.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[2] */ -+.inst 0xce56a6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[2] */ -+.inst 0xce40aee6 -+ /* sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s */ -+.inst 0xce5518b7 -+ shl v20.4s, v21.4s, #1 -+ sri v20.4s, v21.4s, #31 -+ /* sm3tt1b v5.4s, v23.4s, v22.4s[3] */ -+.inst 0xce56b6e5 -+ /* sm3tt2b v6.4s, v23.4s, v0.4s[3] */ -+.inst 0xce40bee6 -+ eor v5.16b, v5.16b, v18.16b -+ eor v6.16b, v6.16b, v19.16b -+ /* -+ * cbnz: compare and branch on Nonzero, compares the value in a register -+ * with zero, and conditionally branches to a label at a PC-relative offset -+ * if the comparison is not equal. -+ * 'w2' is the 32-bit name of the general-purpose register to be tested. -+ * '.Loop' is the program label to be conditionally branched to. -+ */ -+ cbnz w2, .Loop -+ -+ /* save state, it is the result of one cycle */ -+ rev64 v5.4s, v5.4s -+ rev64 v6.4s, v6.4s -+ ext v5.16b, v5.16b, v5.16b, #8 -+ ext v6.16b, v6.16b, v6.16b, #8 -+ st1 {v5.4s,v6.4s}, [x0] -+ ret -+.size sm3_ce_block_compress,.-sm3_ce_block_compress -+ -+.align 3 -+.Tj: -+/* -+ * Inserts a list of 32-bit values as data into the assembly. -+ * In SM3 protocol: -+ * when 0 <= j <= 15, Tj = 0x79cc4519, -+ * when 16 <= j <= 63, Tj = 0x9d8a7a87. -+ */ -+.word 0x79cc4519, 0x9d8a7a87 -diff --git a/include/drv/arm_arch_ce.h b/include/drv/arm_arch_ce.h -new file mode 100644 -index 0000000..3ea81a4 ---- /dev/null -+++ b/include/drv/arm_arch_ce.h -@@ -0,0 +1,199 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* -+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. -+ * -+ * Licensed under the Apache License 2.0 (the "License"). You may not use -+ * this file except in compliance with the License. You can obtain a copy -+ * in the file LICENSE in the source distribution or at -+ * https://www.openssl.org/source/license.html -+ */ -+ -+#ifndef __ARM_ARCH_CE_H -+#define __ARM_ARCH_CE_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if !defined(__ARM_ARCH__) -+# if defined(__CC_ARM) -+# define __ARM_ARCH__ __TARGET_ARCH_ARM -+# if defined(__BIG_ENDIAN) -+# define __ARMEB__ -+# else -+# define __ARMEL__ -+# endif -+# elif defined(__GNUC__) -+# if defined(__aarch64__) -+# define __ARM_ARCH__ 8 -+ /* -+ * GCC does not define __ARM_ARCH__, instead it defines -+ * bunch of below macros. See all_architectures[] table in -+ * gcc/config/arm/arm.c. -+ */ -+# elif defined(__ARM_ARCH) -+# define __ARM_ARCH__ __ARM_ARCH -+# elif defined(__ARM_ARCH_8A__) -+# define __ARM_ARCH__ 8 -+# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ -+ defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || \ -+ defined(__ARM_ARCH_7EM__) -+# define __ARM_ARCH__ 7 -+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ -+ defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6M__) || \ -+ defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || \ -+ defined(__ARM_ARCH_6T2__) -+# define __ARM_ARCH__ 6 -+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ -+ defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ -+ defined(__ARM_ARCH_5TEJ__) -+# define __ARM_ARCH__ 5 -+# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) -+# define __ARM_ARCH__ 4 -+# else -+# error "unsupported ARM architecture" -+# endif -+# endif -+#endif -+ -+#if !defined(__ARM_MAX_ARCH__) -+# define __ARM_MAX_ARCH__ __ARM_ARCH__ -+#endif -+ -+#if __ARM_MAX_ARCH__ < __ARM_ARCH__ -+# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__" -+#elif __ARM_MAX_ARCH__ != __ARM_ARCH__ -+# if __ARM_ARCH__ < 7 && __ARM_MAX_ARCH__ >= 7 && defined(__ARMEB__) -+# error "can't build universal big-endian binary" -+# endif -+#endif -+ -+#ifndef __ASSEMBLER__ -+extern unsigned int ARMCAP_P; -+extern unsigned int ARM_MIDR; -+#endif -+ -+#define ARMV7_NEON (1<<0) -+#define ARMV7_TICK (1<<1) -+#define ARMV8_AES (1<<2) -+#define ARMV8_SHA1 (1<<3) -+#define ARMV8_SHA256 (1<<4) -+#define ARMV8_PMULL (1<<5) -+#define ARMV8_SHA512 (1<<6) -+#define ARMV8_CPUID (1<<7) -+#define ARMV8_RNG (1<<8) -+#define ARMV8_SM3 (1<<9) -+#define ARMV8_SM4 (1<<10) -+#define ARMV8_SHA3 (1<<11) -+#define ARMV8_UNROLL8_EOR3 (1<<12) -+#define ARMV8_SVE (1<<13) -+#define ARMV8_SVE2 (1<<14) -+ -+/* -+ * MIDR_EL1 system register -+ * -+ * 63___ _ ___32_31___ _ ___24_23_____20_19_____16_15__ _ __4_3_______0 -+ * | | | | | | | -+ * |RES0 | Implementer | Variant | Arch | PartNum |Revision| -+ * |____ _ _____|_____ _ _____|_________|_______ _|____ _ ___|________| -+ * -+ */ -+ -+#define ARM_CPU_IMP_ARM 0x41 -+#define HISI_CPU_IMP 0x48 -+ -+#define ARM_CPU_PART_CORTEX_A72 0xD08 -+#define ARM_CPU_PART_N1 0xD0C -+#define ARM_CPU_PART_V1 0xD40 -+#define ARM_CPU_PART_N2 0xD49 -+#define HISI_CPU_PART_KP920 0xD01 -+ -+#define MIDR_PARTNUM_SHIFT 4 -+#define MIDR_PARTNUM_MASK (0xfffU << MIDR_PARTNUM_SHIFT) -+#define MIDR_PARTNUM(midr) \ -+ (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) -+ -+#define MIDR_IMPLEMENTER_SHIFT 24 -+#define MIDR_IMPLEMENTER_MASK (0xffU << MIDR_IMPLEMENTER_SHIFT) -+#define MIDR_IMPLEMENTER(midr) \ -+ (((midr) & MIDR_IMPLEMENTER_MASK) >> MIDR_IMPLEMENTER_SHIFT) -+ -+#define MIDR_ARCHITECTURE_SHIFT 16 -+#define MIDR_ARCHITECTURE_MASK (0xfU << MIDR_ARCHITECTURE_SHIFT) -+#define MIDR_ARCHITECTURE(midr) \ -+ (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) -+ -+#define MIDR_CPU_MODEL_MASK \ -+ (MIDR_IMPLEMENTER_MASK | \ -+ MIDR_PARTNUM_MASK | \ -+ MIDR_ARCHITECTURE_MASK) -+ -+#define MIDR_CPU_MODEL(imp, partnum) \ -+ (((imp) << MIDR_IMPLEMENTER_SHIFT) | \ -+ (0xfU << MIDR_ARCHITECTURE_SHIFT) | \ -+ ((partnum) << MIDR_PARTNUM_SHIFT)) -+ -+#define MIDR_IS_CPU_MODEL(midr, imp, partnum) \ -+ (((midr) & MIDR_CPU_MODEL_MASK) == MIDR_CPU_MODEL(imp, partnum)) -+ -+#if defined(__ASSEMBLER__) -+ /* -+ * Support macros for -+ * - Armv8.3-A Pointer Authentication and -+ * - Armv8.5-A Branch Target Identification -+ * features which require emitting a .note.gnu.property section with the -+ * appropriate architecture-dependent feature bits set. -+ * Read more: "ELF for the Arm?? 64-bit Architecture" -+ */ -+# if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1 -+# define GNU_PROPERTY_AARCH64_BTI (1 << 0) /* Has Branch Target Identification */ -+# define AARCH64_VALID_CALL_TARGET hint #34 /* BTI 'c' */ -+# else -+# define GNU_PROPERTY_AARCH64_BTI 0 /* No Branch Target Identification */ -+# define AARCH64_VALID_CALL_TARGET -+# endif -+ -+# if defined(__ARM_FEATURE_PAC_DEFAULT) && \ -+ (__ARM_FEATURE_PAC_DEFAULT & 1) == 1 /* Signed with A-key */ -+# define GNU_PROPERTY_AARCH64_POINTER_AUTH (1 << 1) /* Has Pointer Authentication */ -+# define AARCH64_SIGN_LINK_REGISTER hint #25 /* PACIASP */ -+# define AARCH64_VALIDATE_LINK_REGISTER hint #29 /* AUTIASP */ -+# elif defined(__ARM_FEATURE_PAC_DEFAULT) && \ -+ (__ARM_FEATURE_PAC_DEFAULT & 2) == 2 /* Signed with B-key */ -+# define GNU_PROPERTY_AARCH64_POINTER_AUTH (1 << 1) /* Has Pointer Authentication */ -+# define AARCH64_SIGN_LINK_REGISTER hint #27 /* PACIBSP */ -+# define AARCH64_VALIDATE_LINK_REGISTER hint #31 /* AUTIBSP */ -+# else -+# define GNU_PROPERTY_AARCH64_POINTER_AUTH 0 /* No Pointer Authentication */ -+# if GNU_PROPERTY_AARCH64_BTI != 0 -+# define AARCH64_SIGN_LINK_REGISTER AARCH64_VALID_CALL_TARGET -+# else -+# define AARCH64_SIGN_LINK_REGISTER -+# endif -+# define AARCH64_VALIDATE_LINK_REGISTER -+# endif -+ -+# if GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_BTI != 0 -+ .pushsection .note.gnu.property, "a"; -+ .balign 8; -+ .long 4; -+ .long 0x10; -+ .long 0x5; -+ .asciz "GNU"; -+ .long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ -+ .long 4; -+ .long (GNU_PROPERTY_AARCH64_POINTER_AUTH | GNU_PROPERTY_AARCH64_BTI); -+ .long 0; -+ .popsection; -+# endif -+ -+#endif /* defined __ASSEMBLER__ */ -+ -+#define IS_CPU_SUPPORT_UNROLL8_EOR3() \ -+ (ARMCAP_P & ARMV8_UNROLL8_EOR3) -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __ARM_ARCH_CE_H */ -diff --git a/include/wd_alg.h b/include/wd_alg.h -index f8b136e..861b7d9 100644 ---- a/include/wd_alg.h -+++ b/include/wd_alg.h -@@ -19,6 +19,49 @@ extern "C" { - #define ALG_NAME_SIZE 128 - #define DEV_NAME_LEN 128 - -+/* -+ * Macros related to arm platform: -+ * ARM puts the feature bits for Crypto Extensions in AT_HWCAP2, whereas -+ * AArch64 used AT_HWCAP. -+ */ -+#ifndef AT_HWCAP -+# define AT_HWCAP 16 -+#endif -+ -+#ifndef AT_HWCAP2 -+# define AT_HWCAP2 26 -+#endif -+ -+#if defined(__arm__) || defined(__arm) -+# define HWCAP AT_HWCAP -+# define HWCAP_NEON (1 << 12) -+ -+# define HWCAP_CE AT_HWCAP2 -+# define HWCAP_CE_AES (1 << 0) -+# define HWCAP_CE_PMULL (1 << 1) -+# define HWCAP_CE_SHA1 (1 << 2) -+# define HWCAP_CE_SHA256 (1 << 3) -+#elif defined(__aarch64__) -+# define HWCAP AT_HWCAP -+# define HWCAP_NEON (1 << 1) -+ -+# define HWCAP_CE HWCAP -+# define HWCAP_CE_AES (1 << 3) -+# define HWCAP_CE_PMULL (1 << 4) -+# define HWCAP_CE_SHA1 (1 << 5) -+# define HWCAP_CE_SHA256 (1 << 6) -+# define HWCAP_CPUID (1 << 11) -+# define HWCAP_SHA3 (1 << 17) -+# define HWCAP_CE_SM3 (1 << 18) -+# define HWCAP_CE_SM4 (1 << 19) -+# define HWCAP_CE_SHA512 (1 << 21) -+# define HWCAP_SVE (1 << 22) -+/* AT_HWCAP2 */ -+# define HWCAP2 26 -+# define HWCAP2_SVE2 (1 << 1) -+# define HWCAP2_RNG (1 << 16) -+#endif -+ - enum alg_dev_type { - UADK_ALG_SOFT = 0x0, - UADK_ALG_CE_INSTR = 0x1, -diff --git a/wd_alg.c b/wd_alg.c -index 3b111c8..f34a407 100644 ---- a/wd_alg.c -+++ b/wd_alg.c -@@ -9,6 +9,7 @@ - #include - #include - #include -+#include - - #include "wd.h" - #include "wd_alg.h" -@@ -90,6 +91,24 @@ static bool wd_check_accel_dev(const char *dev_name) - return false; - } - -+static bool wd_check_ce_support(const char *dev_name) -+{ -+ unsigned long hwcaps = 0; -+ -+ #if defined(__arm__) || defined(__arm) -+ hwcaps = getauxval(AT_HWCAP2); -+ #elif defined(__aarch64__) -+ hwcaps = getauxval(AT_HWCAP); -+ #endif -+ if (!strcmp("isa_ce_sm3", dev_name) && (hwcaps & HWCAP_CE_SM3)) -+ return true; -+ -+ if (!strcmp("isa_ce_sm4", dev_name) && (hwcaps & HWCAP_CE_SM4)) -+ return true; -+ -+ return false; -+} -+ - static bool wd_alg_check_available(int calc_type, const char *dev_name) - { - bool ret = false; -@@ -99,6 +118,7 @@ static bool wd_alg_check_available(int calc_type, const char *dev_name) - break; - /* Should find the CPU if not support CE */ - case UADK_ALG_CE_INSTR: -+ ret = wd_check_ce_support(dev_name); - break; - /* Should find the CPU if not support SVE */ - case UADK_ALG_SVE_INSTR: -@@ -280,8 +300,13 @@ struct wd_alg_driver *wd_request_drv(const char *alg_name, bool hw_mask) - struct wd_alg_driver *drv = NULL; - int tmp_priority = -1; - -- if (!pnext || !alg_name) { -- WD_ERR("invalid: request alg param is error!\n"); -+ if (!pnext) { -+ WD_ERR("invalid: requset drv pnext is NULL!\n"); -+ return NULL; -+ } -+ -+ if (!alg_name) { -+ WD_ERR("invalid: alg_name is NULL!\n"); - return NULL; - } - -@@ -289,7 +314,8 @@ struct wd_alg_driver *wd_request_drv(const char *alg_name, bool hw_mask) - pthread_mutex_lock(&mutex); - while (pnext) { - /* hw_mask true mean not to used hardware dev */ -- if (hw_mask && pnext->drv->calc_type == UADK_ALG_HW) { -+ if ((hw_mask && pnext->drv->calc_type == UADK_ALG_HW) || -+ (!hw_mask && pnext->drv->calc_type != UADK_ALG_HW)) { - pnext = pnext->next; - continue; - } -diff --git a/wd_digest.c b/wd_digest.c -index c59184d..491502a 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -222,7 +222,7 @@ static void wd_digest_clear_status(void) - } - - static int wd_digest_init_nolock(struct wd_ctx_config *config, -- struct wd_sched *sched) -+ struct wd_sched *sched) - { - int ret; - -diff --git a/wd_sched.c b/wd_sched.c -index 419280e..b43834d 100644 ---- a/wd_sched.c -+++ b/wd_sched.c -@@ -453,7 +453,7 @@ static struct wd_sched sched_table[SCHED_POLICY_BUTT] = { - .poll_policy = session_sched_poll_policy, - }, { - .name = "None scheduler", -- .sched_policy = SCHED_POLICY_SINGLE, -+ .sched_policy = SCHED_POLICY_NONE, - .sched_init = sched_none_init, - .pick_next_ctx = sched_none_pick_next_ctx, - .poll_policy = sched_none_poll_policy, -diff --git a/wd_util.c b/wd_util.c -index 6134239..39909ca 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -91,6 +91,11 @@ struct acc_alg_item { - char *algtype; - }; - -+struct wd_ce_ctx { -+ char *drv_name; -+ void *priv; -+}; -+ - static struct acc_alg_item alg_options[] = { - {"zlib", "zlib"}, - {"gzip", "gzip"}, -@@ -229,7 +234,6 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in, - ret = -WD_EINVAL; - goto err_out; - } -- - clone_ctx_to_internal(cfg->ctxs + i, ctxs + i); - ret = pthread_spin_init(&ctxs[i].lock, PTHREAD_PROCESS_SHARED); - if (ret) { -@@ -2612,14 +2616,44 @@ out_freelist: - return ret; - } - -+static int wd_alg_ce_ctx_init(struct wd_init_attrs *attrs) -+{ -+ struct wd_ctx_config *ctx_config = attrs->ctx_config; -+ -+ ctx_config->ctx_num = 1; -+ ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); -+ if (!ctx_config->ctxs) { -+ return -WD_ENOMEM; -+ WD_ERR("failed to alloc ctxs!\n"); -+ } -+ ctx_config->ctxs[0].ctx = (handle_t)calloc(1, sizeof(struct wd_ce_ctx)); -+ -+ return WD_SUCCESS; -+} -+ -+static void wd_alg_ce_ctx_uninit(struct wd_ctx_config *ctx_config) -+{ -+ __u32 i; -+ -+ for (i = 0; i < ctx_config->ctx_num; i++) { -+ if (ctx_config->ctxs[i].ctx) { -+ free((struct wd_ce_ctx *)ctx_config->ctxs[i].ctx); -+ ctx_config->ctxs[i].ctx = 0; -+ } -+ } -+ -+ free(ctx_config->ctxs); -+} -+ - static void wd_alg_ctx_uninit(struct wd_ctx_config *ctx_config) - { - __u32 i; - -- for (i = 0; i < ctx_config->ctx_num; i++) -+ for (i = 0; i < ctx_config->ctx_num; i++) { - if (ctx_config->ctxs[i].ctx) { - wd_release_ctx(ctx_config->ctxs[i].ctx); - ctx_config->ctxs[i].ctx = 0; -+ } - } - - free(ctx_config->ctxs); -@@ -2633,9 +2667,9 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - struct wd_ctx_config *ctx_config = NULL; - struct wd_sched *alg_sched = NULL; - char alg_type[CRYPTO_MAX_ALG_NAME]; -- char *alg = attrs->alg; - int driver_type = UADK_ALG_HW; -- int ret; -+ char *alg = attrs->alg; -+ int ret = 0; - - if (!attrs->ctx_params) - return -WD_EINVAL; -@@ -2646,22 +2680,37 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - switch (driver_type) { - case UADK_ALG_SOFT: - case UADK_ALG_CE_INSTR: -- /* No need to alloc resource */ -- if (sched_type != SCHED_POLICY_NONE) -+ /* No need to alloc resource */ -+ if (sched_type != SCHED_POLICY_NONE) { -+ WD_ERR("invalid sched_type\n"); - return -WD_EINVAL; -+ } -+ -+ ctx_config = calloc(1, sizeof(*ctx_config)); -+ if (!ctx_config) { -+ WD_ERR("fail to alloc ctx config\n"); -+ return -WD_ENOMEM; -+ } -+ attrs->ctx_config = ctx_config; - - alg_sched = wd_sched_rr_alloc(SCHED_POLICY_NONE, 1, 1, alg_poll_func); - if (!alg_sched) { - WD_ERR("fail to alloc scheduler\n"); -- return -WD_EINVAL; -+ goto out_ctx_config; - } -+ - attrs->sched = alg_sched; - -- ret = wd_sched_rr_instance(alg_sched, NULL); -+ ret = wd_alg_ce_ctx_init(attrs); - if (ret) { -- WD_ERR("fail to instance scheduler\n"); -+ WD_ERR("fail to init ce ctx\n"); - goto out_freesched; - } -+ -+ ret = alg_init_func(ctx_config, alg_sched); -+ if (ret) -+ goto out_pre_init; -+ - break; - case UADK_ALG_SVE_INSTR: - /* Todo lock cpu core */ -@@ -2720,7 +2769,10 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - return 0; - - out_pre_init: -- wd_alg_ctx_uninit(ctx_config); -+ if (driver_type == UADK_ALG_CE_INSTR || driver_type == UADK_ALG_SOFT) -+ wd_alg_ce_ctx_uninit(ctx_config); -+ else -+ wd_alg_ctx_uninit(ctx_config); - out_freesched: - wd_sched_rr_release(alg_sched); - out_ctx_config: -@@ -2733,10 +2785,19 @@ void wd_alg_attrs_uninit(struct wd_init_attrs *attrs) - { - struct wd_ctx_config *ctx_config = attrs->ctx_config; - struct wd_sched *alg_sched = attrs->sched; -+ int driver_type = attrs->driver->calc_type; - -- if (ctx_config) { -- wd_alg_ctx_uninit(ctx_config); -- free(ctx_config); -+ if (driver_type == UADK_ALG_CE_INSTR || driver_type == UADK_ALG_SOFT) { -+ if (ctx_config) { -+ wd_alg_ce_ctx_uninit(ctx_config); -+ free(ctx_config); -+ } -+ } else { -+ if (ctx_config) { -+ wd_alg_ctx_uninit(ctx_config); -+ free(ctx_config); -+ } - } -+ - wd_sched_rr_release(alg_sched); - } --- -2.25.1 - diff --git a/0013-uadk-v1-update-the-symbol-table-for-libraries.patch b/0013-uadk-v1-update-the-symbol-table-for-libraries.patch new file mode 100644 index 0000000..c4dc66d --- /dev/null +++ b/0013-uadk-v1-update-the-symbol-table-for-libraries.patch @@ -0,0 +1,29 @@ +From 4e19126cc3cb4795fc9b3bb65e76bd05ab68377a Mon Sep 17 00:00:00 2001 +From: Wenkai Lin +Date: Tue, 20 Aug 2024 10:49:21 +0800 +Subject: [PATCH 13/16] uadk/v1: update the symbol table for libraries + +hisi_qm_inject_op_register should be visible to the +outside APP, fix it now. + +Signed-off-by: Wenkai Lin +Signed-off-by: Qi Tao +--- + v1/libwd.map | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/v1/libwd.map b/v1/libwd.map +index 4e6b639..d53201b 100644 +--- a/v1/libwd.map ++++ b/v1/libwd.map +@@ -160,6 +160,7 @@ global: + wd_get_sgl_datalen; + wd_get_sge_datalen; + wd_get_sgl_bufsize; ++ hisi_qm_inject_op_register; + + local: *; + }; +-- +2.25.1 + diff --git a/0014-uadk-fix-control-range-of-environmemt-variable.patch b/0014-uadk-fix-control-range-of-environmemt-variable.patch deleted file mode 100644 index 4b90bba..0000000 --- a/0014-uadk-fix-control-range-of-environmemt-variable.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 7869d42227f03754e4117a17751e6959b3f93bca Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Mon, 11 Mar 2024 18:07:23 +0800 -Subject: [PATCH 14/44] uadk: fix control range of environmemt variable - -Environment variable will not be used in non-hardware -situation to config the ctx num. So add an interception -condition to avoid the impact of environment variables -on the initialization of non-hardware situations. - -Signed-off-by: Zhiqi Song ---- - wd_util.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/wd_util.c b/wd_util.c -index 39909ca..b8e755c 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -2111,8 +2111,9 @@ int wd_ctx_param_init(struct wd_ctx_params *ctx_params, - return -WD_ENOMEM; - } - -+ /* Only hw driver support environment variable */ - var_s = secure_getenv(env_name); -- if (var_s && strlen(var_s)) { -+ if (var_s && strlen(var_s) && driver->calc_type == UADK_ALG_HW) { - /* environment variable has the highest priority */ - ret = wd_env_set_ctx_nums(driver->alg_name, env_name, var_s, - ctx_params, max_op_type); --- -2.25.1 - diff --git a/0014-uadk-modify-address-check.patch b/0014-uadk-modify-address-check.patch new file mode 100644 index 0000000..78b416b --- /dev/null +++ b/0014-uadk-modify-address-check.patch @@ -0,0 +1,174 @@ +From a98d790c195cd41b839c5058c96eb8e10733783a Mon Sep 17 00:00:00 2001 +From: Weili Qian +Date: Tue, 20 Aug 2024 11:37:47 +0800 +Subject: [PATCH 14/16] uadk: modify address check + +When the memory is in SGL format, both wd_check_src_dst and +wd_check_datalist are called for address check. Actually, +only wd_check_datalist needs to be called. + +Signed-off-by: Weili Qian +Signed-off-by: Qi Tao +--- + include/wd_util.h | 2 +- + wd_aead.c | 19 +++++++++---------- + wd_cipher.c | 14 +++++++------- + wd_digest.c | 18 ++++++++++++------ + wd_util.c | 4 ++-- + 5 files changed, 31 insertions(+), 26 deletions(-) + +diff --git a/include/wd_util.h b/include/wd_util.h +index f217f0f..1c4af0b 100644 +--- a/include/wd_util.h ++++ b/include/wd_util.h +@@ -254,7 +254,7 @@ int wd_check_src_dst(void *src, __u32 in_bytes, void *dst, __u32 out_bytes); + * + * Return 0 if the datalist is not less than expected size. + */ +-int wd_check_datalist(struct wd_datalist *head, __u32 size); ++int wd_check_datalist(struct wd_datalist *head, __u64 size); + + + /* +diff --git a/wd_aead.c b/wd_aead.c +index 9c3f1ab..65949f7 100644 +--- a/wd_aead.c ++++ b/wd_aead.c +@@ -374,7 +374,7 @@ void wd_aead_free_sess(handle_t h_sess) + static int wd_aead_param_check(struct wd_aead_sess *sess, + struct wd_aead_req *req) + { +- __u32 len; ++ __u64 len; + int ret; + + if (unlikely(!sess || !req)) { +@@ -410,18 +410,11 @@ static int wd_aead_param_check(struct wd_aead_sess *sess, + return -WD_EINVAL; + } + +- ret = wd_check_src_dst(req->src, req->in_bytes, req->dst, req->out_bytes); +- if (unlikely(ret)) { +- WD_ERR("invalid: src/dst addr is NULL when src/dst size is non-zero!\n"); +- return -WD_EINVAL; +- } +- + if (req->data_fmt == WD_SGL_BUF) { +- len = req->in_bytes + req->assoc_bytes; ++ len = (__u64)req->in_bytes + req->assoc_bytes; + ret = wd_check_datalist(req->list_src, len); + if (unlikely(ret)) { +- WD_ERR("failed to check the src datalist, size = %u\n", +- len); ++ WD_ERR("failed to check the src datalist, size = %llu\n", len); + return -WD_EINVAL; + } + +@@ -431,6 +424,12 @@ static int wd_aead_param_check(struct wd_aead_sess *sess, + req->out_bytes); + return -WD_EINVAL; + } ++ } else { ++ ret = wd_check_src_dst(req->src, req->in_bytes, req->dst, req->out_bytes); ++ if (unlikely(ret)) { ++ WD_ERR("invalid: src/dst addr is NULL when src/dst size is non-zero!\n"); ++ return -WD_EINVAL; ++ } + } + + return 0; +diff --git a/wd_cipher.c b/wd_cipher.c +index 4799213..646aa89 100644 +--- a/wd_cipher.c ++++ b/wd_cipher.c +@@ -560,7 +560,7 @@ static int cipher_iv_len_check(struct wd_cipher_req *req, + + if (!req->iv) { + WD_ERR("invalid: cipher input iv is NULL!\n"); +- ret = -WD_EINVAL; ++ return -WD_EINVAL; + } + + switch (sess->alg) { +@@ -636,12 +636,6 @@ static int wd_cipher_check_params(handle_t h_sess, + if (unlikely(ret)) + return ret; + +- ret = wd_check_src_dst(req->src, req->in_bytes, req->dst, req->out_bytes); +- if (unlikely(ret)) { +- WD_ERR("invalid: src/dst addr is NULL when src/dst size is non-zero!\n"); +- return -WD_EINVAL; +- } +- + if (req->data_fmt == WD_SGL_BUF) { + ret = wd_check_datalist(req->list_src, req->in_bytes); + if (unlikely(ret)) { +@@ -657,6 +651,12 @@ static int wd_cipher_check_params(handle_t h_sess, + req->in_bytes); + return -WD_EINVAL; + } ++ } else { ++ ret = wd_check_src_dst(req->src, req->in_bytes, req->dst, req->out_bytes); ++ if (unlikely(ret)) { ++ WD_ERR("invalid: src/dst addr is NULL when src/dst size is non-zero!\n"); ++ return -WD_EINVAL; ++ } + } + + return cipher_iv_len_check(req, sess); +diff --git a/wd_digest.c b/wd_digest.c +index f116aec..943fd8c 100644 +--- a/wd_digest.c ++++ b/wd_digest.c +@@ -548,12 +548,6 @@ static int wd_digest_param_check(struct wd_digest_sess *sess, + return -WD_EINVAL; + } + +- ret = wd_check_src_dst(req->in, req->in_bytes, req->out, req->out_bytes); +- if (unlikely(ret)) { +- WD_ERR("invalid: in/out addr is NULL when in/out size is non-zero!\n"); +- return -WD_EINVAL; +- } +- + if (req->data_fmt == WD_SGL_BUF) { + ret = wd_check_datalist(req->list_in, req->in_bytes); + if (unlikely(ret)) { +@@ -561,6 +555,18 @@ static int wd_digest_param_check(struct wd_digest_sess *sess, + req->in_bytes); + return -WD_EINVAL; + } ++ ++ ret = wd_check_src_dst(NULL, 0, req->out, req->out_bytes); ++ if (unlikely(ret)) { ++ WD_ERR("invalid: out addr is NULL when out size is non-zero!\n"); ++ return -WD_EINVAL; ++ } ++ } else { ++ ret = wd_check_src_dst(req->in, req->in_bytes, req->out, req->out_bytes); ++ if (unlikely(ret)) { ++ WD_ERR("invalid: in/out addr is NULL when in/out size is non-zero!\n"); ++ return -WD_EINVAL; ++ } + } + + return wd_aes_hmac_length_check(sess, req); +diff --git a/wd_util.c b/wd_util.c +index 76548c9..5350f84 100644 +--- a/wd_util.c ++++ b/wd_util.c +@@ -475,10 +475,10 @@ int wd_check_src_dst(void *src, __u32 in_bytes, void *dst, __u32 out_bytes) + return 0; + } + +-int wd_check_datalist(struct wd_datalist *head, __u32 size) ++int wd_check_datalist(struct wd_datalist *head, __u64 size) + { + struct wd_datalist *tmp = head; +- __u32 list_size = 0; ++ __u64 list_size = 0; + + while (tmp) { + if (tmp->data) +-- +2.25.1 + diff --git a/0015-uadk-check-calloc-return-value.patch b/0015-uadk-check-calloc-return-value.patch new file mode 100644 index 0000000..8b43220 --- /dev/null +++ b/0015-uadk-check-calloc-return-value.patch @@ -0,0 +1,54 @@ +From a30caaa94e0cc806a4ee64031c300c630f758cd6 Mon Sep 17 00:00:00 2001 +From: Weili Qian +Date: Tue, 20 Aug 2024 11:39:27 +0800 +Subject: [PATCH 15/16] uadk: check calloc return value + +Added the check of the return value of calloc. + +Signed-off-by: Weili Qian +Signed-off-by: Qi Tao +--- + wd_util.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/wd_util.c b/wd_util.c +index 5350f84..8d362d7 100644 +--- a/wd_util.c ++++ b/wd_util.c +@@ -2632,10 +2632,15 @@ static int wd_alg_ce_ctx_init(struct wd_init_attrs *attrs) + ctx_config->ctx_num = 1; + ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); + if (!ctx_config->ctxs) { +- return -WD_ENOMEM; + WD_ERR("failed to alloc ctxs!\n"); ++ return -WD_ENOMEM; + } ++ + ctx_config->ctxs[0].ctx = (handle_t)calloc(1, sizeof(struct wd_ce_ctx)); ++ if (!ctx_config->ctxs[0].ctx) { ++ free(ctx_config->ctxs); ++ return -WD_ENOMEM; ++ } + + return WD_SUCCESS; + } +@@ -2719,7 +2724,7 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) + char alg_type[CRYPTO_MAX_ALG_NAME]; + int driver_type = UADK_ALG_HW; + char *alg = attrs->alg; +- int ret = 0; ++ int ret = -WD_EINVAL; + + if (!attrs->ctx_params) + return -WD_EINVAL; +@@ -2801,7 +2806,6 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) + numa_max_node() + 1, alg_poll_func); + if (!alg_sched) { + WD_ERR("fail to instance scheduler\n"); +- ret = -WD_EINVAL; + goto out_ctx_config; + } + attrs->sched = alg_sched; +-- +2.25.1 + diff --git a/0015-uadk-util-use-default-sched_type-for-instruction-tas.patch b/0015-uadk-util-use-default-sched_type-for-instruction-tas.patch deleted file mode 100644 index c22b859..0000000 --- a/0015-uadk-util-use-default-sched_type-for-instruction-tas.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 1839b896bbb7cfaddbd8b19d322465c7ef7e185c Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Mon, 11 Mar 2024 18:07:24 +0800 -Subject: [PATCH 15/44] uadk/util: use default sched_type for instruction task - -To prevent users from perceiving the difference in instruction -acceleration task, no longer check the specific sched_type, just -accept sched_type within the valid range from user, and use -default sched_type inside. - -As sched_type is checked before init2 calls wd_alg_attrs_init(). -Redundancy check is not needed. - -Signed-off-by: Zhiqi Song ---- - include/wd_sched.h | 2 +- - wd_util.c | 12 ++---------- - 2 files changed, 3 insertions(+), 11 deletions(-) - -diff --git a/include/wd_sched.h b/include/wd_sched.h -index b145172..be541c6 100644 ---- a/include/wd_sched.h -+++ b/include/wd_sched.h -@@ -21,7 +21,7 @@ enum sched_policy_type { - SCHED_POLICY_RR = 0, - /* requests will no need ctxs */ - SCHED_POLICY_NONE, -- /* requests will need a fixed ctx */ -+ /* requests will need a fixed ctx */ - SCHED_POLICY_SINGLE, - SCHED_POLICY_BUTT, - }; -diff --git a/wd_util.c b/wd_util.c -index b8e755c..fb58167 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -2681,12 +2681,6 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - switch (driver_type) { - case UADK_ALG_SOFT: - case UADK_ALG_CE_INSTR: -- /* No need to alloc resource */ -- if (sched_type != SCHED_POLICY_NONE) { -- WD_ERR("invalid sched_type\n"); -- return -WD_EINVAL; -- } -- - ctx_config = calloc(1, sizeof(*ctx_config)); - if (!ctx_config) { - WD_ERR("fail to alloc ctx config\n"); -@@ -2694,6 +2688,7 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - } - attrs->ctx_config = ctx_config; - -+ /* Use default sched_type to alloc scheduler */ - alg_sched = wd_sched_rr_alloc(SCHED_POLICY_NONE, 1, 1, alg_poll_func); - if (!alg_sched) { - WD_ERR("fail to alloc scheduler\n"); -@@ -2714,10 +2709,7 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - - break; - case UADK_ALG_SVE_INSTR: -- /* Todo lock cpu core */ -- if (sched_type != SCHED_POLICY_SINGLE) -- return -WD_EINVAL; -- -+ /* Use default sched_type to alloc scheduler */ - alg_sched = wd_sched_rr_alloc(SCHED_POLICY_SINGLE, 1, 1, alg_poll_func); - if (!alg_sched) { - WD_ERR("fail to alloc scheduler\n"); --- -2.25.1 - diff --git a/0016-drv-hisi-sec-modify-minor-errors-in-hisi_sec.c.patch b/0016-drv-hisi-sec-modify-minor-errors-in-hisi_sec.c.patch new file mode 100644 index 0000000..5b1adee --- /dev/null +++ b/0016-drv-hisi-sec-modify-minor-errors-in-hisi_sec.c.patch @@ -0,0 +1,148 @@ +From 72e907fdf5914a3d157311431858355bd30b573f Mon Sep 17 00:00:00 2001 +From: Weili Qian +Date: Tue, 20 Aug 2024 11:40:26 +0800 +Subject: [PATCH 16/16] drv/hisi/sec: modify minor errors in hisi_sec.c + +1. In AEAD stream mode, iv is update used recv_msg. However, +the iv pointer in recv_msg is a random value in the +asynchronous mode, which may cause address exceptions. +Therefore, temp_msg is used. +2. The AEAD stream mode does not support the SGL format. +The SGL memory soft calculation may fail. +3. The data type is converted to __u64 during addition calculation +to avoid calculation result overflow. + +Signed-off-by: Weili Qian +Signed-off-by: Qi Tao +--- + drv/hisi_sec.c | 59 ++++++++++++++++++++++++++++++-------------------- + 1 file changed, 36 insertions(+), 23 deletions(-) + +diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c +index d9bb7e4..0a1bcc1 100644 +--- a/drv/hisi_sec.c ++++ b/drv/hisi_sec.c +@@ -2255,9 +2255,9 @@ static int aead_len_check(struct wd_aead_msg *msg, enum sec_bd_type type) + } + } + +- if (unlikely(msg->in_bytes + msg->assoc_bytes > MAX_INPUT_DATA_LEN)) { +- WD_ERR("aead input data length is too long, size = %u\n", +- msg->in_bytes + msg->assoc_bytes); ++ if (unlikely((__u64)msg->in_bytes + msg->assoc_bytes > MAX_INPUT_DATA_LEN)) { ++ WD_ERR("aead input data length is too long, size = %llu\n", ++ (__u64)msg->in_bytes + msg->assoc_bytes); + return -WD_EINVAL; + } + +@@ -2516,6 +2516,11 @@ int aead_msg_state_check(struct wd_aead_msg *msg) + } + } + ++ if (unlikely(msg->msg_state != AEAD_MSG_BLOCK && msg->data_fmt == WD_SGL_BUF)) { ++ WD_ERR("invalid: sgl mode not supports stream mode!\n"); ++ return -WD_EINVAL; ++ } ++ + return 0; + } + +@@ -2555,10 +2560,12 @@ static int hisi_sec_aead_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ + fill_aead_bd2_addr(msg, &sqe); + + ret = fill_stream_bd2(msg, &sqe); +- if (ret == WD_SOFT_COMPUTING) +- return 0; +- else if (unlikely(ret)) +- return ret; ++ if (ret == WD_SOFT_COMPUTING) { ++ ret = 0; ++ goto put_sgl; ++ } else if (unlikely(ret)) { ++ goto put_sgl; ++ } + + hisi_set_msg_id(h_qp, &msg->tag); + sqe.type2.tag = (__u16)msg->tag; +@@ -2568,14 +2575,16 @@ static int hisi_sec_aead_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ + if (ret != -WD_EBUSY) + WD_ERR("aead send sqe is err(%d)!\n", ret); + +- if (msg->data_fmt == WD_SGL_BUF) +- hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, +- msg->out); +- +- return ret; ++ goto put_sgl; + } + + return 0; ++ ++put_sgl: ++ if (msg->data_fmt == WD_SGL_BUF) ++ hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); ++ ++ return ret; + } + + static void update_stream_counter(struct wd_aead_msg *recv_msg) +@@ -2629,7 +2638,7 @@ static void parse_aead_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, + temp_msg = recv_msg; + } + +- update_stream_counter(recv_msg); ++ update_stream_counter(temp_msg); + + if (unlikely(recv_msg->result != WD_SUCCESS)) + dump_sec_msg(temp_msg, "aead"); +@@ -2946,10 +2955,12 @@ static int hisi_sec_aead_send_v3(struct wd_alg_driver *drv, handle_t ctx, void * + + fill_aead_bd3_addr(msg, &sqe); + ret = fill_stream_bd3(h_qp, msg, &sqe); +- if (ret == WD_SOFT_COMPUTING) +- return 0; +- else if (unlikely(ret)) +- return ret; ++ if (ret == WD_SOFT_COMPUTING) { ++ ret = 0; ++ goto put_sgl; ++ } else if (unlikely(ret)) { ++ goto put_sgl; ++ } + + hisi_set_msg_id(h_qp, &msg->tag); + sqe.tag = msg->tag; +@@ -2958,14 +2969,16 @@ static int hisi_sec_aead_send_v3(struct wd_alg_driver *drv, handle_t ctx, void * + if (ret != -WD_EBUSY) + WD_ERR("aead send sqe is err(%d)!\n", ret); + +- if (msg->data_fmt == WD_SGL_BUF) +- hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, +- msg->out); +- +- return ret; ++ goto put_sgl; + } + + return 0; ++ ++put_sgl: ++ if (msg->data_fmt == WD_SGL_BUF) ++ hisi_sec_put_sgl(h_qp, msg->alg_type, msg->in, msg->out); ++ ++ return ret; + } + + static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, +@@ -3005,7 +3018,7 @@ static void parse_aead_bd3(struct hisi_qp *qp, struct hisi_sec_sqe3 *sqe, + temp_msg = recv_msg; + } + +- update_stream_counter(recv_msg); ++ update_stream_counter(temp_msg); + + if (unlikely(recv_msg->result != WD_SUCCESS)) + dump_sec_msg(temp_msg, "aead"); +-- +2.25.1 + diff --git a/0016-uadk-digest-modify-spelling-errors.patch b/0016-uadk-digest-modify-spelling-errors.patch deleted file mode 100644 index 665f6f1..0000000 --- a/0016-uadk-digest-modify-spelling-errors.patch +++ /dev/null @@ -1,184 +0,0 @@ -From f7d1cbe8850ceae6de4aed1fd5fa81f029da753f Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Fri, 15 Mar 2024 15:22:06 +0800 -Subject: [PATCH 16/44] uadk/digest: modify spelling errors - -Modify spelling errors related to digest stream mode. - -Signed-off-by: Zhiqi Song ---- - drv/hisi_sec.c | 8 ++++---- - drv/isa_ce_sm3.c | 4 ++-- - include/drv/wd_digest_drv.h | 6 +++--- - v1/test/hisi_sec_test/test_hisi_sec.c | 5 ++--- - v1/test/hisi_sec_test_sgl/test_hisi_sec_sgl.c | 5 ++--- - wd_digest.c | 2 +- - 6 files changed, 14 insertions(+), 16 deletions(-) - -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index 9da21a8..ac62109 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -1553,7 +1553,7 @@ static int fill_digest_long_hash(handle_t h_qp, struct wd_digest_msg *msg, - if (ret) - return ret; - -- if (block_type == HASH_FRIST_BLOCK) { -+ if (block_type == HASH_FIRST_BLOCK) { - /* Long hash first */ - sqe->ai_apd_cs = AI_GEN_INNER; - sqe->ai_apd_cs |= AUTHPAD_NOPAD << AUTHPAD_OFFSET; -@@ -1635,7 +1635,7 @@ static int digest_bd2_type_check(struct wd_digest_msg *msg) - enum hash_block_type type = get_hash_block_type(msg); - - /* Long hash first and middle bd */ -- if (type == HASH_FRIST_BLOCK || type == HASH_MIDDLE_BLOCK) { -+ if (type == HASH_FIRST_BLOCK || type == HASH_MIDDLE_BLOCK) { - WD_ERR("hardware v2 not supports 0 size in long hash!\n"); - return -WD_EINVAL; - } -@@ -1653,7 +1653,7 @@ static int digest_bd3_type_check(struct wd_digest_msg *msg) - { - enum hash_block_type type = get_hash_block_type(msg); - /* Long hash first and middle bd */ -- if (type == HASH_FRIST_BLOCK || type == HASH_MIDDLE_BLOCK) { -+ if (type == HASH_FIRST_BLOCK || type == HASH_MIDDLE_BLOCK) { - WD_ERR("invalid: hardware v3 not supports 0 size in long hash!\n"); - return -WD_EINVAL; - } -@@ -1906,7 +1906,7 @@ static int fill_digest_long_hash3(handle_t h_qp, struct wd_digest_msg *msg, - if (ret) - return ret; - -- if (block_type == HASH_FRIST_BLOCK) { -+ if (block_type == HASH_FIRST_BLOCK) { - /* Long hash first */ - sqe->auth_mac_key |= AI_GEN_INNER << SEC_AI_GEN_OFFSET_V3; - sqe->stream_scene.stream_auth_pad = AUTHPAD_NOPAD; -diff --git a/drv/isa_ce_sm3.c b/drv/isa_ce_sm3.c -index f16bdd3..0309861 100644 ---- a/drv/isa_ce_sm3.c -+++ b/drv/isa_ce_sm3.c -@@ -187,7 +187,7 @@ static int do_sm3_ce(struct wd_digest_msg *msg, __u8 *out_digest) - sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); - sm3_ce_final(&sctx, out_digest, sm3_ce_block_compress); - break; -- case HASH_FRIST_BLOCK: -+ case HASH_FIRST_BLOCK: - sm3_ce_init(&sctx); - sm3_ce_update(&sctx, data, data_len, sm3_ce_block_compress); - trans_output_result(out_digest, sctx.word_reg); -@@ -305,7 +305,7 @@ static int do_hmac_sm3_ce(struct wd_digest_msg *msg, __u8 *out_hmac) - sm3_ce_hmac_update(&hctx, data, data_len); - sm3_ce_hmac_final(&hctx, out_hmac); - break; -- case HASH_FRIST_BLOCK: -+ case HASH_FIRST_BLOCK: - sm3_ce_hmac_init(&hctx, key, key_len); - sm3_ce_hmac_update(&hctx, data, data_len); - trans_output_result(out_hmac, hctx.sctx.word_reg); -diff --git a/include/drv/wd_digest_drv.h b/include/drv/wd_digest_drv.h -index 8a4aa0b..a55ef5b 100644 ---- a/include/drv/wd_digest_drv.h -+++ b/include/drv/wd_digest_drv.h -@@ -11,7 +11,7 @@ extern "C" { - #endif - - enum hash_block_type { -- HASH_FRIST_BLOCK, -+ HASH_FIRST_BLOCK, - HASH_MIDDLE_BLOCK, - HASH_END_BLOCK, - HASH_SINGLE_BLOCK, -@@ -65,13 +65,13 @@ static inline enum hash_block_type get_hash_block_type(struct wd_digest_msg *msg - { - /* - * [has_next , iv_bytes] -- * [ 1 , 0 ] = long hash(frist bd) -+ * [ 1 , 0 ] = long hash(first bd) - * [ 1 , 1 ] = long hash(middle bd) - * [ 0 , 1 ] = long hash(end bd) - * [ 0 , 0 ] = block hash(single bd) - */ - if (msg->has_next && !msg->iv_bytes) -- return HASH_FRIST_BLOCK; -+ return HASH_FIRST_BLOCK; - else if (msg->has_next && msg->iv_bytes) - return HASH_MIDDLE_BLOCK; - else if (!msg->has_next && msg->iv_bytes) -diff --git a/v1/test/hisi_sec_test/test_hisi_sec.c b/v1/test/hisi_sec_test/test_hisi_sec.c -index 824fe9e..7d94332 100644 ---- a/v1/test/hisi_sec_test/test_hisi_sec.c -+++ b/v1/test/hisi_sec_test/test_hisi_sec.c -@@ -1462,7 +1462,7 @@ static int sec_cipher_async_test(int thread_num, __u64 lcore_mask, - SEC_TST_PRT("%s(): create pool fail!\n", __func__); - return -ENOMEM; - } -- /* frist create the async poll thread! */ -+ /* first create the async poll thread! */ - test_thrds_data[0].pool = pool; - test_thrds_data[0].q = &q; - test_thrds_data[0].thread_num = 1; -@@ -2069,7 +2069,7 @@ static int sec_aead_async_test(int thd_num, __u64 lcore_mask, - SEC_TST_PRT("%s(): create pool fail!\n", __func__); - return -ENOMEM; - } -- /* frist create the async poll thread! */ -+ /* first create the async poll thread! */ - test_thrds_data[0].pool = pool; - test_thrds_data[0].q = &q; - test_thrds_data[0].thread_num = 1; -@@ -2082,7 +2082,6 @@ static int sec_aead_async_test(int thd_num, __u64 lcore_mask, - return ret; - } - -- //Ïß³ÌÊý Óë°óºË - if (_get_one_bits(lcore_mask) == 0 && - _get_one_bits(hcore_mask) == 0) - cnt = thd_num; -diff --git a/v1/test/hisi_sec_test_sgl/test_hisi_sec_sgl.c b/v1/test/hisi_sec_test_sgl/test_hisi_sec_sgl.c -index b7513d1..b13915f 100644 ---- a/v1/test/hisi_sec_test_sgl/test_hisi_sec_sgl.c -+++ b/v1/test/hisi_sec_test_sgl/test_hisi_sec_sgl.c -@@ -1733,7 +1733,7 @@ static int sec_cipher_async_test(int thread_num, __u64 lcore_mask, - SEC_TST_PRT("%s(): create pool fail!\n", __func__); - return -ENOMEM; - } -- /* frist create the async poll thread! */ -+ /* first create the async poll thread! */ - test_thrds_data[0].pool = pool; - test_thrds_data[0].q = &q; - test_thrds_data[0].thread_num = 1; -@@ -2640,7 +2640,7 @@ static int sec_aead_async_test(int thd_num, __u64 lcore_mask, - return -ENOMEM; - } - -- /* frist create the async poll thread! */ -+ /* first create the async poll thread! */ - test_thrds_data[0].pool = pool; - test_thrds_data[0].q = &q; - test_thrds_data[0].thread_num = 1; -@@ -2654,7 +2654,6 @@ static int sec_aead_async_test(int thd_num, __u64 lcore_mask, - return ret; - } - -- //Ïß³ÌÊý Óë°óºË - if (_get_one_bits(lcore_mask) == 0 && - _get_one_bits(hcore_mask) == 0) - cnt = thd_num; -diff --git a/wd_digest.c b/wd_digest.c -index 491502a..10ac080 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -53,7 +53,7 @@ struct wd_digest_stream_data { - /* Total data length for stream mode */ - __u64 long_data_len; - /* -- * Notify the stream message state, zero is frist message, -+ * Notify the stream message state, zero is first message, - * non-zero is middle or final message. - */ - int msg_state; --- -2.25.1 - diff --git a/0017-uadk-drv-hisi-fix-failed-to-init-drv-after-fork.patch b/0017-uadk-drv-hisi-fix-failed-to-init-drv-after-fork.patch deleted file mode 100644 index 992f1a7..0000000 --- a/0017-uadk-drv-hisi-fix-failed-to-init-drv-after-fork.patch +++ /dev/null @@ -1,152 +0,0 @@ -From 6a6831101e99323fc5e9b63baa7e86ae8ac244ee Mon Sep 17 00:00:00 2001 -From: Yang Shen -Date: Thu, 22 Feb 2024 15:23:33 +0800 -Subject: [PATCH 17/44] uadk: drv/hisi - fix failed to init drv after fork - -The drivers initialization function use 'drv.priv' to forbid reinit. -But if the child process is forked after the parent process has -initialized, it can't work due to the drivers go to wrong branch on -initialization. - -And the algorithms initialization function is already protected -against re-entry. So it is unnecessary to check 'drv.priv' in driver. - -Signed-off-by: Yang Shen ---- - drv/hisi_comp.c | 7 +------ - drv/hisi_hpre.c | 34 ++++++++++++++-------------------- - drv/hisi_sec.c | 7 +------ - 3 files changed, 16 insertions(+), 32 deletions(-) - -diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c -index 2cb9a6b..a1af567 100644 ---- a/drv/hisi_comp.c -+++ b/drv/hisi_comp.c -@@ -787,18 +787,13 @@ static void hisi_zip_sqe_ops_adapt(handle_t h_qp) - - static int hisi_zip_init(struct wd_alg_driver *drv, void *conf) - { -- struct hisi_zip_ctx *priv = (struct hisi_zip_ctx *)drv->priv; - struct wd_ctx_config_internal *config = conf; - struct hisi_qm_priv qm_priv; -+ struct hisi_zip_ctx *priv; - handle_t h_qp = 0; - handle_t h_ctx; - __u32 i, j; - -- if (priv) { -- /* return if already inited */ -- return 0; -- } -- - if (!config->ctx_num) { - WD_ERR("invalid: zip init config ctx num is 0!\n"); - return -WD_EINVAL; -diff --git a/drv/hisi_hpre.c b/drv/hisi_hpre.c -index 049e60e..babc795 100644 ---- a/drv/hisi_hpre.c -+++ b/drv/hisi_hpre.c -@@ -527,62 +527,56 @@ out: - static int hpre_rsa_dh_init(struct wd_alg_driver *drv, void *conf) - { - struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf; -- struct hisi_hpre_ctx *priv = (struct hisi_hpre_ctx *)drv->priv; - struct hisi_qm_priv qm_priv; -+ struct hisi_hpre_ctx *priv; - int ret; - -- if (priv) { -- /* return if already inited */ -- return WD_SUCCESS; -- } -- - if (!config->ctx_num) { - WD_ERR("invalid: hpre rsa/dh init config ctx num is 0!\n"); - return -WD_EINVAL; - } - -- drv->priv = malloc(sizeof(struct hisi_hpre_ctx)); -- if (!drv->priv) -+ priv = malloc(sizeof(struct hisi_hpre_ctx)); -+ if (!priv) - return -WD_EINVAL; - - qm_priv.op_type = HPRE_HW_V2_ALG_TYPE; -- ret = hpre_init_qm_priv(config, drv->priv, &qm_priv); -+ ret = hpre_init_qm_priv(config, priv, &qm_priv); - if (ret) { -- free(drv->priv); -+ free(priv); - return ret; - } - -+ drv->priv = priv; -+ - return WD_SUCCESS; - } - - static int hpre_ecc_init(struct wd_alg_driver *drv, void *conf) - { - struct wd_ctx_config_internal *config = (struct wd_ctx_config_internal *)conf; -- struct hisi_hpre_ctx *priv = (struct hisi_hpre_ctx *)drv->priv; - struct hisi_qm_priv qm_priv; -+ struct hisi_hpre_ctx *priv; - int ret; - -- if (priv) { -- /* return if already inited */ -- return WD_SUCCESS; -- } -- - if (!config->ctx_num) { - WD_ERR("invalid: hpre ecc init config ctx num is 0!\n"); - return -WD_EINVAL; - } - -- drv->priv = malloc(sizeof(struct hisi_hpre_ctx)); -- if (!drv->priv) -+ priv = malloc(sizeof(struct hisi_hpre_ctx)); -+ if (!priv) - return -WD_EINVAL; - - qm_priv.op_type = HPRE_HW_V3_ECC_ALG_TYPE; -- ret = hpre_init_qm_priv(config, drv->priv, &qm_priv); -+ ret = hpre_init_qm_priv(config, priv, &qm_priv); - if (ret) { -- free(drv->priv); -+ free(priv); - return ret; - } - -+ drv->priv = priv; -+ - return WD_SUCCESS; - } - -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index ac62109..852340d 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -3041,18 +3041,13 @@ static int hisi_sec_aead_recv_v3(struct wd_alg_driver *drv, handle_t ctx, void * - - static int hisi_sec_init(struct wd_alg_driver *drv, void *conf) - { -- struct hisi_sec_ctx *priv = (struct hisi_sec_ctx *)drv->priv; - struct wd_ctx_config_internal *config = conf; - struct hisi_qm_priv qm_priv; -+ struct hisi_sec_ctx *priv; - handle_t h_qp = 0; - handle_t h_ctx; - __u32 i, j; - -- if (priv) { -- /* return if already inited */ -- return 0; -- } -- - if (!config->ctx_num) { - WD_ERR("invalid: sec init config ctx num is 0!\n"); - return -WD_EINVAL; --- -2.25.1 - diff --git a/0018-wd_rsa-fix-wd_rsa_common_uninit-re-entry.patch b/0018-wd_rsa-fix-wd_rsa_common_uninit-re-entry.patch deleted file mode 100644 index c1a6a98..0000000 --- a/0018-wd_rsa-fix-wd_rsa_common_uninit-re-entry.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 2366ff7e765c5c451ab761cd0f9f9f6fbde4add3 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Thu, 1 Feb 2024 14:25:14 +0000 -Subject: [PATCH 18/44] wd_rsa: fix wd_rsa_common_uninit re-entry - -Fix wd_rsa_common_uninit re-entry - -Fixs: 3fc344a drivers alloc and free resources by themself -Signed-off-by: Zhangfei Gao ---- - wd_rsa.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/wd_rsa.c b/wd_rsa.c -index de0b796..8e51177 100644 ---- a/wd_rsa.c -+++ b/wd_rsa.c -@@ -167,6 +167,12 @@ out_clear_ctx_config: - - static int wd_rsa_common_uninit(void) - { -+ enum wd_status status; -+ -+ wd_alg_get_init(&wd_rsa_setting.status, &status); -+ if (status == WD_UNINIT) -+ return -WD_EINVAL; -+ - /* uninit async request pool */ - wd_uninit_async_request_pool(&wd_rsa_setting.pool); - --- -2.25.1 - diff --git a/0019-wd_dh-Fix-wd_aead_uninit-re-entry.patch b/0019-wd_dh-Fix-wd_aead_uninit-re-entry.patch deleted file mode 100644 index 5f547ed..0000000 --- a/0019-wd_dh-Fix-wd_aead_uninit-re-entry.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 72d2f8d98ee7322463f66be3aa8dea7a9e0b0811 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Tue, 19 Mar 2024 02:37:51 +0000 -Subject: [PATCH 19/44] wd_dh: Fix wd_aead_uninit re-entry - -Check status for the re-entry - -Signed-off-by: Zhangfei Gao ---- - wd_dh.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/wd_dh.c b/wd_dh.c -index d23bb61..4d08de6 100644 ---- a/wd_dh.c -+++ b/wd_dh.c -@@ -127,6 +127,12 @@ out_clear_ctx_config: - - static int wd_dh_common_uninit(void) - { -+ enum wd_status status; -+ -+ wd_alg_get_init(&wd_dh_setting.status, &status); -+ if (status == WD_UNINIT) -+ return -WD_EINVAL; -+ - /* uninit async request pool */ - wd_uninit_async_request_pool(&wd_dh_setting.pool); - --- -2.25.1 - diff --git a/0020-wd_ecc-Fix-wd_ecc_uninit-re-entry.patch b/0020-wd_ecc-Fix-wd_ecc_uninit-re-entry.patch deleted file mode 100644 index 6d0e453..0000000 --- a/0020-wd_ecc-Fix-wd_ecc_uninit-re-entry.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 105fec19d2f5008009504e9e051dc2aec42bd113 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Tue, 19 Mar 2024 02:40:09 +0000 -Subject: [PATCH 20/44] wd_ecc: Fix wd_ecc_uninit re-entry - -Check status for the re-entry - -Signed-off-by: Zhangfei Gao ---- - wd_ecc.c | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/wd_ecc.c b/wd_ecc.c -index 9aa1519..e75bca0 100644 ---- a/wd_ecc.c -+++ b/wd_ecc.c -@@ -190,6 +190,12 @@ out_clear_ctx_config: - - static int wd_ecc_common_uninit(void) - { -+ enum wd_status status; -+ -+ wd_alg_get_init(&wd_ecc_setting.status, &status); -+ if (status == WD_UNINIT) -+ return -WD_EINVAL; -+ - /* uninit async request pool */ - wd_uninit_async_request_pool(&wd_ecc_setting.pool); - --- -2.25.1 - diff --git a/0021-wd_digest-uninit-check-status-in-one-func.patch b/0021-wd_digest-uninit-check-status-in-one-func.patch deleted file mode 100644 index 23bf5b6..0000000 --- a/0021-wd_digest-uninit-check-status-in-one-func.patch +++ /dev/null @@ -1,71 +0,0 @@ -From f690d2e248be5270b9cdda6f2b8af18af580ab49 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Tue, 19 Mar 2024 02:42:39 +0000 -Subject: [PATCH 21/44] wd_digest: uninit check status in one func - -To simplify code, checking status in one func - -Signed-off-by: Zhangfei Gao ---- - wd_digest.c | 23 ++++++++++++++--------- - 1 file changed, 14 insertions(+), 9 deletions(-) - -diff --git a/wd_digest.c b/wd_digest.c -index 10ac080..0df7204 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -296,23 +296,29 @@ out_clear_init: - return ret; - } - --static void wd_digest_uninit_nolock(void) -+static int wd_digest_uninit_nolock(void) - { -+ enum wd_status status; -+ -+ wd_alg_get_init(&wd_digest_setting.status, &status); -+ if (status == WD_UNINIT) -+ return -WD_EINVAL; -+ - wd_uninit_async_request_pool(&wd_digest_setting.pool); - wd_clear_sched(&wd_digest_setting.sched); - wd_alg_uninit_driver(&wd_digest_setting.config, - wd_digest_setting.driver); -+ return 0; - } - - void wd_digest_uninit(void) - { -- enum wd_status status; -+ int ret; - -- wd_alg_get_init(&wd_digest_setting.status, &status); -- if (status == WD_UNINIT) -+ ret = wd_digest_uninit_nolock(); -+ if (ret) - return; - -- wd_digest_uninit_nolock(); - wd_digest_close_driver(); - wd_alg_clear_init(&wd_digest_setting.status); - } -@@ -419,13 +425,12 @@ out_uninit: - - void wd_digest_uninit2(void) - { -- enum wd_status status; -+ int ret; - -- wd_alg_get_init(&wd_digest_setting.status, &status); -- if (status == WD_UNINIT) -+ ret = wd_digest_uninit_nolock(); -+ if (ret) - return; - -- wd_digest_uninit_nolock(); - wd_alg_attrs_uninit(&wd_digest_init_attrs); - wd_alg_drv_unbind(wd_digest_setting.driver); - wd_dlclose_drv(wd_digest_setting.dlh_list); --- -2.25.1 - diff --git a/0022-wd_aead-uninit-check-status-in-one-func.patch b/0022-wd_aead-uninit-check-status-in-one-func.patch deleted file mode 100644 index e5e6a53..0000000 --- a/0022-wd_aead-uninit-check-status-in-one-func.patch +++ /dev/null @@ -1,72 +0,0 @@ -From e726680f9c8c9bfcf143d529be34d5b7ce2157be Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Tue, 19 Mar 2024 02:44:21 +0000 -Subject: [PATCH 22/44] wd_aead: uninit check status in one func - -To simplify code, checking status in one func - -Signed-off-by: Zhangfei Gao ---- - wd_aead.c | 24 +++++++++++++++--------- - 1 file changed, 15 insertions(+), 9 deletions(-) - -diff --git a/wd_aead.c b/wd_aead.c -index 34a3b86..57daa80 100644 ---- a/wd_aead.c -+++ b/wd_aead.c -@@ -485,23 +485,30 @@ out_clear_init: - return ret; - } - --static void wd_aead_uninit_nolock(void) -+static int wd_aead_uninit_nolock(void) - { -+ enum wd_status status; -+ -+ wd_alg_get_init(&wd_aead_setting.status, &status); -+ if (status == WD_UNINIT) -+ return -WD_EINVAL; -+ - wd_uninit_async_request_pool(&wd_aead_setting.pool); - wd_clear_sched(&wd_aead_setting.sched); - wd_alg_uninit_driver(&wd_aead_setting.config, - wd_aead_setting.driver); -+ -+ return 0; - } - - void wd_aead_uninit(void) - { -- enum wd_status status; -+ int ret; - -- wd_alg_get_init(&wd_aead_setting.status, &status); -- if (status == WD_UNINIT) -+ ret = wd_aead_uninit_nolock(); -+ if (ret) - return; - -- wd_aead_uninit_nolock(); - wd_aead_close_driver(); - wd_alg_clear_init(&wd_aead_setting.status); - } -@@ -614,13 +621,12 @@ out_uninit: - - void wd_aead_uninit2(void) - { -- enum wd_status status; -+ int ret; - -- wd_alg_get_init(&wd_aead_setting.status, &status); -- if (status == WD_UNINIT) -+ ret = wd_aead_uninit_nolock(); -+ if (ret) - return; - -- wd_aead_uninit_nolock(); - wd_alg_attrs_uninit(&wd_aead_init_attrs); - wd_alg_drv_unbind(wd_aead_setting.driver); - wd_dlclose_drv(wd_aead_setting.dlh_list); --- -2.25.1 - diff --git a/0023-makefile-install-wd_zlibwrapper.h-to-system.patch b/0023-makefile-install-wd_zlibwrapper.h-to-system.patch deleted file mode 100644 index 7f29306..0000000 --- a/0023-makefile-install-wd_zlibwrapper.h-to-system.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 9d4a68db517d42ac3cb9ae66aabfb2ea73303344 Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Sun, 10 Mar 2024 13:57:53 +0000 -Subject: [PATCH 23/44] makefile: install wd_zlibwrapper.h to system - -wd_zlibwrapper.h is requird by other sub-system, so move -it to system header folder like /usr/local/include/uadk/ - -Signed-off-by: Zhangfei Gao ---- - Makefile.am | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/Makefile.am b/Makefile.am -index 19eab30..cd3d7e5 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -35,7 +35,8 @@ AM_CFLAGS+= -DUADK_RELEASED_TIME="\"Released ${MONTH} ${DAY}, ${YEAR}\"" - pkginclude_HEADERS = include/wd.h include/wd_cipher.h include/wd_aead.h \ - include/wd_comp.h include/wd_dh.h include/wd_digest.h \ - include/wd_rsa.h include/uacce.h include/wd_alg_common.h \ -- include/wd_ecc.h include/wd_sched.h include/wd_alg.h -+ include/wd_ecc.h include/wd_sched.h include/wd_alg.h \ -+ include/wd_zlibwrapper.h - - nobase_pkginclude_HEADERS = v1/wd.h v1/wd_cipher.h v1/wd_aead.h v1/uacce.h v1/wd_dh.h \ - v1/wd_digest.h v1/wd_rsa.h v1/wd_bmm.h -@@ -67,7 +68,7 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ - v1/drv/hisi_rng_udrv.c v1/drv/hisi_rng_udrv.h - - libwd_comp_la_SOURCES=wd_comp.c wd_comp.h wd_comp_drv.h wd_util.c wd_util.h \ -- wd_sched.c wd_sched.h wd.c wd.h wd_zlibwrapper.c wd_zlibwrapper.h -+ wd_sched.c wd_sched.h wd.c wd.h wd_zlibwrapper.c - - libhisi_zip_la_SOURCES=drv/hisi_comp.c hisi_comp.h drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h wd_comp_drv.h --- -2.25.1 - diff --git a/0024-conf-fix-includedir.patch b/0024-conf-fix-includedir.patch deleted file mode 100644 index 002989d..0000000 --- a/0024-conf-fix-includedir.patch +++ /dev/null @@ -1,33 +0,0 @@ -From 9590cd2df74a0bf82fa4d3420e851792195f782a Mon Sep 17 00:00:00 2001 -From: Zhangfei Gao -Date: Sun, 10 Mar 2024 14:02:03 +0000 -Subject: [PATCH 24/44] conf: fix includedir - -pkgincludedir already appended $(PACKAGE) [1], so no need -adding "uadk". Otherwise, header files will be installed to -"/usr/local/include/uadk/uadk/" - -[1] https://www.sourceware.org/autobook/autobook/autobook_76.html -pkgincludedir -This is a convenience variable whose value is -"$(includedir)/$(PACKAGE)". - -Signed-off-by: Zhangfei Gao ---- - conf.sh | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/conf.sh b/conf.sh -index 59af821..c361fbc 100755 ---- a/conf.sh -+++ b/conf.sh -@@ -16,5 +16,5 @@ ac_cv_func_malloc_0_nonnull=yes ac_cv_func_realloc_0_nonnull=yes ./configure \ - --enable-perf=yes \ - --host aarch64-linux-gnu \ - --target aarch64-linux-gnu \ -- --includedir=/usr/local/include/uadk \ -+ --includedir=/usr/local/include/ \ - $COMPILE_TYPE --- -2.25.1 - diff --git a/0025-cipher-add-support-for-SM4-CBC-and-CTR-modes-in-CE-i.patch b/0025-cipher-add-support-for-SM4-CBC-and-CTR-modes-in-CE-i.patch deleted file mode 100644 index 7fbb8f4..0000000 --- a/0025-cipher-add-support-for-SM4-CBC-and-CTR-modes-in-CE-i.patch +++ /dev/null @@ -1,1170 +0,0 @@ -From eec2accd50fffe1399151112f53f4061b0eef2f0 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Wed, 20 Mar 2024 16:11:22 +0800 -Subject: [PATCH 25/44] cipher: add support for SM4 CBC and CTR modes in CE - instruction - -This patch implements the CE instruction using SM4 CBC and CTR modes, -and includes the necessary logic for mode-specific operations, -such as generating initialization vectors (IV) and handling chaining -and counter values. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - Makefile.am | 5 +- - drv/isa_ce_sm4.c | 235 +++++++++++++ - drv/isa_ce_sm4.h | 38 ++ - drv/isa_ce_sm4_armv8.S | 774 +++++++++++++++++++++++++++++++++++++++++ - v1/wd.c | 3 +- - v1/wd_rng.c | 4 +- - wd_cipher.c | 4 +- - 7 files changed, 1056 insertions(+), 7 deletions(-) - create mode 100644 drv/isa_ce_sm4.c - create mode 100644 drv/isa_ce_sm4.h - create mode 100644 drv/isa_ce_sm4_armv8.S - -diff --git a/Makefile.am b/Makefile.am -index cd3d7e5..f78ad14 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -78,6 +78,7 @@ libwd_crypto_la_SOURCES=wd_cipher.c wd_cipher.h wd_cipher_drv.h \ - wd_rsa.c wd_rsa.h wd_rsa_drv.h \ - wd_dh.c wd_dh.h wd_dh_drv.h \ - wd_ecc.c wd_ecc.h wd_ecc_drv.h \ -+ arm_arch_ce.h isa_ce_sm3.h isa_ce_sm4.h \ - wd_digest.c wd_digest.h wd_digest_drv.h \ - wd_util.c wd_util.h \ - wd_sched.c wd_sched.h \ -@@ -90,8 +91,8 @@ libhisi_sec_la_SOURCES=drv/hisi_sec.c drv/hisi_qm_udrv.c \ - libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ - hisi_qm_udrv.h - --libisa_ce_la_SOURCES=drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S arm_arch_ce.h \ -- drv/isa_ce_sm3.h -+libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \ -+ drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h - - if WD_STATIC_DRV - AM_CFLAGS += -DWD_STATIC_DRV -fPIC -diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c -new file mode 100644 -index 0000000..e2d81de ---- /dev/null -+++ b/drv/isa_ce_sm4.c -@@ -0,0 +1,235 @@ -+// SPDX-License-Identifier: Apache-2.0 -+/* -+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. -+ * -+ * Licensed under the Apache License 2.0 (the "License"). You may not use -+ * this file except in compliance with the License. You can obtain a copy -+ * in the file LICENSE in the source distribution or at -+ * https://www.openssl.org/source/license.html -+ */ -+/* -+ * Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. -+ */ -+ -+#include "drv/wd_cipher_drv.h" -+#include "wd_cipher.h" -+#include "isa_ce_sm4.h" -+ -+#define SM4_ENCRYPT 1 -+#define SM4_DECRYPT 0 -+#define MSG_Q_DEPTH 1024 -+#define INCREASE_BYTES 12 -+#define SM4_BLOCK_SIZE 16 -+#define MAX_BLOCK_NUM (1U << 28) -+#define CTR96_SHIFT_BITS 8 -+ -+#define GETU32(p) \ -+ ((__u32)(p)[0] << 24 | (__u32)(p)[1] << 16 | (__u32)(p)[2] << 8 | (__u32)(p)[3]) -+#define PUTU32(p, v) \ -+ ((p)[0] = (__u8)((v) >> 24), (p)[1] = (__u8)((v) >> 16), \ -+ (p)[2] = (__u8)((v) >> 8), (p)[3] = (__u8)(v)) -+ -+static int isa_ce_init(struct wd_alg_driver *drv, void *conf) -+{ -+ struct wd_ctx_config_internal *config = conf; -+ struct sm4_ce_drv_ctx *sctx = drv->priv; -+ -+ config->epoll_en = 0; -+ memcpy(&sctx->config, config, sizeof(struct wd_ctx_config_internal)); -+ -+ return 0; -+} -+ -+static void isa_ce_exit(struct wd_alg_driver *drv) -+{ -+} -+ -+/* increment upper 96 bits of 128-bit counter by 1 */ -+static void ctr96_inc(__u8 *counter) -+{ -+ __u32 n = INCREASE_BYTES; -+ __u32 c = 1; -+ -+ do { -+ --n; -+ c += counter[n]; -+ counter[n] = (__u8)c; -+ c >>= CTR96_SHIFT_BITS; -+ } while (n); -+} -+ -+static void sm4_v8_ctr32_encrypt(__u8 *in, __u8 *out, -+ __u64 len, const struct SM4_KEY *key, __u8 *iv) -+{ -+ __u8 ecount_buf[SM4_BLOCK_SIZE] = {0}; -+ __u64 blocks, offset; -+ __u32 ctr32; -+ __u32 n = 0; -+ -+ ctr32 = GETU32(iv + INCREASE_BYTES); -+ while (len >= SM4_BLOCK_SIZE) { -+ blocks = len / SM4_BLOCK_SIZE; -+ /* -+ * 1<<28 is just a not-so-small yet not-so-large number... -+ * Below condition is practically never met, but it has to -+ * be checked for code correctness. -+ */ -+ if (blocks > MAX_BLOCK_NUM) -+ blocks = MAX_BLOCK_NUM; -+ /* -+ * As (*func) operates on 32-bit counter, caller -+ * has to handle overflow. 'if' below detects the -+ * overflow, which is then handled by limiting the -+ * amount of blocks to the exact overflow point... -+ */ -+ ctr32 += (__u32)blocks; -+ if (ctr32 < blocks) { -+ blocks -= ctr32; -+ ctr32 = 0; -+ } -+ sm4_v8_ctr32_encrypt_blocks(in, out, blocks, key, iv); -+ /* (*ctr) does not update iv, caller does: */ -+ PUTU32(iv + INCREASE_BYTES, ctr32); -+ /* ... overflow was detected, propagate carry. */ -+ if (ctr32 == 0) -+ ctr96_inc(iv); -+ offset = blocks * SM4_BLOCK_SIZE; -+ len -= offset; -+ out += offset; -+ in += offset; -+ } -+ if (len) { -+ sm4_v8_ctr32_encrypt_blocks(ecount_buf, ecount_buf, 1, key, iv); -+ ++ctr32; -+ PUTU32(iv + INCREASE_BYTES, ctr32); -+ if (ctr32 == 0) -+ ctr96_inc(iv); -+ while (len--) { -+ out[n] = in[n] ^ ecount_buf[n]; -+ ++n; -+ } -+ } -+} -+ -+static void sm4_ctr_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_v8_ctr32_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, msg->iv); -+} -+ -+static void sm4_cbc_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, msg->iv, SM4_ENCRYPT); -+} -+ -+static void sm4_cbc_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_dec) -+{ -+ sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, msg->iv, SM4_DECRYPT); -+} -+ -+void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key) -+{ -+ sm4_v8_set_encrypt_key(userKey, key); -+} -+ -+void sm4_set_decrypt_key(const __u8 *userKey, struct SM4_KEY *key) -+{ -+ sm4_v8_set_decrypt_key(userKey, key); -+} -+ -+static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) -+{ -+ struct wd_cipher_msg *msg = wd_msg; -+ struct SM4_KEY rkey; -+ -+ if (!msg) { -+ WD_ERR("invalid: input sm4 msg is NULL!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->data_fmt == WD_SGL_BUF) { -+ WD_ERR("invalid: SM4 CE driver do not support sgl data format!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (msg->op_type == WD_CIPHER_ENCRYPTION || msg->mode == WD_CIPHER_CTR) -+ sm4_set_encrypt_key(msg->key, &rkey); -+ else -+ sm4_set_decrypt_key(msg->key, &rkey); -+ -+ switch (msg->mode) { -+ case WD_CIPHER_CBC: -+ if (msg->op_type == WD_CIPHER_ENCRYPTION) -+ sm4_cbc_encrypt(msg, &rkey); -+ else -+ sm4_cbc_decrypt(msg, &rkey); -+ break; -+ case WD_CIPHER_CTR: -+ sm4_ctr_encrypt(msg, &rkey); -+ break; -+ default: -+ WD_ERR("The current block cipher mode is not supported!\n"); -+ return -WD_EINVAL; -+ } -+ -+ return 0; -+} -+ -+static int isa_ce_cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) -+{ -+ return 0; -+} -+ -+static int cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) -+{ -+ return isa_ce_cipher_send(drv, ctx, msg); -+} -+ -+static int cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) -+{ -+ return isa_ce_cipher_recv(drv, ctx, msg); -+} -+ -+#define GEN_CE_ALG_DRIVER(ce_alg_name, alg_type) \ -+{\ -+ .drv_name = "isa_ce_sm4",\ -+ .alg_name = (ce_alg_name),\ -+ .calc_type = UADK_ALG_CE_INSTR,\ -+ .priority = 200,\ -+ .op_type_num = 1,\ -+ .fallback = 0,\ -+ .init = isa_ce_init,\ -+ .exit = isa_ce_exit,\ -+ .send = alg_type##_send,\ -+ .recv = alg_type##_recv,\ -+} -+ -+static struct wd_alg_driver cipher_alg_driver[] = { -+ GEN_CE_ALG_DRIVER("cbc(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("ctr(sm4)", cipher), -+}; -+ -+static void __attribute__((constructor)) isa_ce_probe(void) -+{ -+ __u32 alg_num, i; -+ int ret; -+ -+ WD_INFO("Info: register SM4 CE alg drivers!\n"); -+ -+ alg_num = ARRAY_SIZE(cipher_alg_driver); -+ for (i = 0; i < alg_num; i++) { -+ ret = wd_alg_driver_register(&cipher_alg_driver[i]); -+ if (ret && ret != -WD_ENODEV) -+ WD_ERR("Error: register SM4 CE %s failed!\n", -+ cipher_alg_driver[i].alg_name); -+ } -+} -+ -+static void __attribute__((destructor)) isa_ce_remove(void) -+{ -+ __u32 alg_num, i; -+ -+ WD_INFO("Info: unregister SM4 CE alg drivers!\n"); -+ alg_num = ARRAY_SIZE(cipher_alg_driver); -+ for (i = 0; i < alg_num; i++) -+ wd_alg_driver_unregister(&cipher_alg_driver[i]); -+} -diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h -new file mode 100644 -index 0000000..0bc074d ---- /dev/null -+++ b/drv/isa_ce_sm4.h -@@ -0,0 +1,38 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ -+ -+#ifndef __SM4_CE_DRV_H -+#define __SM4_CE_DRV_H -+ -+#pragma once -+#include -+#include "wd_alg_common.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define SM4_KEY_SCHEDULE 32 -+ -+struct SM4_KEY { -+ __u32 rk[SM4_KEY_SCHEDULE]; -+}; -+ -+struct sm4_ce_drv_ctx { -+ struct wd_ctx_config_internal config; -+}; -+ -+ -+void sm4_v8_set_encrypt_key(const unsigned char *userKey, struct SM4_KEY *key); -+void sm4_v8_set_decrypt_key(const unsigned char *userKey, struct SM4_KEY *key); -+void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out, -+ size_t length, const struct SM4_KEY *key, -+ unsigned char *ivec, const int enc); -+void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, -+ size_t len, const void *key, const unsigned char ivec[16]); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __SM4_CE_DRV_H */ -diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S -new file mode 100644 -index 0000000..d7d172a ---- /dev/null -+++ b/drv/isa_ce_sm4_armv8.S -@@ -0,0 +1,774 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* -+ * Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. -+ * -+ * Licensed under the Apache License 2.0 (the "License"). You may not use -+ * this file except in compliance with the License. You can obtain a copy -+ * in the file LICENSE in the source distribution or at -+ * https://www.openssl.org/source/license.html -+ */ -+#include "../include/drv/arm_arch_ce.h" -+ -+.arch armv8-a+crypto -+ -+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ -+ 16, 17, 18, 19, 20, 21, 22, 23 24, 25, 26, 27, 28, 29, 30, 31 -+ .set .Lv\b\().4s, \b -+.endr -+ -+.macro sm4e, vd, vn -+ .inst 0xcec08400 | (.L\vn << 5) | .L\vd -+.endm -+ -+.macro sm4ekey, vd, vn, vm -+ .inst 0xce60c800 | (.L\vm << 16) | (.L\vn << 5) | .L\vd -+.endm -+ -+.text -+.align 6 -+.Lck: -+.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 -+.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 -+.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 -+.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 -+.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 -+.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 -+.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 -+.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 -+.Lfk: -+.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -+.globl sm4_v8_set_encrypt_key -+.type sm4_v8_set_encrypt_key,%function -+.align 5 -+sm4_v8_set_encrypt_key: -+ AARCH64_VALID_CALL_TARGET -+ ld1 {v0.4s},[x0] -+ adr x2,.Lfk -+ ld1 {v24.4s},[x2] -+ adr x2,.Lck -+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64 -+#ifndef __ARMEB__ -+ rev32 v0.16b,v0.16b -+#endif -+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2] -+ eor v0.16b,v0.16b,v24.16b; -+ sm4ekey v0.4s,v0.4s,v16.4s; -+ sm4ekey v1.4s,v0.4s,v17.4s; -+ sm4ekey v2.4s,v1.4s,v18.4s; -+ sm4ekey v3.4s,v2.4s,v19.4s; -+ sm4ekey v4.4s,v3.4s,v20.4s; -+ st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64 -+ sm4ekey v5.4s,v4.4s,v21.4s; -+ sm4ekey v6.4s,v5.4s,v22.4s; -+ sm4ekey v7.4s,v6.4s,v23.4s; -+ st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1] -+ ret -+.size sm4_v8_set_encrypt_key,.-sm4_v8_set_encrypt_key -+.globl sm4_v8_set_decrypt_key -+.type sm4_v8_set_decrypt_key,%function -+.align 5 -+sm4_v8_set_decrypt_key: -+ AARCH64_VALID_CALL_TARGET -+ ld1 {v7.4s},[x0] -+ adr x2,.Lfk -+ ld1 {v24.4s},[x2] -+ adr x2, .Lck -+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x2],64 -+#ifndef __ARMEB__ -+ rev32 v7.16b,v7.16b -+#endif -+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x2] -+ eor v7.16b, v7.16b,v24.16b; -+ sm4ekey v7.4s,v7.4s,v16.4s; -+ sm4ekey v6.4s,v7.4s,v17.4s; -+ sm4ekey v5.4s,v6.4s,v18.4s; -+ rev64 v7.4s,v7.4s -+ rev64 v6.4s,v6.4s -+ ext v7.16b,v7.16b,v7.16b,#8 -+ ext v6.16b,v6.16b,v6.16b,#8 -+ sm4ekey v4.4s,v5.4s,v19.4s; -+ sm4ekey v3.4s,v4.4s,v20.4s; -+ rev64 v5.4s,v5.4s -+ rev64 v4.4s,v4.4s -+ ext v5.16b,v5.16b,v5.16b,#8 -+ ext v4.16b,v4.16b,v4.16b,#8 -+ sm4ekey v2.4s,v3.4s,v21.4s; -+ sm4ekey v1.4s,v2.4s,v22.4s; -+ rev64 v3.4s,v3.4s -+ rev64 v2.4s,v2.4s -+ ext v3.16b,v3.16b,v3.16b,#8 -+ ext v2.16b,v2.16b,v2.16b,#8 -+ sm4ekey v0.4s,v1.4s,v23.4s; -+ rev64 v1.4s, v1.4s -+ rev64 v0.4s, v0.4s -+ ext v1.16b,v1.16b,v1.16b,#8 -+ ext v0.16b,v0.16b,v0.16b,#8 -+ st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],64 -+ st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1] -+ ret -+.size sm4_v8_set_decrypt_key,.-sm4_v8_set_decrypt_key -+.globl sm4_v8_cbc_encrypt -+.type sm4_v8_cbc_encrypt,%function -+.align 5 -+sm4_v8_cbc_encrypt: -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] -+ ld1 {v8.4s},[x4] -+ cmp w5,#0 -+ b.eq .Ldec -+1: -+ cmp x2, #64 -+ b.lt 1f -+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64 -+ eor v16.16b,v16.16b,v8.16b -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v16.4s,v1.4s; -+ sm4e v16.4s,v2.4s; -+ sm4e v16.4s,v3.4s; -+ sm4e v16.4s,v4.4s; -+ sm4e v16.4s,v5.4s; -+ sm4e v16.4s,v6.4s; -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ ext v16.16b,v16.16b,v16.16b,#8 -+ eor v17.16b,v17.16b,v16.16b -+ sm4e v17.4s,v0.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v17.4s,v7.4s; -+ rev64 v17.4s,v17.4s -+ ext v17.16b,v17.16b,v17.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ eor v18.16b,v18.16b,v17.16b -+ sm4e v18.4s,v0.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v18.4s,v7.4s; -+ rev64 v18.4s,v18.4s -+ ext v18.16b,v18.16b,v18.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+ eor v19.16b,v19.16b,v18.16b -+ sm4e v19.4s,v0.4s; -+ sm4e v19.4s,v1.4s; -+ sm4e v19.4s,v2.4s; -+ sm4e v19.4s,v3.4s; -+ sm4e v19.4s,v4.4s; -+ sm4e v19.4s,v5.4s; -+ sm4e v19.4s,v6.4s; -+ sm4e v19.4s,v7.4s; -+ rev64 v19.4s,v19.4s -+ ext v19.16b,v19.16b,v19.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ mov v8.16b,v19.16b -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ subs x2,x2,#64 -+ b.ne 1b -+1: -+ subs x2,x2,#16 -+ b.lt 3f -+ ld1 {v16.4s},[x0],#16 -+ eor v8.16b,v8.16b,v16.16b -+#ifndef __ARMEB__ -+ rev32 v8.16b,v8.16b -+#endif -+ sm4e v8.4s,v0.4s; -+ sm4e v8.4s,v1.4s; -+ sm4e v8.4s,v2.4s; -+ sm4e v8.4s,v3.4s; -+ sm4e v8.4s,v4.4s; -+ sm4e v8.4s,v5.4s; -+ sm4e v8.4s,v6.4s; -+ sm4e v8.4s,v7.4s; -+ rev64 v8.4s,v8.4s -+ ext v8.16b,v8.16b,v8.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v8.16b,v8.16b -+#endif -+ st1 {v8.16b},[x1],#16 -+ b.ne 1b -+ b 3f -+.Ldec: -+1: -+ cmp x2, #64 -+ b.lt 1f -+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0] -+ ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64 -+ cmp x2,#128 -+ b.lt 2f -+ // 8 blocks mode -+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0] -+ ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v20.16b,v20.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v21.16b,v21.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v22.16b,v22.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v23.16b,v23.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4s,v17.4s -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4s,v18.4s -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4s,v19.4s -+ ext v19.16b,v19.16b,v19.16b,#8 -+ sm4e v20.4s,v0.4s; -+ sm4e v21.4s,v0.4s; -+ sm4e v22.4s,v0.4s; -+ sm4e v23.4s,v0.4s; -+ -+ sm4e v20.4s,v1.4s; -+ sm4e v21.4s,v1.4s; -+ sm4e v22.4s,v1.4s; -+ sm4e v23.4s,v1.4s; -+ -+ sm4e v20.4s,v2.4s; -+ sm4e v21.4s,v2.4s; -+ sm4e v22.4s,v2.4s; -+ sm4e v23.4s,v2.4s; -+ -+ sm4e v20.4s,v3.4s; -+ sm4e v21.4s,v3.4s; -+ sm4e v22.4s,v3.4s; -+ sm4e v23.4s,v3.4s; -+ -+ sm4e v20.4s,v4.4s; -+ sm4e v21.4s,v4.4s; -+ sm4e v22.4s,v4.4s; -+ sm4e v23.4s,v4.4s; -+ -+ sm4e v20.4s,v5.4s; -+ sm4e v21.4s,v5.4s; -+ sm4e v22.4s,v5.4s; -+ sm4e v23.4s,v5.4s; -+ -+ sm4e v20.4s,v6.4s; -+ sm4e v21.4s,v6.4s; -+ sm4e v22.4s,v6.4s; -+ sm4e v23.4s,v6.4s; -+ -+ sm4e v20.4s,v7.4s; -+ rev64 v20.4s,v20.4s -+ sm4e v21.4s,v7.4s; -+ ext v20.16b,v20.16b,v20.16b,#8 -+ rev64 v21.4s,v21.4s -+ sm4e v22.4s,v7.4s; -+ ext v21.16b,v21.16b,v21.16b,#8 -+ rev64 v22.4s,v22.4s -+ sm4e v23.4s,v7.4s; -+ ext v22.16b,v22.16b,v22.16b,#8 -+ rev64 v23.4s,v23.4s -+ ext v23.16b,v23.16b,v23.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v20.16b,v20.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v21.16b,v21.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v22.16b,v22.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v23.16b,v23.16b -+#endif -+ eor v16.16b,v16.16b,v8.16b -+ eor v17.16b,v17.16b,v24.16b -+ eor v18.16b,v18.16b,v25.16b -+ mov v8.16b,v31.16b -+ eor v19.16b,v19.16b,v26.16b -+ eor v20.16b,v20.16b,v27.16b -+ eor v21.16b,v21.16b,v28.16b -+ eor v22.16b,v22.16b,v29.16b -+ eor v23.16b,v23.16b,v30.16b -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 -+ subs x2,x2,128 -+ b.gt 1b -+ b 3f -+ // 4 blocks mode -+2: -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4s,v17.4s -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4s,v18.4s -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4s,v19.4s -+ ext v19.16b,v19.16b,v19.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ eor v16.16b,v16.16b,v8.16b -+ eor v17.16b,v17.16b,v24.16b -+ mov v8.16b,v27.16b -+ eor v18.16b,v18.16b,v25.16b -+ eor v19.16b,v19.16b,v26.16b -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ subs x2,x2,#64 -+ b.gt 1b -+1: -+ subs x2,x2,#16 -+ b.lt 3f -+ ld1 {v16.4s},[x0],#16 -+ mov v24.16b,v16.16b -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v16.4s,v1.4s; -+ sm4e v16.4s,v2.4s; -+ sm4e v16.4s,v3.4s; -+ sm4e v16.4s,v4.4s; -+ sm4e v16.4s,v5.4s; -+ sm4e v16.4s,v6.4s; -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ ext v16.16b,v16.16b,v16.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ eor v16.16b,v16.16b,v8.16b -+ mov v8.16b,v24.16b -+ st1 {v16.16b},[x1],#16 -+ b.ne 1b -+3: -+ // save back IV -+ st1 {v8.16b},[x4] -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt -+.globl sm4_v8_ctr32_encrypt_blocks -+.type sm4_v8_ctr32_encrypt_blocks,%function -+.align 5 -+sm4_v8_ctr32_encrypt_blocks: -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v8.4s},[x4] -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] -+#ifndef __ARMEB__ -+ rev32 v8.16b,v8.16b -+#endif -+ mov w5,v8.s[3] -+1: -+ cmp x2,#4 -+ b.lt 1f -+ ld1 {v24.4s,v25.4s,v26.4s,v27.4s},[x0],#64 -+ mov v16.16b,v8.16b -+ mov v17.16b,v8.16b -+ mov v18.16b,v8.16b -+ mov v19.16b,v8.16b -+ add w5,w5,#1 -+ mov v17.s[3],w5 -+ add w5,w5,#1 -+ mov v18.s[3],w5 -+ add w5,w5,#1 -+ mov v19.s[3],w5 -+ cmp x2,#8 -+ b.lt 2f -+ ld1 {v28.4s,v29.4s,v30.4s,v31.4s},[x0],#64 -+ mov v20.16b,v8.16b -+ mov v21.16b,v8.16b -+ mov v22.16b,v8.16b -+ mov v23.16b,v8.16b -+ add w5,w5,#1 -+ mov v20.s[3],w5 -+ add w5,w5,#1 -+ mov v21.s[3],w5 -+ add w5,w5,#1 -+ mov v22.s[3],w5 -+ add w5,w5,#1 -+ mov v23.s[3],w5 -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4s,v17.4s -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4s,v18.4s -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4s,v19.4s -+ ext v19.16b,v19.16b,v19.16b,#8 -+ sm4e v20.4s,v0.4s; -+ sm4e v21.4s,v0.4s; -+ sm4e v22.4s,v0.4s; -+ sm4e v23.4s,v0.4s; -+ -+ sm4e v20.4s,v1.4s; -+ sm4e v21.4s,v1.4s; -+ sm4e v22.4s,v1.4s; -+ sm4e v23.4s,v1.4s; -+ -+ sm4e v20.4s,v2.4s; -+ sm4e v21.4s,v2.4s; -+ sm4e v22.4s,v2.4s; -+ sm4e v23.4s,v2.4s; -+ -+ sm4e v20.4s,v3.4s; -+ sm4e v21.4s,v3.4s; -+ sm4e v22.4s,v3.4s; -+ sm4e v23.4s,v3.4s; -+ -+ sm4e v20.4s,v4.4s; -+ sm4e v21.4s,v4.4s; -+ sm4e v22.4s,v4.4s; -+ sm4e v23.4s,v4.4s; -+ -+ sm4e v20.4s,v5.4s; -+ sm4e v21.4s,v5.4s; -+ sm4e v22.4s,v5.4s; -+ sm4e v23.4s,v5.4s; -+ -+ sm4e v20.4s,v6.4s; -+ sm4e v21.4s,v6.4s; -+ sm4e v22.4s,v6.4s; -+ sm4e v23.4s,v6.4s; -+ -+ sm4e v20.4s,v7.4s; -+ rev64 v20.4s,v20.4s -+ sm4e v21.4s,v7.4s; -+ ext v20.16b,v20.16b,v20.16b,#8 -+ rev64 v21.4s,v21.4s -+ sm4e v22.4s,v7.4s; -+ ext v21.16b,v21.16b,v21.16b,#8 -+ rev64 v22.4s,v22.4s -+ sm4e v23.4s,v7.4s; -+ ext v22.16b,v22.16b,v22.16b,#8 -+ rev64 v23.4s,v23.4s -+ ext v23.16b,v23.16b,v23.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v20.16b,v20.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v21.16b,v21.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v22.16b,v22.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v23.16b,v23.16b -+#endif -+ eor v16.16b,v16.16b,v24.16b -+ eor v17.16b,v17.16b,v25.16b -+ eor v18.16b,v18.16b,v26.16b -+ eor v19.16b,v19.16b,v27.16b -+ eor v20.16b,v20.16b,v28.16b -+ eor v21.16b,v21.16b,v29.16b -+ eor v22.16b,v22.16b,v30.16b -+ eor v23.16b,v23.16b,v31.16b -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 -+ subs x2,x2,#8 -+ b.eq 3f -+ add w5,w5,#1 -+ mov v8.s[3],w5 -+ b 1b -+2: -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4s,v17.4s -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4s,v18.4s -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4s,v19.4s -+ ext v19.16b,v19.16b,v19.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ eor v16.16b,v16.16b,v24.16b -+ eor v17.16b,v17.16b,v25.16b -+ eor v18.16b,v18.16b,v26.16b -+ eor v19.16b,v19.16b,v27.16b -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ subs x2,x2,#4 -+ b.eq 3f -+ add w5,w5,#1 -+ mov v8.s[3],w5 -+ b 1b -+1: -+ subs x2,x2,#1 -+ b.lt 3f -+ mov v16.16b,v8.16b -+ ld1 {v24.4s},[x0],#16 -+ sm4e v16.4s,v0.4s; -+ sm4e v16.4s,v1.4s; -+ sm4e v16.4s,v2.4s; -+ sm4e v16.4s,v3.4s; -+ sm4e v16.4s,v4.4s; -+ sm4e v16.4s,v5.4s; -+ sm4e v16.4s,v6.4s; -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4s,v16.4s -+ ext v16.16b,v16.16b,v16.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ eor v16.16b,v16.16b,v24.16b -+ st1 {v16.4s},[x1],#16 -+ b.eq 3f -+ add w5,w5,#1 -+ mov v8.s[3],w5 -+ b 1b -+3: -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_ctr32_encrypt_blocks,.-sm4_v8_ctr32_encrypt_blocks -diff --git a/v1/wd.c b/v1/wd.c -index 26e7af3..4286bbe 100644 ---- a/v1/wd.c -+++ b/v1/wd.c -@@ -88,7 +88,8 @@ static int get_raw_attr(const char *dev_root, const char *attr, - if (ptrRet == NULL) - return -WD_ENODEV; - -- /* The attr_file = "/sys/class/uacce/xxx" -+ /* -+ * The attr_file = "/sys/class/uacce/xxx" - * It's the Internal Definition File Node - */ - fd = open(attr_path, O_RDONLY, 0); -diff --git a/v1/wd_rng.c b/v1/wd_rng.c -index 24a4b7a..7a89cd1 100644 ---- a/v1/wd_rng.c -+++ b/v1/wd_rng.c -@@ -57,7 +57,7 @@ static int wcrypto_setup_qinfo(struct wcrypto_rng_ctx_setup *setup, - WD_ERR("algorithm mismatch!\n"); - return ret; - } -- qinfo = q->qinfo; -+ qinfo = q->qinfo; - /* lock at ctx creating */ - wd_spinlock(&qinfo->qlock); - if (qinfo->ctx_num >= WD_MAX_CTX_NUM) { -@@ -120,7 +120,7 @@ void *wcrypto_create_rng_ctx(struct wd_queue *q, - return ctx; - - free_ctx_id: -- qinfo = q->qinfo; -+ qinfo = q->qinfo; - wd_spinlock(&qinfo->qlock); - qinfo->ctx_num--; - wd_free_id(qinfo->ctx_id, WD_MAX_CTX_NUM, ctx_id, WD_MAX_CTX_NUM); -diff --git a/wd_cipher.c b/wd_cipher.c -index f35ce6f..63ec362 100644 ---- a/wd_cipher.c -+++ b/wd_cipher.c -@@ -622,10 +622,10 @@ static int send_recv_sync(struct wd_ctx_internal *ctx, - msg_handle.send = wd_cipher_setting.driver->send; - msg_handle.recv = wd_cipher_setting.driver->recv; - -- pthread_spin_lock(&ctx->lock); -+ wd_ctx_spin_lock(ctx, wd_cipher_setting.driver->calc_type); - ret = wd_handle_msg_sync(wd_cipher_setting.driver, &msg_handle, ctx->ctx, - msg, NULL, wd_cipher_setting.config.epoll_en); -- pthread_spin_unlock(&ctx->lock); -+ wd_ctx_spin_unlock(ctx, wd_cipher_setting.driver->calc_type); - - return ret; - } --- -2.25.1 - diff --git a/0026-cipher-add-support-for-SM4-CFB-and-XTS-modes-in-CE-i.patch b/0026-cipher-add-support-for-SM4-CFB-and-XTS-modes-in-CE-i.patch deleted file mode 100644 index a28822a..0000000 --- a/0026-cipher-add-support-for-SM4-CFB-and-XTS-modes-in-CE-i.patch +++ /dev/null @@ -1,1348 +0,0 @@ -From 091bbf55057370ab571d8a84cc33465ad145e1a9 Mon Sep 17 00:00:00 2001 -From: Yuzeng Zhuang -Date: Wed, 20 Mar 2024 16:12:48 +0800 -Subject: [PATCH 26/44] cipher: add support for SM4 CFB and XTS modes in CE - instruction - -This patch implements the CE instruction using SM4 CFB and XTS modes. - -Signed-off-by: Yuzeng Zhuang -Signed-off-by: Qi Tao ---- - drv/isa_ce_sm4.c | 115 +++- - drv/isa_ce_sm4.h | 14 + - drv/isa_ce_sm4_armv8.S | 1126 ++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 1253 insertions(+), 2 deletions(-) - -diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c -index e2d81de..466b060 100644 ---- a/drv/isa_ce_sm4.c -+++ b/drv/isa_ce_sm4.c -@@ -22,6 +22,8 @@ - #define SM4_BLOCK_SIZE 16 - #define MAX_BLOCK_NUM (1U << 28) - #define CTR96_SHIFT_BITS 8 -+#define SM4_BYTES2BLKS(nbytes) ((nbytes) >> 4) -+#define SM4_KEY_SIZE 16 - - #define GETU32(p) \ - ((__u32)(p)[0] << 24 | (__u32)(p)[1] << 16 | (__u32)(p)[2] << 8 | (__u32)(p)[3]) -@@ -136,10 +138,104 @@ void sm4_set_decrypt_key(const __u8 *userKey, struct SM4_KEY *key) - sm4_v8_set_decrypt_key(userKey, key); - } - -+static void sm4_cfb_crypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey, const int enc) -+{ -+ unsigned char keydata[SM4_BLOCK_SIZE]; -+ const unsigned char *src = msg->in; -+ unsigned char *dst = msg->out; -+ __u32 nbytes = msg->in_bytes; -+ __u32 blocks, bbytes; -+ __u32 i = 0; -+ -+ blocks = SM4_BYTES2BLKS(nbytes); -+ if (blocks) { -+ if (enc == SM4_ENCRYPT) -+ sm4_v8_cfb_encrypt_blocks(src, dst, blocks, rkey, msg->iv); -+ else -+ sm4_v8_cfb_decrypt_blocks(src, dst, blocks, rkey, msg->iv); -+ -+ bbytes = blocks * SM4_BLOCK_SIZE; -+ dst += bbytes; -+ src += bbytes; -+ nbytes -= bbytes; -+ } -+ -+ if (nbytes == 0) -+ return; -+ -+ sm4_v8_crypt_block(msg->iv, keydata, rkey); -+ while (nbytes > 0) { -+ *dst++ = *src++ ^ keydata[i++]; -+ nbytes--; -+ } -+ -+ /* store new IV */ -+ if (enc == SM4_ENCRYPT) { -+ if (msg->out_bytes >= msg->iv_bytes) -+ memcpy(msg->iv, msg->out + msg->out_bytes - -+ msg->iv_bytes, msg->iv_bytes); -+ else -+ memcpy(msg->iv, msg->out, msg->out_bytes); -+ } else { -+ if (msg->in_bytes >= msg->iv_bytes) -+ memcpy(msg->iv, msg->in + msg->in_bytes - -+ msg->iv_bytes, msg->iv_bytes); -+ else -+ memcpy(msg->iv, msg->in, msg->in_bytes); -+ } -+} -+ -+static void sm4_cfb_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_cfb_crypt(msg, rkey_enc, SM4_ENCRYPT); -+} -+ -+static void sm4_cfb_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_dec) -+{ -+ sm4_cfb_crypt(msg, rkey_dec, SM4_DECRYPT); -+} -+ -+static int sm4_xts_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey) -+{ -+ struct SM4_KEY rkey2; -+ -+ if (msg->in_bytes < SM4_BLOCK_SIZE) { -+ WD_ERR("invalid: cipher input length is wrong!\n"); -+ return -WD_EINVAL; -+ } -+ -+ /* set key for tweak */ -+ sm4_set_encrypt_key(msg->key + SM4_KEY_SIZE, &rkey2); -+ -+ sm4_v8_xts_encrypt(msg->in, msg->out, msg->in_bytes, -+ rkey, msg->iv, &rkey2); -+ -+ return 0; -+} -+ -+static int sm4_xts_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey) -+{ -+ struct SM4_KEY rkey2; -+ -+ if (msg->in_bytes < SM4_BLOCK_SIZE) { -+ WD_ERR("invalid: cipher input length is wrong!\n"); -+ return -WD_EINVAL; -+ } -+ -+ /* set key for tweak */ -+ sm4_set_encrypt_key(msg->key + SM4_KEY_SIZE, &rkey2); -+ -+ sm4_v8_xts_decrypt(msg->in, msg->out, msg->in_bytes, -+ rkey, msg->iv, &rkey2); -+ -+ return 0; -+} -+ - static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) - { - struct wd_cipher_msg *msg = wd_msg; - struct SM4_KEY rkey; -+ int ret = 0; - - if (!msg) { - WD_ERR("invalid: input sm4 msg is NULL!\n"); -@@ -151,7 +247,8 @@ static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ - return -WD_EINVAL; - } - -- if (msg->op_type == WD_CIPHER_ENCRYPTION || msg->mode == WD_CIPHER_CTR) -+ if (msg->op_type == WD_CIPHER_ENCRYPTION || msg->mode == WD_CIPHER_CTR -+ || msg->mode == WD_CIPHER_CFB) - sm4_set_encrypt_key(msg->key, &rkey); - else - sm4_set_decrypt_key(msg->key, &rkey); -@@ -166,12 +263,24 @@ static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ - case WD_CIPHER_CTR: - sm4_ctr_encrypt(msg, &rkey); - break; -+ case WD_CIPHER_CFB: -+ if (msg->op_type == WD_CIPHER_ENCRYPTION) -+ sm4_cfb_encrypt(msg, &rkey); -+ else -+ sm4_cfb_decrypt(msg, &rkey); -+ break; -+ case WD_CIPHER_XTS: -+ if (msg->op_type == WD_CIPHER_ENCRYPTION) -+ ret = sm4_xts_encrypt(msg, &rkey); -+ else -+ ret = sm4_xts_decrypt(msg, &rkey); -+ break; - default: - WD_ERR("The current block cipher mode is not supported!\n"); - return -WD_EINVAL; - } - -- return 0; -+ return ret; - } - - static int isa_ce_cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *wd_msg) -@@ -206,6 +315,8 @@ static int cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) - static struct wd_alg_driver cipher_alg_driver[] = { - GEN_CE_ALG_DRIVER("cbc(sm4)", cipher), - GEN_CE_ALG_DRIVER("ctr(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("cfb(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("xts(sm4)", cipher), - }; - - static void __attribute__((constructor)) isa_ce_probe(void) -diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h -index 0bc074d..d92069f 100644 ---- a/drv/isa_ce_sm4.h -+++ b/drv/isa_ce_sm4.h -@@ -31,6 +31,20 @@ void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out, - void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, - size_t len, const void *key, const unsigned char ivec[16]); - -+void sm4_v8_cfb_encrypt_blocks(const unsigned char *in, unsigned char *out, -+ size_t length, const struct SM4_KEY *key, unsigned char *ivec); -+void sm4_v8_cfb_decrypt_blocks(const unsigned char *in, unsigned char *out, -+ size_t length, const struct SM4_KEY *key, unsigned char *ivec); -+void sm4_v8_crypt_block(const unsigned char *in, unsigned char *out, -+ const struct SM4_KEY *key); -+ -+int sm4_v8_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, -+ const struct SM4_KEY *key, unsigned char *ivec, -+ const struct SM4_KEY *key2); -+int sm4_v8_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, -+ const struct SM4_KEY *key, unsigned char *ivec, -+ const struct SM4_KEY *key2); -+ - #ifdef __cplusplus - } - #endif -diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S -index d7d172a..342dfa5 100644 ---- a/drv/isa_ce_sm4_armv8.S -+++ b/drv/isa_ce_sm4_armv8.S -@@ -37,6 +37,14 @@ - .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 - .Lfk: - .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc -+.align 4 -+.cts_permute_table: -+.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 -+.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf -+.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff -+.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff - .globl sm4_v8_set_encrypt_key - .type sm4_v8_set_encrypt_key,%function - .align 5 -@@ -772,3 +780,1121 @@ sm4_v8_ctr32_encrypt_blocks: - ldp d8,d9,[sp],#16 - ret - .size sm4_v8_ctr32_encrypt_blocks,.-sm4_v8_ctr32_encrypt_blocks -+ -+.globl sm4_v8_crypt_block -+.type sm4_v8_crypt_block,%function -+.align 5 -+sm4_v8_crypt_block: -+ /* parameters: -+ * x0: src -+ * x1: dst -+ * x2: key -+ */ -+ AARCH64_VALID_CALL_TARGET -+ -+ ld1 {v0.16b-v3.16b}, [x2], #64 -+ ld1 {v4.16b-v7.16b}, [x2] -+ -+ ld1 {v16.4s},[x0] -+ -+ rev32 v16.16b, v16.16b -+ sm4e v16.4s, v0.4s -+ sm4e v16.4s, v1.4s -+ sm4e v16.4s, v2.4s -+ sm4e v16.4s, v3.4s -+ sm4e v16.4s, v4.4s -+ sm4e v16.4s, v5.4s -+ sm4e v16.4s, v6.4s -+ sm4e v16.4s, v7.4s -+ rev64 v16.4s, v16.4s -+ ext v16.16b, v16.16b, v16.16b, #8 -+ rev32 v16.16b, v16.16b -+ -+ st1 {v16.16b}, [x1]; -+ -+ ret -+.size sm4_v8_crypt_block,.-sm4_v8_crypt_block -+ -+.globl sm4_v8_cfb_encrypt_blocks -+.type sm4_v8_cfb_encrypt_blocks,%function -+.align 5 -+sm4_v8_cfb_encrypt_blocks: -+ /* parameters: -+ * x0: src -+ * x1: dst -+ * w2: nblocks -+ * x3: key -+ * x4: iv -+ */ -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v0.4s-v3.4s}, [x3], #64 -+ ld1 {v4.4s-v7.4s}, [x3] -+ -+ ld1 {v8.4s},[x4] -+ -+.loop_cfb_enc_4block: -+ cmp w2, #4 -+ blt .loob_cfb_enc_1block -+ -+ sub w2, w2, #4 -+ -+ ld1 {v16.4s-v19.4s}, [x0], #64 -+ -+ rev32 v8.16b, v8.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ eor v16.16b, v16.16b, v8.16b -+ -+ rev32 v8.16b, v16.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ eor v17.16b, v17.16b, v8.16b -+ -+ rev32 v8.16b, v17.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ eor v18.16b, v18.16b, v8.16b -+ -+ rev32 v8.16b, v18.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ eor v19.16b, v19.16b, v8.16b -+ -+ st1 {v16.4s-v19.4s}, [x1], #64 -+ mov v8.16b, v19.16b -+ -+ cbz w2, .end_cfb_enc -+ b .loop_cfb_enc_4block -+ -+.loob_cfb_enc_1block: -+ sub w2, w2, #1 -+ -+ ld1 {v16.4s}, [x0], #16 -+ -+ rev32 v8.16b, v8.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ eor v8.16b, v8.16b, v16.16b -+ -+ st1 {v8.4s}, [x1], #16 -+ -+ cbnz w2, .loob_cfb_enc_1block -+ -+.end_cfb_enc: -+ st1 {v8.4s}, [x4] -+ -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_cfb_encrypt_blocks,.-sm4_v8_cfb_encrypt_blocks -+ -+.globl sm4_v8_cfb_decrypt_blocks -+.type sm4_v8_cfb_decrypt_blocks,%function -+.align 5 -+sm4_v8_cfb_decrypt_blocks: -+ /* parameters: -+ * x0: src -+ * x1: dst -+ * w2: nblocks -+ * x3: key -+ * x4: iv -+ */ -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v0.4s-v3.4s}, [x3], #64 -+ ld1 {v4.4s-v7.4s}, [x3] -+ -+ ld1 {v8.4s},[x4] -+ -+.loop_cfb_dec_8block: -+ cmp w2, #8 -+ blt .cfb_dec_4block -+ -+ sub w2, w2, #8 -+ -+ ld1 {v12.4s-v15.4s}, [x0], #64 -+ ld1 {v16.4s-v19.4s}, [x0], #64 -+ -+ rev32 v20.16b, v8.16b -+ rev32 v21.16b, v12.16b -+ rev32 v22.16b, v13.16b -+ rev32 v23.16b, v14.16b -+ rev32 v24.16b, v15.16b -+ rev32 v25.16b, v16.16b -+ rev32 v26.16b, v17.16b -+ rev32 v27.16b, v18.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v24.4s, v0.4s -+ sm4e v25.4s, v0.4s -+ sm4e v26.4s, v0.4s -+ sm4e v27.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v24.4s, v1.4s -+ sm4e v25.4s, v1.4s -+ sm4e v26.4s, v1.4s -+ sm4e v27.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v24.4s, v2.4s -+ sm4e v25.4s, v2.4s -+ sm4e v26.4s, v2.4s -+ sm4e v27.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v24.4s, v3.4s -+ sm4e v25.4s, v3.4s -+ sm4e v26.4s, v3.4s -+ sm4e v27.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v24.4s, v4.4s -+ sm4e v25.4s, v4.4s -+ sm4e v26.4s, v4.4s -+ sm4e v27.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v24.4s, v5.4s -+ sm4e v25.4s, v5.4s -+ sm4e v26.4s, v5.4s -+ sm4e v27.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v24.4s, v6.4s -+ sm4e v25.4s, v6.4s -+ sm4e v26.4s, v6.4s -+ sm4e v27.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ sm4e v24.4s, v7.4s -+ sm4e v25.4s, v7.4s -+ sm4e v26.4s, v7.4s -+ sm4e v27.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ rev64 v24.4s, v24.4s -+ rev64 v25.4s, v25.4s -+ rev64 v26.4s, v26.4s -+ rev64 v27.4s, v27.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ ext v24.16b, v24.16b, v24.16b, #8 -+ ext v25.16b, v25.16b, v25.16b, #8 -+ ext v26.16b, v26.16b, v26.16b, #8 -+ ext v27.16b, v27.16b, v27.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ rev32 v24.16b, v24.16b -+ rev32 v25.16b, v25.16b -+ rev32 v26.16b, v26.16b -+ rev32 v27.16b, v27.16b -+ -+ mov v8.16b, v19.16b //Modify IV -+ -+ eor v20.16b, v20.16b, v12.16b -+ eor v21.16b, v21.16b, v13.16b -+ eor v22.16b, v22.16b, v14.16b -+ eor v23.16b, v23.16b, v15.16b -+ eor v24.16b, v24.16b, v16.16b -+ eor v25.16b, v25.16b, v17.16b -+ eor v26.16b, v26.16b, v18.16b -+ eor v27.16b, v27.16b, v19.16b -+ -+ st1 {v20.4s-v23.4s}, [x1], #64 -+ st1 {v24.4s-v27.4s}, [x1], #64 -+ -+ cbz w2, .end_cfb_dec -+ b .loop_cfb_dec_8block -+ -+.cfb_dec_4block: -+ cmp w2, #4 -+ blt .loop_cfb_dec_1block -+ -+ sub w2, w2, #4 -+ -+ ld1 {v12.4s-v15.4s}, [x0], #64 -+ -+ rev32 v20.16b, v8.16b -+ rev32 v21.16b, v12.16b -+ rev32 v22.16b, v13.16b -+ rev32 v23.16b, v14.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ -+ mov v8.16b, v15.16b //Modify IV -+ -+ eor v20.16b, v20.16b, v12.16b -+ eor v21.16b, v21.16b, v13.16b -+ eor v22.16b, v22.16b, v14.16b -+ eor v23.16b, v23.16b, v15.16b -+ -+ st1 {v20.4s-v23.4s}, [x1], #64 -+ -+ cbz w2, .end_cfb_dec -+ -+.loop_cfb_dec_1block: -+ sub w2, w2, #1 -+ -+ ld1 {v12.4s}, [x0], #16 -+ -+ rev32 v20.16b, v8.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ -+ eor v20.16b, v20.16b, v12.16b -+ st1 {v20.4s}, [x1], #16 -+ -+ mov v8.16b, v12.16b //Modify IV -+ -+ cbnz w2, .loop_cfb_dec_1block -+ -+.end_cfb_dec: -+ /* store new IV */ -+ st1 {v8.4s}, [x4] -+ -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_cfb_decrypt_blocks,.-sm4_v8_cfb_decrypt_blocks -+ -+#define tweak_calc(out, in, MSK, TMP) \ -+ sshr TMP.2d, in.2d, #63; \ -+ and TMP.16b, TMP.16b, MSK.16b; \ -+ add out.2d, in.2d, in.2d; \ -+ ext TMP.16b, TMP.16b, TMP.16b, #8; \ -+ eor out.16b, out.16b, TMP.16b; -+ -+.globl sm4_v8_xts_encrypt -+.type sm4_v8_xts_encrypt,%function -+.align 5 -+sm4_v8_xts_encrypt: -+ /* parameters: -+ * x0: src -+ * x1: dst -+ * w2: nbytes -+ * x3: key -+ * x4: tweak -+ * x5: key array for tweak -+ */ -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v8.16b}, [x4] -+ -+ cbz x5, .enc_xts_nokey2 -+ -+ /* load round key array for tweak */ -+ ld1 {v0.16b-v3.16b}, [x5], #64 -+ ld1 {v4.16b-v7.16b}, [x5] -+ -+ /* first tweak */ -+ rev32 v8.16b, v8.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ -+.enc_xts_nokey2: -+ /* load key array */ -+ ld1 {v0.16b-v3.16b}, [x3], #64 -+ ld1 {v4.16b-v7.16b}, [x3] -+ -+ and w5, w2, #15 -+ lsr w2, w2, #4 -+ cbz w5, .enc_xts_mask -+ /* leave the last block for tail */ -+ sub w2, w2, #1 -+ -+.enc_xts_mask: -+ /* init mask */ -+ movi v31.2s, #0x1 -+ movi v16.2s, #0x87 -+ uzp1 v31.4s, v31.4s, v16.4s -+ -+ cbz w2, .enc_xts_tail -+ -+.enc_xts_8block: -+ sub w2, w2, #8 -+ tbnz w2, #31, .enc_xts_4block -+ -+ tweak_calc(v9, v8, v31, v16) -+ tweak_calc(v10, v9, v31, v17) -+ tweak_calc(v11, v10, v31, v18) -+ tweak_calc(v12, v11, v31, v19) -+ tweak_calc(v13, v12, v31, v16) -+ tweak_calc(v14, v13, v31, v17) -+ tweak_calc(v15, v14, v31, v18) -+ -+ ld1 {v20.16b-v23.16b}, [x0], #64 -+ ld1 {v24.16b-v27.16b}, [x0], #64 -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ eor v24.16b, v24.16b, v12.16b -+ eor v25.16b, v25.16b, v13.16b -+ eor v26.16b, v26.16b, v14.16b -+ eor v27.16b, v27.16b, v15.16b -+ -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ rev32 v24.16b, v24.16b -+ rev32 v25.16b, v25.16b -+ rev32 v26.16b, v26.16b -+ rev32 v27.16b, v27.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v24.4s, v0.4s -+ sm4e v25.4s, v0.4s -+ sm4e v26.4s, v0.4s -+ sm4e v27.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v24.4s, v1.4s -+ sm4e v25.4s, v1.4s -+ sm4e v26.4s, v1.4s -+ sm4e v27.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v24.4s, v2.4s -+ sm4e v25.4s, v2.4s -+ sm4e v26.4s, v2.4s -+ sm4e v27.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v24.4s, v3.4s -+ sm4e v25.4s, v3.4s -+ sm4e v26.4s, v3.4s -+ sm4e v27.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v24.4s, v4.4s -+ sm4e v25.4s, v4.4s -+ sm4e v26.4s, v4.4s -+ sm4e v27.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v24.4s, v5.4s -+ sm4e v25.4s, v5.4s -+ sm4e v26.4s, v5.4s -+ sm4e v27.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v24.4s, v6.4s -+ sm4e v25.4s, v6.4s -+ sm4e v26.4s, v6.4s -+ sm4e v27.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ sm4e v24.4s, v7.4s -+ sm4e v25.4s, v7.4s -+ sm4e v26.4s, v7.4s -+ sm4e v27.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ rev64 v24.4s, v24.4s -+ rev64 v25.4s, v25.4s -+ rev64 v26.4s, v26.4s -+ rev64 v27.4s, v27.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ ext v24.16b, v24.16b, v24.16b, #8 -+ ext v25.16b, v25.16b, v25.16b, #8 -+ ext v26.16b, v26.16b, v26.16b, #8 -+ ext v27.16b, v27.16b, v27.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ rev32 v24.16b, v24.16b -+ rev32 v25.16b, v25.16b -+ rev32 v26.16b, v26.16b -+ rev32 v27.16b, v27.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ eor v24.16b, v24.16b, v12.16b -+ eor v25.16b, v25.16b, v13.16b -+ eor v26.16b, v26.16b, v14.16b -+ eor v27.16b, v27.16b, v15.16b -+ st1 {v20.16b-v23.16b}, [x1], #64 -+ st1 {v24.16b-v27.16b}, [x1], #64 -+ -+ tweak_calc(v8, v15, v31, v19) -+ -+ cbz w2, .enc_xts_tail -+ b .enc_xts_8block -+ -+.enc_xts_4block: -+ add w2, w2, #8 -+ cmp w2, #4 -+ blt .enc_xts_1block -+ -+ sub w2, w2, #4 -+ -+ tweak_calc(v9, v8, v31, v16) -+ tweak_calc(v10, v9, v31, v17) -+ tweak_calc(v11, v10, v31, v18) -+ -+ ld1 {v20.16b-v23.16b}, [x0], #64 -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ st1 {v20.16b-v23.16b}, [x1], #64 -+ -+ tweak_calc(v8, v11, v31, v19) -+ -+ cbz w2, .enc_xts_tail -+ -+.enc_xts_1block: -+ sub w2, w2, #1 -+ -+ ld1 {v20.16b}, [x0], #16 -+ eor v20.16b, v20.16b, v8.16b -+ -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ st1 {v20.16b}, [x1], #16 -+ -+ tweak_calc(v8, v8, v31, v16) -+ -+ cbnz w2, .enc_xts_1block -+ -+.enc_xts_tail: -+ uxtw x5, w5 -+ cbz x5, .enc_xts_end -+ -+ tweak_calc(v9, v8, v31, v16) -+ ld1 {v20.16b}, [x0] -+ eor v20.16b, v20.16b, v8.16b -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ eor v20.16b, v20.16b, v8.16b -+ -+ adr x6, .cts_permute_table -+ add x7, x6, #32 -+ add x6, x6, x5 -+ sub x7, x7, x5 -+ ld1 {v23.16b}, [x6] -+ ld1 {v24.16b}, [x7] -+ -+ add x0, x0, x5 -+ ld1 {v21.16b}, [x0] -+ -+ tbl v22.16b, {v20.16b}, v23.16b -+ tbx v20.16b, {v21.16b}, v24.16b -+ -+ eor v20.16b, v20.16b, v9.16b -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ eor v20.16b, v20.16b, v9.16b -+ -+ add x5, x1, x5 -+ st1 {v22.16b}, [x5] -+ st1 {v20.16b}, [x1] -+ -+ b .enc_xts_ret -+ -+.enc_xts_end: -+ /* new tweak */ -+ st1 {v8.16b}, [x4] -+ -+.enc_xts_ret: -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_xts_encrypt,.-sm4_v8_xts_encrypt -+ -+.globl sm4_v8_xts_decrypt -+.type sm4_v8_xts_decrypt,%function -+.align 5 -+sm4_v8_xts_decrypt: -+ /* parameters: -+ * x0: src -+ * x1: dst -+ * w2: nbytes -+ * x3: key -+ * x4: tweak -+ * x5: key array for tweak -+ */ -+ AARCH64_VALID_CALL_TARGET -+ stp d8,d9,[sp, #-16]! -+ -+ ld1 {v8.16b}, [x4] -+ -+ cbz x5, .dec_xts_nokey2 -+ -+ /* load round key array for tweak */ -+ ld1 {v0.16b-v3.16b}, [x5], #64 -+ ld1 {v4.16b-v7.16b}, [x5] -+ -+ /* first tweak */ -+ rev32 v8.16b, v8.16b -+ sm4e v8.4s, v0.4s -+ sm4e v8.4s, v1.4s -+ sm4e v8.4s, v2.4s -+ sm4e v8.4s, v3.4s -+ sm4e v8.4s, v4.4s -+ sm4e v8.4s, v5.4s -+ sm4e v8.4s, v6.4s -+ sm4e v8.4s, v7.4s -+ rev64 v8.4s, v8.4s -+ ext v8.16b, v8.16b, v8.16b, #8 -+ rev32 v8.16b, v8.16b -+ -+.dec_xts_nokey2: -+ ld1 {v0.16b-v3.16b}, [x3], #64 -+ ld1 {v4.16b-v7.16b}, [x3] -+ -+ and w5, w2, #15 -+ lsr w2, w2, #4 -+ cbz w5, .dec_xts_mask -+ /* leave the last block for tail */ -+ sub w2, w2, #1 -+ -+.dec_xts_mask: -+ /* init mask */ -+ movi v31.2s, #0x1 -+ movi v16.2s, #0x87 -+ uzp1 v31.4s, v31.4s, v16.4s -+ -+ cbz w2, .dec_xts_tail -+ -+.dec_xts_8block: -+ sub w2, w2, #8 -+ tbnz w2, #31, .dec_xts_4block -+ -+ tweak_calc(v9, v8, v31, v16) -+ tweak_calc(v10, v9, v31, v17) -+ tweak_calc(v11, v10, v31, v18) -+ tweak_calc(v12, v11, v31, v19) -+ tweak_calc(v13, v12, v31, v16) -+ tweak_calc(v14, v13, v31, v17) -+ tweak_calc(v15, v14, v31, v18) -+ -+ ld1 {v20.16b-v23.16b}, [x0], #64 -+ ld1 {v24.16b-v27.16b}, [x0], #64 -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ eor v24.16b, v24.16b, v12.16b -+ eor v25.16b, v25.16b, v13.16b -+ eor v26.16b, v26.16b, v14.16b -+ eor v27.16b, v27.16b, v15.16b -+ -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ rev32 v24.16b, v24.16b -+ rev32 v25.16b, v25.16b -+ rev32 v26.16b, v26.16b -+ rev32 v27.16b, v27.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v24.4s, v0.4s -+ sm4e v25.4s, v0.4s -+ sm4e v26.4s, v0.4s -+ sm4e v27.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v24.4s, v1.4s -+ sm4e v25.4s, v1.4s -+ sm4e v26.4s, v1.4s -+ sm4e v27.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v24.4s, v2.4s -+ sm4e v25.4s, v2.4s -+ sm4e v26.4s, v2.4s -+ sm4e v27.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v24.4s, v3.4s -+ sm4e v25.4s, v3.4s -+ sm4e v26.4s, v3.4s -+ sm4e v27.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v24.4s, v4.4s -+ sm4e v25.4s, v4.4s -+ sm4e v26.4s, v4.4s -+ sm4e v27.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v24.4s, v5.4s -+ sm4e v25.4s, v5.4s -+ sm4e v26.4s, v5.4s -+ sm4e v27.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v24.4s, v6.4s -+ sm4e v25.4s, v6.4s -+ sm4e v26.4s, v6.4s -+ sm4e v27.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ sm4e v24.4s, v7.4s -+ sm4e v25.4s, v7.4s -+ sm4e v26.4s, v7.4s -+ sm4e v27.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ rev64 v24.4s, v24.4s -+ rev64 v25.4s, v25.4s -+ rev64 v26.4s, v26.4s -+ rev64 v27.4s, v27.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ ext v24.16b, v24.16b, v24.16b, #8 -+ ext v25.16b, v25.16b, v25.16b, #8 -+ ext v26.16b, v26.16b, v26.16b, #8 -+ ext v27.16b, v27.16b, v27.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ rev32 v24.16b, v24.16b -+ rev32 v25.16b, v25.16b -+ rev32 v26.16b, v26.16b -+ rev32 v27.16b, v27.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ eor v24.16b, v24.16b, v12.16b -+ eor v25.16b, v25.16b, v13.16b -+ eor v26.16b, v26.16b, v14.16b -+ eor v27.16b, v27.16b, v15.16b -+ st1 {v20.16b-v23.16b}, [x1], #64 -+ st1 {v24.16b-v27.16b}, [x1], #64 -+ -+ tweak_calc(v8, v15, v31, v19) -+ -+ cbz w2, .dec_xts_tail -+ b .dec_xts_8block -+ -+.dec_xts_4block: -+ add w2, w2, #8 -+ cmp w2, #4 -+ blt .dec_xts_1block -+ -+ sub w2, w2, #4 -+ -+ tweak_calc(v9, v8, v31, v16) -+ tweak_calc(v10, v9, v31, v17) -+ tweak_calc(v11, v10, v31, v18) -+ -+ ld1 {v20.16b-v23.16b}, [x0], #64 -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ sm4e v20.4s, v0.4s -+ sm4e v21.4s, v0.4s -+ sm4e v22.4s, v0.4s -+ sm4e v23.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v21.4s, v1.4s -+ sm4e v22.4s, v1.4s -+ sm4e v23.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v21.4s, v2.4s -+ sm4e v22.4s, v2.4s -+ sm4e v23.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v21.4s, v3.4s -+ sm4e v22.4s, v3.4s -+ sm4e v23.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v21.4s, v4.4s -+ sm4e v22.4s, v4.4s -+ sm4e v23.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v21.4s, v5.4s -+ sm4e v22.4s, v5.4s -+ sm4e v23.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v21.4s, v6.4s -+ sm4e v22.4s, v6.4s -+ sm4e v23.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ sm4e v21.4s, v7.4s -+ sm4e v22.4s, v7.4s -+ sm4e v23.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ rev64 v21.4s, v21.4s -+ rev64 v22.4s, v22.4s -+ rev64 v23.4s, v23.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ ext v21.16b, v21.16b, v21.16b, #8 -+ ext v22.16b, v22.16b, v22.16b, #8 -+ ext v23.16b, v23.16b, v23.16b, #8 -+ rev32 v20.16b, v20.16b -+ rev32 v21.16b, v21.16b -+ rev32 v22.16b, v22.16b -+ rev32 v23.16b, v23.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ eor v21.16b, v21.16b, v9.16b -+ eor v22.16b, v22.16b, v10.16b -+ eor v23.16b, v23.16b, v11.16b -+ st1 {v20.16b-v23.16b}, [x1], #64 -+ -+ tweak_calc(v8, v11, v31, v19) -+ -+ cbz w2, .dec_xts_tail -+ -+.dec_xts_1block: -+ sub w2, w2, #1 -+ -+ ld1 {v20.16b}, [x0], #16 -+ eor v20.16b, v20.16b, v8.16b -+ -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ st1 {v20.16b}, [x1], #16 -+ -+ tweak_calc(v8, v8, v31, v16) -+ -+ cbnz w2, .dec_xts_1block -+ -+.dec_xts_tail: -+ uxtw x5, w5 -+ cbz x5, .dec_xts_end -+ -+ tweak_calc(v9, v8, v31, v16) -+ ld1 {v20.16b}, [x0] -+ eor v20.16b, v20.16b, v9.16b -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ eor v20.16b, v20.16b, v9.16b -+ -+ adr x6, .cts_permute_table -+ add x7, x6, #32 -+ add x6, x6, x5 -+ sub x7, x7, x5 -+ ld1 {v23.16b}, [x6] -+ ld1 {v24.16b}, [x7] -+ -+ add x0, x0, x5 -+ ld1 {v21.16b}, [x0] -+ -+ tbl v22.16b, {v20.16b}, v23.16b -+ tbx v20.16b, {v21.16b}, v24.16b -+ -+ eor v20.16b, v20.16b, v8.16b -+ rev32 v20.16b, v20.16b -+ sm4e v20.4s, v0.4s -+ sm4e v20.4s, v1.4s -+ sm4e v20.4s, v2.4s -+ sm4e v20.4s, v3.4s -+ sm4e v20.4s, v4.4s -+ sm4e v20.4s, v5.4s -+ sm4e v20.4s, v6.4s -+ sm4e v20.4s, v7.4s -+ rev64 v20.4s, v20.4s -+ ext v20.16b, v20.16b, v20.16b, #8 -+ rev32 v20.16b, v20.16b -+ eor v20.16b, v20.16b, v8.16b -+ -+ add x5, x1, x5 -+ st1 {v22.16b}, [x5] -+ st1 {v20.16b}, [x1] -+ -+ b .dec_xts_ret -+ -+.dec_xts_end: -+ /* new tweak */ -+ st1 {v8.16b}, [x4] -+ -+.dec_xts_ret: -+ ldp d8,d9,[sp],#16 -+ ret -+.size sm4_v8_xts_decrypt,.-sm4_v8_xts_decrypt --- -2.25.1 - diff --git a/0027-cipher-add-support-for-SM4-ECB-algorithm-in-CE-instr.patch b/0027-cipher-add-support-for-SM4-ECB-algorithm-in-CE-instr.patch deleted file mode 100644 index 2deb528..0000000 --- a/0027-cipher-add-support-for-SM4-ECB-algorithm-in-CE-instr.patch +++ /dev/null @@ -1,348 +0,0 @@ -From 6e66b445df0d39b9e796d1a4afcbe617197278de Mon Sep 17 00:00:00 2001 -From: Qi Tao -Date: Wed, 20 Mar 2024 16:13:45 +0800 -Subject: [PATCH 27/44] cipher: add support for SM4(ECB) algorithm in CE - instruction - -Provides the CE acceleration instruction (Crypto-Extension) -to accelerate the execution of the SM4(ECB) algorithm. - -Signed-off-by: Qi Tao ---- - drv/isa_ce_sm4.c | 17 +++ - drv/isa_ce_sm4.h | 2 + - drv/isa_ce_sm4_armv8.S | 263 +++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 282 insertions(+) - -diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c -index 466b060..ccab8fb 100644 ---- a/drv/isa_ce_sm4.c -+++ b/drv/isa_ce_sm4.c -@@ -128,6 +128,16 @@ static void sm4_cbc_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rke - sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, msg->iv, SM4_DECRYPT); - } - -+static void sm4_ecb_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, SM4_ENCRYPT); -+} -+ -+static void sm4_ecb_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_dec) -+{ -+ sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, SM4_DECRYPT); -+} -+ - void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key) - { - sm4_v8_set_encrypt_key(userKey, key); -@@ -254,6 +264,12 @@ static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ - sm4_set_decrypt_key(msg->key, &rkey); - - switch (msg->mode) { -+ case WD_CIPHER_ECB: -+ if (msg->op_type == WD_CIPHER_ENCRYPTION) -+ sm4_ecb_encrypt(msg, &rkey); -+ else -+ sm4_ecb_decrypt(msg, &rkey); -+ break; - case WD_CIPHER_CBC: - if (msg->op_type == WD_CIPHER_ENCRYPTION) - sm4_cbc_encrypt(msg, &rkey); -@@ -317,6 +333,7 @@ static struct wd_alg_driver cipher_alg_driver[] = { - GEN_CE_ALG_DRIVER("ctr(sm4)", cipher), - GEN_CE_ALG_DRIVER("cfb(sm4)", cipher), - GEN_CE_ALG_DRIVER("xts(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("ecb(sm4)", cipher), - }; - - static void __attribute__((constructor)) isa_ce_probe(void) -diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h -index d92069f..d10b0af 100644 ---- a/drv/isa_ce_sm4.h -+++ b/drv/isa_ce_sm4.h -@@ -28,6 +28,8 @@ void sm4_v8_set_decrypt_key(const unsigned char *userKey, struct SM4_KEY *key); - void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const struct SM4_KEY *key, - unsigned char *ivec, const int enc); -+void sm4_v8_ecb_encrypt(const unsigned char *in, unsigned char *out, -+ size_t length, const struct SM4_KEY *key, const int enc); - void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, - size_t len, const void *key, const unsigned char ivec[16]); - -diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S -index 342dfa5..7d84496 100644 ---- a/drv/isa_ce_sm4_armv8.S -+++ b/drv/isa_ce_sm4_armv8.S -@@ -506,6 +506,269 @@ sm4_v8_cbc_encrypt: - ldp d8,d9,[sp],#16 - ret - .size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt -+.globl sm4_v8_ecb_encrypt -+.type sm4_v8_ecb_encrypt,%function -+.align 5 -+sm4_v8_ecb_encrypt: -+ AARCH64_VALID_CALL_TARGET -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x3],#64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x3] -+1: -+ cmp x2,#64 -+ b.lt 1f -+ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x0],#64 -+ cmp x2,#128 -+ b.lt 2f -+ ld1 {v20.4s,v21.4s,v22.4s,v23.4s},[x0],#64 -+ // 8 blocks -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v20.16b,v20.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v21.16b,v21.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v22.16b,v22.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v23.16b,v23.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4S,v16.4S -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4S,v17.4S -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4S,v18.4S -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4S,v19.4S -+ ext v19.16b,v19.16b,v19.16b,#8 -+ sm4e v20.4s,v0.4s; -+ sm4e v21.4s,v0.4s; -+ sm4e v22.4s,v0.4s; -+ sm4e v23.4s,v0.4s; -+ -+ sm4e v20.4s,v1.4s; -+ sm4e v21.4s,v1.4s; -+ sm4e v22.4s,v1.4s; -+ sm4e v23.4s,v1.4s; -+ -+ sm4e v20.4s,v2.4s; -+ sm4e v21.4s,v2.4s; -+ sm4e v22.4s,v2.4s; -+ sm4e v23.4s,v2.4s; -+ -+ sm4e v20.4s,v3.4s; -+ sm4e v21.4s,v3.4s; -+ sm4e v22.4s,v3.4s; -+ sm4e v23.4s,v3.4s; -+ -+ sm4e v20.4s,v4.4s; -+ sm4e v21.4s,v4.4s; -+ sm4e v22.4s,v4.4s; -+ sm4e v23.4s,v4.4s; -+ -+ sm4e v20.4s,v5.4s; -+ sm4e v21.4s,v5.4s; -+ sm4e v22.4s,v5.4s; -+ sm4e v23.4s,v5.4s; -+ -+ sm4e v20.4s,v6.4s; -+ sm4e v21.4s,v6.4s; -+ sm4e v22.4s,v6.4s; -+ sm4e v23.4s,v6.4s; -+ -+ sm4e v20.4s,v7.4s; -+ rev64 v20.4S,v20.4S -+ sm4e v21.4s,v7.4s; -+ ext v20.16b,v20.16b,v20.16b,#8 -+ rev64 v21.4S,v21.4S -+ sm4e v22.4s,v7.4s; -+ ext v21.16b,v21.16b,v21.16b,#8 -+ rev64 v22.4S,v22.4S -+ sm4e v23.4s,v7.4s; -+ ext v22.16b,v22.16b,v22.16b,#8 -+ rev64 v23.4S,v23.4S -+ ext v23.16b,v23.16b,v23.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v20.16b,v20.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v21.16b,v21.16b -+#endif -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+#ifndef __ARMEB__ -+ rev32 v22.16b,v22.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v23.16b,v23.16b -+#endif -+ st1 {v20.4s,v21.4s,v22.4s,v23.4s},[x1],#64 -+ subs x2,x2,#128 -+ b.gt 1b -+ ret -+ // 4 blocks -+2: -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v17.4s,v0.4s; -+ sm4e v18.4s,v0.4s; -+ sm4e v19.4s,v0.4s; -+ -+ sm4e v16.4s,v1.4s; -+ sm4e v17.4s,v1.4s; -+ sm4e v18.4s,v1.4s; -+ sm4e v19.4s,v1.4s; -+ -+ sm4e v16.4s,v2.4s; -+ sm4e v17.4s,v2.4s; -+ sm4e v18.4s,v2.4s; -+ sm4e v19.4s,v2.4s; -+ -+ sm4e v16.4s,v3.4s; -+ sm4e v17.4s,v3.4s; -+ sm4e v18.4s,v3.4s; -+ sm4e v19.4s,v3.4s; -+ -+ sm4e v16.4s,v4.4s; -+ sm4e v17.4s,v4.4s; -+ sm4e v18.4s,v4.4s; -+ sm4e v19.4s,v4.4s; -+ -+ sm4e v16.4s,v5.4s; -+ sm4e v17.4s,v5.4s; -+ sm4e v18.4s,v5.4s; -+ sm4e v19.4s,v5.4s; -+ -+ sm4e v16.4s,v6.4s; -+ sm4e v17.4s,v6.4s; -+ sm4e v18.4s,v6.4s; -+ sm4e v19.4s,v6.4s; -+ -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4S,v16.4S -+ sm4e v17.4s,v7.4s; -+ ext v16.16b,v16.16b,v16.16b,#8 -+ rev64 v17.4S,v17.4S -+ sm4e v18.4s,v7.4s; -+ ext v17.16b,v17.16b,v17.16b,#8 -+ rev64 v18.4S,v18.4S -+ sm4e v19.4s,v7.4s; -+ ext v18.16b,v18.16b,v18.16b,#8 -+ rev64 v19.4S,v19.4S -+ ext v19.16b,v19.16b,v19.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v17.16b,v17.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v18.16b,v18.16b -+#endif -+#ifndef __ARMEB__ -+ rev32 v19.16b,v19.16b -+#endif -+ st1 {v16.4s,v17.4s,v18.4s,v19.4s},[x1],#64 -+ subs x2,x2,#64 -+ b.gt 1b -+1: -+ subs x2,x2,#16 -+ b.lt 1f -+ ld1 {v16.4s},[x0],#16 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ sm4e v16.4s,v0.4s; -+ sm4e v16.4s,v1.4s; -+ sm4e v16.4s,v2.4s; -+ sm4e v16.4s,v3.4s; -+ sm4e v16.4s,v4.4s; -+ sm4e v16.4s,v5.4s; -+ sm4e v16.4s,v6.4s; -+ sm4e v16.4s,v7.4s; -+ rev64 v16.4S,v16.4S -+ ext v16.16b,v16.16b,v16.16b,#8 -+#ifndef __ARMEB__ -+ rev32 v16.16b,v16.16b -+#endif -+ st1 {v16.4s},[x1],#16 -+ b.ne 1b -+1: -+ ret -+.size sm4_v8_ecb_encrypt,.-sm4_v8_ecb_encrypt - .globl sm4_v8_ctr32_encrypt_blocks - .type sm4_v8_ctr32_encrypt_blocks,%function - .align 5 --- -2.25.1 - diff --git a/0028-uadk-cipher-isa_ce-support-SM4-cbc_cts-mode.patch b/0028-uadk-cipher-isa_ce-support-SM4-cbc_cts-mode.patch deleted file mode 100644 index 3ef0b90..0000000 --- a/0028-uadk-cipher-isa_ce-support-SM4-cbc_cts-mode.patch +++ /dev/null @@ -1,337 +0,0 @@ -From 8c23969dacd7b1ae1b77c1118a8f895bec6fd165 Mon Sep 17 00:00:00 2001 -From: Yang Shen -Date: Wed, 20 Mar 2024 16:15:00 +0800 -Subject: [PATCH 28/44] uadk/cipher: isa_ce - support SM4 cbc_cts mode - -This patch implements the CE instruction using SM4 CBC_CTS modes. - -Signed-off-by: Yang Shen -Signed-off-by: Qi Tao ---- - drv/isa_ce_sm4.c | 91 +++++++++++++++++++++++++++- - drv/isa_ce_sm4.h | 24 +++++--- - drv/isa_ce_sm4_armv8.S | 133 +++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 238 insertions(+), 10 deletions(-) - -diff --git a/drv/isa_ce_sm4.c b/drv/isa_ce_sm4.c -index ccab8fb..6961471 100644 ---- a/drv/isa_ce_sm4.c -+++ b/drv/isa_ce_sm4.c -@@ -128,6 +128,82 @@ static void sm4_cbc_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rke - sm4_v8_cbc_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, msg->iv, SM4_DECRYPT); - } - -+/* -+ * In some situations, the cts mode can use cbc mode instead to imporve performance. -+ */ -+static int sm4_cts_cbc_instead(struct wd_cipher_msg *msg) -+{ -+ if (msg->in_bytes == SM4_BLOCK_SIZE) -+ return true; -+ -+ if (!(msg->in_bytes % SM4_BLOCK_SIZE) && msg->mode != WD_CIPHER_CBC_CS3) -+ return true; -+ -+ return false; -+} -+ -+static void sm4_cts_cs1_mode_adapt(__u8 *cts_in, __u8 *cts_out, -+ const __u32 cts_bytes, const int enc) -+{ -+ __u32 rsv_bytes = cts_bytes % SM4_BLOCK_SIZE; -+ __u8 blocks[SM4_BLOCK_SIZE] = {0}; -+ -+ if (enc == SM4_ENCRYPT) { -+ memcpy(blocks, cts_out, SM4_BLOCK_SIZE); -+ memcpy(cts_out, cts_out + SM4_BLOCK_SIZE, rsv_bytes); -+ memcpy(cts_out + rsv_bytes, blocks, SM4_BLOCK_SIZE); -+ } else { -+ memcpy(blocks, cts_in + rsv_bytes, SM4_BLOCK_SIZE); -+ memcpy(cts_in + SM4_BLOCK_SIZE, cts_in, rsv_bytes); -+ memcpy(cts_in, blocks, SM4_BLOCK_SIZE); -+ } -+} -+ -+static void sm4_cts_cbc_crypt(struct wd_cipher_msg *msg, -+ const struct SM4_KEY *rkey_enc, const int enc) -+{ -+ enum wd_cipher_mode mode = msg->mode; -+ __u32 in_bytes = msg->in_bytes; -+ __u8 *cts_in, *cts_out; -+ __u32 cts_bytes; -+ -+ if (sm4_cts_cbc_instead(msg)) -+ return sm4_v8_cbc_encrypt(msg->in, msg->out, in_bytes, rkey_enc, msg->iv, enc); -+ -+ cts_bytes = in_bytes % SM4_BLOCK_SIZE + SM4_BLOCK_SIZE; -+ if (cts_bytes == SM4_BLOCK_SIZE) -+ cts_bytes += SM4_BLOCK_SIZE; -+ -+ in_bytes -= cts_bytes; -+ if (in_bytes) -+ sm4_v8_cbc_encrypt(msg->in, msg->out, in_bytes, rkey_enc, msg->iv, enc); -+ -+ cts_in = msg->in + in_bytes; -+ cts_out = msg->out + in_bytes; -+ -+ if (enc == SM4_ENCRYPT) { -+ sm4_v8_cbc_cts_encrypt(cts_in, cts_out, cts_bytes, rkey_enc, msg->iv); -+ -+ if (mode == WD_CIPHER_CBC_CS1) -+ sm4_cts_cs1_mode_adapt(cts_in, cts_out, cts_bytes, enc); -+ } else { -+ if (mode == WD_CIPHER_CBC_CS1) -+ sm4_cts_cs1_mode_adapt(cts_in, cts_out, cts_bytes, enc); -+ -+ sm4_v8_cbc_cts_decrypt(cts_in, cts_out, cts_bytes, rkey_enc, msg->iv); -+ } -+} -+ -+static void sm4_cbc_cts_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_cts_cbc_crypt(msg, rkey_enc, SM4_ENCRYPT); -+} -+ -+static void sm4_cbc_cts_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) -+{ -+ sm4_cts_cbc_crypt(msg, rkey_enc, SM4_DECRYPT); -+} -+ - static void sm4_ecb_encrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rkey_enc) - { - sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_enc, SM4_ENCRYPT); -@@ -138,12 +214,12 @@ static void sm4_ecb_decrypt(struct wd_cipher_msg *msg, const struct SM4_KEY *rke - sm4_v8_ecb_encrypt(msg->in, msg->out, msg->in_bytes, rkey_dec, SM4_DECRYPT); - } - --void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key) -+static void sm4_set_encrypt_key(const __u8 *userKey, struct SM4_KEY *key) - { - sm4_v8_set_encrypt_key(userKey, key); - } - --void sm4_set_decrypt_key(const __u8 *userKey, struct SM4_KEY *key) -+static void sm4_set_decrypt_key(const __u8 *userKey, struct SM4_KEY *key) - { - sm4_v8_set_decrypt_key(userKey, key); - } -@@ -276,6 +352,14 @@ static int isa_ce_cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *wd_ - else - sm4_cbc_decrypt(msg, &rkey); - break; -+ case WD_CIPHER_CBC_CS1: -+ case WD_CIPHER_CBC_CS2: -+ case WD_CIPHER_CBC_CS3: -+ if (msg->op_type == WD_CIPHER_ENCRYPTION) -+ sm4_cbc_cts_encrypt(msg, &rkey); -+ else -+ sm4_cbc_cts_decrypt(msg, &rkey); -+ break; - case WD_CIPHER_CTR: - sm4_ctr_encrypt(msg, &rkey); - break; -@@ -330,6 +414,9 @@ static int cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) - - static struct wd_alg_driver cipher_alg_driver[] = { - GEN_CE_ALG_DRIVER("cbc(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("cbc-cs1(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("cbc-cs2(sm4)", cipher), -+ GEN_CE_ALG_DRIVER("cbc-cs3(sm4)", cipher), - GEN_CE_ALG_DRIVER("ctr(sm4)", cipher), - GEN_CE_ALG_DRIVER("cfb(sm4)", cipher), - GEN_CE_ALG_DRIVER("xts(sm4)", cipher), -diff --git a/drv/isa_ce_sm4.h b/drv/isa_ce_sm4.h -index d10b0af..308619e 100644 ---- a/drv/isa_ce_sm4.h -+++ b/drv/isa_ce_sm4.h -@@ -25,27 +25,35 @@ struct sm4_ce_drv_ctx { - - void sm4_v8_set_encrypt_key(const unsigned char *userKey, struct SM4_KEY *key); - void sm4_v8_set_decrypt_key(const unsigned char *userKey, struct SM4_KEY *key); -+ - void sm4_v8_cbc_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const struct SM4_KEY *key, - unsigned char *ivec, const int enc); -+void sm4_v8_cbc_cts_encrypt(const unsigned char *in, unsigned char *out, -+ size_t len, const void *key, const unsigned char ivec[16]); -+void sm4_v8_cbc_cts_decrypt(const unsigned char *in, unsigned char *out, -+ size_t len, const void *key, const unsigned char ivec[16]); -+ - void sm4_v8_ecb_encrypt(const unsigned char *in, unsigned char *out, - size_t length, const struct SM4_KEY *key, const int enc); -+ - void sm4_v8_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, -- size_t len, const void *key, const unsigned char ivec[16]); -+ size_t len, const void *key, const unsigned char ivec[16]); - - void sm4_v8_cfb_encrypt_blocks(const unsigned char *in, unsigned char *out, -- size_t length, const struct SM4_KEY *key, unsigned char *ivec); -+ size_t length, const struct SM4_KEY *key, unsigned char *ivec); - void sm4_v8_cfb_decrypt_blocks(const unsigned char *in, unsigned char *out, -- size_t length, const struct SM4_KEY *key, unsigned char *ivec); -+ size_t length, const struct SM4_KEY *key, unsigned char *ivec); -+ - void sm4_v8_crypt_block(const unsigned char *in, unsigned char *out, -- const struct SM4_KEY *key); -+ const struct SM4_KEY *key); - - int sm4_v8_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, -- const struct SM4_KEY *key, unsigned char *ivec, -- const struct SM4_KEY *key2); -+ const struct SM4_KEY *key, unsigned char *ivec, -+ const struct SM4_KEY *key2); - int sm4_v8_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, -- const struct SM4_KEY *key, unsigned char *ivec, -- const struct SM4_KEY *key2); -+ const struct SM4_KEY *key, unsigned char *ivec, -+ const struct SM4_KEY *key2); - - #ifdef __cplusplus - } -diff --git a/drv/isa_ce_sm4_armv8.S b/drv/isa_ce_sm4_armv8.S -index 7d84496..6ebf39b 100644 ---- a/drv/isa_ce_sm4_armv8.S -+++ b/drv/isa_ce_sm4_armv8.S -@@ -506,6 +506,139 @@ sm4_v8_cbc_encrypt: - ldp d8,d9,[sp],#16 - ret - .size sm4_v8_cbc_encrypt,.-sm4_v8_cbc_encrypt -+ -+.globl sm4_v8_cbc_cts_encrypt -+.type sm4_v8_cbc_cts_encrypt,%function -+.align 5 -+sm4_v8_cbc_cts_encrypt: -+ AARCH64_VALID_CALL_TARGET -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x3], #64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x3] -+ sub x5, x2, #16 -+ -+ ld1 {v8.4s}, [x4] -+ -+ ld1 {v10.4s}, [x0] -+ eor v8.16b, v8.16b, v10.16b -+ rev32 v8.16b, v8.16b; -+ sm4e v8.4s, v0.4s; -+ sm4e v8.4s, v1.4s; -+ sm4e v8.4s, v2.4s; -+ sm4e v8.4s, v3.4s; -+ sm4e v8.4s, v4.4s; -+ sm4e v8.4s, v5.4s; -+ sm4e v8.4s, v6.4s; -+ sm4e v8.4s, v7.4s; -+ rev64 v8.4s, v8.4s; -+ ext v8.16b, v8.16b, v8.16b, #8; -+ rev32 v8.16b, v8.16b; -+ -+ /* load permute table */ -+ adr x6, .cts_permute_table -+ add x7, x6, #32 -+ add x6, x6, x5 -+ sub x7, x7, x5 -+ ld1 {v13.4s}, [x6] -+ ld1 {v14.4s}, [x7] -+ -+ /* overlapping loads */ -+ add x0, x0, x5 -+ ld1 {v11.4s}, [x0] -+ -+ /* create Cn from En-1 */ -+ tbl v10.16b, {v8.16b}, v13.16b -+ /* padding Pn with zeros */ -+ tbl v11.16b, {v11.16b}, v14.16b -+ -+ eor v11.16b, v11.16b, v8.16b -+ rev32 v11.16b, v11.16b; -+ sm4e v11.4s, v0.4s; -+ sm4e v11.4s, v1.4s; -+ sm4e v11.4s, v2.4s; -+ sm4e v11.4s, v3.4s; -+ sm4e v11.4s, v4.4s; -+ sm4e v11.4s, v5.4s; -+ sm4e v11.4s, v6.4s; -+ sm4e v11.4s, v7.4s; -+ rev64 v11.4s, v11.4s; -+ ext v11.16b, v11.16b, v11.16b, #8; -+ rev32 v11.16b, v11.16b; -+ -+ /* overlapping stores */ -+ add x5, x1, x5 -+ st1 {v10.16b}, [x5] -+ st1 {v11.16b}, [x1] -+ -+ ret -+.size sm4_v8_cbc_cts_encrypt,.-sm4_v8_cbc_cts_encrypt -+ -+.globl sm4_v8_cbc_cts_decrypt -+.type sm4_v8_cbc_cts_decrypt,%function -+.align 5 -+sm4_v8_cbc_cts_decrypt: -+ AARCH64_VALID_CALL_TARGET -+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x3], #64 -+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x3] -+ -+ sub x5, x2, #16 -+ -+ ld1 {v8.4s}, [x4] -+ -+ /* load permute table */ -+ adr x6, .cts_permute_table -+ add x7, x6, #32 -+ add x6, x6, x5 -+ sub x7, x7, x5 -+ ld1 {v13.4s}, [x6] -+ ld1 {v14.4s}, [x7] -+ -+ /* overlapping loads */ -+ ld1 {v10.16b}, [x0], x5 -+ ld1 {v11.16b}, [x0] -+ -+ rev32 v10.16b, v10.16b; -+ sm4e v10.4s, v0.4s; -+ sm4e v10.4s, v1.4s; -+ sm4e v10.4s, v2.4s; -+ sm4e v10.4s, v3.4s; -+ sm4e v10.4s, v4.4s; -+ sm4e v10.4s, v5.4s; -+ sm4e v10.4s, v6.4s; -+ sm4e v10.4s, v7.4s; -+ rev64 v10.4s, v10.4s; -+ ext v10.16b, v10.16b, v10.16b, #8; -+ rev32 v10.16b, v10.16b; -+ -+ /* select the first Ln bytes of Xn to create Pn */ -+ tbl v12.16b, {v10.16b}, v13.16b -+ eor v12.16b, v12.16b, v11.16b -+ -+ /* overwrite the first Ln bytes with Cn to create En-1 */ -+ tbx v10.16b, {v11.16b}, v14.16b -+ -+ rev32 v10.16b, v10.16b; -+ sm4e v10.4s, v0.4s; -+ sm4e v10.4s, v1.4s; -+ sm4e v10.4s, v2.4s; -+ sm4e v10.4s, v3.4s; -+ sm4e v10.4s, v4.4s; -+ sm4e v10.4s, v5.4s; -+ sm4e v10.4s, v6.4s; -+ sm4e v10.4s, v7.4s; -+ rev64 v10.4s, v10.4s; -+ ext v10.16b, v10.16b, v10.16b, #8; -+ rev32 v10.16b, v10.16b; -+ -+ eor v10.16b, v10.16b, v8.16b -+ -+ /* overlapping stores */ -+ add x5, x1, x5 -+ st1 {v12.16b}, [x5] -+ st1 {v10.16b}, [x1] -+ -+ ret -+.size sm4_v8_cbc_cts_decrypt,.-sm4_v8_cbc_cts_decrypt -+ - .globl sm4_v8_ecb_encrypt - .type sm4_v8_ecb_encrypt,%function - .align 5 --- -2.25.1 - diff --git a/0029-uadk-wd_alg-check-whether-the-platform-supports-SVE.patch b/0029-uadk-wd_alg-check-whether-the-platform-supports-SVE.patch deleted file mode 100644 index 0bf5c92..0000000 --- a/0029-uadk-wd_alg-check-whether-the-platform-supports-SVE.patch +++ /dev/null @@ -1,49 +0,0 @@ -From 4163f4f6ff8534b171c8b1b044452b930629576d Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Sat, 23 Mar 2024 17:54:16 +0800 -Subject: [PATCH 29/44] uadk/wd_alg: check whether the platform supports SVE - -If the algorithm uses the SVE instruction, check whether -the platform supports SVE before algorithm driver registration. -If the platform does not support SVE, do not register the algorithm. - -Signed-off-by: Weili Qian ---- - wd_alg.c | 14 ++++++++++++++ - 1 file changed, 14 insertions(+) - -diff --git a/wd_alg.c b/wd_alg.c -index f34a407..de352d7 100644 ---- a/wd_alg.c -+++ b/wd_alg.c -@@ -109,6 +109,19 @@ static bool wd_check_ce_support(const char *dev_name) - return false; - } - -+static bool wd_check_sve_support(void) -+{ -+ unsigned long hwcaps = 0; -+ -+ #if defined(__aarch64__) -+ hwcaps = getauxval(AT_HWCAP); -+ #endif -+ if (hwcaps & HWCAP_SVE) -+ return true; -+ -+ return false; -+} -+ - static bool wd_alg_check_available(int calc_type, const char *dev_name) - { - bool ret = false; -@@ -122,6 +135,7 @@ static bool wd_alg_check_available(int calc_type, const char *dev_name) - break; - /* Should find the CPU if not support SVE */ - case UADK_ALG_SVE_INSTR: -+ ret = wd_check_sve_support(); - break; - /* Check if the current driver has device support */ - case UADK_ALG_HW: --- -2.25.1 - diff --git a/0030-uadk-sched-fix-async-mode-ctx-id.patch b/0030-uadk-sched-fix-async-mode-ctx-id.patch deleted file mode 100644 index 021a896..0000000 --- a/0030-uadk-sched-fix-async-mode-ctx-id.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 6ab956dc04c04849d2650e08d59b9722522eb201 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Sat, 23 Mar 2024 17:56:17 +0800 -Subject: [PATCH 30/44] uadk/sched: fix async mode ctx id - -In the single scheduler scenario, ctx id 1 is asynchronous ctx, -but the function sched_single_poll_policy() uses ctx id 0. -As a result, packets fail to be received. Change the value of -ctx id to 1. - -Signed-off-by: Weili Qian ---- - wd_sched.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/wd_sched.c b/wd_sched.c -index b43834d..6766872 100644 ---- a/wd_sched.c -+++ b/wd_sched.c -@@ -428,9 +428,9 @@ static int sched_single_poll_policy(handle_t h_sched_ctx, - } - - while (loop_times > 0) { -- /* Default async mode use ctx 0 */ -+ /* Default async mode use ctx 1 */ - loop_times--; -- ret = sched_ctx->poll_func(0, 1, &poll_num); -+ ret = sched_ctx->poll_func(1, 1, &poll_num); - if ((ret < 0) && (ret != -EAGAIN)) - return ret; - else if (ret == -EAGAIN) --- -2.25.1 - diff --git a/0031-uadk-initializes-ctx-resources-in-SVE-mode.patch b/0031-uadk-initializes-ctx-resources-in-SVE-mode.patch deleted file mode 100644 index d0fdc9f..0000000 --- a/0031-uadk-initializes-ctx-resources-in-SVE-mode.patch +++ /dev/null @@ -1,169 +0,0 @@ -From 4516c0f35532713548f4ccd016c562359c713eb4 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Sat, 23 Mar 2024 17:57:12 +0800 -Subject: [PATCH 31/44] uadk: initializes ctx resources in SVE mode - -Initializes ctx resources in SVE mode. In addition, when the driver -is released, the config resources need to be released in all modes, -not only UADK_ALG_HW. - -Signed-off-by: Weili Qian ---- - include/wd_alg_common.h | 4 ++ - wd_util.c | 95 +++++++++++++++++++++++++++++++++++------ - 2 files changed, 85 insertions(+), 14 deletions(-) - -diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h -index 32b8630..5fee085 100644 ---- a/include/wd_alg_common.h -+++ b/include/wd_alg_common.h -@@ -127,6 +127,10 @@ struct wd_ctx_params { - struct wd_cap_config *cap; - }; - -+struct wd_soft_ctx { -+ void *priv; -+}; -+ - struct wd_ctx_internal { - handle_t ctx; - __u8 op_type; -diff --git a/wd_util.c b/wd_util.c -index fb58167..1e2b190 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -28,6 +28,10 @@ - #define US2S(us) ((us) >> 20) - #define WD_INIT_RETRY_TIMEOUT 3 - -+#define WD_SOFT_CTX_NUM 2 -+#define WD_SOFT_SYNC_CTX 0 -+#define WD_SOFT_ASYNC_CTX 1 -+ - #define WD_DRV_LIB_DIR "uadk" - - struct msg_pool { -@@ -1968,8 +1972,7 @@ void wd_alg_uninit_driver(struct wd_ctx_config_internal *config, - - driver->exit(driver); - /* Ctx config just need clear once */ -- if (driver->calc_type == UADK_ALG_HW) -- wd_clear_ctx_config(config); -+ wd_clear_ctx_config(config); - - if (driver->fallback) - wd_alg_uninit_fallback((struct wd_alg_driver *)driver->fallback); -@@ -2660,6 +2663,47 @@ static void wd_alg_ctx_uninit(struct wd_ctx_config *ctx_config) - free(ctx_config->ctxs); - } - -+static int wd_alg_init_sve_ctx(struct wd_ctx_config *ctx_config) -+{ -+ struct wd_soft_ctx *ctx_sync, *ctx_async; -+ -+ ctx_config->ctx_num = WD_SOFT_CTX_NUM; -+ ctx_config->ctxs = calloc(ctx_config->ctx_num, sizeof(struct wd_ctx)); -+ if (!ctx_config->ctxs) -+ return -WD_ENOMEM; -+ -+ ctx_sync = calloc(1, sizeof(struct wd_soft_ctx)); -+ if (!ctx_sync) -+ goto free_ctxs; -+ -+ ctx_config->ctxs[WD_SOFT_SYNC_CTX].op_type = 0; -+ ctx_config->ctxs[WD_SOFT_SYNC_CTX].ctx_mode = CTX_MODE_SYNC; -+ ctx_config->ctxs[WD_SOFT_SYNC_CTX].ctx = (handle_t)ctx_sync; -+ -+ ctx_async = calloc(1, sizeof(struct wd_soft_ctx)); -+ if (!ctx_async) -+ goto free_ctx_sync; -+ -+ ctx_config->ctxs[WD_SOFT_ASYNC_CTX].op_type = 0; -+ ctx_config->ctxs[WD_SOFT_ASYNC_CTX].ctx_mode = CTX_MODE_ASYNC; -+ ctx_config->ctxs[WD_SOFT_ASYNC_CTX].ctx = (handle_t)ctx_async; -+ -+ return 0; -+ -+free_ctx_sync: -+ free(ctx_sync); -+free_ctxs: -+ free(ctx_config->ctxs); -+ return -WD_ENOMEM; -+} -+ -+static void wd_alg_uninit_sve_ctx(struct wd_ctx_config *ctx_config) -+{ -+ free((struct wd_soft_ctx *)ctx_config->ctxs[WD_SOFT_ASYNC_CTX].ctx); -+ free((struct wd_soft_ctx *)ctx_config->ctxs[WD_SOFT_SYNC_CTX].ctx); -+ free(ctx_config->ctxs); -+} -+ - int wd_alg_attrs_init(struct wd_init_attrs *attrs) - { - wd_alg_poll_ctx alg_poll_func = attrs->alg_poll_ctx; -@@ -2717,9 +2761,23 @@ int wd_alg_attrs_init(struct wd_init_attrs *attrs) - } - attrs->sched = alg_sched; - -- ret = wd_sched_rr_instance(alg_sched, NULL); -+ ctx_config = calloc(1, sizeof(*ctx_config)); -+ if (!ctx_config) { -+ WD_ERR("fail to alloc ctx config\n"); -+ goto out_freesched; -+ } -+ attrs->ctx_config = ctx_config; -+ -+ ret = wd_alg_init_sve_ctx(ctx_config); - if (ret) { -- WD_ERR("fail to instance scheduler\n"); -+ WD_ERR("fail to init sve ctx!\n"); -+ goto out_freesched; -+ } -+ -+ ctx_config->cap = attrs->ctx_params->cap; -+ ret = alg_init_func(ctx_config, alg_sched); -+ if (ret) { -+ wd_alg_uninit_sve_ctx(ctx_config); - goto out_freesched; - } - break; -@@ -2780,17 +2838,26 @@ void wd_alg_attrs_uninit(struct wd_init_attrs *attrs) - struct wd_sched *alg_sched = attrs->sched; - int driver_type = attrs->driver->calc_type; - -- if (driver_type == UADK_ALG_CE_INSTR || driver_type == UADK_ALG_SOFT) { -- if (ctx_config) { -- wd_alg_ce_ctx_uninit(ctx_config); -- free(ctx_config); -- } -- } else { -- if (ctx_config) { -- wd_alg_ctx_uninit(ctx_config); -- free(ctx_config); -- } -+ if (!ctx_config) { -+ wd_sched_rr_release(alg_sched); -+ return; -+ } -+ -+ switch (driver_type) { -+ case UADK_ALG_SOFT: -+ case UADK_ALG_CE_INSTR: -+ wd_alg_ce_ctx_uninit(ctx_config); -+ break; -+ case UADK_ALG_SVE_INSTR: -+ wd_alg_uninit_sve_ctx(ctx_config); -+ break; -+ case UADK_ALG_HW: -+ wd_alg_ctx_uninit(ctx_config); -+ break; -+ default: -+ break; - } - -+ free(ctx_config); - wd_sched_rr_release(alg_sched); - } --- -2.25.1 - diff --git a/0032-uadk-hash_mb-support-multi-buffer-calculation-for-sm.patch b/0032-uadk-hash_mb-support-multi-buffer-calculation-for-sm.patch deleted file mode 100644 index 7089827..0000000 --- a/0032-uadk-hash_mb-support-multi-buffer-calculation-for-sm.patch +++ /dev/null @@ -1,4092 +0,0 @@ -From 5dbc53c96ac4efcf26b4dbcdbbf55d1b5e7a06be Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Sat, 23 Mar 2024 18:00:43 +0800 -Subject: [PATCH 32/44] uadk/hash_mb: support multi-buffer calculation for sm3 - and md5 - -Supports sm3 and md5 multi-buffer calculation by using SVE instructions. -If the platform supports SVE instructions, uesrs can choose SVE instructions -to perform sm3 and md5 algorithm calculation. - -The assembly implementation is from isa-l_crypto: -https://github.com/intel/isa-l_crypto.git - -Signed-off-by: Weili Qian ---- - Makefile.am | 15 +- - drv/hash_mb/hash_mb.c | 843 ++++++++++++++++++++++++++++++++++ - drv/hash_mb/hash_mb.h | 62 +++ - drv/hash_mb/md5_mb_asimd_x1.S | 248 ++++++++++ - drv/hash_mb/md5_mb_asimd_x4.S | 526 +++++++++++++++++++++ - drv/hash_mb/md5_mb_sve.S | 158 +++++++ - drv/hash_mb/md5_sve_common.S | 478 +++++++++++++++++++ - drv/hash_mb/sm3_mb_asimd_x1.S | 387 ++++++++++++++++ - drv/hash_mb/sm3_mb_asimd_x4.S | 576 +++++++++++++++++++++++ - drv/hash_mb/sm3_mb_sve.S | 161 +++++++ - drv/hash_mb/sm3_sve_common.S | 505 ++++++++++++++++++++ - 11 files changed, 3958 insertions(+), 1 deletion(-) - create mode 100644 drv/hash_mb/hash_mb.c - create mode 100644 drv/hash_mb/hash_mb.h - create mode 100644 drv/hash_mb/md5_mb_asimd_x1.S - create mode 100644 drv/hash_mb/md5_mb_asimd_x4.S - create mode 100644 drv/hash_mb/md5_mb_sve.S - create mode 100644 drv/hash_mb/md5_sve_common.S - create mode 100644 drv/hash_mb/sm3_mb_asimd_x1.S - create mode 100644 drv/hash_mb/sm3_mb_asimd_x4.S - create mode 100644 drv/hash_mb/sm3_mb_sve.S - create mode 100644 drv/hash_mb/sm3_sve_common.S - -diff --git a/Makefile.am b/Makefile.am -index f78ad14..68f3106 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -45,7 +45,7 @@ lib_LTLIBRARIES=libwd.la libwd_comp.la libwd_crypto.la - - uadk_driversdir=$(libdir)/uadk - uadk_drivers_LTLIBRARIES=libhisi_sec.la libhisi_hpre.la libhisi_zip.la \ -- libisa_ce.la -+ libisa_ce.la libisa_sve.la - - libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ - v1/wd.c v1/wd.h v1/wd_adapter.c v1/wd_adapter.h \ -@@ -94,6 +94,12 @@ libhisi_hpre_la_SOURCES=drv/hisi_hpre.c drv/hisi_qm_udrv.c \ - libisa_ce_la_SOURCES=arm_arch_ce.h drv/isa_ce_sm3.c drv/isa_ce_sm3_armv8.S isa_ce_sm3.h \ - drv/isa_ce_sm4.c drv/isa_ce_sm4_armv8.S drv/isa_ce_sm4.h - -+libisa_sve_la_SOURCES=drv/hash_mb/hash_mb.c wd_digest_drv.h drv/hash_mb/hash_mb.h \ -+ drv/hash_mb/sm3_sve_common.S drv/hash_mb/sm3_mb_asimd_x1.S \ -+ drv/hash_mb/sm3_mb_asimd_x4.S drv/hash_mb/sm3_mb_sve.S \ -+ drv/hash_mb/md5_sve_common.S drv/hash_mb/md5_mb_asimd_x1.S \ -+ drv/hash_mb/md5_mb_asimd_x4.S drv/hash_mb/md5_mb_sve.S -+ - if WD_STATIC_DRV - AM_CFLAGS += -DWD_STATIC_DRV -fPIC - AM_CFLAGS += -DWD_NO_LOG -@@ -117,6 +123,9 @@ libhisi_hpre_la_DEPENDENCIES = libwd.la libwd_crypto.la - libisa_ce_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) - libisa_ce_la_DEPENDENCIES = libwd.la libwd_crypto.la - -+libisa_sve_la_LIBADD = $(libwd_la_OBJECTS) $(libwd_crypto_la_OBJECTS) -+libisa_sve_la_DEPENDENCIES = libwd.la libwd_crypto.la -+ - else - UADK_WD_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd.map - UADK_CRYPTO_SYMBOL= -Wl,--version-script,$(top_srcdir)/libwd_crypto.map -@@ -149,6 +158,10 @@ libhisi_hpre_la_DEPENDENCIES= libwd.la libwd_crypto.la - libisa_ce_la_LIBADD= -lwd -lwd_crypto - libisa_ce_la_LDFLAGS=$(UADK_VERSION) - libisa_ce_la_DEPENDENCIES= libwd.la libwd_crypto.la -+ -+libisa_sve_la_LIBADD= -lwd -lwd_crypto -+libisa_sve_la_LDFLAGS=$(UADK_VERSION) -+libisa_sve_la_DEPENDENCIES= libwd.la libwd_crypto.la - endif # WD_STATIC_DRV - - pkgconfigdir = $(libdir)/pkgconfig -diff --git a/drv/hash_mb/hash_mb.c b/drv/hash_mb/hash_mb.c -new file mode 100644 -index 0000000..a73c698 ---- /dev/null -+++ b/drv/hash_mb/hash_mb.c -@@ -0,0 +1,843 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ -+ -+#include -+#include -+#include -+#include -+#include "hash_mb.h" -+ -+#define MIN(a, b) (((a) > (b)) ? (b) : (a)) -+#define IPAD_VALUE 0x36 -+#define OPAD_VALUE 0x5C -+#define HASH_KEY_LEN 64 -+#define HASH_BLOCK_OFFSET 6 -+#define HASH_BLOCK_SIZE 64 -+#define HASH_PADLENGTHFIELD_SIZE 56 -+#define HASH_PADDING_SIZE 120 -+#define HASH_HIGH_32BITS 32 -+#define HASH_PADDING_BLOCKS 2 -+#define HASH_NENO_PROCESS_JOBS 4 -+#define HASH_TRY_PROCESS_COUNT 16 -+#define BYTES_TO_BITS_OFFSET 3 -+ -+#define MD5_DIGEST_DATA_SIZE 16 -+#define SM3_DIGEST_DATA_SIZE 32 -+#define HASH_MAX_LANES 32 -+#define SM3_MAX_LANES 16 -+ -+#define PUTU32(p, V) \ -+ ((p)[0] = (uint8_t)((V) >> 24), \ -+ (p)[1] = (uint8_t)((V) >> 16), \ -+ (p)[2] = (uint8_t)((V) >> 8), \ -+ (p)[3] = (uint8_t)(V)) -+ -+struct hash_mb_ops { -+ int (*max_lanes)(void); -+ void (*asimd_x4)(struct hash_job *job1, struct hash_job *job2, -+ struct hash_job *job3, struct hash_job *job4, int len); -+ void (*asimd_x1)(struct hash_job *job, int len); -+ void (*sve)(int blocks, int total_lanes, struct hash_job **job_vec); -+ __u8 *iv_data; -+ int iv_bytes; -+ int max_jobs; -+}; -+ -+struct hash_mb_poll_queue { -+ struct hash_job *head; -+ struct hash_job *tail; -+ pthread_spinlock_t s_lock; -+ const struct hash_mb_ops *ops; -+ __u32 job_num; -+}; -+ -+struct hash_mb_queue { -+ struct hash_mb_poll_queue sm3_poll_queue; -+ struct hash_mb_poll_queue md5_poll_queue; -+ pthread_spinlock_t r_lock; -+ struct hash_job *recv_head; -+ struct hash_job *recv_tail; -+ __u32 complete_cnt; -+ __u8 ctx_mode; -+}; -+ -+struct hash_mb_ctx { -+ struct wd_ctx_config_internal config; -+}; -+ -+static __u8 sm3_iv_data[SM3_DIGEST_DATA_SIZE] = { -+ 0x73, 0x80, 0x16, 0x6f, 0x49, 0x14, 0xb2, 0xb9, -+ 0x17, 0x24, 0x42, 0xd7, 0xda, 0x8a, 0x06, 0x00, -+ 0xa9, 0x6f, 0x30, 0xbc, 0x16, 0x31, 0x38, 0xaa, -+ 0xe3, 0x8d, 0xee, 0x4d, 0xb0, 0xfb, 0x0e, 0x4e, -+}; -+ -+static __u8 md5_iv_data[MD5_DIGEST_DATA_SIZE] = { -+ 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, -+ 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, -+}; -+ -+static struct hash_mb_ops md5_ops = { -+ .max_lanes = md5_mb_sve_max_lanes, -+ .asimd_x4 = md5_mb_asimd_x4, -+ .asimd_x1 = md5_mb_asimd_x1, -+ .sve = md5_mb_sve, -+ .iv_data = md5_iv_data, -+ .iv_bytes = MD5_DIGEST_DATA_SIZE, -+ .max_jobs = HASH_MAX_LANES, -+}; -+ -+static struct hash_mb_ops sm3_ops = { -+ .max_lanes = sm3_mb_sve_max_lanes, -+ .asimd_x4 = sm3_mb_asimd_x4, -+ .asimd_x1 = sm3_mb_asimd_x1, -+ .sve = sm3_mb_sve, -+ .iv_data = sm3_iv_data, -+ .iv_bytes = SM3_DIGEST_DATA_SIZE, -+ .max_jobs = SM3_MAX_LANES, -+}; -+ -+static void hash_mb_uninit_poll_queue(struct hash_mb_poll_queue *poll_queue) -+{ -+ pthread_spin_destroy(&poll_queue->s_lock); -+} -+ -+static void hash_mb_queue_uninit(struct wd_ctx_config_internal *config, int ctx_num) -+{ -+ struct hash_mb_queue *mb_queue; -+ struct wd_soft_ctx *ctx; -+ int i; -+ -+ for (i = 0; i < ctx_num; i++) { -+ ctx = (struct wd_soft_ctx *)config->ctxs[i].ctx; -+ mb_queue = ctx->priv; -+ pthread_spin_destroy(&mb_queue->r_lock); -+ hash_mb_uninit_poll_queue(&mb_queue->sm3_poll_queue); -+ hash_mb_uninit_poll_queue(&mb_queue->md5_poll_queue); -+ free(mb_queue); -+ } -+} -+ -+static int hash_mb_init_poll_queue(struct hash_mb_poll_queue *poll_queue) -+{ -+ int ret; -+ -+ ret = pthread_spin_init(&poll_queue->s_lock, PTHREAD_PROCESS_SHARED); -+ if (ret) { -+ WD_ERR("failed to init s_lock!\n"); -+ return ret; -+ } -+ -+ poll_queue->head = NULL; -+ poll_queue->tail = NULL; -+ poll_queue->job_num = 0; -+ -+ return WD_SUCCESS; -+} -+ -+static int hash_mb_queue_init(struct wd_ctx_config_internal *config) -+{ -+ struct hash_mb_queue *mb_queue; -+ int ctx_num = config->ctx_num; -+ struct wd_soft_ctx *ctx; -+ int i, ret; -+ -+ for (i = 0; i < ctx_num; i++) { -+ mb_queue = calloc(1, sizeof(struct hash_mb_queue)); -+ if (!mb_queue) { -+ ret = -WD_ENOMEM; -+ goto free_mb_queue; -+ } -+ -+ mb_queue->ctx_mode = config->ctxs[i].ctx_mode; -+ ctx = (struct wd_soft_ctx *)config->ctxs[i].ctx; -+ ctx->priv = mb_queue; -+ ret = hash_mb_init_poll_queue(&mb_queue->sm3_poll_queue); -+ if (ret) -+ goto free_mem; -+ -+ ret = hash_mb_init_poll_queue(&mb_queue->md5_poll_queue); -+ if (ret) -+ goto uninit_sm3_poll; -+ -+ ret = pthread_spin_init(&mb_queue->r_lock, PTHREAD_PROCESS_SHARED); -+ if (ret) { -+ WD_ERR("failed to init r_lock!\n"); -+ goto uninit_md5_poll; -+ } -+ -+ mb_queue->sm3_poll_queue.ops = &sm3_ops; -+ mb_queue->md5_poll_queue.ops = &md5_ops; -+ mb_queue->recv_head = NULL; -+ mb_queue->recv_tail = NULL; -+ mb_queue->complete_cnt = 0; -+ } -+ -+ return WD_SUCCESS; -+ -+uninit_md5_poll: -+ hash_mb_uninit_poll_queue(&mb_queue->md5_poll_queue); -+uninit_sm3_poll: -+ hash_mb_uninit_poll_queue(&mb_queue->sm3_poll_queue); -+free_mem: -+ free(mb_queue); -+free_mb_queue: -+ hash_mb_queue_uninit(config, i); -+ return ret; -+} -+ -+static int hash_mb_init(struct wd_alg_driver *drv, void *conf) -+{ -+ struct wd_ctx_config_internal *config = conf; -+ struct hash_mb_ctx *priv; -+ int ret; -+ -+ priv = malloc(sizeof(struct hash_mb_ctx)); -+ if (!priv) -+ return -WD_ENOMEM; -+ -+ /* multibuff does not use epoll. */ -+ config->epoll_en = 0; -+ memcpy(&priv->config, config, sizeof(struct wd_ctx_config_internal)); -+ -+ ret = hash_mb_queue_init(config); -+ if (ret) { -+ free(priv); -+ return ret; -+ } -+ -+ drv->priv = priv; -+ -+ return WD_SUCCESS; -+} -+ -+static void hash_mb_exit(struct wd_alg_driver *drv) -+{ -+ struct hash_mb_ctx *priv = (struct hash_mb_ctx *)drv->priv; -+ -+ if (!priv) -+ return; -+ -+ hash_mb_queue_uninit(&priv->config, priv->config.ctx_num); -+ free(priv); -+ drv->priv = NULL; -+} -+ -+static void hash_mb_pad_data(struct hash_pad *hash_pad, __u8 *in, __u32 partial, -+ __u64 total_len, bool transfer) -+{ -+ __u64 size = total_len << BYTES_TO_BITS_OFFSET; -+ __u8 *buffer = hash_pad->pad; -+ -+ if (partial) -+ memcpy(buffer, in, partial); -+ -+ buffer[partial++] = 0x80; -+ if (partial <= HASH_PADLENGTHFIELD_SIZE) { -+ memset(buffer + partial, 0, HASH_PADLENGTHFIELD_SIZE - partial); -+ if (transfer) { -+ PUTU32(buffer + HASH_PADLENGTHFIELD_SIZE, size >> HASH_HIGH_32BITS); -+ PUTU32(buffer + HASH_PADLENGTHFIELD_SIZE + sizeof(__u32), size); -+ } else { -+ memcpy(buffer + HASH_PADLENGTHFIELD_SIZE, &size, sizeof(__u64)); -+ } -+ hash_pad->pad_len = 1; -+ } else { -+ memset(buffer + partial, 0, HASH_PADDING_SIZE - partial); -+ if (transfer) { -+ PUTU32(buffer + HASH_PADDING_SIZE, size >> HASH_HIGH_32BITS); -+ PUTU32(buffer + HASH_PADDING_SIZE + sizeof(__u32), size); -+ } else { -+ memcpy(buffer + HASH_PADDING_SIZE, &size, sizeof(__u64)); -+ } -+ hash_pad->pad_len = HASH_PADDING_BLOCKS; -+ } -+} -+ -+static inline void hash_xor(__u8 *key_out, __u8 *key_in, __u32 key_len, __u8 xor_value) -+{ -+ __u32 i; -+ -+ for (i = 0; i < HASH_KEY_LEN; i++) { -+ if (i < key_len) -+ key_out[i] = key_in[i] ^ xor_value; -+ else -+ key_out[i] = xor_value; -+ } -+} -+ -+static int hash_middle_block_process(struct hash_mb_poll_queue *poll_queue, -+ struct wd_digest_msg *d_msg, -+ struct hash_job *job) -+{ -+ __u8 *buffer = d_msg->partial_block + d_msg->partial_bytes; -+ __u64 length = (__u64)d_msg->partial_bytes + d_msg->in_bytes; -+ -+ if (length < HASH_BLOCK_SIZE) { -+ memcpy(buffer, d_msg->in, d_msg->in_bytes); -+ d_msg->partial_bytes = length; -+ return -WD_EAGAIN; -+ } -+ -+ if (d_msg->partial_bytes) { -+ memcpy(buffer, d_msg->in, HASH_BLOCK_SIZE - d_msg->partial_bytes); -+ job->buffer = d_msg->partial_block; -+ poll_queue->ops->asimd_x1(job, 1); -+ length = d_msg->in_bytes - (HASH_BLOCK_SIZE - d_msg->partial_bytes); -+ buffer = d_msg->in + (HASH_BLOCK_SIZE - d_msg->partial_bytes); -+ } else { -+ buffer = d_msg->in; -+ } -+ -+ job->len = length >> HASH_BLOCK_OFFSET; -+ d_msg->partial_bytes = length & (HASH_BLOCK_SIZE - 1); -+ if (d_msg->partial_bytes) -+ memcpy(d_msg->partial_block, buffer + (job->len << HASH_BLOCK_OFFSET), -+ d_msg->partial_bytes); -+ -+ if (!job->len) { -+ memcpy(d_msg->out, job->result_digest, poll_queue->ops->iv_bytes); -+ return -WD_EAGAIN; -+ } -+ -+ job->buffer = buffer; -+ job->pad.pad_len = 0; -+ -+ return WD_SUCCESS; -+} -+ -+static void hash_signle_block_process(struct wd_digest_msg *d_msg, -+ struct hash_job *job, __u64 total_len) -+{ -+ __u32 hash_partial = d_msg->in_bytes & (HASH_BLOCK_SIZE - 1); -+ __u8 *buffer; -+ -+ job->len = d_msg->in_bytes >> HASH_BLOCK_OFFSET; -+ buffer = d_msg->in + (job->len << HASH_BLOCK_OFFSET); -+ hash_mb_pad_data(&job->pad, buffer, hash_partial, total_len, job->is_transfer); -+ if (!job->len) { -+ job->buffer = job->pad.pad; -+ job->len = job->pad.pad_len; -+ job->pad.pad_len = 0; -+ return; -+ } -+ -+ job->buffer = d_msg->in; -+} -+ -+static void hash_final_block_process(struct hash_mb_poll_queue *poll_queue, -+ struct wd_digest_msg *d_msg, -+ struct hash_job *job) -+{ -+ __u8 *buffer = d_msg->partial_block + d_msg->partial_bytes; -+ __u64 length = (__u64)d_msg->partial_bytes + d_msg->in_bytes; -+ __u32 hash_partial = length & (HASH_BLOCK_SIZE - 1); -+ __u64 total_len = d_msg->long_data_len; -+ -+ if (job->opad.opad_size) -+ total_len += HASH_BLOCK_SIZE; -+ -+ if (!d_msg->partial_bytes) { -+ hash_signle_block_process(d_msg, job, total_len); -+ return; -+ } -+ -+ if (length <= HASH_BLOCK_SIZE) { -+ memcpy(buffer, d_msg->in, d_msg->in_bytes); -+ job->len = length >> HASH_BLOCK_OFFSET; -+ buffer = d_msg->partial_block + (job->len << HASH_BLOCK_OFFSET); -+ hash_mb_pad_data(&job->pad, buffer, hash_partial, total_len, job->is_transfer); -+ if (!job->len) { -+ job->buffer = job->pad.pad; -+ job->len = job->pad.pad_len; -+ job->pad.pad_len = 0; -+ return; -+ } -+ -+ job->buffer = d_msg->partial_block; -+ return; -+ } -+ -+ memcpy(buffer, d_msg->in, (HASH_BLOCK_SIZE - d_msg->partial_bytes)); -+ job->buffer = d_msg->partial_block; -+ poll_queue->ops->asimd_x1(job, 1); -+ job->buffer = d_msg->in + (HASH_BLOCK_SIZE - d_msg->partial_bytes); -+ length = d_msg->in_bytes - (HASH_BLOCK_SIZE - d_msg->partial_bytes); -+ job->len = length >> HASH_BLOCK_OFFSET; -+ buffer = job->buffer + (job->len << HASH_BLOCK_OFFSET); -+ hash_partial = length & (HASH_BLOCK_SIZE - 1); -+ hash_mb_pad_data(&job->pad, buffer, hash_partial, total_len, job->is_transfer); -+ if (!job->len) { -+ job->buffer = job->pad.pad; -+ job->len = job->pad.pad_len; -+ job->pad.pad_len = 0; -+ } -+} -+ -+static int hash_first_block_process(struct wd_digest_msg *d_msg, -+ struct hash_job *job, -+ __u32 iv_bytes) -+{ -+ __u8 *buffer; -+ -+ job->len = d_msg->in_bytes >> HASH_BLOCK_OFFSET; -+ d_msg->partial_bytes = d_msg->in_bytes & (HASH_BLOCK_SIZE - 1); -+ if (d_msg->partial_bytes) { -+ buffer = d_msg->in + (job->len << HASH_BLOCK_OFFSET); -+ memcpy(d_msg->partial_block, buffer, d_msg->partial_bytes); -+ } -+ -+ /* -+ * Long hash mode, if first block is less than HASH_BLOCK_SIZE, -+ * copy ikey hash result to out. -+ */ -+ if (!job->len) { -+ memcpy(d_msg->out, job->result_digest, iv_bytes); -+ return -WD_EAGAIN; -+ } -+ job->buffer = d_msg->in; -+ job->pad.pad_len = 0; -+ -+ return WD_SUCCESS; -+} -+ -+static int hash_do_partial(struct hash_mb_poll_queue *poll_queue, -+ struct wd_digest_msg *d_msg, struct hash_job *job) -+{ -+ enum hash_block_type bd_type = get_hash_block_type(d_msg); -+ __u64 total_len = d_msg->in_bytes; -+ int ret = WD_SUCCESS; -+ -+ switch (bd_type) { -+ case HASH_FIRST_BLOCK: -+ ret = hash_first_block_process(d_msg, job, poll_queue->ops->iv_bytes); -+ break; -+ case HASH_MIDDLE_BLOCK: -+ ret = hash_middle_block_process(poll_queue, d_msg, job); -+ break; -+ case HASH_END_BLOCK: -+ hash_final_block_process(poll_queue, d_msg, job); -+ break; -+ case HASH_SINGLE_BLOCK: -+ if (job->opad.opad_size) -+ total_len += HASH_BLOCK_SIZE; -+ hash_signle_block_process(d_msg, job, total_len); -+ break; -+ } -+ -+ return ret; -+} -+ -+static void hash_mb_init_iv(struct hash_mb_poll_queue *poll_queue, -+ struct wd_digest_msg *d_msg, struct hash_job *job) -+{ -+ enum hash_block_type bd_type = get_hash_block_type(d_msg); -+ __u8 key_ipad[HASH_KEY_LEN]; -+ __u8 key_opad[HASH_KEY_LEN]; -+ -+ job->opad.opad_size = 0; -+ switch (bd_type) { -+ case HASH_FIRST_BLOCK: -+ memcpy(job->result_digest, poll_queue->ops->iv_data, poll_queue->ops->iv_bytes); -+ if (d_msg->mode != WD_DIGEST_HMAC) -+ return; -+ -+ hash_xor(key_ipad, d_msg->key, d_msg->key_bytes, IPAD_VALUE); -+ job->buffer = key_ipad; -+ poll_queue->ops->asimd_x1(job, 1); -+ break; -+ case HASH_MIDDLE_BLOCK: -+ memcpy(job->result_digest, d_msg->out, poll_queue->ops->iv_bytes); -+ break; -+ case HASH_END_BLOCK: -+ if (d_msg->mode != WD_DIGEST_HMAC) { -+ memcpy(job->result_digest, d_msg->out, poll_queue->ops->iv_bytes); -+ return; -+ } -+ memcpy(job->result_digest, poll_queue->ops->iv_data, poll_queue->ops->iv_bytes); -+ hash_xor(key_opad, d_msg->key, d_msg->key_bytes, OPAD_VALUE); -+ job->buffer = key_opad; -+ poll_queue->ops->asimd_x1(job, 1); -+ memcpy(job->opad.opad, job->result_digest, poll_queue->ops->iv_bytes); -+ job->opad.opad_size = poll_queue->ops->iv_bytes; -+ memcpy(job->result_digest, d_msg->out, poll_queue->ops->iv_bytes); -+ break; -+ case HASH_SINGLE_BLOCK: -+ memcpy(job->result_digest, poll_queue->ops->iv_data, poll_queue->ops->iv_bytes); -+ if (d_msg->mode != WD_DIGEST_HMAC) -+ return; -+ -+ hash_xor(key_ipad, d_msg->key, d_msg->key_bytes, IPAD_VALUE); -+ hash_xor(key_opad, d_msg->key, d_msg->key_bytes, OPAD_VALUE); -+ job->buffer = key_opad; -+ poll_queue->ops->asimd_x1(job, 1); -+ memcpy(job->opad.opad, job->result_digest, poll_queue->ops->iv_bytes); -+ job->opad.opad_size = poll_queue->ops->iv_bytes; -+ job->buffer = key_ipad; -+ memcpy(job->result_digest, poll_queue->ops->iv_data, poll_queue->ops->iv_bytes); -+ poll_queue->ops->asimd_x1(job, 1); -+ break; -+ } -+} -+ -+static void hash_do_sync(struct hash_mb_poll_queue *poll_queue, struct hash_job *job) -+{ -+ __u32 iv_bytes = poll_queue->ops->iv_bytes; -+ __u32 length; -+ -+ poll_queue->ops->asimd_x1(job, job->len); -+ -+ if (job->pad.pad_len) { -+ job->buffer = job->pad.pad; -+ poll_queue->ops->asimd_x1(job, job->pad.pad_len); -+ } -+ -+ if (job->opad.opad_size) { -+ job->buffer = job->opad.opad + job->opad.opad_size; -+ memcpy(job->buffer, job->result_digest, iv_bytes); -+ memcpy(job->result_digest, job->opad.opad, iv_bytes); -+ length = HASH_BLOCK_SIZE + iv_bytes; -+ hash_mb_pad_data(&job->pad, job->buffer, iv_bytes, length, job->is_transfer); -+ job->buffer = job->pad.pad; -+ poll_queue->ops->asimd_x1(job, job->pad.pad_len); -+ } -+} -+ -+static void hash_mb_add_job_tail(struct hash_mb_poll_queue *poll_queue, struct hash_job *job) -+{ -+ pthread_spin_lock(&poll_queue->s_lock); -+ if (poll_queue->job_num) { -+ poll_queue->tail->next = job; -+ poll_queue->tail = job; -+ } else { -+ poll_queue->head = job; -+ poll_queue->tail = job; -+ } -+ poll_queue->job_num++; -+ pthread_spin_unlock(&poll_queue->s_lock); -+} -+ -+static void hash_mb_add_job_head(struct hash_mb_poll_queue *poll_queue, struct hash_job *job) -+{ -+ pthread_spin_lock(&poll_queue->s_lock); -+ if (poll_queue->job_num) { -+ job->next = poll_queue->head; -+ poll_queue->head = job; -+ } else { -+ poll_queue->head = job; -+ poll_queue->tail = job; -+ } -+ poll_queue->job_num++; -+ pthread_spin_unlock(&poll_queue->s_lock); -+} -+ -+static int hash_mb_check_param(struct hash_mb_queue *mb_queue, struct wd_digest_msg *d_msg) -+{ -+ if (unlikely(mb_queue->ctx_mode == CTX_MODE_ASYNC && d_msg->has_next)) { -+ WD_ERR("invalid: async mode not supports long hash!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (unlikely(d_msg->data_fmt != WD_FLAT_BUF)) { -+ WD_ERR("invalid: hash multibuffer not supports sgl mode!\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+} -+ -+static int hash_mb_send(struct wd_alg_driver *drv, handle_t ctx, void *drv_msg) -+{ -+ struct wd_soft_ctx *s_ctx = (struct wd_soft_ctx *)ctx; -+ struct hash_mb_queue *mb_queue = s_ctx->priv; -+ struct wd_digest_msg *d_msg = drv_msg; -+ struct hash_mb_poll_queue *poll_queue; -+ struct hash_job hash_sync_job; -+ struct hash_job *hash_job; -+ int ret; -+ -+ ret = hash_mb_check_param(mb_queue, d_msg); -+ if (ret) -+ return ret; -+ -+ if (mb_queue->ctx_mode == CTX_MODE_ASYNC) { -+ hash_job = malloc(sizeof(struct hash_job)); -+ if (unlikely(!hash_job)) -+ return -WD_ENOMEM; -+ } else { -+ hash_job = &hash_sync_job; -+ } -+ -+ switch (d_msg->alg) { -+ case WD_DIGEST_SM3: -+ poll_queue = &mb_queue->sm3_poll_queue; -+ hash_job->is_transfer = true; -+ break; -+ case WD_DIGEST_MD5: -+ poll_queue = &mb_queue->md5_poll_queue; -+ hash_job->is_transfer = false; -+ break; -+ default: -+ WD_ERR("invalid: alg type %u not support!\n", d_msg->alg); -+ if (mb_queue->ctx_mode == CTX_MODE_ASYNC) -+ free(hash_job); -+ return -WD_EINVAL; -+ } -+ -+ hash_mb_init_iv(poll_queue, d_msg, hash_job); -+ /* If block not need process, return directly. */ -+ ret = hash_do_partial(poll_queue, d_msg, hash_job); -+ if (ret == -WD_EAGAIN) { -+ if (mb_queue->ctx_mode == CTX_MODE_ASYNC) -+ free(hash_job); -+ -+ d_msg->result = WD_SUCCESS; -+ return WD_SUCCESS; -+ } -+ -+ if (mb_queue->ctx_mode == CTX_MODE_SYNC) { -+ hash_do_sync(poll_queue, hash_job); -+ memcpy(d_msg->out, hash_job->result_digest, d_msg->out_bytes); -+ d_msg->result = WD_SUCCESS; -+ return WD_SUCCESS; -+ } -+ -+ hash_job->msg = d_msg; -+ hash_mb_add_job_tail(poll_queue, hash_job); -+ -+ return WD_SUCCESS; -+} -+ -+static struct hash_job *hash_mb_find_complete_job(struct hash_mb_queue *mb_queue) -+{ -+ struct hash_job *job; -+ -+ pthread_spin_lock(&mb_queue->r_lock); -+ if (!mb_queue->complete_cnt) { -+ pthread_spin_unlock(&mb_queue->r_lock); -+ return NULL; -+ } -+ -+ job = mb_queue->recv_head; -+ mb_queue->recv_head = job->next; -+ mb_queue->complete_cnt--; -+ pthread_spin_unlock(&mb_queue->r_lock); -+ -+ return job; -+} -+ -+static int hash_recv_complete_job(struct hash_mb_queue *mb_queue, struct wd_digest_msg *msg) -+{ -+ struct hash_mb_poll_queue *poll_queue; -+ struct hash_job *hash_job; -+ __u32 total_len; -+ -+ hash_job = hash_mb_find_complete_job(mb_queue); -+ if (!hash_job) -+ return -WD_EAGAIN; -+ -+ if (!hash_job->opad.opad_size) { -+ msg->tag = hash_job->msg->tag; -+ memcpy(hash_job->msg->out, hash_job->result_digest, hash_job->msg->out_bytes); -+ free(hash_job); -+ msg->result = WD_SUCCESS; -+ return WD_SUCCESS; -+ } -+ -+ if (hash_job->msg->alg == WD_DIGEST_SM3) -+ poll_queue = &mb_queue->sm3_poll_queue; -+ else -+ poll_queue = &mb_queue->md5_poll_queue; -+ hash_job->buffer = hash_job->opad.opad + poll_queue->ops->iv_bytes; -+ memcpy(hash_job->buffer, hash_job->result_digest, poll_queue->ops->iv_bytes); -+ total_len = poll_queue->ops->iv_bytes + HASH_BLOCK_SIZE; -+ hash_mb_pad_data(&hash_job->pad, hash_job->buffer, poll_queue->ops->iv_bytes, -+ total_len, hash_job->is_transfer); -+ memcpy(hash_job->result_digest, hash_job->opad.opad, poll_queue->ops->iv_bytes); -+ hash_job->opad.opad_size = 0; -+ hash_job->buffer = hash_job->pad.pad; -+ hash_job->len = hash_job->pad.pad_len; -+ hash_job->pad.pad_len = 0; -+ -+ hash_mb_add_job_head(poll_queue, hash_job); -+ -+ return -WD_EAGAIN; -+} -+ -+static struct hash_job *hash_mb_get_job(struct hash_mb_poll_queue *poll_queue) -+{ -+ struct hash_job *job; -+ -+ pthread_spin_lock(&poll_queue->s_lock); -+ if (!poll_queue->job_num) { -+ pthread_spin_unlock(&poll_queue->s_lock); -+ return NULL; -+ } -+ -+ job = poll_queue->head; -+ poll_queue->head = job->next; -+ poll_queue->job_num--; -+ pthread_spin_unlock(&poll_queue->s_lock); -+ -+ return job; -+} -+ -+static void hash_mb_add_finish_job(struct hash_mb_queue *mb_queue, struct hash_job *job) -+{ -+ pthread_spin_lock(&mb_queue->r_lock); -+ if (mb_queue->complete_cnt) { -+ mb_queue->recv_tail->next = job; -+ mb_queue->recv_tail = job; -+ } else { -+ mb_queue->recv_head = job; -+ mb_queue->recv_tail = job; -+ } -+ mb_queue->complete_cnt++; -+ pthread_spin_unlock(&mb_queue->r_lock); -+} -+ -+static struct hash_mb_poll_queue *hash_get_poll_queue(struct hash_mb_queue *mb_queue) -+{ -+ if (!mb_queue->sm3_poll_queue.job_num && -+ !mb_queue->md5_poll_queue.job_num) -+ return NULL; -+ -+ if (mb_queue->md5_poll_queue.job_num >= mb_queue->sm3_poll_queue.job_num) -+ return &mb_queue->md5_poll_queue; -+ -+ return &mb_queue->sm3_poll_queue; -+} -+ -+static int hash_mb_do_jobs(struct hash_mb_queue *mb_queue) -+{ -+ struct hash_mb_poll_queue *poll_queue = hash_get_poll_queue(mb_queue); -+ struct hash_job *job_vecs[HASH_MAX_LANES]; -+ __u64 len = 0; -+ int maxjobs; -+ int j = 0; -+ int i = 0; -+ -+ if (!poll_queue) -+ return -WD_EAGAIN; -+ -+ maxjobs = poll_queue->ops->max_lanes(); -+ maxjobs = MIN(maxjobs, poll_queue->ops->max_jobs); -+ while (j < maxjobs) { -+ job_vecs[j] = hash_mb_get_job(poll_queue); -+ if (!job_vecs[j]) -+ break; -+ -+ if (!j) -+ len = job_vecs[j]->len; -+ else -+ len = MIN(job_vecs[j]->len, len); -+ j++; -+ } -+ -+ if (!j) -+ return -WD_EAGAIN; -+ -+ if (j > HASH_NENO_PROCESS_JOBS) { -+ poll_queue->ops->sve(len, j, job_vecs); -+ } else if (j == HASH_NENO_PROCESS_JOBS) { -+ poll_queue->ops->asimd_x4(job_vecs[0], job_vecs[1], -+ job_vecs[2], job_vecs[3], len); -+ } else { -+ while (i < j) -+ poll_queue->ops->asimd_x1(job_vecs[i++], len); -+ } -+ -+ for (i = 0; i < j; i++) { -+ if (job_vecs[i]->len == len) { -+ if (!job_vecs[i]->pad.pad_len) { -+ hash_mb_add_finish_job(mb_queue, job_vecs[i]); -+ } else { -+ job_vecs[i]->buffer = job_vecs[i]->pad.pad; -+ job_vecs[i]->len = job_vecs[i]->pad.pad_len; -+ job_vecs[i]->pad.pad_len = 0; -+ hash_mb_add_job_head(poll_queue, job_vecs[i]); -+ } -+ } else { -+ job_vecs[i]->len -= len; -+ job_vecs[i]->buffer += len << HASH_BLOCK_OFFSET; -+ hash_mb_add_job_head(poll_queue, job_vecs[i]); -+ } -+ } -+ -+ return WD_SUCCESS; -+} -+ -+static int hash_mb_recv(struct wd_alg_driver *drv, handle_t ctx, void *drv_msg) -+{ -+ struct wd_soft_ctx *s_ctx = (struct wd_soft_ctx *)ctx; -+ struct hash_mb_queue *mb_queue = s_ctx->priv; -+ struct wd_digest_msg *msg = drv_msg; -+ int ret, i = 0; -+ -+ if (mb_queue->ctx_mode == CTX_MODE_SYNC) -+ return WD_SUCCESS; -+ -+ while (i++ < HASH_TRY_PROCESS_COUNT) { -+ ret = hash_recv_complete_job(mb_queue, msg); -+ if (!ret) -+ return WD_SUCCESS; -+ -+ ret = hash_mb_do_jobs(mb_queue); -+ if (ret) -+ return ret; -+ } -+ -+ return -WD_EAGAIN; -+} -+ -+static int hash_mb_get_usage(void *param) -+{ -+ return 0; -+} -+ -+#define GEN_HASH_ALG_DRIVER(hash_alg_name) \ -+{\ -+ .drv_name = "hash_mb",\ -+ .alg_name = (hash_alg_name),\ -+ .calc_type = UADK_ALG_SVE_INSTR,\ -+ .priority = 100,\ -+ .queue_num = 1,\ -+ .op_type_num = 1,\ -+ .fallback = 0,\ -+ .init = hash_mb_init,\ -+ .exit = hash_mb_exit,\ -+ .send = hash_mb_send,\ -+ .recv = hash_mb_recv,\ -+ .get_usage = hash_mb_get_usage,\ -+} -+ -+static struct wd_alg_driver hash_mb_driver[] = { -+ GEN_HASH_ALG_DRIVER("sm3"), -+ GEN_HASH_ALG_DRIVER("md5"), -+}; -+ -+static void __attribute__((constructor)) hash_mb_probe(void) -+{ -+ size_t alg_num = ARRAY_SIZE(hash_mb_driver); -+ size_t i; -+ int ret; -+ -+ WD_INFO("Info: register hash_mb alg drivers!\n"); -+ for (i = 0; i < alg_num; i++) { -+ ret = wd_alg_driver_register(&hash_mb_driver[i]); -+ if (ret && ret != -WD_ENODEV) -+ WD_ERR("Error: register hash multibuff %s failed!\n", -+ hash_mb_driver[i].alg_name); -+ } -+} -+ -+static void __attribute__((destructor)) hash_mb_remove(void) -+{ -+ size_t alg_num = ARRAY_SIZE(hash_mb_driver); -+ size_t i; -+ -+ WD_INFO("Info: unregister hash_mb alg drivers!\n"); -+ for (i = 0; i < alg_num; i++) -+ wd_alg_driver_unregister(&hash_mb_driver[i]); -+} -+ -diff --git a/drv/hash_mb/hash_mb.h b/drv/hash_mb/hash_mb.h -new file mode 100644 -index 0000000..aba5ec9 ---- /dev/null -+++ b/drv/hash_mb/hash_mb.h -@@ -0,0 +1,62 @@ -+/* SPDX-License-Identifier: Apache-2.0 */ -+/* Copyright 2024 Huawei Technologies Co.,Ltd. All rights reserved. */ -+ -+#ifndef __HASH_MB_H -+#define __HASH_MB_H -+ -+#include -+#include -+#include "drv/wd_digest_drv.h" -+#include "wd_digest.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define HASH_BLOCK_SIZE 64 -+#define HASH_DIGEST_NWORDS 32 -+ -+#if __STDC_VERSION__ >= 201112L -+# define __ALIGN_END __attribute__((aligned(64))) -+#else -+# define __ALIGN_END __aligned(64) -+#endif -+ -+struct hash_pad { -+ __u8 pad[HASH_BLOCK_SIZE * 2]; -+ __u32 pad_len; -+}; -+ -+struct hash_opad { -+ __u8 opad[HASH_BLOCK_SIZE]; -+ __u32 opad_size; -+}; -+ -+struct hash_job { -+ void *buffer; -+ __u64 len; -+ __u8 result_digest[HASH_DIGEST_NWORDS] __ALIGN_END; -+ struct hash_pad pad; -+ struct hash_opad opad; -+ struct hash_job *next; -+ struct wd_digest_msg *msg; -+ bool is_transfer; -+}; -+ -+void sm3_mb_sve(int blocks, int total_lanes, struct hash_job **job_vec); -+void sm3_mb_asimd_x4(struct hash_job *job1, struct hash_job *job2, -+ struct hash_job *job3, struct hash_job *job4, int len); -+void sm3_mb_asimd_x1(struct hash_job *job, int len); -+int sm3_mb_sve_max_lanes(void); -+void md5_mb_sve(int blocks, int total_lanes, struct hash_job **job_vec); -+void md5_mb_asimd_x4(struct hash_job *job1, struct hash_job *job2, -+ struct hash_job *job3, struct hash_job *job4, int len); -+void md5_mb_asimd_x1(struct hash_job *job, int len); -+int md5_mb_sve_max_lanes(void); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* __HASH_MB_H */ -+ -diff --git a/drv/hash_mb/md5_mb_asimd_x1.S b/drv/hash_mb/md5_mb_asimd_x1.S -new file mode 100644 -index 0000000..27d1124 ---- /dev/null -+++ b/drv/hash_mb/md5_mb_asimd_x1.S -@@ -0,0 +1,248 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8-a -+ -+/* -+Macros -+*/ -+ -+.macro declare_var_vector_reg name:req,reg:req -+ q_\name .req q\reg -+ v_\name .req v\reg -+ s_\name .req s\reg -+.endm -+ -+ -+.macro round_0_15 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req -+ eor tmp0,\d_c,\d_d -+ mov k,\kl -+ and tmp0,tmp0,\d_b -+ movk k,\kh,lsl 16 -+ eor tmp0,tmp0,\d_d -+ add tmp1,k,\w -+ add tmp0,tmp1,tmp0 -+ add tmp0,\d_a,tmp0 -+ ror tmp0,tmp0,32 - \r -+ add \d_a,\d_b,tmp0 -+.endm -+ -+.macro round_16_31 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req -+ eor tmp0,\d_b,\d_c -+ mov k,\kl -+ and tmp0,tmp0,\d_d -+ movk k,\kh,lsl 16 -+ eor tmp0,tmp0,\d_c -+ add tmp1,k,\w -+ add tmp0,tmp1,tmp0 -+ add tmp0,\d_a,tmp0 -+ ror tmp0,tmp0,32 - \r -+ add \d_a,\d_b,tmp0 -+.endm -+ -+.macro round_32_47 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req -+ eor tmp0,\d_b,\d_c -+ mov k,\kl -+ eor tmp0,tmp0,\d_d -+ movk k,\kh,lsl 16 -+ add tmp1,k,\w -+ add tmp0,tmp1,tmp0 -+ add tmp0,\d_a,tmp0 -+ ror tmp0,tmp0,32 - \r -+ add \d_a,\d_b,tmp0 -+.endm -+ -+.macro round_48_63 d_a:req,d_b:req,d_c:req,d_d:req,kh:req,kl:req,w:req,r:req -+ orn tmp0,\d_b,\d_d -+ mov k,\kl -+ eor tmp0,tmp0,\d_c -+ movk k,\kh,lsl 16 -+ add tmp1,k,\w -+ add tmp0,tmp1,tmp0 -+ add tmp0,\d_a,tmp0 -+ ror tmp0,tmp0,32 - \r -+ add \d_a,\d_b,tmp0 -+.endm -+/* -+ variables -+*/ -+ job0 .req x0 -+ digest_addr .req x0 -+ len .req w1 -+ end .req x1 -+ -+ buf_adr .req x2 -+ d_a .req w3 -+ d_b .req w4 -+ d_c .req w5 -+ d_d .req w6 -+ k .req w7 -+ m0 .req w8 -+ m1 .req w9 -+ m2 .req w10 -+ m3 .req w11 -+ m4 .req w12 -+ m5 .req w13 -+ m6 .req w14 -+ m7 .req w15 -+ m8 .req w19 -+ m9 .req w20 -+ m10 .req w21 -+ m11 .req w22 -+ m12 .req w23 -+ m13 .req w24 -+ m14 .req w25 -+ m15 .req w26 -+ -+ tmp0 .req w27 -+ tmp1 .req w28 -+ -+ d_a1 .req w8 -+ d_b1 .req w9 -+ d_c1 .req w15 -+ d_d1 .req w19 -+ -+/* -+ void md5_mb_asimd_x1(MD5_JOB * job0,int len) -+*/ -+ .global md5_mb_asimd_x1 -+ .type md5_mb_asimd_x1, %function -+md5_mb_asimd_x1: -+ cmp len,0 -+ stp x29, x30, [sp,-96]! -+ ldr buf_adr,[job0],64 -+ stp x19, x20, [sp, 16] -+ add end,buf_adr,end,lsl 6 -+ stp x21, x22, [sp, 32] -+ ldp d_a,d_b,[digest_addr] -+ stp x23, x24, [sp, 48] -+ ldp d_c,d_d,[digest_addr,8] -+ stp x25, x26, [sp, 64] -+ stp x27, x28, [sp, 80] -+ ble .exit -+ -+.loop_start: -+ ldp m0,m1,[buf_adr],8 -+ ldp m2,m3,[buf_adr],8 -+ round_0_15 d_a,d_b,d_c,d_d,0xd76a,0xa478,m0,7 -+ -+ ldp m4,m5,[buf_adr],8 -+ round_0_15 d_d,d_a,d_b,d_c,0xe8c7,0xb756,m1,12 -+ ldp m6,m7,[buf_adr],8 -+ round_0_15 d_c,d_d,d_a,d_b,0x2420,0x70db,m2,17 -+ ldp m8,m9,[buf_adr],8 -+ round_0_15 d_b,d_c,d_d,d_a,0xc1bd,0xceee,m3,22 -+ ldp m10,m11,[buf_adr],8 -+ round_0_15 d_a,d_b,d_c,d_d,0xf57c,0xfaf,m4,7 -+ ldp m12,m13,[buf_adr],8 -+ round_0_15 d_d,d_a,d_b,d_c,0x4787,0xc62a,m5,12 -+ ldp m14,m15,[buf_adr],8 -+ round_0_15 d_c,d_d,d_a,d_b,0xa830,0x4613,m6,17 -+ round_0_15 d_b,d_c,d_d,d_a,0xfd46,0x9501,m7,22 -+ round_0_15 d_a,d_b,d_c,d_d,0x6980,0x98d8,m8,7 -+ round_0_15 d_d,d_a,d_b,d_c,0x8b44,0xf7af,m9,12 -+ round_0_15 d_c,d_d,d_a,d_b,0xffff,0x5bb1,m10,17 -+ round_0_15 d_b,d_c,d_d,d_a,0x895c,0xd7be,m11,22 -+ round_0_15 d_a,d_b,d_c,d_d,0x6b90,0x1122,m12,7 -+ round_0_15 d_d,d_a,d_b,d_c,0xfd98,0x7193,m13,12 -+ round_0_15 d_c,d_d,d_a,d_b,0xa679,0x438e,m14,17 -+ round_0_15 d_b,d_c,d_d,d_a,0x49b4,0x821,m15,22 -+ -+ round_16_31 d_a,d_b,d_c,d_d,0xf61e,0x2562,m1,5 -+ round_16_31 d_d,d_a,d_b,d_c,0xc040,0xb340,m6,9 -+ round_16_31 d_c,d_d,d_a,d_b,0x265e,0x5a51,m11,14 -+ round_16_31 d_b,d_c,d_d,d_a,0xe9b6,0xc7aa,m0,20 -+ round_16_31 d_a,d_b,d_c,d_d,0xd62f,0x105d,m5,5 -+ round_16_31 d_d,d_a,d_b,d_c,0x244,0x1453,m10,9 -+ round_16_31 d_c,d_d,d_a,d_b,0xd8a1,0xe681,m15,14 -+ round_16_31 d_b,d_c,d_d,d_a,0xe7d3,0xfbc8,m4,20 -+ round_16_31 d_a,d_b,d_c,d_d,0x21e1,0xcde6,m9,5 -+ round_16_31 d_d,d_a,d_b,d_c,0xc337,0x7d6,m14,9 -+ round_16_31 d_c,d_d,d_a,d_b,0xf4d5,0xd87,m3,14 -+ round_16_31 d_b,d_c,d_d,d_a,0x455a,0x14ed,m8,20 -+ round_16_31 d_a,d_b,d_c,d_d,0xa9e3,0xe905,m13,5 -+ round_16_31 d_d,d_a,d_b,d_c,0xfcef,0xa3f8,m2,9 -+ round_16_31 d_c,d_d,d_a,d_b,0x676f,0x2d9,m7,14 -+ round_16_31 d_b,d_c,d_d,d_a,0x8d2a,0x4c8a,m12,20 -+ -+ round_32_47 d_a,d_b,d_c,d_d,0xfffa,0x3942,m5,4 -+ round_32_47 d_d,d_a,d_b,d_c,0x8771,0xf681,m8,11 -+ round_32_47 d_c,d_d,d_a,d_b,0x6d9d,0x6122,m11,16 -+ round_32_47 d_b,d_c,d_d,d_a,0xfde5,0x380c,m14,23 -+ round_32_47 d_a,d_b,d_c,d_d,0xa4be,0xea44,m1,4 -+ round_32_47 d_d,d_a,d_b,d_c,0x4bde,0xcfa9,m4,11 -+ round_32_47 d_c,d_d,d_a,d_b,0xf6bb,0x4b60,m7,16 -+ round_32_47 d_b,d_c,d_d,d_a,0xbebf,0xbc70,m10,23 -+ round_32_47 d_a,d_b,d_c,d_d,0x289b,0x7ec6,m13,4 -+ round_32_47 d_d,d_a,d_b,d_c,0xeaa1,0x27fa,m0,11 -+ round_32_47 d_c,d_d,d_a,d_b,0xd4ef,0x3085,m3,16 -+ round_32_47 d_b,d_c,d_d,d_a,0x488,0x1d05,m6,23 -+ round_32_47 d_a,d_b,d_c,d_d,0xd9d4,0xd039,m9,4 -+ round_32_47 d_d,d_a,d_b,d_c,0xe6db,0x99e5,m12,11 -+ round_32_47 d_c,d_d,d_a,d_b,0x1fa2,0x7cf8,m15,16 -+ round_32_47 d_b,d_c,d_d,d_a,0xc4ac,0x5665,m2,23 -+ -+ round_48_63 d_a,d_b,d_c,d_d,0xf429,0x2244,m0,6 -+ round_48_63 d_d,d_a,d_b,d_c,0x432a,0xff97,m7,10 -+ round_48_63 d_c,d_d,d_a,d_b,0xab94,0x23a7,m14,15 -+ round_48_63 d_b,d_c,d_d,d_a,0xfc93,0xa039,m5,21 -+ round_48_63 d_a,d_b,d_c,d_d,0x655b,0x59c3,m12,6 -+ round_48_63 d_d,d_a,d_b,d_c,0x8f0c,0xcc92,m3,10 -+ round_48_63 d_c,d_d,d_a,d_b,0xffef,0xf47d,m10,15 -+ round_48_63 d_b,d_c,d_d,d_a,0x8584,0x5dd1,m1,21 -+ round_48_63 d_a,d_b,d_c,d_d,0x6fa8,0x7e4f,m8,6 -+ round_48_63 d_d,d_a,d_b,d_c,0xfe2c,0xe6e0,m15,10 -+ round_48_63 d_c,d_d,d_a,d_b,0xa301,0x4314,m6,15 -+ round_48_63 d_b,d_c,d_d,d_a,0x4e08,0x11a1,m13,21 -+ round_48_63 d_a,d_b,d_c,d_d,0xf753,0x7e82,m4,6 -+ ldp d_a1,d_b1,[digest_addr] -+ round_48_63 d_d,d_a,d_b,d_c,0xbd3a,0xf235,m11,10 -+ ldp d_c1,d_d1,[digest_addr,8] -+ round_48_63 d_c,d_d,d_a,d_b,0x2ad7,0xd2bb,m2,15 -+ round_48_63 d_b,d_c,d_d,d_a,0xeb86,0xd391,m9,21 -+ -+ cmp buf_adr,end -+ add d_a,d_a1 ,d_a -+ str d_a,[digest_addr] -+ add d_b,d_b1 ,d_b -+ str d_b,[digest_addr,4] -+ add d_c,d_c1 ,d_c -+ str d_c,[digest_addr,8] -+ add d_d,d_d1 ,d_d -+ str d_d,[digest_addr,12] -+ bne .loop_start -+ -+.exit: -+ ldp x19, x20, [sp, 16] -+ ldp x21, x22, [sp, 32] -+ ldp x23, x24, [sp, 48] -+ ldp x25, x26, [sp, 64] -+ ldp x27, x28, [sp, 80] -+ ldp x29, x30, [sp], 96 -+ ret -+ .size md5_mb_asimd_x1, .-md5_mb_asimd_x1 -diff --git a/drv/hash_mb/md5_mb_asimd_x4.S b/drv/hash_mb/md5_mb_asimd_x4.S -new file mode 100644 -index 0000000..5397913 ---- /dev/null -+++ b/drv/hash_mb/md5_mb_asimd_x4.S -@@ -0,0 +1,526 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8-a -+ -+/* -+Macros -+*/ -+ -+.macro declare_var_vector_reg name:req,reg:req -+ q_\name .req q\reg -+ v_\name .req v\reg -+ s_\name .req s\reg -+.endm -+ -+.macro add_key_rol a:req,b:req,k:req,w:req,r:req -+ add v_tmp0.4s,v_\k\().4s,v_\w\().4s -+ add v_tmp1.4s,v_tmp1.4s,v_\a\().4s -+ add v_tmp1.4s,v_tmp1.4s,v_tmp0.4s -+ shl v_tmp0.4s,v_tmp1.4s,\r -+ ushr v_tmp1.4s,v_tmp1.4s,32-\r -+ orr v_tmp0.16b,v_tmp1.16b,v_tmp0.16b -+ -+ add v_\a\().4s,v_\b\().4s,v_tmp0.4s -+.endm -+.macro round_0_15 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req -+ mov v_tmp1.16b, v_\b\().16b -+ bsl v_tmp1.16b, v_\c\().16b, v_\d\().16b -+ ldr q_\k1,[key_adr],16 -+ add_key_rol \a,\b,\k,\w,\r -+.endm -+ -+.macro round_16_31 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req -+ mov v_tmp1.16b, v_\d\().16b -+ bsl v_tmp1.16b, v_\b\().16b, v_\c\().16b -+ ldr q_\k1,[key_adr],16 -+ add_key_rol \a,\b,\k,\w,\r -+.endm -+ -+.macro round_32_47 a:req,b:req,c:req,d:req,k:req,k1:req,w:req,r:req -+ eor v_tmp1.16b,v_\b\().16b,v_\c\().16b -+ eor v_tmp1.16b,v_tmp1.16b,v_\d\().16b -+ ldr q_\k1,[key_adr],16 -+ add_key_rol \a,\b,\k,\w,\r -+.endm -+ -+.macro round_48_63 a:req,b:req,c:req,d:req,k:req,k1,w:req,r:req -+ orn v_tmp1.16b,v_\b\().16b,v_\d\().16b -+ eor v_tmp1.16b,v_tmp1.16b,v_\c\().16b -+ .ifnb \k1 -+ ldr q_\k1,[key_adr],16 -+ .endif -+ add_key_rol \a,\b,\k,\w,\r -+.endm -+/* -+ variables -+*/ -+ declare_var_vector_reg tmp0, 0 -+ declare_var_vector_reg tmp1, 1 -+ declare_var_vector_reg k, 2 -+ declare_var_vector_reg k1, 3 -+ declare_var_vector_reg a, 4 -+ declare_var_vector_reg b, 5 -+ declare_var_vector_reg c, 6 -+ declare_var_vector_reg d, 7 -+ declare_var_vector_reg a1, 8 -+ declare_var_vector_reg b1, 9 -+ declare_var_vector_reg c1, 10 -+ declare_var_vector_reg d1, 11 -+ -+ declare_var_vector_reg w0, 16 -+ declare_var_vector_reg w1, 17 -+ declare_var_vector_reg w2, 18 -+ declare_var_vector_reg w3, 19 -+ declare_var_vector_reg w4, 20 -+ declare_var_vector_reg w5, 21 -+ declare_var_vector_reg w6, 22 -+ declare_var_vector_reg w7, 23 -+ declare_var_vector_reg w8, 24 -+ declare_var_vector_reg w9, 25 -+ declare_var_vector_reg w10, 26 -+ declare_var_vector_reg w11, 27 -+ declare_var_vector_reg w12, 28 -+ declare_var_vector_reg w13, 29 -+ declare_var_vector_reg w14, 30 -+ declare_var_vector_reg w15, 31 -+ -+ len .req w4 -+ len_x .req x4 -+ lane0 .req x5 -+ lane1 .req x6 -+ lane2 .req x7 -+ lane3 .req x9 -+ end .req x4 -+ job0 .req x0 -+ job1 .req x1 -+ job2 .req x2 -+ job3 .req x3 -+ key_adr .req x10 -+ -+/* -+ void md5_mb_asimd_x4(MD5_JOB * job0, MD5_JOB * job1, -+ MD5_JOB * job2, MD5_JOB * job3, int len) -+*/ -+ .global md5_mb_asimd_x4 -+ .type md5_mb_asimd_x4, %function -+md5_mb_asimd_x4: -+ stp x29,x30,[sp,-48]! -+ ldr lane0,[job0],64 -+ stp d8,d9,[sp,16] -+ ldr lane1,[job1],64 -+ stp d10,d11,[sp,32] -+ ldr lane2,[job2],64 -+ cmp len,0 -+ ldr lane3,[job3],64 -+ ble .exit -+ -+ //load digests -+ ld4 {v_a.s-v_d.s}[0],[job0] -+ add end,lane0,len_x,lsl 6 -+ ld4 {v_a.s-v_d.s}[1],[job1] -+ ld4 {v_a.s-v_d.s}[2],[job2] -+ ld4 {v_a.s-v_d.s}[3],[job3] -+.loop_start: -+ ld1 {v_w0.s}[0],[lane0],4 -+ mov v_a1.16b,v_a.16b -+ ld1 {v_w0.s}[1],[lane1],4 -+ mov v_b1.16b,v_b.16b -+ ld1 {v_w0.s}[2],[lane2],4 -+ mov v_c1.16b,v_c.16b -+ ld1 {v_w0.s}[3],[lane3],4 -+ mov v_d1.16b,v_d.16b -+ -+ ld3 {v_w1.s-v_w3.s}[0],[lane0],12 -+ adrp key_adr,.key_consts -+ ld3 {v_w1.s-v_w3.s}[1],[lane1],12 -+ add key_adr,key_adr,#:lo12:.key_consts -+ ld3 {v_w1.s-v_w3.s}[2],[lane2],12 -+ ldr q_k,[key_adr],16 -+ ld3 {v_w1.s-v_w3.s}[3],[lane3],12 -+ -+ -+ ld4 {v_w4.s-v_w7.s}[0], [lane0],16 -+ -+ round_0_15 a,b,c,d,k,k1,w0,7 -+ -+ ld4 {v_w4.s-v_w7.s}[1], [lane1],16 -+ round_0_15 d,a,b,c,k1,k,w1,12 -+ ld4 {v_w4.s-v_w7.s}[2], [lane2],16 -+ round_0_15 c,d,a,b,k,k1,w2,17 -+ ld4 {v_w4.s-v_w7.s}[3], [lane3],16 -+ round_0_15 b,c,d,a,k1,k,w3,22 -+ ld4 {v_w8.s-v_w11.s}[0],[lane0],16 -+ round_0_15 a,b,c,d,k,k1,w4,7 -+ ld4 {v_w8.s-v_w11.s}[1],[lane1],16 -+ round_0_15 d,a,b,c,k1,k,w5,12 -+ ld4 {v_w8.s-v_w11.s}[2],[lane2],16 -+ round_0_15 c,d,a,b,k,k1,w6,17 -+ ld4 {v_w8.s-v_w11.s}[3],[lane3],16 -+ round_0_15 b,c,d,a,k1,k,w7,22 -+ ld4 {v_w12.s-v_w15.s}[0],[lane0],16 -+ round_0_15 a,b,c,d,k,k1,w8,7 -+ ld4 {v_w12.s-v_w15.s}[1],[lane1],16 -+ round_0_15 d,a,b,c,k1,k,w9,12 -+ ld4 {v_w12.s-v_w15.s}[2],[lane2],16 -+ round_0_15 c,d,a,b,k,k1,w10,17 -+ ld4 {v_w12.s-v_w15.s}[3],[lane3],16 -+ round_0_15 b,c,d,a,k1,k,w11,22 -+ round_0_15 a,b,c,d,k,k1,w12,7 -+ round_0_15 d,a,b,c,k1,k,w13,12 -+ round_0_15 c,d,a,b,k,k1,w14,17 -+ round_0_15 b,c,d,a,k1,k,w15,22 -+ -+ round_16_31 a,b,c,d,k,k1,w1,5 -+ round_16_31 d,a,b,c,k1,k,w6,9 -+ round_16_31 c,d,a,b,k,k1,w11,14 -+ round_16_31 b,c,d,a,k1,k,w0,20 -+ round_16_31 a,b,c,d,k,k1,w5,5 -+ round_16_31 d,a,b,c,k1,k,w10,9 -+ round_16_31 c,d,a,b,k,k1,w15,14 -+ round_16_31 b,c,d,a,k1,k,w4,20 -+ round_16_31 a,b,c,d,k,k1,w9,5 -+ round_16_31 d,a,b,c,k1,k,w14,9 -+ round_16_31 c,d,a,b,k,k1,w3,14 -+ round_16_31 b,c,d,a,k1,k,w8,20 -+ round_16_31 a,b,c,d,k,k1,w13,5 -+ round_16_31 d,a,b,c,k1,k,w2,9 -+ round_16_31 c,d,a,b,k,k1,w7,14 -+ round_16_31 b,c,d,a,k1,k,w12,20 -+ -+ round_32_47 a,b,c,d,k,k1,w5,4 -+ round_32_47 d,a,b,c,k1,k,w8,11 -+ round_32_47 c,d,a,b,k,k1,w11,16 -+ round_32_47 b,c,d,a,k1,k,w14,23 -+ round_32_47 a,b,c,d,k,k1,w1,4 -+ round_32_47 d,a,b,c,k1,k,w4,11 -+ round_32_47 c,d,a,b,k,k1,w7,16 -+ round_32_47 b,c,d,a,k1,k,w10,23 -+ round_32_47 a,b,c,d,k,k1,w13,4 -+ round_32_47 d,a,b,c,k1,k,w0,11 -+ round_32_47 c,d,a,b,k,k1,w3,16 -+ round_32_47 b,c,d,a,k1,k,w6,23 -+ round_32_47 a,b,c,d,k,k1,w9,4 -+ round_32_47 d,a,b,c,k1,k,w12,11 -+ round_32_47 c,d,a,b,k,k1,w15,16 -+ round_32_47 b,c,d,a,k1,k,w2,23 -+ -+ round_48_63 a,b,c,d,k,k1,w0,6 -+ round_48_63 d,a,b,c,k1,k,w7,10 -+ round_48_63 c,d,a,b,k,k1,w14,15 -+ round_48_63 b,c,d,a,k1,k,w5,21 -+ round_48_63 a,b,c,d,k,k1,w12,6 -+ round_48_63 d,a,b,c,k1,k,w3,10 -+ round_48_63 c,d,a,b,k,k1,w10,15 -+ round_48_63 b,c,d,a,k1,k,w1,21 -+ round_48_63 a,b,c,d,k,k1,w8,6 -+ round_48_63 d,a,b,c,k1,k,w15,10 -+ round_48_63 c,d,a,b,k,k1,w6,15 -+ round_48_63 b,c,d,a,k1,k,w13,21 -+ round_48_63 a,b,c,d,k,k1,w4,6 -+ round_48_63 d,a,b,c,k1,k,w11,10 -+ round_48_63 c,d,a,b,k,k1,w2,15 -+ round_48_63 b,c,d,a,k1, ,w9,21 -+ -+ -+ -+ -+ cmp lane0,end -+ add v_a.4s,v_a1.4s,v_a.4s -+ add v_b.4s,v_b1.4s,v_b.4s -+ add v_c.4s,v_c1.4s,v_c.4s -+ add v_d.4s,v_d1.4s,v_d.4s -+ bne .loop_start -+ -+ st4 {v_a.s-v_d.s}[0],[job0] -+ st4 {v_a.s-v_d.s}[1],[job1] -+ st4 {v_a.s-v_d.s}[2],[job2] -+ st4 {v_a.s-v_d.s}[3],[job3] -+.exit: -+ ldp d8,d9,[sp,16] -+ ldp d10,d11,[sp,32] -+ ldp x29,x30,[sp],48 -+ ret -+.key_consts: -+ .word 0xd76aa478 -+ .word 0xd76aa478 -+ .word 0xd76aa478 -+ .word 0xd76aa478 -+ .word 0xe8c7b756 -+ .word 0xe8c7b756 -+ .word 0xe8c7b756 -+ .word 0xe8c7b756 -+ .word 0x242070db -+ .word 0x242070db -+ .word 0x242070db -+ .word 0x242070db -+ .word 0xc1bdceee -+ .word 0xc1bdceee -+ .word 0xc1bdceee -+ .word 0xc1bdceee -+ .word 0xf57c0faf -+ .word 0xf57c0faf -+ .word 0xf57c0faf -+ .word 0xf57c0faf -+ .word 0x4787c62a -+ .word 0x4787c62a -+ .word 0x4787c62a -+ .word 0x4787c62a -+ .word 0xa8304613 -+ .word 0xa8304613 -+ .word 0xa8304613 -+ .word 0xa8304613 -+ .word 0xfd469501 -+ .word 0xfd469501 -+ .word 0xfd469501 -+ .word 0xfd469501 -+ .word 0x698098d8 -+ .word 0x698098d8 -+ .word 0x698098d8 -+ .word 0x698098d8 -+ .word 0x8b44f7af -+ .word 0x8b44f7af -+ .word 0x8b44f7af -+ .word 0x8b44f7af -+ .word 0xffff5bb1 -+ .word 0xffff5bb1 -+ .word 0xffff5bb1 -+ .word 0xffff5bb1 -+ .word 0x895cd7be -+ .word 0x895cd7be -+ .word 0x895cd7be -+ .word 0x895cd7be -+ .word 0x6b901122 -+ .word 0x6b901122 -+ .word 0x6b901122 -+ .word 0x6b901122 -+ .word 0xfd987193 -+ .word 0xfd987193 -+ .word 0xfd987193 -+ .word 0xfd987193 -+ .word 0xa679438e -+ .word 0xa679438e -+ .word 0xa679438e -+ .word 0xa679438e -+ .word 0x49b40821 -+ .word 0x49b40821 -+ .word 0x49b40821 -+ .word 0x49b40821 -+ .word 0xf61e2562 -+ .word 0xf61e2562 -+ .word 0xf61e2562 -+ .word 0xf61e2562 -+ .word 0xc040b340 -+ .word 0xc040b340 -+ .word 0xc040b340 -+ .word 0xc040b340 -+ .word 0x265e5a51 -+ .word 0x265e5a51 -+ .word 0x265e5a51 -+ .word 0x265e5a51 -+ .word 0xe9b6c7aa -+ .word 0xe9b6c7aa -+ .word 0xe9b6c7aa -+ .word 0xe9b6c7aa -+ .word 0xd62f105d -+ .word 0xd62f105d -+ .word 0xd62f105d -+ .word 0xd62f105d -+ .word 0x02441453 -+ .word 0x02441453 -+ .word 0x02441453 -+ .word 0x02441453 -+ .word 0xd8a1e681 -+ .word 0xd8a1e681 -+ .word 0xd8a1e681 -+ .word 0xd8a1e681 -+ .word 0xe7d3fbc8 -+ .word 0xe7d3fbc8 -+ .word 0xe7d3fbc8 -+ .word 0xe7d3fbc8 -+ .word 0x21e1cde6 -+ .word 0x21e1cde6 -+ .word 0x21e1cde6 -+ .word 0x21e1cde6 -+ .word 0xc33707d6 -+ .word 0xc33707d6 -+ .word 0xc33707d6 -+ .word 0xc33707d6 -+ .word 0xf4d50d87 -+ .word 0xf4d50d87 -+ .word 0xf4d50d87 -+ .word 0xf4d50d87 -+ .word 0x455a14ed -+ .word 0x455a14ed -+ .word 0x455a14ed -+ .word 0x455a14ed -+ .word 0xa9e3e905 -+ .word 0xa9e3e905 -+ .word 0xa9e3e905 -+ .word 0xa9e3e905 -+ .word 0xfcefa3f8 -+ .word 0xfcefa3f8 -+ .word 0xfcefa3f8 -+ .word 0xfcefa3f8 -+ .word 0x676f02d9 -+ .word 0x676f02d9 -+ .word 0x676f02d9 -+ .word 0x676f02d9 -+ .word 0x8d2a4c8a -+ .word 0x8d2a4c8a -+ .word 0x8d2a4c8a -+ .word 0x8d2a4c8a -+ .word 0xfffa3942 -+ .word 0xfffa3942 -+ .word 0xfffa3942 -+ .word 0xfffa3942 -+ .word 0x8771f681 -+ .word 0x8771f681 -+ .word 0x8771f681 -+ .word 0x8771f681 -+ .word 0x6d9d6122 -+ .word 0x6d9d6122 -+ .word 0x6d9d6122 -+ .word 0x6d9d6122 -+ .word 0xfde5380c -+ .word 0xfde5380c -+ .word 0xfde5380c -+ .word 0xfde5380c -+ .word 0xa4beea44 -+ .word 0xa4beea44 -+ .word 0xa4beea44 -+ .word 0xa4beea44 -+ .word 0x4bdecfa9 -+ .word 0x4bdecfa9 -+ .word 0x4bdecfa9 -+ .word 0x4bdecfa9 -+ .word 0xf6bb4b60 -+ .word 0xf6bb4b60 -+ .word 0xf6bb4b60 -+ .word 0xf6bb4b60 -+ .word 0xbebfbc70 -+ .word 0xbebfbc70 -+ .word 0xbebfbc70 -+ .word 0xbebfbc70 -+ .word 0x289b7ec6 -+ .word 0x289b7ec6 -+ .word 0x289b7ec6 -+ .word 0x289b7ec6 -+ .word 0xeaa127fa -+ .word 0xeaa127fa -+ .word 0xeaa127fa -+ .word 0xeaa127fa -+ .word 0xd4ef3085 -+ .word 0xd4ef3085 -+ .word 0xd4ef3085 -+ .word 0xd4ef3085 -+ .word 0x04881d05 -+ .word 0x04881d05 -+ .word 0x04881d05 -+ .word 0x04881d05 -+ .word 0xd9d4d039 -+ .word 0xd9d4d039 -+ .word 0xd9d4d039 -+ .word 0xd9d4d039 -+ .word 0xe6db99e5 -+ .word 0xe6db99e5 -+ .word 0xe6db99e5 -+ .word 0xe6db99e5 -+ .word 0x1fa27cf8 -+ .word 0x1fa27cf8 -+ .word 0x1fa27cf8 -+ .word 0x1fa27cf8 -+ .word 0xc4ac5665 -+ .word 0xc4ac5665 -+ .word 0xc4ac5665 -+ .word 0xc4ac5665 -+ .word 0xf4292244 -+ .word 0xf4292244 -+ .word 0xf4292244 -+ .word 0xf4292244 -+ .word 0x432aff97 -+ .word 0x432aff97 -+ .word 0x432aff97 -+ .word 0x432aff97 -+ .word 0xab9423a7 -+ .word 0xab9423a7 -+ .word 0xab9423a7 -+ .word 0xab9423a7 -+ .word 0xfc93a039 -+ .word 0xfc93a039 -+ .word 0xfc93a039 -+ .word 0xfc93a039 -+ .word 0x655b59c3 -+ .word 0x655b59c3 -+ .word 0x655b59c3 -+ .word 0x655b59c3 -+ .word 0x8f0ccc92 -+ .word 0x8f0ccc92 -+ .word 0x8f0ccc92 -+ .word 0x8f0ccc92 -+ .word 0xffeff47d -+ .word 0xffeff47d -+ .word 0xffeff47d -+ .word 0xffeff47d -+ .word 0x85845dd1 -+ .word 0x85845dd1 -+ .word 0x85845dd1 -+ .word 0x85845dd1 -+ .word 0x6fa87e4f -+ .word 0x6fa87e4f -+ .word 0x6fa87e4f -+ .word 0x6fa87e4f -+ .word 0xfe2ce6e0 -+ .word 0xfe2ce6e0 -+ .word 0xfe2ce6e0 -+ .word 0xfe2ce6e0 -+ .word 0xa3014314 -+ .word 0xa3014314 -+ .word 0xa3014314 -+ .word 0xa3014314 -+ .word 0x4e0811a1 -+ .word 0x4e0811a1 -+ .word 0x4e0811a1 -+ .word 0x4e0811a1 -+ .word 0xf7537e82 -+ .word 0xf7537e82 -+ .word 0xf7537e82 -+ .word 0xf7537e82 -+ .word 0xbd3af235 -+ .word 0xbd3af235 -+ .word 0xbd3af235 -+ .word 0xbd3af235 -+ .word 0x2ad7d2bb -+ .word 0x2ad7d2bb -+ .word 0x2ad7d2bb -+ .word 0x2ad7d2bb -+ .word 0xeb86d391 -+ .word 0xeb86d391 -+ .word 0xeb86d391 -+ .word 0xeb86d391 -+ .size md5_mb_asimd_x4, .-md5_mb_asimd_x4 -diff --git a/drv/hash_mb/md5_mb_sve.S b/drv/hash_mb/md5_mb_sve.S -new file mode 100644 -index 0000000..8d8ecc1 ---- /dev/null -+++ b/drv/hash_mb/md5_mb_sve.S -@@ -0,0 +1,158 @@ -+/********************************************************************** -+ Copyright(c) 2022 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ -+ .arch armv8.2-a+sve -+ -+// copying data from sparse memory unto continuous stack space -+// in oroder to gather-load into SVE registers -+.macro copy_mb_16words vecs:req,dest:req -+ mov src,\vecs -+ mov dst,\dest -+ mov counter,total_lanes -+10: -+ ldr tmp,[src],8 -+ ldr tmp,[tmp] -+ add tmp,tmp,block_ctr,lsl 6 -+ ld1 {TMPV0.4s,TMPV1.4s,TMPV2.4s,TMPV3.4s}, [tmp] -+ st1 {TMPV0.4s,TMPV1.4s,TMPV2.4s,TMPV3.4s}, [dst],64 -+ subs counter,counter,1 -+ b.ne 10b -+.endm -+ -+.macro load_init -+ mov tmpw,16 -+ index VOFFS.s,0,tmpw -+ copy_mb_16words job_vec,databuf -+.endm -+ -+.macro load_word pipelines:req,windex:req,zreg0:req,zreg1 -+ add tmp,databuf,\windex * 4 -+ ld1w { \zreg0\().s}, p0/z, [tmp, VOFFS.s, UXTW 2] -+ .if \pipelines > 1 -+ add tmp,tmp,veclen,lsl #6 -+ ld1w {\zreg1\().s}, p1/z, [tmp, VOFFS.s, UXTW 2] -+ .endif -+.endm -+ -+#include "md5_sve_common.S" -+ -+/* int md5_mb_sve_max_lanes() -+ */ -+ .global md5_mb_sve_max_lanes -+ .type md5_mb_sve_max_lanes, %function -+md5_mb_sve_max_lanes: -+ cntw x0 -+ add x0,x0,x0 -+ ret -+ .size md5_mb_sve_max_lanes, .-md5_mb_sve_max_lanes -+ -+/* -+ * void md5_mb_sve(int blocks, int total_lanes, MD5_JOB **job_vec) -+ */ -+ num_blocks .req w0 -+ total_lanes .req w1 -+ job_vec .req x2 -+ src .req x5 -+ dst .req x6 -+ tmp .req x8 -+ tmpw .req w8 -+ block_ctr .req x9 -+ block_ctr_w .req w9 -+ savedsp .req x10 -+ databuf .req x11 -+ counter .req w12 -+ veclen .req x13 -+ veclen_w .req w13 -+ abcd_buf .req x14 -+ md5key_adr .req x15 -+ -+ .global md5_mb_sve -+ .type md5_mb_sve, %function -+md5_mb_sve: -+ cbz num_blocks,.return -+ md5_sve_save_stack -+ mov savedsp,sp -+ // reserve (16 * lanes) for abcd buf -+ mov tmpw,total_lanes,lsl 4 -+ sub abcd_buf,sp,tmp -+ // reserve (64 * lanes) for data buf -+ mov tmpw,total_lanes,lsl 6 -+ sub databuf,abcd_buf,tmp -+ mov sp,databuf -+ adr md5key_adr,MD5_CONST_KEYS -+ whilelo p0.s,wzr,total_lanes -+ mov src,job_vec -+ mov dst,abcd_buf -+ mov counter,total_lanes -+.ldr_hash: -+ ldr tmp,[src],8 -+ add tmp,tmp,64 -+ ld1 {v0.16b},[tmp] -+ st1 {v0.16b},[dst],16 -+ subs counter,counter,1 -+ bne .ldr_hash -+ ld4w {VA_0.s,VB_0.s,VC_0.s,VD_0.s},p0/z,[abcd_buf] -+ mov block_ctr,0 -+ cntp veclen,p0,p0.s -+ cmp veclen_w,total_lanes -+ b.eq .loop_1x -+ whilelo p1.s,veclen_w,total_lanes -+ add tmp,abcd_buf,veclen,lsl #4 -+ ld4w {VA_1.s,VB_1.s,VC_1.s,VD_1.s},p1/z,[tmp] -+ b .loop_2x -+.loop_1x: -+ md5_single 1 -+ add block_ctr, block_ctr, 1 -+ cmp block_ctr_w,num_blocks -+ bne .loop_1x -+ st4w {VA_0.s,VB_0.s,VC_0.s,VD_0.s},p0,[abcd_buf] -+ b 1f -+.loop_2x: -+ md5_single 2 -+ add block_ctr, block_ctr, 1 -+ cmp block_ctr_w,num_blocks -+ bne .loop_2x -+ st4w {VA_0.s,VB_0.s,VC_0.s,VD_0.s},p0,[abcd_buf] -+ add tmp,abcd_buf,veclen,lsl #4 -+ st4w {VA_1.s,VB_1.s,VC_1.s,VD_1.s},p1,[tmp] -+1: -+ mov dst,job_vec -+ mov src,abcd_buf -+.str_hash: -+ ld1 {v0.16b},[src],16 -+ ldr tmp,[dst],8 -+ add tmp,tmp,64 -+ st1 {v0.16b},[tmp] -+ subs total_lanes,total_lanes,1 -+ bne .str_hash -+ mov sp,savedsp -+ md5_sve_restore_stack -+.return: -+ ret -+ .size md5_mb_sve, .-md5_mb_sve -diff --git a/drv/hash_mb/md5_sve_common.S b/drv/hash_mb/md5_sve_common.S -new file mode 100644 -index 0000000..ed81482 ---- /dev/null -+++ b/drv/hash_mb/md5_sve_common.S -@@ -0,0 +1,478 @@ -+/********************************************************************** -+ Copyright(c) 2022 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ VK .req z0 -+ VOFFS .req z1 -+ VA_0 .req z2 -+ VB_0 .req z3 -+ VC_0 .req z4 -+ VD_0 .req z5 -+ VF_0 .req z6 -+ VF_1 .req z7 -+ VA_1 .req z16 -+ VB_1 .req z17 -+ VC_1 .req z18 -+ VD_1 .req z19 -+ MD5WORD0_0 .req z20 -+ MD5WORD1_0 .req z21 -+ MD5WORD0_1 .req z22 -+ MD5WORD1_1 .req z23 -+ TMPV0 .req v20 -+ TMPV1 .req v21 -+ TMPV2 .req v22 -+ TMPV3 .req v23 -+ VTMP_0 .req z24 -+ VAA_0 .req z25 -+ VBB_0 .req z26 -+ VCC_0 .req z27 -+ VDD_0 .req z28 -+ VTMP_1 .req z29 -+ VAA_1 .req z30 -+ VBB_1 .req z31 -+ VCC_1 .req z8 -+ VDD_1 .req z9 -+ TT .req z0 -+ -+.macro rotate_left_x1 out:req,in:req,tmp:req,bits -+ .if \bits == 16 -+ revh \out\().s,p0/m,\in\().s -+ .else -+ .if have_sve2 == 0 -+ lsl \tmp\().s, \in\().s,\bits -+ lsr \out\().s,\in\().s,32-\bits -+ orr \out\().d,\out\().d,\tmp\().d -+ .else -+ movprfx \out\().d,\in\().d -+ xar \out\().s,\out\().s,VZERO.s,32-\bits -+ .endif -+ .endif -+.endm -+ -+.macro rotate_left_x2 out:req,in:req,tmp:req,bits,out1:req,in1:req,tmp1:req,bits1 -+ -+ .if \bits == 16 -+ revh \out\().s,p0/m,\in\().s -+ revh \out1\().s,p0/m,\in1\().s -+ .else -+ .if have_sve2 == 0 -+ lsl \tmp\().s, \in\().s,\bits -+ lsl \tmp1\().s, \in1\().s,\bits1 -+ lsr \out\().s,\in\().s,32-\bits -+ lsr \out1\().s,\in1\().s,32-\bits1 -+ orr \out\().d,\out\().d,\tmp\().d -+ orr \out1\().d,\out1\().d,\tmp1\().d -+ .else -+ movprfx \out\().d,\in\().d -+ xar \out\().s,\out\().s,VZERO.s,32-\bits -+ movprfx \out1\().d,\in1\().d -+ xar \out1\().s,\out1\().s,VZERO.s,32-\bits1 -+ .endif -+ .endif -+.endm -+ -+.macro bsl_x1 ret:req,x:req,y:req,z:req,tmp:req -+ .if have_sve2 == 0 -+ bic \ret\().d,\z\().d,\x\().d -+ and \tmp\().d,\x\().d,\y\().d -+ orr \ret\().d,\ret\().d,\tmp\().d -+ .else -+ movprfx \ret\().d,\x\().d -+ bsl \ret\().d,\ret\().d,\y\().d,\z\().d -+ .endif -+.endm -+ -+.macro bsl_x2 ret:req,x:req,y:req,z:req,tmp:req,ret1:req,x1:req,y1:req,z1:req,tmp1:req -+ .if have_sve2 == 0 -+ bic \ret\().d,\z\().d,\x\().d -+ bic \ret1\().d,\z1\().d,\x1\().d -+ and \tmp\().d,\x\().d,\y\().d -+ and \tmp1\().d,\x1\().d,\y1\().d -+ orr \ret\().d,\ret\().d,\tmp\().d -+ orr \ret1\().d,\ret1\().d,\tmp1\().d -+ .else -+ movprfx \ret\().d,\x\().d -+ bsl \ret\().d,\ret\().d,\y\().d,\z\().d -+ movprfx \ret1\().d,\x1\().d -+ bsl \ret1\().d,\ret1\().d,\y1\().d,\z1\().d -+ .endif -+.endm -+ -+ -+// F = D ^ (B and (C xor D)) -+// that is (B and C) or ((not B) and D) -+.macro FUNC_F0_x1 -+ bsl_x1 VF_0,VB_0,VC_0,VD_0,VTMP_0 -+.endm -+ -+.macro FUNC_F0_x2 -+ bsl_x2 VF_0,VB_0,VC_0,VD_0,VTMP_0,VF_1,VB_1,VC_1,VD_1,VTMP_1 -+.endm -+ -+// F = C xor (D and (B xor C)) -+// that is (D and B) or ((not D) and C) -+.macro FUNC_F1_x1 -+ bsl_x1 VF_0,VD_0,VB_0,VC_0,VTMP_0 -+.endm -+ -+.macro FUNC_F1_x2 -+ bsl_x2 VF_0,VD_0,VB_0,VC_0,VTMP_0,VF_1,VD_1,VB_1,VC_1,VTMP_1 -+.endm -+ -+// F := B xor C xor D -+.macro FUNC_F2_x1 -+ .if have_sve2 == 0 -+ eor VF_0.d,VB_0.d,VC_0.d -+ eor VF_0.d,VF_0.d,VD_0.d -+ .else -+ movprfx VF_0.d,VB_0.d -+ eor3 VF_0.d,VF_0.d,VC_0.d,VD_0.d -+ .endif -+.endm -+ -+.macro FUNC_F2_x2 -+ .if have_sve2 == 0 -+ eor VF_0.d,VB_0.d,VC_0.d -+ eor VF_1.d,VB_1.d,VC_1.d -+ eor VF_0.d,VF_0.d,VD_0.d -+ eor VF_1.d,VF_1.d,VD_1.d -+ .else -+ movprfx VF_0.d,VB_0.d -+ eor3 VF_0.d,VF_0.d,VC_0.d,VD_0.d -+ movprfx VF_1.d,VB_1.d -+ eor3 VF_1.d,VF_1.d,VC_1.d,VD_1.d -+ .endif -+.endm -+ -+// F := C xor (B or (not D)) -+.macro FUNC_F3_x1 -+ not VF_0.s,p0/m,VD_0.s -+ orr VF_0.d,VF_0.d,VB_0.d -+ eor VF_0.d,VF_0.d,VC_0.d -+.endm -+ -+.macro FUNC_F3_x2 -+ not VF_0.s,p0/m,VD_0.s -+ not VF_1.s,p0/m,VD_1.s -+ orr VF_0.d,VF_0.d,VB_0.d -+ orr VF_1.d,VF_1.d,VB_1.d -+ eor VF_0.d,VF_0.d,VC_0.d -+ eor VF_1.d,VF_1.d,VC_1.d -+.endm -+ -+.macro SWAP_STATES -+ .unreq TT -+ TT .req VA_0 -+ .unreq VA_0 -+ VA_0 .req VD_0 -+ .unreq VD_0 -+ VD_0 .req VC_0 -+ .unreq VC_0 -+ VC_0 .req VB_0 -+ .unreq VB_0 -+ VB_0 .req TT -+ -+ .unreq TT -+ TT .req VA_1 -+ .unreq VA_1 -+ VA_1 .req VD_1 -+ .unreq VD_1 -+ VD_1 .req VC_1 -+ .unreq VC_1 -+ VC_1 .req VB_1 -+ .unreq VB_1 -+ VB_1 .req TT -+.endm -+ -+.macro MD5_STEP_x1 windex:req,mg:req,func_f:req,bits:req -+ ld1rw {VK.s},p0/z,[md5key_adr,windex * 4] -+ \func_f\()_x1 -+ add VTMP_0.s,VA_0.s,\mg\()_0.s -+ add VF_0.s,VF_0.s,VK.s -+ add VF_0.s,VF_0.s,VTMP_0.s -+ rotate_left_x1 VA_0,VF_0,VTMP_0,\bits -+ add VA_0.s,VA_0.s,VB_0.s -+.endm -+ -+.macro MD5_STEP_x2 windex:req,mg:req,func_f:req,bits:req -+ ld1rw {VK.s},p0/z,[md5key_adr,windex * 4] -+ \func_f\()_x2 -+ add VTMP_0.s,VA_0.s,\mg\()_0.s -+ add VTMP_1.s,VA_1.s,\mg\()_1.s -+ add VF_0.s,VF_0.s,VK.s -+ add VF_1.s,VF_1.s,VK.s -+ add VF_0.s,VF_0.s,VTMP_0.s -+ add VF_1.s,VF_1.s,VTMP_1.s -+ rotate_left_x2 VA_0,VF_0,VTMP_0,\bits,VA_1,VF_1,VTMP_1,\bits -+ add VA_0.s,VA_0.s,VB_0.s -+ add VA_1.s,VA_1.s,VB_1.s -+.endm -+ -+.altmacro -+.macro load_words index:req,mg:req -+ load_word %num_pipelines,\index,MD5WORD\mg\()_0,MD5WORD\mg\()_1 -+.endm -+ -+.macro MD5_STEP_WRAPPER pipelines:req,windex:req,gindex:req,mg:req,\ -+ func_f:req,bits:req,gindex_next,mg_next -+ .ifnb \gindex_next -+ load_words \gindex_next,\mg_next -+ .endif -+ MD5_STEP_x\pipelines\() \windex,MD5WORD\mg\(),\func_f,\bits -+.endm -+ -+.macro exec_step windex:req,gindex:req,bits:req,gindex_next -+ .if \windex % 2 == 0 -+ mg=0 -+ mg_next=1 -+ .else -+ mg=1 -+ mg_next=0 -+ .endif -+ -+ .if \windex <= 15 -+ MD5_STEP_WRAPPER %num_pipelines,\windex,\gindex,%mg,\ -+ FUNC_F0,\bits,\gindex_next,%mg_next -+ .endif -+ .if \windex >= 16 && \windex <= 31 -+ MD5_STEP_WRAPPER %num_pipelines,\windex,\gindex,%mg,\ -+ FUNC_F1,\bits,\gindex_next,%mg_next -+ .endif -+ .if \windex >= 32 && \windex <= 47 -+ MD5_STEP_WRAPPER %num_pipelines,\windex,\gindex,%mg,\ -+ FUNC_F2,\bits,\gindex_next,%mg_next -+ .endif -+ .if \windex >= 48 && \windex < 63 -+ MD5_STEP_WRAPPER %num_pipelines,\windex,\gindex,%mg,\ -+ FUNC_F3,\bits,\gindex_next,%mg_next -+ .endif -+ .if \windex == 63 -+ MD5_STEP_WRAPPER %num_pipelines,\windex,\gindex,%mg,FUNC_F3,\bits -+ .endif -+ SWAP_STATES -+.endm -+ -+.macro exec_steps -+ exec_step 0,0,7,1 -+ exec_step 1,1,12,2 -+ exec_step 2,2,17,3 -+ exec_step 3,3,22,4 -+ exec_step 4,4,7,5 -+ exec_step 5,5,12,6 -+ exec_step 6,6,17,7 -+ exec_step 7,7,22,8 -+ exec_step 8,8,7,9 -+ exec_step 9,9,12,10 -+ exec_step 10,10,17,11 -+ exec_step 11,11,22,12 -+ exec_step 12,12,7,13 -+ exec_step 13,13,12,14 -+ exec_step 14,14,17,15 -+ exec_step 15,15,22,1 -+ exec_step 16,1,5,6 -+ exec_step 17,6,9,11 -+ exec_step 18,11,14,0 -+ exec_step 19,0,20,5 -+ exec_step 20,5,5,10 -+ exec_step 21,10,9,15 -+ exec_step 22,15,14,4 -+ exec_step 23,4,20,9 -+ exec_step 24,9,5,14 -+ exec_step 25,14,9,3 -+ exec_step 26,3,14,8 -+ exec_step 27,8,20,13 -+ exec_step 28,13,5,2 -+ exec_step 29,2,9,7 -+ exec_step 30,7,14,12 -+ exec_step 31,12,20,5 -+ exec_step 32,5,4,8 -+ exec_step 33,8,11,11 -+ exec_step 34,11,16,14 -+ exec_step 35,14,23,1 -+ exec_step 36,1,4,4 -+ exec_step 37,4,11,7 -+ exec_step 38,7,16,10 -+ exec_step 39,10,23,13 -+ exec_step 40,13,4,0 -+ exec_step 41,0,11,3 -+ exec_step 42,3,16,6 -+ exec_step 43,6,23,9 -+ exec_step 44,9,4,12 -+ exec_step 45,12,11,15 -+ exec_step 46,15,16,2 -+ exec_step 47,2,23,0 -+ exec_step 48,0,6,7 -+ exec_step 49,7,10,14 -+ exec_step 50,14,15,5 -+ exec_step 51,5,21,12 -+ exec_step 52,12,6,3 -+ exec_step 53,3,10,10 -+ exec_step 54,10,15,1 -+ exec_step 55,1,21,8 -+ exec_step 56,8,6,15 -+ exec_step 57,15,10,6 -+ exec_step 58,6,15,13 -+ exec_step 59,13,21,4 -+ exec_step 60,4,6,11 -+ exec_step 61,11,10,2 -+ exec_step 62,2,15,9 -+ exec_step 63,9,21 -+.endm -+ -+.macro prepare_x1 -+ load_words 0,0 -+ orr VAA_0.d,VA_0.d,VA_0.d -+ orr VBB_0.d,VB_0.d,VB_0.d -+ orr VCC_0.d,VC_0.d,VC_0.d -+ orr VDD_0.d,VD_0.d,VD_0.d -+.endm -+ -+.macro prepare_x2 -+ load_words 0,0 -+ orr VAA_0.d,VA_0.d,VA_0.d -+ orr VAA_1.d,VA_1.d,VA_1.d -+ orr VBB_0.d,VB_0.d,VB_0.d -+ orr VBB_1.d,VB_1.d,VB_1.d -+ orr VCC_0.d,VC_0.d,VC_0.d -+ orr VCC_1.d,VC_1.d,VC_1.d -+ orr VDD_0.d,VD_0.d,VD_0.d -+ orr VDD_1.d,VD_1.d,VD_1.d -+.endm -+ -+.macro finish_x1 -+ add VA_0.s,VA_0.s,VAA_0.s -+ add VB_0.s,VB_0.s,VBB_0.s -+ add VC_0.s,VC_0.s,VCC_0.s -+ add VD_0.s,VD_0.s,VDD_0.s -+.endm -+ -+.macro finish_x2 -+ add VA_0.s,VA_0.s,VAA_0.s -+ add VA_1.s,VA_1.s,VAA_1.s -+ add VB_0.s,VB_0.s,VBB_0.s -+ add VB_1.s,VB_1.s,VBB_1.s -+ add VC_0.s,VC_0.s,VCC_0.s -+ add VC_1.s,VC_1.s,VCC_1.s -+ add VD_0.s,VD_0.s,VDD_0.s -+ add VD_1.s,VD_1.s,VDD_1.s -+.endm -+ -+.macro md5_single pipelines:req,sve2 -+ .ifnb \sve2 -+ have_sve2=1 -+ eor VZERO.d,VZERO.d,VZERO.d -+ .else -+ have_sve2=0 -+ .endif -+ num_pipelines=\pipelines -+ load_init -+ -+ prepare_x\pipelines\() -+ exec_steps -+ finish_x\pipelines\() -+.endm -+ -+.macro md5_sve_save_stack -+ stp d8,d9,[sp, -48]! -+ stp d10,d11,[sp, 16] -+ stp d12,d13,[sp, 32] -+.endm -+ -+.macro md5_sve_restore_stack -+ ldp d10,d11,[sp, 16] -+ ldp d12,d13,[sp, 32] -+ ldp d8,d9,[sp],48 -+.endm -+ -+ .section .rodata.cst16,"aM",@progbits,16 -+ .align 16 -+ -+MD5_CONST_KEYS: -+ .word 0xd76aa478 -+ .word 0xe8c7b756 -+ .word 0x242070db -+ .word 0xc1bdceee -+ .word 0xf57c0faf -+ .word 0x4787c62a -+ .word 0xa8304613 -+ .word 0xfd469501 -+ .word 0x698098d8 -+ .word 0x8b44f7af -+ .word 0xffff5bb1 -+ .word 0x895cd7be -+ .word 0x6b901122 -+ .word 0xfd987193 -+ .word 0xa679438e -+ .word 0x49b40821 -+ .word 0xf61e2562 -+ .word 0xc040b340 -+ .word 0x265e5a51 -+ .word 0xe9b6c7aa -+ .word 0xd62f105d -+ .word 0x02441453 -+ .word 0xd8a1e681 -+ .word 0xe7d3fbc8 -+ .word 0x21e1cde6 -+ .word 0xc33707d6 -+ .word 0xf4d50d87 -+ .word 0x455a14ed -+ .word 0xa9e3e905 -+ .word 0xfcefa3f8 -+ .word 0x676f02d9 -+ .word 0x8d2a4c8a -+ .word 0xfffa3942 -+ .word 0x8771f681 -+ .word 0x6d9d6122 -+ .word 0xfde5380c -+ .word 0xa4beea44 -+ .word 0x4bdecfa9 -+ .word 0xf6bb4b60 -+ .word 0xbebfbc70 -+ .word 0x289b7ec6 -+ .word 0xeaa127fa -+ .word 0xd4ef3085 -+ .word 0x04881d05 -+ .word 0xd9d4d039 -+ .word 0xe6db99e5 -+ .word 0x1fa27cf8 -+ .word 0xc4ac5665 -+ .word 0xf4292244 -+ .word 0x432aff97 -+ .word 0xab9423a7 -+ .word 0xfc93a039 -+ .word 0x655b59c3 -+ .word 0x8f0ccc92 -+ .word 0xffeff47d -+ .word 0x85845dd1 -+ .word 0x6fa87e4f -+ .word 0xfe2ce6e0 -+ .word 0xa3014314 -+ .word 0x4e0811a1 -+ .word 0xf7537e82 -+ .word 0xbd3af235 -+ .word 0x2ad7d2bb -+ .word 0xeb86d391 -diff --git a/drv/hash_mb/sm3_mb_asimd_x1.S b/drv/hash_mb/sm3_mb_asimd_x1.S -new file mode 100644 -index 0000000..c7362de ---- /dev/null -+++ b/drv/hash_mb/sm3_mb_asimd_x1.S -@@ -0,0 +1,387 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTmsgARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED msgARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED msgARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ dig_A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OmsgNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOmsgEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, msgHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERmsgISE) ARISING IN ANY msgAY OUT OF THE USE -+ OF THIS SOFTmsgARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8.2-a -+ .text -+ .align 2 -+ .p2align 3,,7 -+ -+.macro declare_var_vector_reg name:req,reg:req -+ q\name\() .req q\reg -+ v\name\() .req v\reg -+ s\name\() .req s\reg -+.endm -+ -+ job .req x0 -+ len .req x1 -+ data .req x2 -+ digest .req x0 -+ -+ msg0 .req w3 -+ msg1 .req w4 -+ msg2 .req w5 -+ msg3 .req w6 -+ msg4 .req w7 -+ -+ msg .req w9 -+ msgP .req w10 -+ SS1 .req w11 -+ SS2 .req w12 -+ TT1 .req w13 -+ TT2 .req w14 -+ Tj .req w15 -+ tmp0 .req w19 -+ tmp1 .req w20 -+ dig_A .req w21 -+ dig_B .req w22 -+ dig_C .req w23 -+ dig_D .req w24 -+ dig_E .req w25 -+ dig_F .req w26 -+ dig_G .req w27 -+ dig_H .req w28 -+ -+ declare_var_vector_reg dig0,0 -+ declare_var_vector_reg dig1,1 -+ declare_var_vector_reg dig0_bak,2 -+ declare_var_vector_reg dig1_bak,3 -+ declare_var_vector_reg vect_msg0,4 -+ declare_var_vector_reg vect_msg1,5 -+ declare_var_vector_reg vect_msg2,6 -+ declare_var_vector_reg vect_msg3,7 -+ -+ declare_var_vector_reg vect_msgP0,16 -+ declare_var_vector_reg vect_msgP1,17 -+ declare_var_vector_reg vect_msgP2,18 -+ -+ -+ -+ -+ -+ -+// round 0-11 -+.macro sm3_round_0 round:req -+ ldr msg, [sp,msg_off+4*\round\()] -+ ldr msgP,[sp,wp_off +4*\round\()] -+ add SS1,dig_E,Tj -+ ror TT1,dig_A,32-12 -+ add SS1,SS1,TT1 -+ ror SS1,SS1,32-7 //SS1 done -+ eor SS2,SS1,TT1 //SS2 done -+ eor TT1,dig_A,dig_B -+ eor TT2,dig_E,dig_F -+ add SS2,SS2,msgP -+ eor TT2,TT2,dig_G -+ add SS1,SS1,msg -+ eor TT1,TT1,dig_C -+ add SS2,SS2,dig_D -+ add SS1,SS1,dig_H -+ add TT1,TT1,SS2 -+ add TT2,TT2,SS1 -+ mov dig_D,dig_C -+ ror dig_C,dig_B,32-9 -+ mov dig_B,dig_A -+ mov dig_A,TT1 -+ eor TT1,TT2,TT2,ror (32-17) -+ mov dig_H,dig_G -+ ror dig_G,dig_F,32-19 -+ mov dig_F,dig_E -+ eor dig_E,TT1,TT2,ror(32-9) -+ ror Tj,Tj,(32-1) -+.endm -+ -+//round 12-15 -+.macro sm3_round_12 round:req -+ ldr msg, [sp,msg_off+4*((\round\())%17)] -+ ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] -+ ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] -+ add SS1,dig_E,Tj -+ ror TT1,dig_A,32-12 -+ add SS1,SS1,TT1 -+ ror SS1,SS1,32-7 //SS1 done -+ eor SS2,SS1,TT1 //SS2 done -+ -+ eor msg0,msg0,msg1 -+ ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] -+ eor TT1,dig_A,dig_B -+ eor TT2,dig_E,dig_F -+ add SS2,SS2,dig_D -+ eor TT2,TT2,dig_G -+ add SS1,SS1,msg -+ eor msg0,msg0,msg2,ror (32-15) -+ ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] -+ ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] -+ eor msg1,msg0,msg0,ror (32 -15) -+ eor TT1,TT1,dig_C -+ add TT1,TT1,SS2 -+ eor msg4,msg4,msg3, ror (32-7) -+ eor msg0,msg1,msg0, ror (32-23) -+ add SS1,SS1,dig_H -+ eor msg0,msg0,msg4 -+ add TT2,TT2,SS1 -+ mov dig_D,dig_C -+ str msg0,[sp,msg_off+4*((\round\()+4)%17)] -+ eor msgP,msg,msg0 -+ add TT1,TT1,msgP -+ ror dig_C,dig_B,32-9 -+ mov dig_B,dig_A -+ mov dig_A,TT1 -+ eor TT1,TT2,TT2,ror (32-17) -+ mov dig_H,dig_G -+ ror dig_G,dig_F,32-19 -+ mov dig_F,dig_E -+ eor dig_E,TT1,TT2,ror(32-9) -+ ror Tj,Tj,32-1 -+.endm -+ -+// round 16-62 -+.macro sm3_round_16 round:req -+ ldr msg, [sp,msg_off+4*((\round\())%17)] -+ ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] -+ ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] -+ add SS1,dig_E,Tj -+ ror TT1,dig_A,32-12 -+ add SS1,SS1,TT1 -+ ror SS1,SS1,32-7 //SS1 done -+ eor SS2,SS1,TT1 //SS2 done -+ -+ eor msg0,msg0,msg1 -+ ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] -+ orr TT1,dig_B,dig_C -+ and tmp0,dig_B,dig_C -+ -+ eor TT2,dig_F,dig_G -+ and TT1,TT1,dig_A -+ add SS2,SS2,dig_D -+ orr TT1,TT1,tmp0 -+ and TT2,TT2,dig_E -+ add SS1,SS1,msg -+ eor TT2,TT2,dig_G -+ -+ eor msg0,msg0,msg2,ror (32-15) -+ ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] -+ ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] -+ eor msg1,msg0,msg0,ror (32 -15) -+ add TT1,TT1,SS2 -+ eor msg4,msg4,msg3, ror (32-7) -+ eor msg0,msg1,msg0, ror (32-23) -+ add SS1,SS1,dig_H -+ eor msg0,msg0,msg4 -+ add TT2,TT2,SS1 -+ mov dig_D,dig_C -+ str msg0,[sp,msg_off+4*((\round\()+4)%17)] -+ eor msgP,msg,msg0 -+ add TT1,TT1,msgP -+ ror dig_C,dig_B,32-9 -+ mov dig_B,dig_A -+ mov dig_A,TT1 -+ eor TT1,TT2,TT2,ror (32-17) -+ mov dig_H,dig_G -+ ror dig_G,dig_F,32-19 -+ mov dig_F,dig_E -+ eor dig_E,TT1,TT2,ror(32-9) -+ ror Tj,Tj,32-1 -+.endm -+ -+//round 63 -+.macro sm3_round_63 round:req -+ ldr msg, [sp,msg_off+4*((\round\())%17)] -+ ldr msg0,[sp,msg_off+4*((\round\()+4 - 16)%17)] -+ ldr msg1,[sp,msg_off+4*((\round\()+4 - 9)%17)] -+ add SS1,dig_E,Tj -+ ror TT1,dig_A,32-12 -+ add SS1,SS1,TT1 -+ ror SS1,SS1,32-7 //SS1 done -+ eor SS2,SS1,TT1 //SS2 done -+ eor msg0,msg0,msg1 -+ ldr msg2,[sp,msg_off+4*((\round\()+4 - 3)%17)] -+ orr TT1,dig_B,dig_C -+ and tmp0,dig_B,dig_C -+ eor TT2,dig_F,dig_G -+ and TT1,TT1,dig_A -+ add SS2,SS2,dig_D -+ orr TT1,TT1,tmp0 -+ and TT2,TT2,dig_E -+ add SS1,SS1,msg -+ eor TT2,TT2,dig_G -+ eor msg0,msg0,msg2,ror (32-15) -+ ldr msg3,[sp,msg_off+4*((\round\()+4 - 13)%17)] -+ ldr msg4,[sp,msg_off+4*((\round\()+4 - 6)%17)] -+ eor msg1,msg0,msg0,ror (32 -15) -+ add TT1,TT1,SS2 -+ eor msg4,msg4,msg3, ror (32-7) -+ eor msg0,msg1,msg0, ror (32-23) -+ add SS1,SS1,dig_H -+ eor msg0,msg0,msg4 -+ add TT2,TT2,SS1 -+ str msg0,[sp,msg_off+4*((\round\()+4)%17)] -+ eor msgP,msg,msg0 -+ add TT1,TT1,msgP -+ ins vdig0_bak.s[3],dig_C -+ ror dig_C,dig_B,32-9 -+ ins vdig0_bak.s[1],dig_A -+ ins vdig0_bak.s[0],TT1 -+ ins vdig0_bak.s[2],dig_C -+ eor TT1,TT2,TT2,ror (32-17) -+ ins vdig1_bak.s[3],dig_G -+ ror dig_G,dig_F,32-19 -+ ins vdig1_bak.s[1],dig_E -+ ins vdig1_bak.s[2],dig_G -+ eor dig_E,TT1,TT2,ror(32-9) -+ ins vdig1_bak.s[0],dig_E -+.endm -+ -+ .set wp_off , 96 -+ .set msg_off, 96 + 12*4 -+#define STACK_SIZE 224 -+ .global sm3_mb_asimd_x1 -+ .type sm3_mb_asimd_x1, %function -+sm3_mb_asimd_x1: -+ stp x29,x30, [sp,-STACK_SIZE]! -+ cmp len,0 -+ ldr data,[job],64 -+ ldp qdig0,qdig1,[digest] -+ stp x19, x20, [sp, 16] -+ stp x21, x22, [sp, 32] -+ rev32 vdig0.16b,vdig0.16b -+ stp x23, x24, [sp, 48] -+ rev32 vdig1.16b,vdig1.16b -+ stp x25, x26, [sp, 64] -+ stp x27, x28, [sp, 80] -+ ble .exit_func -+ -+.start_loop: -+ -+ /** prepare first 12 round data **/ -+ ld1 {vvect_msg0.16b-vvect_msg3.16b},[data],64 -+ mov Tj, 17689 -+ umov dig_A,vdig0.s[0] -+ movk Tj, 0x79cc, lsl 16 -+ rev32 vvect_msg0.16b,vvect_msg0.16b -+ umov dig_B,vdig0.s[1] -+ rev32 vvect_msg1.16b,vvect_msg1.16b -+ umov dig_C,vdig0.s[2] -+ rev32 vvect_msg2.16b,vvect_msg2.16b -+ umov dig_D,vdig0.s[3] -+ rev32 vvect_msg3.16b,vvect_msg3.16b -+ umov dig_E,vdig1.s[0] -+ stp qvect_msg0,qvect_msg1,[sp,msg_off] -+ umov dig_F,vdig1.s[1] -+ stp qvect_msg2,qvect_msg3,[sp,msg_off+32] -+ umov dig_G,vdig1.s[2] -+ eor vvect_msgP0.16b,vvect_msg0.16b,vvect_msg1.16b -+ eor vvect_msgP1.16b,vvect_msg1.16b,vvect_msg2.16b -+ umov dig_H,vdig1.s[3] -+ stp qvect_msgP0,qvect_msgP1,[sp,wp_off] -+ eor vvect_msgP2.16b,vvect_msg2.16b,vvect_msg3.16b -+ str qvect_msgP2,[sp,wp_off+32] -+ -+ sm3_round_0 0 -+ sm3_round_0 1 -+ sm3_round_0 2 -+ sm3_round_0 3 -+ sm3_round_0 4 -+ sm3_round_0 5 -+ sm3_round_0 6 -+ sm3_round_0 7 -+ sm3_round_0 8 -+ sm3_round_0 9 -+ sm3_round_0 10 -+ sm3_round_0 11 -+ -+ sm3_round_12 12 -+ sm3_round_12 13 -+ sm3_round_12 14 -+ sm3_round_12 15 -+ mov Tj, 0x7a87 -+ movk Tj, 0x9d8a, lsl 16 -+ sm3_round_16 16 -+ sm3_round_16 17 -+ sm3_round_16 18 -+ sm3_round_16 19 -+ sm3_round_16 20 -+ sm3_round_16 21 -+ sm3_round_16 22 -+ sm3_round_16 23 -+ sm3_round_16 24 -+ sm3_round_16 25 -+ sm3_round_16 26 -+ sm3_round_16 27 -+ sm3_round_16 28 -+ sm3_round_16 29 -+ sm3_round_16 30 -+ sm3_round_16 31 -+ sm3_round_16 32 -+ sm3_round_16 33 -+ sm3_round_16 34 -+ sm3_round_16 35 -+ sm3_round_16 36 -+ sm3_round_16 37 -+ sm3_round_16 38 -+ sm3_round_16 39 -+ sm3_round_16 40 -+ sm3_round_16 41 -+ sm3_round_16 42 -+ sm3_round_16 43 -+ sm3_round_16 44 -+ sm3_round_16 45 -+ sm3_round_16 46 -+ sm3_round_16 47 -+ sm3_round_16 48 -+ sm3_round_16 49 -+ sm3_round_16 50 -+ sm3_round_16 51 -+ sm3_round_16 52 -+ sm3_round_16 53 -+ sm3_round_16 54 -+ sm3_round_16 55 -+ sm3_round_16 56 -+ sm3_round_16 57 -+ sm3_round_16 58 -+ sm3_round_16 59 -+ sm3_round_16 60 -+ sm3_round_16 61 -+ sm3_round_16 62 -+ sm3_round_63 63 -+ subs len,len,1 -+ eor vdig0.16b,vdig0.16b,vdig0_bak.16b -+ eor vdig1.16b,vdig1.16b,vdig1_bak.16b -+ bne .start_loop -+.exit_func: -+ ldp x19, x20, [sp, 16] -+ rev32 vdig0.16b,vdig0.16b -+ ldp x21, x22, [sp, 32] -+ rev32 vdig1.16b,vdig1.16b -+ ldp x23, x24, [sp, 48] -+ stp qdig0,qdig1,[digest] -+ ldp x25, x26, [sp, 64] -+ ldp x27, x28, [sp, 80] -+ ldp x29, x30, [sp], STACK_SIZE -+ ret -+ .size sm3_mb_asimd_x1, .-sm3_mb_asimd_x1 -+ -diff --git a/drv/hash_mb/sm3_mb_asimd_x4.S b/drv/hash_mb/sm3_mb_asimd_x4.S -new file mode 100644 -index 0000000..975a07c ---- /dev/null -+++ b/drv/hash_mb/sm3_mb_asimd_x4.S -@@ -0,0 +1,576 @@ -+/********************************************************************** -+ Copyright(c) 2020 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTmsgARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED msgARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED msgARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ dig_A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OmsgNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOmsgEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, msgHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERmsgISE) ARISING IN ANY msgAY OUT OF THE USE -+ OF THIS SOFTmsgARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8.2-a -+ .text -+ .align 2 -+ .p2align 3,,7 -+ -+.macro declare_var_vector_reg name:req,reg:req -+ q\name\() .req q\reg -+ v\name\() .req v\reg -+ s\name\() .req s\reg -+.endm -+ -+ job0 .req x0 -+ job1 .req x1 -+ job2 .req x2 -+ job3 .req x3 -+ len .req x4 -+ -+ job0_data .req x5 -+ job1_data .req x6 -+ job2_data .req x7 -+ job3_data .req x9 -+ -+ job0_digest .req x0 -+ job1_digest .req x1 -+ job2_digest .req x2 -+ job3_digest .req x3 -+ job0_tmp .req x10 -+ job1_tmp .req x11 -+ job2_tmp .req x12 -+ job3_tmp .req x13 -+ const_adr .req x14 -+ -+ -+ declare_var_vector_reg msg0,0 -+ declare_var_vector_reg msg1,1 -+ declare_var_vector_reg msg2,2 -+ declare_var_vector_reg msg3,3 -+ declare_var_vector_reg msg4,4 -+ declare_var_vector_reg msg5,5 -+ declare_var_vector_reg msg6,6 -+ declare_var_vector_reg msg7,7 -+ declare_var_vector_reg msg8,8 -+ declare_var_vector_reg msg9,9 -+ declare_var_vector_reg msg10,10 -+ declare_var_vector_reg msg11,11 -+ declare_var_vector_reg msg12,12 -+ declare_var_vector_reg msg13,13 -+ declare_var_vector_reg msg14,14 -+ declare_var_vector_reg msg15,15 -+ declare_var_vector_reg msg16,16 -+ -+ -+ declare_var_vector_reg dig_A,24 -+ declare_var_vector_reg dig_B,25 -+ declare_var_vector_reg dig_C,26 -+ declare_var_vector_reg dig_D,27 -+ declare_var_vector_reg dig_E,28 -+ declare_var_vector_reg dig_F,29 -+ declare_var_vector_reg dig_G,30 -+ declare_var_vector_reg dig_H,31 -+ -+ declare_var_vector_reg TT1,17 -+ declare_var_vector_reg TT2,18 -+ declare_var_vector_reg SS1,19 -+ declare_var_vector_reg SS2,20 -+ declare_var_vector_reg tmp0,21 -+ declare_var_vector_reg word_pair,23 -+ declare_var_vector_reg Tj,22 -+ -+ -+.macro rol32 target:req,reg:req,bit:req -+ ushr v\target\().4s,v\reg\().4s,32 - \bit -+ sli v\target\().4s,v\reg\().4s,\bit -+.endm -+ -+// round 0-11 -+.macro sm3_round_0 round:req,wp:req -+ -+ ushr vtmp0.4s,vdig_A.4s,32 - 12 -+ -+ add vSS1.4s,vdig_E.4s,vTj.4s -+ sli vtmp0.4s,vdig_A.4s,12 -+ rev32 vmsg\round\().16b,vmsg\round\().16b -+ rev32 vmsg\wp\().16b,vmsg\wp\().16b -+ add vTT1.4s,vSS1.4s,vtmp0.4s //SS1 Done -+ rol32 SS1,TT1,7 -+ eor vSS2.16b,vSS1.16b,vtmp0.16b //SS2 Done -+ eor vword_pair.16b,vmsg\round\().16b,vmsg\wp\().16b -+ -+ eor vTT1.16b,vdig_A.16b,vdig_B.16b -+ eor vTT2.16b,vdig_E.16b,vdig_F.16b -+ eor vTT1.16b,vTT1.16b,vdig_C.16b -+ eor vTT2.16b,vTT2.16b,vdig_G.16b -+ -+ add vSS1.4s,vSS1.4s,vmsg\round\().4s -+ add vSS2.4s,vSS2.4s,vword_pair.4s -+ add vTT1.4s,vTT1.4s,vdig_D.4s -+ add vTT2.4s,vTT2.4s,vdig_H.4s -+ ushr vtmp0.4s,vTj.4s,32-1 -+ add vTT1.4s,vTT1.4s,vSS2.4s //TT1 Done -+ sli vtmp0.4s,vTj.4s,1 -+ add vTT2.4s,vTT2.4s,vSS1.4s //TT2 Done -+ mov vTj.16b,vtmp0.16b -+ //D=C -+ mov vdig_D.16b,vdig_C.16b -+ //C = ROTL32(B, 9); -+ ushr vdig_C.4s,vdig_B.4s,32 - 9 -+ sli vdig_C.4s,vdig_B.4s,9 -+ //B=A -+ mov vdig_B.16b,vdig_A.16b -+ //A=TT1 -+ mov vdig_A.16b,vTT1.16b -+ // H=G -+ mov vdig_H.16b,vdig_G.16b -+ //G = ROTL32(F,19) -+ rol32 dig_G,dig_F,19 -+ //F = E -+ mov vdig_F.16b,vdig_E.16b -+ // E=Target, TT2=src, TT1,SS1,SS2 is free -+ // E = P0(TT2); -+ ushr vSS2.4s, vTT2.4s, 32 - 9 -+ ushr vSS1.4s, vTT2.4s, 32 - 17 -+ sli vSS2.4s, vTT2.4s, 9 -+ sli vSS1.4s, vTT2.4s, 17 -+ eor vdig_E.16b, vTT2.16b, vSS1.16b -+ eor vdig_E.16b, vdig_E.16b, vSS2.16b -+ -+.endm -+ -+ -+.macro sm3_round_4 round:req,wp:req -+ -+ ushr vtmp0.4s,vdig_A.4s,32 - 12 -+ add vSS1.4s,vdig_E.4s,vTj.4s -+ sli vtmp0.4s,vdig_A.4s,12 -+ rev32 vmsg\wp\().16b,vmsg\wp\().16b -+ add vTT1.4s,vSS1.4s,vtmp0.4s //SS1 Done -+ rol32 SS1,TT1,7 -+ eor vSS2.16b,vSS1.16b,vtmp0.16b //SS2 Done -+ eor vword_pair.16b,vmsg\round\().16b,vmsg\wp\().16b -+ eor vTT1.16b,vdig_A.16b,vdig_B.16b -+ eor vTT2.16b,vdig_E.16b,vdig_F.16b -+ eor vTT1.16b,vTT1.16b,vdig_C.16b -+ eor vTT2.16b,vTT2.16b,vdig_G.16b -+ add vSS1.4s,vSS1.4s,vmsg\round\().4s -+ add vSS2.4s,vSS2.4s,vword_pair.4s -+ add vTT1.4s,vTT1.4s,vdig_D.4s -+ add vTT2.4s,vTT2.4s,vdig_H.4s -+ ushr vtmp0.4s,vTj.4s,32-1 -+ add vTT1.4s,vTT1.4s,vSS2.4s //TT1 Done -+ sli vtmp0.4s,vTj.4s,1 -+ add vTT2.4s,vTT2.4s,vSS1.4s //TT2 Done -+ mov vTj.16b,vtmp0.16b -+ //D=C -+ mov vdig_D.16b,vdig_C.16b -+ //C = ROTL32(B, 9); -+ ushr vdig_C.4s,vdig_B.4s,32 - 9 -+ sli vdig_C.4s,vdig_B.4s,9 -+ //B=A -+ mov vdig_B.16b,vdig_A.16b -+ //A=TT1 -+ mov vdig_A.16b,vTT1.16b -+ // H=G -+ mov vdig_H.16b,vdig_G.16b -+ //G = ROTL32(F,19) -+ rol32 dig_G,dig_F,19 -+ //F = E -+ mov vdig_F.16b,vdig_E.16b -+ // E=Target, TT2=src, TT1,SS1,SS2 is free -+ // E = P0(TT2); -+ ushr vSS2.4s, vTT2.4s, 32 - 9 -+ ushr vSS1.4s, vTT2.4s, 32 - 17 -+ sli vSS2.4s, vTT2.4s, 9 -+ sli vSS1.4s, vTT2.4s, 17 -+ eor vdig_E.16b, vTT2.16b, vSS1.16b -+ eor vdig_E.16b, vdig_E.16b, vSS2.16b -+ -+.endm -+ -+//round 12-15 -+.macro sm3_round_12 round:req,plus_4:req,m0,m1,m2,m3,m4 -+ rol32 msg\plus_4,msg\m2,15 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m0\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m1\().16b -+ rol32 tmp0,msg\plus_4,15 -+ rol32 word_pair,msg\plus_4,23 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vword_pair.16b -+ rol32 tmp0,msg\m3,7 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m4\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ ushr vtmp0.4s,vdig_A.4s,32 - 12 -+ sli vtmp0.4s,vdig_A.4s,12 -+ add vSS1.4s,vdig_E.4s,vTj.4s -+ add vSS2.4s,vSS1.4s,vtmp0.4s //SS1 Done -+ rol32 SS1,SS2,7 -+ eor vSS2.16b,vSS1.16b,vtmp0.16b //SS2 Done -+ eor vword_pair.16b,vmsg\round\().16b,vmsg\plus_4\().16b -+ eor vTT1.16b,vdig_A.16b,vdig_B.16b -+ eor vTT1.16b,vTT1.16b,vdig_C.16b -+ eor vTT2.16b,vdig_E.16b,vdig_F.16b -+ eor vTT2.16b,vTT2.16b,vdig_G.16b -+ add vSS1.4s,vSS1.4s,vmsg\round\().4s -+ add vSS2.4s,vSS2.4s,vword_pair.4s -+ add vTT1.4s,vTT1.4s,vdig_D.4s -+ add vTT2.4s,vTT2.4s,vdig_H.4s -+ ushr vtmp0.4s,vTj.4s,32-1 -+ add vTT1.4s,vTT1.4s,vSS2.4s //TT1 Done -+ sli vtmp0.4s,vTj.4s,1 -+ add vTT2.4s,vTT2.4s,vSS1.4s //TT2 Done -+ mov vTj.16b,vtmp0.16b -+ //D=C -+ mov vdig_D.16b,vdig_C.16b -+ //C = ROTL32(B, 9); -+ ushr vdig_C.4s,vdig_B.4s,32 - 9 -+ sli vdig_C.4s,vdig_B.4s,9 -+ //B=A -+ mov vdig_B.16b,vdig_A.16b -+ //A=TT1 -+ mov vdig_A.16b,vTT1.16b -+ // H=G -+ mov vdig_H.16b,vdig_G.16b -+ //G = ROTL32(F,19) -+ rol32 dig_G,dig_F,19 -+ //F = E -+ mov vdig_F.16b,vdig_E.16b -+ // E=Target, TT2=src, TT1,SS1,SS2 is free -+ // E = P0(TT2); -+ ushr vSS2.4s, vTT2.4s, 32 - 9 -+ ushr vSS1.4s, vTT2.4s, 32 - 17 -+ sli vSS2.4s, vTT2.4s, 9 -+ sli vSS1.4s, vTT2.4s, 17 -+ eor vdig_E.16b, vTT2.16b, vSS1.16b -+ eor vdig_E.16b, vdig_E.16b, vSS2.16b -+.endm -+ -+// round 16-62 -+.macro sm3_round_16 round:req,plus_4:req,m0,m1,m2,m3,m4 -+ rol32 msg\plus_4,msg\m2,15 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m0\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m1\().16b -+ rol32 tmp0,msg\plus_4,15 -+ rol32 word_pair,msg\plus_4,23 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vword_pair.16b -+ rol32 tmp0,msg\m3,7 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m4\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ ushr vtmp0.4s,vdig_A.4s,32 - 12 -+ sli vtmp0.4s,vdig_A.4s,12 -+ add vSS1.4s,vdig_E.4s,vTj.4s -+ add vSS2.4s,vSS1.4s,vtmp0.4s //SS1 Done -+ rol32 SS1,SS2,7 -+ eor vSS2.16b,vSS1.16b,vtmp0.16b //SS2 Done -+ eor vword_pair.16b,vmsg\round\().16b,vmsg\plus_4\().16b -+ mov vTT2.16b,vdig_E.16b -+ orr vTT1.16b,vdig_B.16b,vdig_C.16b -+ and vtmp0.16b,vdig_B.16b,vdig_C.16b -+ bsl vTT2.16b,vdig_F.16b,vdig_G.16b -+ and vTT1.16b,vTT1.16b,vdig_A.16b -+ add vSS1.4s,vSS1.4s,vmsg\round\().4s -+ orr vTT1.16b,vTT1.16b,vtmp0.16b -+ add vSS2.4s,vSS2.4s,vword_pair.4s -+ add vTT1.4s,vTT1.4s,vdig_D.4s -+ add vTT2.4s,vTT2.4s,vdig_H.4s -+ ushr vtmp0.4s,vTj.4s,32-1 -+ add vTT1.4s,vTT1.4s,vSS2.4s //TT1 Done -+ sli vtmp0.4s,vTj.4s,1 -+ add vTT2.4s,vTT2.4s,vSS1.4s //TT2 Done -+ mov vTj.16b,vtmp0.16b -+ //D=C -+ mov vdig_D.16b,vdig_C.16b -+ //C = ROTL32(B, 9); -+ ushr vdig_C.4s,vdig_B.4s,32 - 9 -+ sli vdig_C.4s,vdig_B.4s,9 -+ //B=A -+ mov vdig_B.16b,vdig_A.16b -+ //A=TT1 -+ mov vdig_A.16b,vTT1.16b -+ // H=G -+ mov vdig_H.16b,vdig_G.16b -+ //G = ROTL32(F,19) -+ rol32 dig_G,dig_F,19 -+ //F = E -+ mov vdig_F.16b,vdig_E.16b -+ // E=Target, TT2=src, TT1,SS1,SS2 is free -+ // E = P0(TT2); -+ ushr vSS2.4s, vTT2.4s, 32 - 9 -+ ushr vSS1.4s, vTT2.4s, 32 - 17 -+ sli vSS2.4s, vTT2.4s, 9 -+ sli vSS1.4s, vTT2.4s, 17 -+ eor vdig_E.16b, vTT2.16b, vSS1.16b -+ eor vdig_E.16b, vdig_E.16b, vSS2.16b -+.endm -+ -+//round 63 -+.macro sm3_round_63 round:req,plus_4:req,m0,m1,m2,m3,m4 -+ rol32 msg\plus_4,msg\m2,15 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m0\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m1\().16b -+ rol32 tmp0,msg\plus_4,15 -+ rol32 word_pair,msg\plus_4,23 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vword_pair.16b -+ rol32 tmp0,msg\m3,7 -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vmsg\m4\().16b -+ eor vmsg\plus_4\().16b,vmsg\plus_4\().16b,vtmp0.16b -+ ushr vtmp0.4s,vdig_A.4s,32 - 12 -+ sli vtmp0.4s,vdig_A.4s,12 -+ add vSS1.4s,vdig_E.4s,vTj.4s -+ add vSS2.4s,vSS1.4s,vtmp0.4s //SS1 Done -+ rol32 SS1,SS2,7 -+ eor vSS2.16b,vSS1.16b,vtmp0.16b //SS2 Done -+ eor vword_pair.16b,vmsg\round\().16b,vmsg\plus_4\().16b -+ -+ ldp qmsg0,qmsg1,[sp,dig_off+ 0] -+ mov vTT2.16b,vdig_E.16b -+ ldp qmsg2,qmsg3,[sp,dig_off+ 32] -+ orr vTT1.16b,vdig_B.16b,vdig_C.16b -+ ldp qmsg4,qmsg5,[sp,dig_off+ 64] -+ and vtmp0.16b,vdig_B.16b,vdig_C.16b -+ bsl vTT2.16b,vdig_F.16b,vdig_G.16b -+ ldp qmsg6,qmsg7,[sp,dig_off+ 96] -+ and vTT1.16b,vTT1.16b,vdig_A.16b -+ add vSS1.4s,vSS1.4s,vmsg\round\().4s -+ orr vTT1.16b,vTT1.16b,vtmp0.16b -+ add vSS2.4s,vSS2.4s,vword_pair.4s -+ add vTT1.4s,vTT1.4s,vdig_D.4s -+ add vTT2.4s,vTT2.4s,vdig_H.4s -+ add vTT1.4s,vTT1.4s,vSS2.4s //TT1 Done -+ add vTT2.4s,vTT2.4s,vSS1.4s //TT2 Done -+ //D=C -+ eor vdig_D.16b,vdig_C.16b,vmsg3.16b -+ //C = ROTL32(B, 9); -+ ushr vdig_C.4s,vdig_B.4s,32 - 9 -+ sli vdig_C.4s,vdig_B.4s,9 -+ eor vdig_C.16b,vdig_C.16b,vmsg2.16b -+ //B=A -+ eor vdig_B.16b,vdig_A.16b,vmsg1.16b -+ stp qdig_C,qdig_D,[sp,dig_off+ 32] -+ //A=TT1 -+ eor vdig_A.16b,vTT1.16b,vmsg0.16b -+ // H=G -+ eor vdig_H.16b,vdig_G.16b,vmsg7.16b -+ stp qdig_A,qdig_B,[sp,dig_off+ 0] -+ //G = ROTL32(F,19) -+ rol32 dig_G,dig_F,19 -+ eor vdig_G.16b,vdig_G.16b,vmsg6.16b -+ //F = E -+ eor vdig_F.16b,vdig_E.16b,vmsg5.16b -+ stp qdig_G,qdig_H,[sp,dig_off+ 96] -+ // E=Target, TT2=src, TT1,SS1,SS2 is free -+ // E = P0(TT2); -+ ushr vSS2.4s, vTT2.4s, 32 - 9 -+ ushr vSS1.4s, vTT2.4s, 32 - 17 -+ sli vSS2.4s, vTT2.4s, 9 -+ sli vSS1.4s, vTT2.4s, 17 -+ eor vdig_E.16b, vTT2.16b, vSS1.16b -+ eor vdig_E.16b, vdig_E.16b, vSS2.16b -+ eor vdig_E.16b, vdig_E.16b, vmsg4.16b -+ stp qdig_E,qdig_F,[sp,dig_off+ 64] -+.endm -+ -+ .set dig_off , 80 -+ -+#define STACK_SIZE 224 -+ .global sm3_mb_asimd_x4 -+ .type sm3_mb_asimd_x4, %function -+sm3_mb_asimd_x4: -+ stp x29,x30, [sp,-STACK_SIZE]! -+ cmp len,0 -+ //push d8~d15 -+ ldr job0_data, [job0],64 -+ stp d8,d9, [sp,16] -+ ldr job1_data, [job1],64 -+ stp d10,d11,[sp,32] -+ ldr job2_data, [job2],64 -+ stp d12,d13,[sp,48] -+ ldr job3_data, [job3],64 -+ stp d14,d15,[sp,64] -+ ble .exit_func -+ -+ mov job0_tmp,job0_digest -+ mov job1_tmp,job1_digest -+ mov job2_tmp,job2_digest -+ mov job3_tmp,job3_digest -+ //load digests -+ ld4 {vdig_A.s-vdig_D.s}[0],[job0_tmp],16 -+ ld4 {vdig_A.s-vdig_D.s}[1],[job1_tmp],16 -+ ld4 {vdig_A.s-vdig_D.s}[2],[job2_tmp],16 -+ adrp const_adr, .consts -+ ld4 {vdig_A.s-vdig_D.s}[3],[job3_tmp],16 -+ add const_adr, const_adr, #:lo12:.consts -+ ld4 {vdig_E.s-vdig_H.s}[0],[job0_tmp] -+ rev32 vdig_A.16b,vdig_A.16b -+ ld4 {vdig_E.s-vdig_H.s}[1],[job1_tmp] -+ rev32 vdig_B.16b,vdig_B.16b -+ ld4 {vdig_E.s-vdig_H.s}[2],[job2_tmp] -+ rev32 vdig_C.16b,vdig_C.16b -+ ld4 {vdig_E.s-vdig_H.s}[3],[job3_tmp] -+ rev32 vdig_D.16b,vdig_D.16b -+ stp qdig_A,qdig_B,[sp,dig_off+ 0] -+ rev32 vdig_E.16b,vdig_E.16b -+ rev32 vdig_F.16b,vdig_F.16b -+ stp qdig_C,qdig_D,[sp,dig_off+ 32] -+ rev32 vdig_G.16b,vdig_G.16b -+ rev32 vdig_H.16b,vdig_H.16b -+ stp qdig_E,qdig_F,[sp,dig_off+ 64] -+ stp qdig_G,qdig_H,[sp,dig_off+ 96] -+ -+.start_loop: -+ ld4 {vmsg0.s-vmsg3.s}[0],[job0_data],16 -+ ld4 {vmsg0.s-vmsg3.s}[1],[job1_data],16 -+ ld4 {vmsg0.s-vmsg3.s}[2],[job2_data],16 -+ ld4 {vmsg0.s-vmsg3.s}[3],[job3_data],16 -+ ld4 {vmsg4.s-vmsg7.s}[0],[job0_data],16 -+ ld4 {vmsg4.s-vmsg7.s}[1],[job1_data],16 -+ ld4 {vmsg4.s-vmsg7.s}[2],[job2_data],16 -+ ld4 {vmsg4.s-vmsg7.s}[3],[job3_data],16 -+ ld4 {vmsg8.s-vmsg11.16b}[0],[job0_data],16 -+ ldr qTj,[const_adr] -+ -+ sm3_round_0 0, 4 -+ -+ ld4 {vmsg8.s-vmsg11.s}[1],[job1_data],16 -+ sm3_round_0 1, 5 -+ -+ ld4 {vmsg8.s-vmsg11.s}[2],[job2_data],16 -+ sm3_round_0 2, 6 -+ ld4 {vmsg8.s-vmsg11.s}[3],[job3_data],16 -+ sm3_round_0 3, 7 -+ -+ ld4 {vmsg12.s-vmsg15.s}[0],[job0_data],16 -+ -+ sm3_round_4 4, 8 -+ ld4 {vmsg12.s-vmsg15.s}[1],[job1_data],16 -+ sm3_round_4 5, 9 -+ ld4 {vmsg12.s-vmsg15.s}[2],[job2_data],16 -+ sm3_round_4 6,10 -+ ld4 {vmsg12.s-vmsg15.s}[3],[job3_data],16 -+ sm3_round_4 7,11 -+ sm3_round_4 8,12 -+ sm3_round_4 9,13 -+ sm3_round_4 10,14 -+ sm3_round_4 11,15 -+ -+ sm3_round_12 12,16, 0, 7,13, 3,10 //12 -+ sm3_round_12 13, 0, 1, 8,14, 4,11 //13 -+ sm3_round_12 14, 1, 2, 9,15, 5,12 //14 -+ sm3_round_12 15, 2, 3,10,16, 6,13 //15 -+ -+ ldr qTj,[const_adr,16] -+ sm3_round_16 16, 3, 4,11, 0, 7,14 //16 -+#if 0 -+ stp sdig_A,sdig_B,[job0_digest] -+ stp sdig_C,sdig_D,[job0_digest,8] -+ stp sdig_E,sdig_F,[job0_digest,16] -+ stp sdig_G,sdig_H,[job0_digest,24] -+ b .exit_func -+#endif -+ sm3_round_16 0, 4, 5,12, 1, 8,15 //17 -+ -+ sm3_round_16 1, 5, 6,13, 2, 9,16 //18 -+ sm3_round_16 2, 6, 7,14, 3,10, 0 //19 -+ sm3_round_16 3, 7, 8,15, 4,11, 1 //20 -+ sm3_round_16 4, 8, 9,16, 5,12, 2 //21 -+ sm3_round_16 5, 9,10, 0, 6,13, 3 //22 -+ sm3_round_16 6,10,11, 1, 7,14, 4 //23 -+ sm3_round_16 7,11,12, 2, 8,15, 5 //24 -+ sm3_round_16 8,12,13, 3, 9,16, 6 //25 -+ sm3_round_16 9,13,14, 4,10, 0, 7 //26 -+ sm3_round_16 10,14,15, 5,11, 1, 8 //27 -+ sm3_round_16 11,15,16, 6,12, 2, 9 //28 -+ sm3_round_16 12,16, 0, 7,13, 3,10 //29 -+ sm3_round_16 13, 0, 1, 8,14, 4,11 //30 -+ sm3_round_16 14, 1, 2, 9,15, 5,12 //31 -+ sm3_round_16 15, 2, 3,10,16, 6,13 //32 -+ sm3_round_16 16, 3, 4,11, 0, 7,14 //33 -+ sm3_round_16 0, 4, 5,12, 1, 8,15 //34 -+ sm3_round_16 1, 5, 6,13, 2, 9,16 //35 -+ sm3_round_16 2, 6, 7,14, 3,10, 0 //36 -+ sm3_round_16 3, 7, 8,15, 4,11, 1 //37 -+ sm3_round_16 4, 8, 9,16, 5,12, 2 //38 -+ sm3_round_16 5, 9,10, 0, 6,13, 3 //39 -+ sm3_round_16 6,10,11, 1, 7,14, 4 //40 -+ sm3_round_16 7,11,12, 2, 8,15, 5 //41 -+ sm3_round_16 8,12,13, 3, 9,16, 6 //42 -+ sm3_round_16 9,13,14, 4,10, 0, 7 //43 -+ sm3_round_16 10,14,15, 5,11, 1, 8 //44 -+ sm3_round_16 11,15,16, 6,12, 2, 9 //45 -+ sm3_round_16 12,16, 0, 7,13, 3,10 //46 -+ sm3_round_16 13, 0, 1, 8,14, 4,11 //47 -+ sm3_round_16 14, 1, 2, 9,15, 5,12 //48 -+ sm3_round_16 15, 2, 3,10,16, 6,13 //49 -+ sm3_round_16 16, 3, 4,11, 0, 7,14 //50 -+ sm3_round_16 0, 4, 5,12, 1, 8,15 //51 -+ sm3_round_16 1, 5, 6,13, 2, 9,16 //52 -+ sm3_round_16 2, 6, 7,14, 3,10, 0 //53 -+ sm3_round_16 3, 7, 8,15, 4,11, 1 //54 -+ sm3_round_16 4, 8, 9,16, 5,12, 2 //55 -+ sm3_round_16 5, 9,10, 0, 6,13, 3 //56 -+ sm3_round_16 6,10,11, 1, 7,14, 4 //57 -+ sm3_round_16 7,11,12, 2, 8,15, 5 //58 -+ sm3_round_16 8,12,13, 3, 9,16, 6 //59 -+ sm3_round_16 9,13,14, 4,10, 0, 7 //60 -+ sm3_round_16 10,14,15, 5,11, 1, 8 //61 -+ sm3_round_16 11,15,16, 6,12, 2, 9 //62 -+ sm3_round_63 12,16, 0, 7,13, 3,10 //63 -+ -+ subs len,len,1 -+ bne .start_loop -+ -+ //save digests with big endian -+ rev32 vdig_A.16b,vdig_A.16b -+ rev32 vdig_B.16b,vdig_B.16b -+ rev32 vdig_C.16b,vdig_C.16b -+ rev32 vdig_D.16b,vdig_D.16b -+ st4 {vdig_A.s-vdig_D.s}[0],[job0_digest],16 -+ rev32 vdig_E.16b,vdig_E.16b -+ rev32 vdig_F.16b,vdig_F.16b -+ st4 {vdig_A.s-vdig_D.s}[1],[job1_digest],16 -+ rev32 vdig_G.16b,vdig_G.16b -+ rev32 vdig_H.16b,vdig_H.16b -+ st4 {vdig_A.s-vdig_D.s}[2],[job2_digest],16 -+ st4 {vdig_A.s-vdig_D.s}[3],[job3_digest],16 -+ st4 {vdig_E.s-vdig_H.s}[0],[job0_digest] -+ st4 {vdig_E.s-vdig_H.s}[1],[job1_digest] -+ st4 {vdig_E.s-vdig_H.s}[2],[job2_digest] -+ st4 {vdig_E.s-vdig_H.s}[3],[job3_digest] -+ -+.exit_func: -+ ldp d8, d9, [sp,16] -+ ldp d10,d11,[sp,32] -+ ldp d12,d13,[sp,48] -+ ldp d14,d15,[sp,64] -+ ldp x29, x30, [sp], STACK_SIZE -+ ret -+.consts: -+ .word 0x79cc4519 -+ .word 0x79cc4519 -+ .word 0x79cc4519 -+ .word 0x79cc4519 -+ .word 0x9d8a7a87 -+ .word 0x9d8a7a87 -+ .word 0x9d8a7a87 -+ .word 0x9d8a7a87 -+ .size sm3_mb_asimd_x4, .-sm3_mb_asimd_x4 -+ -diff --git a/drv/hash_mb/sm3_mb_sve.S b/drv/hash_mb/sm3_mb_sve.S -new file mode 100644 -index 0000000..7dd2428 ---- /dev/null -+++ b/drv/hash_mb/sm3_mb_sve.S -@@ -0,0 +1,161 @@ -+/********************************************************************** -+ Copyright(c) 2022 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ .arch armv8.2-a+sve -+ -+.macro copy_mb_16words vecs:req,dest:req -+ mov src,\vecs -+ mov dst,\dest -+ mov ctr,lanes -+1: -+ ldr tmp,[src],8 -+ ldr tmp,[tmp] -+ add tmp,tmp,block_ctr,lsl 6 -+ ld1 {TMPV0.4s,TMPV1.4s,TMPV2.4s,TMPV3.4s}, [tmp] -+ st1 {TMPV0.4s,TMPV1.4s,TMPV2.4s,TMPV3.4s}, [dst],64 -+ subs ctr,ctr,1 -+ b.ne 1b -+.endm -+ -+.macro load_words windex:req -+ .if \windex == 0 -+ mov tmpw,16 -+ index VOFFS.s,0,tmpw -+ copy_mb_16words job_vec,databuf -+ mov dataptr,databuf -+ .endif -+ ld1w { WORD\windex\().s}, p0/z, [dataptr, VOFFS.s, UXTW 2] -+ add dataptr,dataptr,4 -+.endm -+ -+#include "sm3_sve_common.S" -+ -+/* int sm3_mb_sve_max_lanes() -+ * return : max lanes of SVE vector -+ */ -+ .global sm3_mb_sve_max_lanes -+ .type sm3_mb_sve_max_lanes, %function -+sm3_mb_sve_max_lanes: -+ cntw x0 -+ ret -+ .size sm3_mb_sve_max_lanes, .-sm3_mb_sve_max_lanes -+/* -+ * void sm3_mb_sve(int blocks, int total_lanes, SM3_JOB **job_vec) -+ */ -+ num_blocks .req w0 -+ total_lanes .req w1 -+ job_vec .req x2 -+ lanes .req x4 -+ src .req x5 -+ dst .req x6 -+ lane_offset .req w7 -+ lane_offset_x .req x7 -+ tmp .req x8 -+ tmpw .req w8 -+ block_ctr .req x9 -+ block_ctr_w .req w9 -+ savedsp .req x10 -+ databuf .req x11 -+ dataptr .req x12 -+ efgh_buf .req x12 -+ ctr .req x13 -+ abcd_buf .req x14 -+ sm3const_adr .req x15 -+ -+ .global sm3_mb_sve -+ .type sm3_mb_sve, %function -+sm3_mb_sve: -+ cbz num_blocks,.return -+ sm3_sve_save_stack -+ mov savedsp,sp -+ mov lane_offset, #0 -+ whilelo p0.s, wzr, total_lanes -+ // reserve (32 * max lanes) for abcdefgh buf -+ cntw tmp -+ lsl tmp, tmp, 5 -+ sub abcd_buf,sp,tmp -+ mov tmp,63 -+ bic abcd_buf,abcd_buf,tmp -+ // reserve (64 * lanes) for data buf -+ cntp lanes,p0,p0.s -+ lsl tmp,lanes,6 -+ sub databuf,abcd_buf,tmp -+ mov sp,databuf -+ adr sm3const_adr,SM3_CONSTS -+.seg_loops: -+ mov src,job_vec -+ mov dst,abcd_buf -+ cntp lanes,p0,p0.s -+ add efgh_buf,abcd_buf,lanes,lsl 4 -+ mov ctr,lanes -+.ldr_hash: -+ ldr tmp,[src],8 -+ add tmp,tmp,64 -+ ld1 {v0.16b, v1.16b},[tmp] -+ rev32 v0.16b,v0.16b -+ rev32 v1.16b,v1.16b -+ st1 {v0.16b},[dst],16 -+ st1 {v1.16b},[efgh_buf],16 -+ subs ctr,ctr,1 -+ bne .ldr_hash -+ ld4w {VA.s,VB.s,VC.s,VD.s},p0/z,[abcd_buf] -+ add tmp,abcd_buf,lanes,lsl 4 -+ ld4w {VE.s,VF.s,VG.s,VH.s},p0/z,[tmp] -+ mov block_ctr,0 -+ // always unpredicated SVE mode in current settings -+ pred_mode=0 -+.block_loop: -+ sm3_single -+ add block_ctr, block_ctr, 1 -+ cmp block_ctr_w,num_blocks -+ bne .block_loop -+ st4w {VA.s,VB.s,VC.s,VD.s},p0,[abcd_buf] -+ add efgh_buf,abcd_buf,lanes,lsl 4 -+ st4w {VE.s,VF.s,VG.s,VH.s},p0,[efgh_buf] -+ mov dst,job_vec -+ mov src,abcd_buf -+ add job_vec,job_vec,lanes,lsl 3 -+ mov ctr,lanes -+.str_hash: -+ ld1 {v0.16b},[src],16 -+ ld1 {v1.16b},[efgh_buf],16 -+ rev32 v0.16b,v0.16b -+ rev32 v1.16b,v1.16b -+ ldr tmp,[dst],8 -+ add tmp,tmp,64 -+ st1 {v0.16b,v1.16b},[tmp] -+ subs ctr,ctr,1 -+ bne .str_hash -+ incw lane_offset_x -+ whilelo p0.s, lane_offset, total_lanes -+ b.mi .seg_loops -+ mov sp,savedsp -+ sm3_sve_restore_stack -+.return: -+ ret -+ .size sm3_mb_sve, .-sm3_mb_sve -diff --git a/drv/hash_mb/sm3_sve_common.S b/drv/hash_mb/sm3_sve_common.S -new file mode 100644 -index 0000000..3d54952 ---- /dev/null -+++ b/drv/hash_mb/sm3_sve_common.S -@@ -0,0 +1,505 @@ -+/********************************************************************** -+ Copyright(c) 2022 Arm Corporation All rights reserved. -+ -+ Redistribution and use in source and binary forms, with or without -+ modification, are permitted provided that the following conditions -+ are met: -+ * Redistributions of source code must retain the above copyright -+ notice, this list of conditions and the following disclaimer. -+ * Redistributions in binary form must reproduce the above copyright -+ notice, this list of conditions and the following disclaimer in -+ the documentation and/or other materials provided with the -+ distribution. -+ * Neither the name of Arm Corporation nor the names of its -+ contributors may be used to endorse or promote products derived -+ from this software without specific prior written permission. -+ -+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+**********************************************************************/ -+ VA .req z0 -+ VB .req z1 -+ VC .req z2 -+ VD .req z3 -+ VE .req z4 -+ VF .req z5 -+ VG .req z6 -+ VH .req z7 -+ TMPV0 .req v8 -+ TMPV1 .req v9 -+ TMPV2 .req v10 -+ TMPV3 .req v11 -+ WORD0 .req z8 -+ WORD1 .req z9 -+ WORD2 .req z10 -+ WORD3 .req z11 -+ WORD4 .req z12 -+ WORD5 .req z13 -+ WORD6 .req z14 -+ WORD7 .req z15 -+ WORD8 .req z16 -+ WORD9 .req z17 -+ WORD10 .req z18 -+ WORD11 .req z19 -+ WORD12 .req z20 -+ WORD13 .req z21 -+ WORD14 .req z22 -+ WORD15 .req z23 -+ WORD16 .req z24 -+ VOFFS .req z24 // reuse WORD16 -+ SS1 .req z25 -+ SS2 .req z26 -+ VT .req z26 // reuse SS2 -+ TT2 .req z27 -+ VT1 .req z28 -+ VT2 .req z29 -+ VT3 .req z30 -+ VT4 .req z31 -+ VZERO .req z31 -+ TT .req z0 -+ -+.macro sve_op inst:req,regd,args:vararg -+ .if pred_mode == 1 -+ \inst \regd,p0/m,\args -+ .else -+ \inst \regd,\args -+ .endif -+.endm -+ -+.macro sve_bitop inst:req,regd:req,regm:req -+ .if pred_mode == 1 -+ \inst \regd\().s,p0/m,\regd\().s,\regm\().s -+ .else -+ \inst \regd\().d,\regd\().d,\regm\().d -+ .endif -+.endm -+ -+.macro rotate_left0 out:req,in:req,tmp:req,bits:req,args:vararg -+ .if have_sve2 == 0 -+ lsl \tmp\().s,\in\().s,\bits -+ .else -+ movprfx \out\().d,\in\().d -+ xar \out\().s,\out\().s,VZERO.s,32-\bits -+ .endif -+ -+ .ifnb \args -+ rotate_left0 \args -+ .endif -+.endm -+ -+.macro rotate_left1 out:req,in:req,tmp:req,bits:req,args:vararg -+ .if have_sve2 == 0 -+ lsr \out\().s,\in\().s,32-\bits -+ .endif -+ -+ .ifnb \args -+ rotate_left1 \args -+ .endif -+.endm -+ -+.macro rotate_left2 out:req,in:req,tmp:req,bits:req,args:vararg -+ .if have_sve2 == 0 -+ orr \out\().d,\out\().d,\tmp\().d -+ .endif -+ -+ .ifnb \args -+ rotate_left2 \args -+ .endif -+.endm -+ -+.macro rotate_left args:vararg -+ rotate_left0 \args -+ rotate_left1 \args -+ rotate_left2 \args -+.endm -+ -+.macro SVE_EOR3 rd:req,r1:req,r2:req -+ .if have_sve2 == 0 -+ sve_bitop eor,\rd,\r1 -+ sve_bitop eor,\rd,\r2 -+ .else -+ eor3 \rd\().d,\rd\().d,\r1\().d,\r2\().d -+ .endif -+.endm -+ -+.macro FUNC_EOR3 ret:req,x:req,y:req,z:req -+ .if have_sve2 == 0 -+ eor \ret\().d,\x\().d,\y\().d -+ sve_bitop eor,\ret,\z -+ .else -+ movprfx \ret\().d,\x\().d -+ eor3 \ret\().d,\ret\().d,\y\().d,\z\().d -+ .endif -+.endm -+ -+.macro FUNC_FF windex:req,ret:req,x:req,y:req,z:req,tmp1:req,tmp2:req -+ and \ret\().d,\x\().d,\y\().d -+ and \tmp1\().d,\x\().d,\z\().d -+ and \tmp2\().d,\y\().d,\z\().d -+ sve_bitop orr,\ret,\tmp1 -+ sve_bitop orr,\ret,\tmp2 -+.endm -+ -+.macro FUNC_BSL ret:req,x:req,y:req,z:req,tmp:req -+ .if have_sve2 == 0 -+ bic \ret\().d,\z\().d,\x\().d -+ and \tmp\().d,\x\().d,\y\().d -+ sve_bitop orr,\ret,\tmp -+ .else -+ movprfx \ret\().d,\x\().d -+ bsl \ret\().d,\ret\().d,\y\().d,\z\().d -+ .endif -+.endm -+ -+.altmacro -+.macro load_next_words windex -+ .if \windex < 16 -+ load_words \windex -+ .endif -+.endm -+ -+.macro SM3_STEP_00_11 windex:req,w:req,w4:req -+ // SS1 = rol32(rol32(a, 12) + e + rol32(T, (j % 32)), 7) -+ ld1rw {VT2.s},p0/z,[sm3const_adr,\windex * 4] -+ rotate_left SS1,VA,VT1,12 -+ mov SS2.s,p0/m,SS1.s -+ sve_op add,SS1.s,SS1.s,VE.s -+ sve_op add,SS1.s,SS1.s,VT2.s -+ rotate_left SS1,SS1,VT2,7 -+ // d <- TT2 = GG(index, e, f, g) + h + SS1 + W[index] -+ add VT2.s,\w\().s,VH.s -+ FUNC_EOR3 TT2,VE,VF,VG -+ // SS2 = SS1 ^ rol32(a, 12) -+ sve_bitop eor,SS2,SS1 -+ sve_op add,TT2.s,TT2.s,VT2.s -+ // h <- TT1 = FF(index, a, b, c) + d + SS2 + WB[index] -+ FUNC_EOR3 VH,VA,VB,VC -+ eor VT1.d,\w\().d,\w4\().d -+ sve_op add,VH.s,VH.s,VD.s -+ sve_op add,VH.s,VH.s,VT1.s -+ add VD.s,TT2.s,SS1.s -+ sve_op add,VH.s,VH.s,SS2.s -+ // d = P0(TT2) -+ rotate_left VT1,VD,VT2,9,VT3,VD,VT4,17 -+ SVE_EOR3 VD,VT1,VT3 -+ // b = rol32(b, 9) -+ // f = rol32(f, 19) -+ rotate_left VB,VB,VT3,9,VF,VF,VT4,19 -+.endm -+ -+.macro SM3_STEP_12_15 windex:req,w:req,w4:req,w16:req,w13:req,w9:req,w6:req,w3:req -+ // SS1 = rol32(rol32(a, 12) + e + rol32(T, (j % 32)), 7) -+ rotate_left VT,\w3,VT1,15,\w4,\w13,VT2,7,SS1,VA,VT3,12 -+ ld1rw {VT1.s},p0/z,[sm3const_adr,\windex * 4] -+ mov TT2.s,p0/m,SS1.s -+ sve_bitop eor,VT,\w16 -+ sve_op add,SS1.s,SS1.s,VE.s -+ sve_bitop eor,VT,\w9 -+ sve_op add,SS1.s,SS1.s,VT1.s -+ rotate_left VT1,VT,VT2,15,VT3,VT,VT4,23 -+ SVE_EOR3 VT,VT1,VT3 -+ rotate_left SS1,SS1,VT2,7 -+ sve_bitop eor,\w4,VT -+ // SS2 = SS1 ^ rol32(a, 12) -+ eor SS2.d,TT2.d,SS1.d -+ sve_bitop eor,\w4,\w6 -+ // d <- TT2 = GG(index, e, f, g) + h + SS1 + W[index] -+ FUNC_EOR3 TT2,VE,VF,VG -+ add VT1.s,\w\().s,VH.s -+ sve_op add,TT2.s,TT2.s,VT1.s -+ // h <- TT1 = FF(index, a, b, c) + d + SS2 + WB[index] -+ FUNC_EOR3 VH,VA,VB,VC -+ eor VT1.d,\w\().d,\w4\().d -+ sve_op add,VH.s,VH.s,VD.s -+ // b = rol32(b, 9) -+ // f = rol32(f, 19) -+ rotate_left VB,VB,VT3,9 -+ sve_op add,VH.s,VH.s,VT1.s -+ add VD.s,TT2.s,SS1.s -+ sve_op add,VH.s,VH.s,SS2.s -+ // d = P0(TT2) -+ rotate_left VT1,VD,VT2,9,VT3,VD,VT4,17,VF,VF,TT2,19 -+ SVE_EOR3 VD,VT1,VT3 -+.endm -+ -+.macro SM3_STEP_16_62 windex:req,w:req,w4:req,w16:req,w13:req,w9:req,w6:req,w3:req -+ // SS1 = rol32(rol32(a, 12) + e + rol32(T, (j % 32)), 7) -+ rotate_left VT,\w3,VT1,15,\w4,\w13,VT2,7,SS1,VA,VT3,12 -+ ld1rw {VT1.s},p0/z,[sm3const_adr,\windex * 4] -+ mov TT2.s,p0/m,SS1.s -+ sve_bitop eor,VT,\w16 -+ sve_op add,SS1.s,SS1.s,VE.s -+ sve_bitop eor,VT,\w9 -+ sve_op add,SS1.s,SS1.s,VT1.s -+ rotate_left VT1,VT,VT2,15,VT3,VT,VT4,23 -+ SVE_EOR3 \w4,VT,VT1 -+ rotate_left SS1,SS1,VT2,7 -+ sve_bitop eor,\w4,VT3 -+ // SS2 = SS1 ^ rol32(a, 12) -+ eor SS2.d,TT2.d,SS1.d -+ sve_bitop eor,\w4,\w6 -+ // d <- TT2 = GG(index, e, f, g) + h + SS1 + W[index] -+ sve_op add,SS1.s,SS1.s,\w\().s -+ FUNC_BSL TT2,VE,VF,VG,VT1 -+ sve_op add,SS1.s,SS1.s,VH.s -+ // h <- TT1 = FF(index, a, b, c) + d + SS2 + WB[index] -+ FUNC_FF \windex,VH,VA,VB,VC,VT1,VT2 -+ eor VT1.d,\w\().d,\w4\().d -+ sve_op add,VH.s,VH.s,VD.s -+ // b = rol32(b, 9) -+ // f = rol32(f, 19) -+ rotate_left VB,VB,VT2,9,VF,VF,VT4,19 -+ sve_op add,VH.s,VH.s,VT1.s -+ add VD.s,TT2.s,SS1.s -+ sve_op add,VH.s,VH.s,SS2.s -+ // d = P0(TT2) -+ rotate_left VT1,VD,VT2,9,VT3,VD,VT4,17 -+ SVE_EOR3 VD,VT1,VT3 -+.endm -+ -+.macro SM3_STEP_63 windex:req,w:req,w4:req,w16:req,w13:req,w9:req,w6:req,w3:req -+ // SS1 = rol32(rol32(a, 12) + e + rol32(T, (j % 32)), 7) -+ rotate_left VT,\w3,VT1,15,\w4,\w13,VT2,7,SS1,VA,VT3,12 -+ ld1rw {VT1.s},p0/z,[sm3const_adr,\windex * 4] -+ mov TT2.s,p0/m,SS1.s -+ sve_bitop eor,VT,\w16 -+ sve_op add,SS1.s,SS1.s,VE.s -+ sve_bitop eor,VT,\w9 -+ sve_op add,SS1.s,SS1.s,VT1.s -+ rotate_left VT1,VT,VT2,15,VT3,VT,VT4,23 -+ SVE_EOR3 VT,VT1,VT3 -+ rotate_left SS1,SS1,VT2,7 -+ sve_bitop eor,\w4,VT -+ // SS2 = SS1 ^ rol32(a, 12) -+ eor SS2.d,TT2.d,SS1.d -+ sve_bitop eor,\w4,\w6 -+ // d <- TT2 = GG(index, e, f, g) + h + SS1 + W[index] -+ FUNC_BSL TT2,VE,VF,VG,VT1 -+ add VT1.s,\w\().s,VH.s -+ .if \windex == 63 -+ ld1w {WORD0.s},p0/z,[abcd_buf, 0, MUL VL] -+ ld1w {WORD1.s},p0/z,[abcd_buf, 1, MUL VL] -+ ld1w {WORD2.s},p0/z,[abcd_buf, 2, MUL VL] -+ ld1w {WORD3.s},p0/z,[abcd_buf, 3, MUL VL] -+ ld1w {WORD4.s},p0/z,[abcd_buf, 4, MUL VL] -+ ld1w {WORD5.s},p0/z,[abcd_buf, 5, MUL VL] -+ ld1w {WORD6.s},p0/z,[abcd_buf, 6, MUL VL] -+ ld1w {WORD7.s},p0/z,[abcd_buf, 7, MUL VL] -+ .endif -+ sve_op add,TT2.s,TT2.s,VT1.s -+ // h <- TT1 = FF(index, a, b, c) + d + SS2 + WB[index] -+ FUNC_FF \windex,VH,VA,VB,VC,VT1,VT2 -+ eor VT1.d,\w\().d,\w4\().d -+ sve_op add,VH.s,VH.s,VD.s -+ // b = rol32(b, 9) -+ // f = rol32(f, 19) -+ rotate_left VB,VB,VT2,9,VF,VF,VT4,19 -+ sve_op add,VH.s,VH.s,VT1.s -+ add VD.s,TT2.s,SS1.s -+ sve_bitop eor,VA,WORD1 -+ sve_bitop eor,VB,WORD2 -+ sve_bitop eor,VC,WORD3 -+ // d = P0(TT2) -+ rotate_left VT1,VD,VT2,9,VT3,VD,VT4,17 -+ sve_bitop eor,VF,WORD6 -+ SVE_EOR3 VD,VT1,VT3 -+ sve_bitop eor,VG,WORD7 -+ sve_bitop eor,VD,WORD4 -+ sve_op add,VH.s,VH.s,SS2.s -+ sve_bitop eor,VE,WORD5 -+ sve_bitop eor,VH,WORD0 -+.endm -+ -+.macro SWAP_STATES -+ .unreq TT -+ TT .req VH -+ .unreq VH -+ VH .req VG -+ .unreq VG -+ VG .req VF -+ .unreq VF -+ VF .req VE -+ .unreq VE -+ VE .req VD -+ .unreq VD -+ VD .req VC -+ .unreq VC -+ VC .req VB -+ .unreq VB -+ VB .req VA -+ .unreq VA -+ VA .req TT -+.endm -+ -+.altmacro -+.macro SM3_STEP_WRAPPER windex:req,idx:req,idx4:req,idx16,idx13,idx9,idx6,idx3 -+ .if \windex <= 11 -+ revb WORD\idx4\().s, p0/m, WORD\idx4\().s -+ next=\idx4+1 -+ load_next_words %next -+ SM3_STEP_00_11 \windex,WORD\idx\(),WORD\idx4\() -+ .else -+ .if \windex < 16 -+ SM3_STEP_12_15 \windex,WORD\idx\(),\ -+ WORD\idx4\(),WORD\idx16\(),WORD\idx13\(),\ -+ WORD\idx9\(),WORD\idx6\(),WORD\idx3\() -+ .else -+ .if \windex == 63 -+ SM3_STEP_63 \windex,WORD\idx\(),WORD\idx4\(),\ -+ WORD\idx16\(),WORD\idx13\(),WORD\idx9\(),\ -+ WORD\idx6\(),WORD\idx3\() -+ .else -+ SM3_STEP_16_62 \windex,WORD\idx\(),WORD\idx4\(),\ -+ WORD\idx16\(),WORD\idx13\(),WORD\idx9\(),\ -+ WORD\idx6\(),WORD\idx3\() -+ .endif -+ .endif -+ .endif -+.endm -+ -+.macro exec_step windex:req -+ .if \windex <= 11 -+ idx4=\windex+4 -+ SM3_STEP_WRAPPER \windex,\windex,%idx4 -+ .else -+ idxp4=\windex + 4 -+ idx4=idxp4 % 17 -+ idx16=(idxp4 - 16) % 17 -+ idx13=(idxp4 - 13) % 17 -+ idx9=(idxp4 - 9) % 17 -+ idx6=(idxp4 - 6) % 17 -+ idx3=(idxp4 - 3) % 17 -+ idx=\windex % 17 -+ SM3_STEP_WRAPPER \windex,%idx,%idx4,%idx16,%idx13,%idx9,%idx6,%idx3 -+ .endif -+ SWAP_STATES -+.endm -+ -+.macro sm3_exec -+ current_step=0 -+ .rept 64 -+ exec_step %current_step -+ current_step=current_step+1 -+ .endr -+.endm -+ -+.macro sm3_single sve2:vararg -+ .ifnb \sve2 -+ have_sve2 = 1 -+ .else -+ have_sve2=0 -+ .endif -+ st1w {VA.s},p0,[abcd_buf, 0, MUL VL] -+ st1w {VB.s},p0,[abcd_buf, 1, MUL VL] -+ st1w {VC.s},p0,[abcd_buf, 2, MUL VL] -+ st1w {VD.s},p0,[abcd_buf, 3, MUL VL] -+ st1w {VE.s},p0,[abcd_buf, 4, MUL VL] -+ st1w {VF.s},p0,[abcd_buf, 5, MUL VL] -+ st1w {VG.s},p0,[abcd_buf, 6, MUL VL] -+ st1w {VH.s},p0,[abcd_buf, 7, MUL VL] -+ load_words 0 -+ load_words 1 -+ load_words 2 -+ load_words 3 -+ load_words 4 -+ revb WORD0.s, p0/m, WORD0.s -+ revb WORD1.s, p0/m, WORD1.s -+ revb WORD2.s, p0/m, WORD2.s -+ revb WORD3.s, p0/m, WORD3.s -+ .if have_sve2 == 1 -+ mov VZERO.s,p0/m,#0 -+ .endif -+ sm3_exec -+.endm -+ -+.macro sm3_sve_save_stack -+ stp d8,d9,[sp, -64]! -+ stp d10,d11,[sp, 16] -+ stp d12,d13,[sp, 32] -+ stp d14,d15,[sp, 48] -+.endm -+ -+.macro sm3_sve_restore_stack -+ ldp d10,d11,[sp, 16] -+ ldp d12,d13,[sp, 32] -+ ldp d14,d15,[sp, 48] -+ ldp d8,d9,[sp],64 -+.endm -+ -+ .section .rodata.cst16,"aM",@progbits,16 -+ .align 16 -+SM3_CONSTS: -+ .word 0x79CC4519 -+ .word 0xF3988A32 -+ .word 0xE7311465 -+ .word 0xCE6228CB -+ .word 0x9CC45197 -+ .word 0x3988A32F -+ .word 0x7311465E -+ .word 0xE6228CBC -+ .word 0xCC451979 -+ .word 0x988A32F3 -+ .word 0x311465E7 -+ .word 0x6228CBCE -+ .word 0xC451979C -+ .word 0x88A32F39 -+ .word 0x11465E73 -+ .word 0x228CBCE6 -+ .word 0x9D8A7A87 -+ .word 0x3B14F50F -+ .word 0x7629EA1E -+ .word 0xEC53D43C -+ .word 0xD8A7A879 -+ .word 0xB14F50F3 -+ .word 0x629EA1E7 -+ .word 0xC53D43CE -+ .word 0x8A7A879D -+ .word 0x14F50F3B -+ .word 0x29EA1E76 -+ .word 0x53D43CEC -+ .word 0xA7A879D8 -+ .word 0x4F50F3B1 -+ .word 0x9EA1E762 -+ .word 0x3D43CEC5 -+ .word 0x7A879D8A -+ .word 0xF50F3B14 -+ .word 0xEA1E7629 -+ .word 0xD43CEC53 -+ .word 0xA879D8A7 -+ .word 0x50F3B14F -+ .word 0xA1E7629E -+ .word 0x43CEC53D -+ .word 0x879D8A7A -+ .word 0x0F3B14F5 -+ .word 0x1E7629EA -+ .word 0x3CEC53D4 -+ .word 0x79D8A7A8 -+ .word 0xF3B14F50 -+ .word 0xE7629EA1 -+ .word 0xCEC53D43 -+ .word 0x9D8A7A87 -+ .word 0x3B14F50F -+ .word 0x7629EA1E -+ .word 0xEC53D43C -+ .word 0xD8A7A879 -+ .word 0xB14F50F3 -+ .word 0x629EA1E7 -+ .word 0xC53D43CE -+ .word 0x8A7A879D -+ .word 0x14F50F3B -+ .word 0x29EA1E76 -+ .word 0x53D43CEC -+ .word 0xA7A879D8 -+ .word 0x4F50F3B1 -+ .word 0x9EA1E762 -+ .word 0x3D43CEC5 -+ --- -2.25.1 - diff --git a/0033-uadk_tool-fix-aead-performance-test-issue.patch b/0033-uadk_tool-fix-aead-performance-test-issue.patch deleted file mode 100644 index c422378..0000000 --- a/0033-uadk_tool-fix-aead-performance-test-issue.patch +++ /dev/null @@ -1,38 +0,0 @@ -From f5787232f4f5cb09445bfc87d20cb2b43f5e5ea3 Mon Sep 17 00:00:00 2001 -From: Longfang Liu -Date: Mon, 11 Mar 2024 16:14:34 +0800 -Subject: [PATCH 33/44] uadk_tool: fix aead performance test issue - -In the current UADK code, due to the new support for aead stream mode, -a new msg_state state has been added. If the initial value is not -assigned, an error will occur in the block mode check. -As a result, the performance test cannot be executed. - -Signed-off-by: Longfang Liu ---- - uadk_tool/benchmark/sec_uadk_benchmark.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index c99ae89..92e967a 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -1165,6 +1165,7 @@ static void *sec_uadk_aead_async(void *arg) - areq.mac_bytes = auth_size; - areq.assoc_bytes = SEC_AEAD_LEN; - areq.in_bytes = g_pktlen; -+ areq.msg_state = 0; - if (pdata->is_union) - areq.mac_bytes = 32; - if (areq.op_type) // decrypto -@@ -1396,6 +1397,7 @@ static void *sec_uadk_aead_sync(void *arg) - areq.assoc_bytes = SEC_AEAD_LEN; - areq.in_bytes = g_pktlen; - areq.mac_bytes = g_maclen; -+ areq.msg_state = 0; - if (areq.op_type) // decrypto - areq.out_bytes = g_pktlen + 16; // aadsize = 16; - else --- -2.25.1 - diff --git a/0034-uadk_tool-fix-the-logic-for-counting-retransmissions.patch b/0034-uadk_tool-fix-the-logic-for-counting-retransmissions.patch deleted file mode 100644 index 98c833d..0000000 --- a/0034-uadk_tool-fix-the-logic-for-counting-retransmissions.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 12466753e870b9577826d238e08a744c17267a8f Mon Sep 17 00:00:00 2001 -From: Chenghai Huang -Date: Mon, 11 Mar 2024 16:19:46 +0800 -Subject: [PATCH 34/44] uadk_tool: fix the logic for counting retransmissions - when busy - -The try cnt should be set to 0 after the packet is successfully -sent, not after BUSY is returned. - -Signed-off-by: Chenghai Huang ---- - uadk_tool/benchmark/zip_uadk_benchmark.c | 8 ++++---- - uadk_tool/benchmark/zip_wd_benchmark.c | 7 +++---- - 2 files changed, 7 insertions(+), 8 deletions(-) - -diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c -index 435c0b4..9681c22 100644 ---- a/uadk_tool/benchmark/zip_uadk_benchmark.c -+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c -@@ -817,9 +817,8 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) - - while(1) { - if (get_run_state() == 0) -- break; -+ break; - -- try_cnt = 0; - i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; - creq.dst = &hw_buff_out[i]; //temp out -@@ -845,6 +844,7 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) - } else if (ret || creq.status) { - break; - } -+ try_cnt = 0; - count++; - } - -@@ -1037,9 +1037,8 @@ static void *zip_uadk_blk_async_run(void *arg) - - while(1) { - if (get_run_state() == 0) -- break; -+ break; - -- try_cnt = 0; - i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; -@@ -1062,6 +1061,7 @@ static void *zip_uadk_blk_async_run(void *arg) - } else if (ret || creq.status) { - break; - } -+ try_cnt = 0; - count++; - } - -diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c -index d2340e0..d7bafd6 100644 ---- a/uadk_tool/benchmark/zip_wd_benchmark.c -+++ b/uadk_tool/benchmark/zip_wd_benchmark.c -@@ -708,9 +708,8 @@ static void *zip_wd_blk_lz77_async_run(void *arg) - - while(1) { - if (get_run_state() == 0) -- break; -+ break; - -- try_cnt = 0; - i = count % MAX_POOL_LENTH_COMP; - opdata.in = bd_pool[i].src; - opdata.out = bd_pool[i].dst; //temp out -@@ -737,6 +736,7 @@ static void *zip_wd_blk_lz77_async_run(void *arg) - opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) { - break; - } -+ try_cnt = 0; - count++; - } - -@@ -984,7 +984,6 @@ static void *zip_wd_blk_async_run(void *arg) - opdata.in_len = bd_pool[i].src_len; - opdata.avail_out = out_len; - -- try_cnt = 0; - tag[i].ctx = ctx; - tag[i].td_id = pdata->td_id; - tag[i].bd_idx = i; -@@ -1002,7 +1001,7 @@ static void *zip_wd_blk_async_run(void *arg) - opdata.status == WD_IN_EPARA || opdata.status == WD_VERIFY_ERR) { - break; - } -- -+ try_cnt = 0; - count++; - } - --- -2.25.1 - diff --git a/0035-uadk-tools-support-the-nosva-test-of-a-specified-dev.patch b/0035-uadk-tools-support-the-nosva-test-of-a-specified-dev.patch deleted file mode 100644 index a816149..0000000 --- a/0035-uadk-tools-support-the-nosva-test-of-a-specified-dev.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 17e5f25df480a5cacc3ac5e8ae88b708786eec44 Mon Sep 17 00:00:00 2001 -From: Chenghai Huang -Date: Mon, 11 Mar 2024 16:22:58 +0800 -Subject: [PATCH 35/44] uadk/tools - support the nosva test of a specified - device - -Add the description of device parameters. The input should -use the device name from '/sys/class/uacce/'. Only full -matching device names are supported. - -Signed-off-by: Chenghai Huang ---- - uadk_tool/benchmark/hpre_wd_benchmark.c | 8 ++++++++ - uadk_tool/benchmark/sec_wd_benchmark.c | 8 ++++++++ - uadk_tool/benchmark/trng_wd_benchmark.c | 8 ++++++++ - uadk_tool/benchmark/uadk_benchmark.c | 3 +++ - uadk_tool/benchmark/zip_wd_benchmark.c | 8 ++++++++ - 5 files changed, 35 insertions(+) - -diff --git a/uadk_tool/benchmark/hpre_wd_benchmark.c b/uadk_tool/benchmark/hpre_wd_benchmark.c -index 2873ffd..6dc1269 100644 ---- a/uadk_tool/benchmark/hpre_wd_benchmark.c -+++ b/uadk_tool/benchmark/hpre_wd_benchmark.c -@@ -431,6 +431,14 @@ static int init_hpre_wd_queue(struct acc_option *options) - /* nodemask need to be clean */ - g_thread_queue.bd_res[i].queue->node_mask = 0x0; - memset(g_thread_queue.bd_res[i].queue->dev_path, 0x0, PATH_STR_SIZE); -+ if (strlen(options->device) != 0) { -+ ret = snprintf(g_thread_queue.bd_res[i].queue->dev_path, -+ PATH_STR_SIZE, "%s", options->device); -+ if (ret < 0) { -+ WD_ERR("failed to copy dev file path!\n"); -+ return -WD_EINVAL; -+ } -+ } - - ret = wd_request_queue(g_thread_queue.bd_res[i].queue); - if (ret) { -diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c -index aa03db8..2ed8493 100644 ---- a/uadk_tool/benchmark/sec_wd_benchmark.c -+++ b/uadk_tool/benchmark/sec_wd_benchmark.c -@@ -600,6 +600,14 @@ static int init_wd_queue(struct acc_option *options) - /* nodemask need to be clean */ - g_thread_queue.bd_res[i].queue->node_mask = 0x0; - memset(g_thread_queue.bd_res[i].queue->dev_path, 0x0, PATH_STR_SIZE); -+ if (strlen(options->device) != 0) { -+ ret = snprintf(g_thread_queue.bd_res[i].queue->dev_path, -+ PATH_STR_SIZE, "%s", options->device); -+ if (ret < 0) { -+ WD_ERR("failed to copy dev file path!\n"); -+ return -WD_EINVAL; -+ } -+ } - - ret = wd_request_queue(g_thread_queue.bd_res[i].queue); - if (ret) { -diff --git a/uadk_tool/benchmark/trng_wd_benchmark.c b/uadk_tool/benchmark/trng_wd_benchmark.c -index 64942f0..3ce329a 100644 ---- a/uadk_tool/benchmark/trng_wd_benchmark.c -+++ b/uadk_tool/benchmark/trng_wd_benchmark.c -@@ -51,6 +51,14 @@ static int init_trng_wd_queue(struct acc_option *options) - /* nodemask need to be clean */ - g_thread_queue.bd_res[i].queue->node_mask = 0x0; - memset(g_thread_queue.bd_res[i].queue->dev_path, 0x0, PATH_STR_SIZE); -+ if (strlen(options->device) != 0) { -+ ret = snprintf(g_thread_queue.bd_res[i].queue->dev_path, -+ PATH_STR_SIZE, "%s", options->device); -+ if (ret < 0) { -+ WD_ERR("failed to copy dev file path!\n"); -+ return -WD_EINVAL; -+ } -+ } - - g_thread_queue.bd_res[i].in_bytes = options->pktlen; - g_thread_queue.bd_res[i].out = malloc(options->pktlen); -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index cf3a93c..0ebbb68 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -595,6 +595,7 @@ static void dump_param(struct acc_option *option) - ACC_TST_PRT(" [--engine]: %s\n", option->engine); - ACC_TST_PRT(" [--latency]: %u\n", option->latency); - ACC_TST_PRT(" [--init2]: %u\n", option->inittype); -+ ACC_TST_PRT(" [--device]: %s\n", option->device); - } - - int acc_benchmark_run(struct acc_option *option) -@@ -718,6 +719,8 @@ static void print_help(void) - ACC_TST_PRT(" test the running time of packets\n"); - ACC_TST_PRT(" [--init2]:\n"); - ACC_TST_PRT(" select init2 mode in the init interface of UADK SVA\n"); -+ ACC_TST_PRT(" [--device]:\n"); -+ ACC_TST_PRT(" select device to do task\n"); - ACC_TST_PRT(" [--help] = usage\n"); - ACC_TST_PRT("Example\n"); - ACC_TST_PRT(" ./uadk_tool benchmark --alg aes-128-cbc --mode sva --opt 0 --sync\n"); -diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c -index d7bafd6..4424e08 100644 ---- a/uadk_tool/benchmark/zip_wd_benchmark.c -+++ b/uadk_tool/benchmark/zip_wd_benchmark.c -@@ -310,6 +310,14 @@ static int init_zip_wd_queue(struct acc_option *options) - /* nodemask need to be clean */ - g_thread_queue.bd_res[i].queue->node_mask = 0x0; - memset(g_thread_queue.bd_res[i].queue->dev_path, 0x0, PATH_STR_SIZE); -+ if (strlen(options->device) != 0) { -+ ret = snprintf(g_thread_queue.bd_res[i].queue->dev_path, -+ PATH_STR_SIZE, "%s", options->device); -+ if (ret < 0) { -+ WD_ERR("failed to copy dev file path!\n"); -+ return -WD_EINVAL; -+ } -+ } - - ret = wd_request_queue(g_thread_queue.bd_res[i].queue); - if (ret) { --- -2.25.1 - diff --git a/0036-uadk-tools-support-designated-device-testing.patch b/0036-uadk-tools-support-designated-device-testing.patch deleted file mode 100644 index 7334716..0000000 --- a/0036-uadk-tools-support-designated-device-testing.patch +++ /dev/null @@ -1,674 +0,0 @@ -From 829bc553310349ee7c654397204e8b348d7610f4 Mon Sep 17 00:00:00 2001 -From: Yang Shen -Date: Mon, 11 Mar 2024 16:27:04 +0800 -Subject: [PATCH 36/44] uadk/tools - support designated device testing - -Add a parameter 'device' to designate a device. The input should -use the device name from '/sys/class/uacce/'. Only full -matching device names are supported. - -Signed-off-by: Yang Shen ---- - uadk_tool/benchmark/hpre_uadk_benchmark.c | 86 ++++++++++++++------- - uadk_tool/benchmark/sec_uadk_benchmark.c | 86 ++++++++++++++------- - uadk_tool/benchmark/uadk_benchmark.c | 64 ++++++++------- - uadk_tool/benchmark/uadk_benchmark.h | 36 +++++---- - uadk_tool/benchmark/zip_uadk_benchmark.c | 94 ++++++++++++++++------- - 5 files changed, 237 insertions(+), 129 deletions(-) - -diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c -index 0cbbdf2..729728f 100644 ---- a/uadk_tool/benchmark/hpre_uadk_benchmark.c -+++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c -@@ -346,43 +346,66 @@ static int hpre_uadk_param_parse(thread_data *tddata, struct acc_option *options - - static int init_hpre_ctx_config(struct acc_option *options) - { -+ struct uacce_dev_list *list, *tmp; - int subtype = options->subtype; - char *alg = options->algclass; - int mode = options->syncmode; -+ struct uacce_dev *dev = NULL; - struct sched_params param; -- struct uacce_dev *dev; -- int max_node; -+ int max_node, i; -+ char *dev_name; - int ret = 0; -- int i = 0; - - max_node = numa_max_node() + 1; - if (max_node <= 0) - return -EINVAL; - -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) -- return -ENOMEM; -+ list = wd_get_accel_list(alg); -+ if (!list) { -+ HPRE_TST_PRT("failed to get %s device\n", alg); -+ return -ENODEV; -+ } - -- while (i < g_ctxnum) { -- dev = wd_get_accel_dev(alg); -- if (!dev) { -- HPRE_TST_PRT("failed to get %s device\n", alg); -- ret = -EINVAL; -- goto out; -+ if (strlen(options->device) == 0) { -+ dev = list->dev; -+ } else { -+ for (tmp = list; tmp; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; -+ } - } -+ } - -- for (; i < g_ctxnum; i++) { -- g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -- if (!g_ctx_cfg.ctxs[i].ctx) -- break; -+ if (dev == NULL) { -+ HPRE_TST_PRT("failed to find device %s\n", options->device); -+ ret = -ENODEV; -+ goto free_list; -+ } -+ -+ /* If there is no numa, we defualt config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; - -- g_ctx_cfg.ctxs[i].op_type = 0; // default op_type -- g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) { -+ ret = -ENOMEM; -+ goto free_list; -+ } -+ -+ for (i = 0; i < g_ctxnum; i++) { -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) { -+ HPRE_TST_PRT("failed to alloc %dth ctx\n", i); -+ ret = -ENODEV; -+ goto free_ctx; - } - -- free(dev); -+ g_ctx_cfg.ctxs[i].op_type = 0; -+ g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } - - switch(subtype) { -@@ -401,11 +424,11 @@ static int init_hpre_ctx_config(struct acc_option *options) - break; - default: - HPRE_TST_PRT("failed to parse alg subtype!\n"); -- return -EINVAL; -+ goto free_ctx; - } - if (!g_sched) { - HPRE_TST_PRT("failed to alloc sched!\n"); -- goto out; -+ goto free_ctx; - } - - g_sched->name = SCHED_SINGLE; -@@ -417,7 +440,7 @@ static int init_hpre_ctx_config(struct acc_option *options) - ret = wd_sched_rr_instance(g_sched, ¶m); - if (ret) { - HPRE_TST_PRT("failed to fill hpre sched data!\n"); -- goto out; -+ goto free_sched; - } - - /* init */ -@@ -438,17 +461,22 @@ static int init_hpre_ctx_config(struct acc_option *options) - } - if (ret) { - HPRE_TST_PRT("failed to get hpre ctx!\n"); -- goto out; -+ goto free_sched; - } - - return 0; --out: -- for (i = i - 1; i >= 0; i--) -- wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); - -- free(g_ctx_cfg.ctxs); -+free_sched: - wd_sched_rr_release(g_sched); - -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ free(g_ctx_cfg.ctxs); -+ -+free_list: -+ wd_free_list_accels(list); -+ - return ret; - } - -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index 92e967a..105fb1a 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -516,42 +516,66 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - - static int init_ctx_config(struct acc_option *options) - { -+ struct uacce_dev_list *list, *tmp; - struct sched_params param = {0}; -- struct uacce_dev *dev = NULL; -- char *alg = options->algclass; - int subtype = options->subtype; -+ char *alg = options->algclass; - int mode = options->syncmode; -- int max_node = 0; -+ struct uacce_dev *dev = NULL; -+ int max_node, i; -+ char *dev_name; - int ret = 0; -- int i = 0; - - max_node = numa_max_node() + 1; - if (max_node <= 0) - return -EINVAL; - -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) -- return -ENOMEM; -+ list = wd_get_accel_list(alg); -+ if (!list) { -+ SEC_TST_PRT("failed to get %s device\n", alg); -+ return -ENODEV; -+ } - -- while (i < g_ctxnum) { -- dev = wd_get_accel_dev(alg); -- if (!dev) { -- SEC_TST_PRT("failed to get %s device\n", alg); -- goto out; -+ if (strlen(options->device) == 0) { -+ dev = list->dev; -+ } else { -+ for (tmp = list; tmp; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; -+ } - } -+ } - -- for (; i < g_ctxnum; i++) { -- g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -- if (!g_ctx_cfg.ctxs[i].ctx) -- break; -+ if (dev == NULL) { -+ SEC_TST_PRT("failed to find device %s\n", options->device); -+ ret = -ENODEV; -+ goto free_list; -+ } -+ -+ /* If there is no numa, we defualt config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) { -+ ret = -ENOMEM; -+ goto free_list; -+ } - -- g_ctx_cfg.ctxs[i].op_type = 0; // default op_type -- g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; -+ for (i = 0; i < g_ctxnum; i++) { -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) { -+ SEC_TST_PRT("failed to alloc %dth ctx\n", i); -+ ret = -ENOMEM; -+ goto free_ctx; - } - -- free(dev); -+ g_ctx_cfg.ctxs[i].op_type = 0; -+ g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } - - switch(subtype) { -@@ -566,11 +590,11 @@ static int init_ctx_config(struct acc_option *options) - break; - default: - SEC_TST_PRT("failed to parse alg subtype!\n"); -- return -EINVAL; -+ goto free_ctx; - } - if (!g_sched) { - SEC_TST_PRT("failed to alloc sched!\n"); -- goto out; -+ goto free_ctx; - } - - g_sched->name = SCHED_SINGLE; -@@ -582,7 +606,7 @@ static int init_ctx_config(struct acc_option *options) - ret = wd_sched_rr_instance(g_sched, ¶m); - if (ret) { - SEC_TST_PRT("failed to fill sched data!\n"); -- goto out; -+ goto free_sched; - } - - /* init */ -@@ -599,17 +623,21 @@ static int init_ctx_config(struct acc_option *options) - } - if (ret) { - SEC_TST_PRT("failed to cipher ctx!\n"); -- goto out; -+ goto free_sched; - } - - return 0; - --out: -- for (i--; i >= 0; i--) -- wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+free_sched: -+ wd_sched_rr_release(g_sched); - -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); - free(g_ctx_cfg.ctxs); -- wd_sched_rr_release(g_sched); -+ -+free_list: -+ wd_free_list_accels(list); - - return ret; - } -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index 0ebbb68..5dbe26a 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -491,6 +491,7 @@ static void parse_alg_param(struct acc_option *option) - void cal_perfermance_data(struct acc_option *option, u32 sttime) - { - u8 palgname[MAX_ALG_NAME]; -+ char *unit = "KiB/s"; - double perfermance; - double cpu_rate; - u32 ttime = 1000; -@@ -506,8 +507,8 @@ void cal_perfermance_data(struct acc_option *option, u32 sttime) - if (option->syncmode == SYNC_MODE) { - if (get_recv_time() == option->threads) - break; -- } else { // ASYNC_MODE -- if (get_recv_time() == 1) // poll complete -+ } else { -+ if (get_recv_time() == 1) - break; - } - usleep(1000); -@@ -525,14 +526,17 @@ void cal_perfermance_data(struct acc_option *option, u32 sttime) - palgname[i] = '\0'; - - ptime = ptime - sttime; -+ cpu_rate = (double)ptime / option->times; -+ - perfdata = g_recv_data.pkg_len * g_recv_data.recv_cnt / 1024.0; -- perfops = (double)(g_recv_data.recv_cnt) / 1000.0; - perfermance = perfdata / option->times; -+ -+ perfops = g_recv_data.recv_cnt / 1000.0; - ops = perfops / option->times; -- cpu_rate = (double)ptime / option->times; -- ACC_TST_PRT("algname: length: perf: iops: CPU_rate:\n" -- "%s %-2uBytes %.1fKB/s %.1fKops %.2f%%\n", -- palgname, option->pktlen, perfermance, ops, cpu_rate); -+ -+ ACC_TST_PRT("algname:\tlength:\t\tperf:\t\tiops:\t\tCPU_rate:\n" -+ "%s\t%-2uBytes \t%.2f%s\t%.1fKops \t%.2f%%\n", -+ palgname, option->pktlen, perfermance, unit, ops, cpu_rate); - } - - static int benchmark_run(struct acc_option *option) -@@ -744,24 +748,25 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) - int c; - - static struct option long_options[] = { -- {"help", no_argument, 0, 0}, -- {"alg", required_argument, 0, 1}, -- {"mode", required_argument, 0, 2}, -- {"opt", required_argument, 0, 3}, -- {"sync", no_argument, 0, 4}, -- {"async", no_argument, 0, 5}, -- {"pktlen", required_argument, 0, 6}, -- {"seconds", required_argument, 0, 7}, -- {"thread", required_argument, 0, 8}, -- {"multi", required_argument, 0, 9}, -- {"ctxnum", required_argument, 0, 10}, -- {"prefetch", no_argument, 0, 11}, -- {"engine", required_argument, 0, 12}, -- {"alglist", no_argument, 0, 13}, -- {"latency", no_argument, 0, 14}, -- {"winsize", required_argument, 0, 15}, -- {"complevel", required_argument, 0, 16}, -- {"init2", no_argument, 0, 17}, -+ {"help", no_argument, 0, 0}, -+ {"alg", required_argument, 0, 1}, -+ {"mode", required_argument, 0, 2}, -+ {"opt", required_argument, 0, 3}, -+ {"sync", no_argument, 0, 4}, -+ {"async", no_argument, 0, 5}, -+ {"pktlen", required_argument, 0, 6}, -+ {"seconds", required_argument, 0, 7}, -+ {"thread", required_argument, 0, 8}, -+ {"multi", required_argument, 0, 9}, -+ {"ctxnum", required_argument, 0, 10}, -+ {"prefetch", no_argument, 0, 11}, -+ {"engine", required_argument, 0, 12}, -+ {"alglist", no_argument, 0, 13}, -+ {"latency", no_argument, 0, 14}, -+ {"winsize", required_argument, 0, 15}, -+ {"complevel", required_argument, 0, 16}, -+ {"init2", no_argument, 0, 17}, -+ {"device", required_argument, 0, 18}, - {0, 0, 0, 0} - }; - -@@ -826,8 +831,15 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) - case 17: - option->inittype = INIT2_TYPE; - break; -+ case 18: -+ if (strlen(optarg) >= MAX_DEVICE_NAME) { -+ ACC_TST_PRT("invalid: device name is %s\n", optarg); -+ goto to_exit; -+ } -+ strcpy(option->device, optarg); -+ break; - default: -- ACC_TST_PRT("bad input test parameter!\n"); -+ ACC_TST_PRT("invalid: bad input parameter!\n"); - print_help(); - goto to_exit; - } -diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h -index 1752948..fd3ebe5 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.h -+++ b/uadk_tool/benchmark/uadk_benchmark.h -@@ -6,27 +6,28 @@ - #include - #include - #include -+#include - #include --#include - #include -+#include - #include - #include --#include - #include --#include - #include - #include -+#include - --#define ACC_TST_PRT printf --#define PROCESS_NUM 32 --#define THREADS_NUM 64 --#define MAX_CTX_NUM 64 -+#define ACC_TST_PRT printf -+#define PROCESS_NUM 32 -+#define THREADS_NUM 64 -+#define MAX_CTX_NUM 64 - #define MAX_TIME_SECONDS 128 --#define BYTES_TO_MB 20 --#define MAX_OPT_TYPE 6 --#define MAX_DATA_SIZE (15 * 1024 * 1024) --#define MAX_ALG_NAME 64 --#define ACC_QUEUE_SIZE 1024 -+#define BYTES_TO_MB 20 -+#define MAX_OPT_TYPE 6 -+#define MAX_DATA_SIZE (15 * 1024 * 1024) -+#define MAX_ALG_NAME 64 -+#define ACC_QUEUE_SIZE 1024 -+#define MAX_DEVICE_NAME 64 - - #define MAX_BLOCK_NM 16384 /* BLOCK_NUM must 4 times of POOL_LENTH */ - #define MAX_POOL_LENTH 4096 -@@ -35,15 +36,15 @@ - #define SEC_2_USEC 1000000 - #define HASH_ZISE 16 - -+#define SCHED_SINGLE "sched_single" -+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -+#define gettid() syscall(__NR_gettid) -+ - typedef unsigned long long u64; - typedef unsigned int u32; - typedef unsigned short u16; - typedef unsigned char u8; - --#define SCHED_SINGLE "sched_single" --#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) --#define gettid() syscall(__NR_gettid) -- - /** - * struct acc_option - Define the test acc app option list. - * @algclass: 0:cipher 1:digest -@@ -55,9 +56,10 @@ typedef unsigned char u8; - * @latency: test packet running time - */ - struct acc_option { -- char algname[64]; -+ char algname[MAX_ALG_NAME]; - char algclass[64]; - char engine[64]; -+ char device[MAX_DEVICE_NAME]; - u32 algtype; - u32 modetype; - u32 optype; -diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c -index 9681c22..63fbdab 100644 ---- a/uadk_tool/benchmark/zip_uadk_benchmark.c -+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c -@@ -16,6 +16,7 @@ - #define MAX_POOL_LENTH_COMP 1 - #define COMPRESSION_RATIO_FACTOR 0.7 - #define CHUNK_SIZE (128 * 1024) -+ - struct uadk_bd { - u8 *src; - u8 *dst; -@@ -61,6 +62,7 @@ struct zip_file_head { - - static struct wd_ctx_config g_ctx_cfg; - static struct wd_sched *g_sched; -+static struct sched_params param; - static unsigned int g_thread_num; - static unsigned int g_ctxnum; - static unsigned int g_pktlen; -@@ -240,7 +242,7 @@ static int zip_uadk_param_parse(thread_data *tddata, struct acc_option *options) - u8 alg; - - if (optype >= WD_DIR_MAX << 1) { -- ZIP_TST_PRT("Fail to get zip optype!\n"); -+ ZIP_TST_PRT("failed to get zip optype!\n"); - return -EINVAL; - } else if (optype >= WD_DIR_MAX) { - mode = STREAM_MODE; -@@ -265,7 +267,7 @@ static int zip_uadk_param_parse(thread_data *tddata, struct acc_option *options) - optype = WD_DIR_COMPRESS; - break; - default: -- ZIP_TST_PRT("Fail to set zip alg\n"); -+ ZIP_TST_PRT("failed to set zip alg\n"); - return -EINVAL; - } - -@@ -298,21 +300,22 @@ static int init_ctx_config2(struct acc_option *options) - /* init */ - ret = wd_comp_init2(alg_name, SCHED_POLICY_RR, TASK_HW); - if (ret) { -- ZIP_TST_PRT("Fail to do comp init2!\n"); -+ ZIP_TST_PRT("failed to do comp init2!\n"); - return ret; - } - - return 0; - } - --static struct sched_params param; - static int init_ctx_config(struct acc_option *options) - { -- struct uacce_dev_list *list; -+ struct uacce_dev_list *list, *tmp; - char *alg = options->algclass; - int optype = options->optype; - int mode = options->syncmode; -- int i, max_node; -+ struct uacce_dev *dev = NULL; -+ int max_node, i; -+ char *dev_name; - int ret = 0; - - optype = optype % WD_DIR_MAX; -@@ -322,61 +325,96 @@ static int init_ctx_config(struct acc_option *options) - - list = wd_get_accel_list(alg); - if (!list) { -- ZIP_TST_PRT("Fail to get %s device\n", alg); -+ ZIP_TST_PRT("failed to get %s device\n", alg); - return -ENODEV; - } -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) -- return -ENOMEM; - -- g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, max_node, wd_comp_poll_ctx); -- if (!g_sched) { -- ZIP_TST_PRT("Fail to alloc sched!\n"); -- goto out; -+ if (strlen(options->device) == 0) { -+ dev = list->dev; -+ } else { -+ for (tmp = list; tmp; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; -+ } -+ } -+ } -+ -+ if (dev == NULL) { -+ ZIP_TST_PRT("failed to find device %s\n", options->device); -+ ret = -ENODEV; -+ goto free_list; - } - - /* If there is no numa, we defualt config to zero */ -- if (list->dev->numa_id < 0) -- list->dev->numa_id = 0; -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) { -+ ret = -ENOMEM; -+ goto free_list; -+ } - - for (i = 0; i < g_ctxnum; i++) { -- g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(list->dev); -- g_ctx_cfg.ctxs[i].op_type = optype; // default op_type -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) { -+ ZIP_TST_PRT("failed to alloc %dth ctx\n", i); -+ goto free_ctx; -+ } -+ -+ g_ctx_cfg.ctxs[i].op_type = optype; - g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } -+ -+ g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, max_node, wd_comp_poll_ctx); -+ if (!g_sched) { -+ ZIP_TST_PRT("failed to alloc sched!\n"); -+ ret = -ENOMEM; -+ goto free_ctx; -+ } -+ - g_sched->name = SCHED_SINGLE; - - /* - * All contexts for 2 modes & 2 types. - * The test only uses one kind of contexts at the same time. - */ -- param.numa_id = list->dev->numa_id; -+ param.numa_id = dev->numa_id; - param.type = optype; - param.mode = mode; - param.begin = 0; - param.end = g_ctxnum - 1; - ret = wd_sched_rr_instance(g_sched, ¶m); - if (ret) { -- ZIP_TST_PRT("Fail to fill sched data!\n"); -- goto out; -+ ZIP_TST_PRT("failed to fill sched data!\n"); -+ goto free_sched; - } - -- /* init */ - ret = wd_comp_init(&g_ctx_cfg, g_sched); - if (ret) { -- ZIP_TST_PRT("Fail to cipher ctx!\n"); -- goto out; -+ ZIP_TST_PRT("failed to cipher ctx!\n"); -+ goto free_sched; - } - - wd_free_list_accels(list); - - return 0; --out: -- free(g_ctx_cfg.ctxs); -+ -+free_sched: - wd_sched_rr_release(g_sched); - -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ free(g_ctx_cfg.ctxs); -+ -+free_list: -+ wd_free_list_accels(list); -+ - return ret; - } - --- -2.25.1 - diff --git a/0037-uadk_tool-support-sm3-ce-benchmark-and-function-test.patch b/0037-uadk_tool-support-sm3-ce-benchmark-and-function-test.patch deleted file mode 100644 index 58c0144..0000000 --- a/0037-uadk_tool-support-sm3-ce-benchmark-and-function-test.patch +++ /dev/null @@ -1,308 +0,0 @@ -From abb578a52d3bd6a34ac852afb56f7da06e8de266 Mon Sep 17 00:00:00 2001 -From: Zhiqi Song -Date: Mon, 11 Mar 2024 16:29:30 +0800 -Subject: [PATCH 37/44] uadk_tool: support sm3 ce benchmark and function test - -Support performance and function test for sm3-ce. - -Signed-off-by: Zhiqi Song ---- - uadk_tool/Makefile.am | 1 + - uadk_tool/benchmark/sec_uadk_benchmark.c | 22 +++++++++++++-- - uadk_tool/benchmark/uadk_benchmark.c | 29 ++++++++++++-------- - uadk_tool/benchmark/uadk_benchmark.h | 1 + - uadk_tool/test/test_sec.c | 35 +++++++++++++++++++++--- - 5 files changed, 69 insertions(+), 19 deletions(-) - -diff --git a/uadk_tool/Makefile.am b/uadk_tool/Makefile.am -index 7f00087..6fa0d9d 100644 ---- a/uadk_tool/Makefile.am -+++ b/uadk_tool/Makefile.am -@@ -29,6 +29,7 @@ uadk_tool_LDADD=$(libwd_la_OBJECTS) \ - ../.libs/libhisi_sec.a \ - ../.libs/libhisi_hpre.a \ - ../.libs/libhisi_zip.a \ -+ ../.libs/libisa_ce.a \ - -ldl -lnuma - else - uadk_tool_LDADD=-L../.libs -l:libwd.so.2 -l:libwd_crypto.so.2 \ -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index 105fb1a..c3da616 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -679,6 +679,7 @@ static void uninit_ctx_config2(int subtype) - wd_aead_uninit2(); - break; - case DIGEST_TYPE: -+ case DIGEST_INSTR_TYPE: - wd_digest_uninit2(); - break; - default: -@@ -703,12 +704,23 @@ static int init_ctx_config2(struct acc_option *options) - switch(subtype) { - case CIPHER_TYPE: - ret = wd_cipher_init2(alg_name, SCHED_POLICY_RR, TASK_HW); -+ if (ret) -+ SEC_TST_PRT("failed to do cipher init2!\n"); - break; - case AEAD_TYPE: - ret = wd_aead_init2(alg_name, SCHED_POLICY_RR, TASK_HW); -+ if (ret) -+ SEC_TST_PRT("failed to do aead init2!\n"); - break; - case DIGEST_TYPE: - ret = wd_digest_init2(alg_name, SCHED_POLICY_RR, TASK_HW); -+ if (ret) -+ SEC_TST_PRT("failed to do digest init2!\n"); -+ break; -+ case DIGEST_INSTR_TYPE: -+ ret = wd_digest_init2(alg_name, SCHED_POLICY_NONE, TASK_INSTR); -+ if (ret) -+ SEC_TST_PRT("failed to do digest intruction init2!\n"); - break; - } - if (ret) { -@@ -716,7 +728,7 @@ static int init_ctx_config2(struct acc_option *options) - return ret; - } - -- return 0; -+ return ret; - } - - static void get_aead_data(u8 *addr, u32 size) -@@ -1489,8 +1501,8 @@ static void *sec_uadk_digest_sync(void *arg) - } - } - dreq.in_bytes = g_pktlen; -- dreq.out_bytes = 16; -- dreq.out_buf_bytes = 16; -+ dreq.out_bytes = 32; -+ dreq.out_buf_bytes = 32; - dreq.data_fmt = 0; - dreq.state = 0; - dreq.has_next = 0; -@@ -1536,8 +1548,12 @@ int sec_uadk_sync_threads(struct acc_option *options) - uadk_sec_sync_run = sec_uadk_aead_sync; - break; - case DIGEST_TYPE: -+ case DIGEST_INSTR_TYPE: - uadk_sec_sync_run = sec_uadk_digest_sync; - break; -+ default: -+ SEC_TST_PRT("Invalid subtype!\n"); -+ return -EINVAL; - } - - for (i = 0; i < g_thread_num; i++) { -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index 5dbe26a..9c025cf 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -37,6 +37,7 @@ enum test_type { - SOFT_MODE = 0x4, - SVA_SOFT = 0x5, - NOSVA_SOFT = 0x6, -+ INSTR_MODE = 0x7, - INVALID_MODE = 0x8, - }; - -@@ -51,6 +52,7 @@ static struct acc_sva_item sys_name_item[] = { - {"soft", SOFT_MODE}, - {"sva-soft", SVA_SOFT}, - {"nosva-soft", NOSVA_SOFT}, -+ {"instr", INSTR_MODE}, - }; - - struct acc_alg_item { -@@ -286,7 +288,7 @@ static int get_alg_type(const char *alg_name) - - for (i = 0; i < ALG_MAX; i++) { - if (strcmp(alg_name, alg_options[i].name) == 0) { -- alg = alg_options[i].alg; -+ alg = alg_options[i].alg; - break; - } - } -@@ -482,8 +484,11 @@ static void parse_alg_param(struct acc_option *option) - option->subtype = AEAD_TYPE; - } else if (option->algtype <= SHA512_256) { - snprintf(option->algclass, MAX_ALG_NAME, "%s", "digest"); -+ if (option->modetype == INSTR_MODE) -+ option->subtype = DIGEST_INSTR_TYPE; -+ else -+ option->subtype = DIGEST_TYPE; - option->acctype = SEC_TYPE; -- option->subtype = DIGEST_TYPE; - } - } - } -@@ -545,35 +550,35 @@ static int benchmark_run(struct acc_option *option) - - switch(option->acctype) { - case SEC_TYPE: -- if (option->modetype & SVA_MODE) { -+ if ((option->modetype == SVA_MODE) || (option->modetype == INSTR_MODE)) { - ret = sec_uadk_benchmark(option); -- } else if (option->modetype & NOSVA_MODE) { -+ } else if (option->modetype == NOSVA_MODE) { - ret = sec_wd_benchmark(option); - } - usleep(20000); - #ifdef HAVE_CRYPTO -- if (option->modetype & SOFT_MODE) { -+ if (option->modetype == SOFT_MODE) { - ret = sec_soft_benchmark(option); - } - #endif - break; - case HPRE_TYPE: -- if (option->modetype & SVA_MODE) { -+ if (option->modetype == SVA_MODE) { - ret = hpre_uadk_benchmark(option); -- } else if (option->modetype & NOSVA_MODE) { -+ } else if (option->modetype == NOSVA_MODE) { - ret = hpre_wd_benchmark(option); - } - break; - case ZIP_TYPE: -- if (option->modetype & SVA_MODE) { -+ if (option->modetype == SVA_MODE) { - ret = zip_uadk_benchmark(option); -- } else if (option->modetype & NOSVA_MODE) { -+ } else if (option->modetype == NOSVA_MODE) { - ret = zip_wd_benchmark(option); - } - case TRNG_TYPE: -- if (option->modetype & SVA_MODE) -+ if (option->modetype == SVA_MODE) - ACC_TST_PRT("TRNG not support sva mode..\n"); -- else if (option->modetype & NOSVA_MODE) -+ else if (option->modetype == NOSVA_MODE) - ret = trng_wd_benchmark(option); - - break; -@@ -698,7 +703,7 @@ static void print_help(void) - ACC_TST_PRT("DESCRIPTION\n"); - ACC_TST_PRT(" [--alg aes-128-cbc ]:\n"); - ACC_TST_PRT(" The name of the algorithm for benchmarking\n"); -- ACC_TST_PRT(" [--mode sva/nosva/soft/sva-soft/nosva-soft]: start UADK or Warpdrive or Openssl mode test\n"); -+ ACC_TST_PRT(" [--mode sva/nosva/soft/sva-soft/nosva-soft/instr]: start UADK or Warpdrive or Openssl or Instruction mode test\n"); - ACC_TST_PRT(" [--sync/--async]: start asynchronous/synchronous mode test\n"); - ACC_TST_PRT(" [--opt 0,1,2,3,4,5]:\n"); - ACC_TST_PRT(" SEC/ZIP: 0/1:encryption/decryption or compression/decompression\n"); -diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h -index fd3ebe5..e370d3e 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.h -+++ b/uadk_tool/benchmark/uadk_benchmark.h -@@ -104,6 +104,7 @@ enum alg_type { - SM2_TYPE, - X25519_TYPE, - X448_TYPE, -+ DIGEST_INSTR_TYPE, - }; - - enum sync_type { -diff --git a/uadk_tool/test/test_sec.c b/uadk_tool/test/test_sec.c -index 16feaf0..87fc718 100644 ---- a/uadk_tool/test/test_sec.c -+++ b/uadk_tool/test/test_sec.c -@@ -32,7 +32,7 @@ - #define SCHED_SINGLE "sched_single" - #define SCHED_NULL_CTX_SIZE 4 - #define TEST_WORD_LEN 4096 --#define MAX_ALGO_PER_TYPE 16 -+#define MAX_ALGO_PER_TYPE 17 - #define MIN_SVA_BD_NUM 1 - #define AES_KEYSIZE_128 16 - #define AES_KEYSIZE_192 24 -@@ -83,6 +83,8 @@ enum digest_type { - LOCAL_AES_GMAC_192, - LOCAL_AES_GMAC_256, - LOCAL_AES_XCBC_MAC_96, -+ LOCAL_AES_XCBC_PRF_128, -+ LOCAL_AES_CCM, - }; - - char *digest_names[MAX_ALGO_PER_TYPE] = { -@@ -102,6 +104,7 @@ char *digest_names[MAX_ALGO_PER_TYPE] = { - "xcbc-mac-96(aes)", - "xcbc-prf-128(aes)", - "ccm(aes)", /* --digest 15: for error alg test */ -+ "sm3-ce", - }; - - char *aead_names[MAX_ALGO_PER_TYPE] = { -@@ -1464,11 +1467,14 @@ static int digest_init2(int type, int mode) - { - struct wd_ctx_params cparams = {0}; - struct wd_ctx_nums *ctx_set_num; -+ char *alg_name; - int ret; - - if (g_testalg >= MAX_ALGO_PER_TYPE) - return -WD_EINVAL; - -+ alg_name = digest_names[g_testalg]; -+ - ctx_set_num = calloc(1, sizeof(*ctx_set_num)); - if (!ctx_set_num) { - WD_ERR("failed to alloc ctx_set_size!\n"); -@@ -1492,7 +1498,10 @@ static int digest_init2(int type, int mode) - if (mode == CTX_MODE_ASYNC) - ctx_set_num->async_ctx_num = g_ctxnum; - -- ret = wd_digest_init2_(digest_names[g_testalg], 0, 0, &cparams); -+ if (!strcmp(alg_name, "sm3-ce")) -+ ret = wd_digest_init2("sm3", SCHED_POLICY_NONE, TASK_INSTR); -+ else -+ ret = wd_digest_init2_(digest_names[g_testalg], 0, 0, &cparams); - if (ret) - goto out_freebmp; - -@@ -1780,7 +1789,22 @@ int get_digest_resource(struct hash_testvec **alg_tv, int* alg, int* mode) - tv->dsize = 16; - alg_type = WD_DIGEST_AES_XCBC_PRF_128; - break; -- -+ case 16: /* SM3-CE */ -+ switch (g_alg_op_type) { -+ case 0: -+ mode_type = WD_DIGEST_NORMAL; -+ SEC_TST_PRT("test alg: %s\n", "normal(sm3-ce)"); -+ tv = &sm3_tv_template[0]; -+ break; -+ case 1: -+ mode_type = WD_DIGEST_HMAC; -+ SEC_TST_PRT("test alg: %s\n", "hmac(sm3-ce)"); -+ tv = &hmac_sm3_tv_template[0]; -+ break; -+ } -+ tv->dsize = 32; -+ alg_type = WD_DIGEST_SM3; -+ break; - default: - SEC_TST_PRT("keylenth error, default test alg: %s\n", "normal(sm3)"); - return -EINVAL; -@@ -4229,7 +4253,7 @@ static void print_help(void) - SEC_TST_PRT(" 4 : SHA224; 5 : SHA384; 6 : SHA512; 7 : SHA512_224\n"); - SEC_TST_PRT(" 8 : SHA512_256; 9 : AES_CMAC; 10 : AES_GMAC_128\n"); - SEC_TST_PRT(" 11 : AES_GMAC_192; 12 : AES_GMAC_256; 13 : AES_XCBC_MAC_96\n"); -- SEC_TST_PRT(" 14 : AES_XCBC_PRF_128\n"); -+ SEC_TST_PRT(" 14 : AES_XCBC_PRF_128; 15 : SM3-CE\n"); - SEC_TST_PRT(" [--aead ]:\n"); - SEC_TST_PRT(" specify symmetric aead algorithm\n"); - SEC_TST_PRT(" 0 : AES-CCM; 1 : AES-GCM; 2 : Hmac(sha256),cbc(aes)\n"); -@@ -4257,6 +4281,9 @@ static void print_help(void) - SEC_TST_PRT(" set the steam mode for digest\n"); - SEC_TST_PRT(" [--sglnum]:\n"); - SEC_TST_PRT(" the number of scatterlist number used by the entire test task\n"); -+ SEC_TST_PRT(" [--init]:\n"); -+ SEC_TST_PRT(" 1: use init API of uadk\n"); -+ SEC_TST_PRT(" 2: use init2 API of uadk\n"); - SEC_TST_PRT(" [--help] = usage\n"); - SEC_TST_PRT("Example\n"); - SEC_TST_PRT(" ./uadk_tool test --m sec --cipher 0 --sync --optype 0\n"); --- -2.25.1 - diff --git a/0038-uadk_tool-support-sm4-ce-benchmark-test.patch b/0038-uadk_tool-support-sm4-ce-benchmark-test.patch deleted file mode 100644 index a4523c9..0000000 --- a/0038-uadk_tool-support-sm4-ce-benchmark-test.patch +++ /dev/null @@ -1,167 +0,0 @@ -From 3ca60986407cfe3b1ddd264b0bfbe24d88856d71 Mon Sep 17 00:00:00 2001 -From: Qi Tao -Date: Mon, 11 Mar 2024 16:31:35 +0800 -Subject: [PATCH 38/44] uadk_tool: support sm4 ce benchmark test -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Support performance test for sm4-ce. Add CBC-CS1、CBC-CS2 -and CBC-CS3 mode for SM4 algorithm in benchmark tool. - -Signed-off-by: Qi Tao ---- - uadk_tool/benchmark/sec_uadk_benchmark.c | 25 ++++++++++++++++++++++++ - uadk_tool/benchmark/sec_wd_benchmark.c | 18 +++++++++++++++++ - uadk_tool/benchmark/uadk_benchmark.c | 11 ++++++++++- - uadk_tool/benchmark/uadk_benchmark.h | 4 ++++ - 4 files changed, 57 insertions(+), 1 deletion(-) - -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index c3da616..f8b19ba 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -346,6 +346,24 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - mode = WD_CIPHER_CBC; - alg = WD_CIPHER_SM4; - break; -+ case SM4_128_CBC_CS1: -+ keysize = 16; -+ ivsize = 16; -+ mode = WD_CIPHER_CBC_CS1; -+ alg = WD_CIPHER_SM4; -+ break; -+ case SM4_128_CBC_CS2: -+ keysize = 16; -+ ivsize = 16; -+ mode = WD_CIPHER_CBC_CS2; -+ alg = WD_CIPHER_SM4; -+ break; -+ case SM4_128_CBC_CS3: -+ keysize = 16; -+ ivsize = 16; -+ mode = WD_CIPHER_CBC_CS3; -+ alg = WD_CIPHER_SM4; -+ break; - case SM4_128_CTR: - keysize = 16; - ivsize = 16; -@@ -673,6 +691,7 @@ static void uninit_ctx_config2(int subtype) - /* uninit2 */ - switch(subtype) { - case CIPHER_TYPE: -+ case CIPHER_INSTR_TYPE: - wd_cipher_uninit2(); - break; - case AEAD_TYPE: -@@ -707,6 +726,11 @@ static int init_ctx_config2(struct acc_option *options) - if (ret) - SEC_TST_PRT("failed to do cipher init2!\n"); - break; -+ case CIPHER_INSTR_TYPE: -+ ret = wd_cipher_init2(alg_name, SCHED_POLICY_NONE, TASK_INSTR); -+ if (ret) -+ SEC_TST_PRT("failed to do cipher intruction init2!\n"); -+ break; - case AEAD_TYPE: - ret = wd_aead_init2(alg_name, SCHED_POLICY_RR, TASK_HW); - if (ret) -@@ -1542,6 +1566,7 @@ int sec_uadk_sync_threads(struct acc_option *options) - - switch (options->subtype) { - case CIPHER_TYPE: -+ case CIPHER_INSTR_TYPE: - uadk_sec_sync_run = sec_uadk_cipher_sync; - break; - case AEAD_TYPE: -diff --git a/uadk_tool/benchmark/sec_wd_benchmark.c b/uadk_tool/benchmark/sec_wd_benchmark.c -index 2ed8493..bb47d61 100644 ---- a/uadk_tool/benchmark/sec_wd_benchmark.c -+++ b/uadk_tool/benchmark/sec_wd_benchmark.c -@@ -412,6 +412,24 @@ static int sec_wd_param_parse(thread_data *tddata, struct acc_option *options) - mode = WCRYPTO_CIPHER_CBC; - alg = WCRYPTO_CIPHER_SM4; - break; -+ case SM4_128_CBC_CS1: -+ keysize = 16; -+ ivsize = 16; -+ mode = WCRYPTO_CIPHER_CBC_CS1; -+ alg = WCRYPTO_CIPHER_SM4; -+ break; -+ case SM4_128_CBC_CS2: -+ keysize = 16; -+ ivsize = 16; -+ mode = WCRYPTO_CIPHER_CBC_CS2; -+ alg = WCRYPTO_CIPHER_SM4; -+ break; -+ case SM4_128_CBC_CS3: -+ keysize = 16; -+ ivsize = 16; -+ mode = WCRYPTO_CIPHER_CBC_CS3; -+ alg = WCRYPTO_CIPHER_SM4; -+ break; - case SM4_128_CTR: - keysize = 16; - ivsize = 16; -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index 9c025cf..f9bb69c 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -120,6 +120,9 @@ static struct acc_alg_item alg_options[] = { - {"3des-192-cbc", DES3_192_CBC}, - {"sm4-128-ecb", SM4_128_ECB}, - {"sm4-128-cbc", SM4_128_CBC}, -+ {"sm4-128-cbc-cs1", SM4_128_CBC_CS1}, -+ {"sm4-128-cbc-cs2", SM4_128_CBC_CS2}, -+ {"sm4-128-cbc-cs3", SM4_128_CBC_CS3}, - {"sm4-128-ctr", SM4_128_CTR}, - {"sm4-128-ofb", SM4_128_OFB}, - {"sm4-128-cfb", SM4_128_CFB}, -@@ -209,6 +212,9 @@ static struct acc_alg_item alg_name_options[] = { - {"cbc(des3_ede)", DES3_192_CBC}, - {"ecb(sm4)", SM4_128_ECB}, - {"cbc(sm4)", SM4_128_CBC}, -+ {"cbc-cs1(sm4)", SM4_128_CBC_CS1}, -+ {"cbc-cs2(sm4)", SM4_128_CBC_CS2}, -+ {"cbc-cs3(sm4)", SM4_128_CBC_CS3}, - {"ctr(sm4)", SM4_128_CTR}, - {"ofb(sm4)", SM4_128_OFB}, - {"cfb(sm4)", SM4_128_CFB}, -@@ -476,8 +482,11 @@ static void parse_alg_param(struct acc_option *option) - option->subtype = ECDSA_TYPE; - } else if (option->algtype <= SM4_128_XTS_GB) { - snprintf(option->algclass, MAX_ALG_NAME, "%s", "cipher"); -+ if (option->modetype == INSTR_MODE) -+ option->subtype = CIPHER_INSTR_TYPE; -+ else -+ option->subtype = CIPHER_TYPE; - option->acctype = SEC_TYPE; -- option->subtype = CIPHER_TYPE; - } else if (option->algtype <= SM4_128_GCM) { - snprintf(option->algclass, MAX_ALG_NAME, "%s", "aead"); - option->acctype = SEC_TYPE; -diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h -index e370d3e..ea8e437 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.h -+++ b/uadk_tool/benchmark/uadk_benchmark.h -@@ -105,6 +105,7 @@ enum alg_type { - X25519_TYPE, - X448_TYPE, - DIGEST_INSTR_TYPE, -+ CIPHER_INSTR_TYPE, - }; - - enum sync_type { -@@ -172,6 +173,9 @@ enum test_alg { - DES3_192_CBC, - SM4_128_ECB, - SM4_128_CBC, -+ SM4_128_CBC_CS1, -+ SM4_128_CBC_CS2, -+ SM4_128_CBC_CS3, - SM4_128_CTR, - SM4_128_OFB, - SM4_128_CFB, --- -2.25.1 - diff --git a/0039-uadk_tool-support-sm3-md5-multibuff-benchmark-test.patch b/0039-uadk_tool-support-sm3-md5-multibuff-benchmark-test.patch deleted file mode 100644 index f78d464..0000000 --- a/0039-uadk_tool-support-sm3-md5-multibuff-benchmark-test.patch +++ /dev/null @@ -1,272 +0,0 @@ -From 7b3f79fedc187ded4dc7d6bdc976d0e560cc746d Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Mon, 11 Mar 2024 16:33:58 +0800 -Subject: [PATCH 39/44] uadk_tool: support sm3/md5 multibuff benchmark test - -Support sm3/md5 multibuff benchmark test - -Signed-off-by: Weili Qian ---- - uadk_tool/benchmark/sec_uadk_benchmark.c | 31 +++++++++++++++--------- - uadk_tool/benchmark/uadk_benchmark.c | 26 ++++++++++++++------ - uadk_tool/benchmark/uadk_benchmark.h | 3 ++- - 3 files changed, 40 insertions(+), 20 deletions(-) - -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index f8b19ba..2c12c20 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -53,6 +53,7 @@ typedef struct uadk_thread_res { - bool is_union; - u32 dalg; - u32 dmode; -+ u32 d_outbytes; - } thread_data; - - static struct wd_ctx_config g_ctx_cfg; -@@ -146,6 +147,7 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - u32 algtype = options->algtype; - u32 optype = options->optype; - bool is_union = false; -+ u32 out_bytes = 32; - u8 keysize = 0; - u8 ivsize = 0; - u8 dmode = 0; -@@ -472,45 +474,54 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - case SM3_ALG: // digest mode is optype - keysize = 4; - mode = optype; -+ out_bytes = 32; - alg = WD_DIGEST_SM3; - break; - case MD5_ALG: - keysize = 4; -+ out_bytes = 16; - mode = optype; - alg = WD_DIGEST_MD5; - break; - case SHA1_ALG: - keysize = 4; -+ out_bytes = 20; - mode = optype; - alg = WD_DIGEST_SHA1; - break; - case SHA256_ALG: - keysize = 4; -+ out_bytes = 32; - mode = optype; - alg = WD_DIGEST_SHA256; - break; - case SHA224_ALG: - keysize = 4; -+ out_bytes = 28; - mode = optype; - alg = WD_DIGEST_SHA224; - break; - case SHA384_ALG: - keysize = 4; -+ out_bytes = 48; - mode = optype; - alg = WD_DIGEST_SHA384; - break; - case SHA512_ALG: - keysize = 4; -+ out_bytes = 64; - mode = optype; - alg = WD_DIGEST_SHA512; - break; - case SHA512_224: - keysize = 4; -+ out_bytes = 28; - mode = optype; - alg = WD_DIGEST_SHA512_224; - break; - case SHA512_256: - keysize = 4; -+ out_bytes = 32; - mode = optype; - alg = WD_DIGEST_SHA512_256; - break; -@@ -528,6 +539,7 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - tddata->is_union = is_union; - tddata->optype = options->optype; - tddata->subtype = options->subtype; -+ tddata->d_outbytes = out_bytes; - - return 0; - } -@@ -698,7 +710,6 @@ static void uninit_ctx_config2(int subtype) - wd_aead_uninit2(); - break; - case DIGEST_TYPE: -- case DIGEST_INSTR_TYPE: - wd_digest_uninit2(); - break; - default: -@@ -737,15 +748,10 @@ static int init_ctx_config2(struct acc_option *options) - SEC_TST_PRT("failed to do aead init2!\n"); - break; - case DIGEST_TYPE: -- ret = wd_digest_init2(alg_name, SCHED_POLICY_RR, TASK_HW); -+ ret = wd_digest_init2(alg_name, options->sched_type, options->task_type); - if (ret) - SEC_TST_PRT("failed to do digest init2!\n"); - break; -- case DIGEST_INSTR_TYPE: -- ret = wd_digest_init2(alg_name, SCHED_POLICY_NONE, TASK_INSTR); -- if (ret) -- SEC_TST_PRT("failed to do digest intruction init2!\n"); -- break; - } - if (ret) { - SEC_TST_PRT("failed to do cipher init2!\n"); -@@ -1305,8 +1311,8 @@ static void *sec_uadk_digest_async(void *arg) - } - } - dreq.in_bytes = g_pktlen; -- dreq.out_bytes = 16; -- dreq.out_buf_bytes = 16; -+ dreq.out_bytes = pdata->d_outbytes; -+ dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = 0; - dreq.state = 0; - dreq.has_next = 0; -@@ -1525,8 +1531,8 @@ static void *sec_uadk_digest_sync(void *arg) - } - } - dreq.in_bytes = g_pktlen; -- dreq.out_bytes = 32; -- dreq.out_buf_bytes = 32; -+ dreq.out_bytes = pdata->d_outbytes; -+ dreq.out_buf_bytes = pdata->d_outbytes; - dreq.data_fmt = 0; - dreq.state = 0; - dreq.has_next = 0; -@@ -1573,7 +1579,6 @@ int sec_uadk_sync_threads(struct acc_option *options) - uadk_sec_sync_run = sec_uadk_aead_sync; - break; - case DIGEST_TYPE: -- case DIGEST_INSTR_TYPE: - uadk_sec_sync_run = sec_uadk_digest_sync; - break; - default: -@@ -1591,6 +1596,7 @@ int sec_uadk_sync_threads(struct acc_option *options) - threads_args[i].ivsize = threads_option.ivsize; - threads_args[i].optype = threads_option.optype; - threads_args[i].td_id = i; -+ threads_args[i].d_outbytes = threads_option.d_outbytes; - ret = pthread_create(&tdid[i], NULL, uadk_sec_sync_run, &threads_args[i]); - if (ret) { - SEC_TST_PRT("Create sync thread fail!\n"); -@@ -1662,6 +1668,7 @@ int sec_uadk_async_threads(struct acc_option *options) - threads_args[i].ivsize = threads_option.ivsize; - threads_args[i].optype = threads_option.optype; - threads_args[i].td_id = i; -+ threads_args[i].d_outbytes = threads_option.d_outbytes; - ret = pthread_create(&tdid[i], NULL, uadk_sec_async_run, &threads_args[i]); - if (ret) { - SEC_TST_PRT("Create async thread fail!\n"); -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index f9bb69c..c1accc6 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -2,6 +2,8 @@ - - #include - #include -+#include "include/wd_alg_common.h" -+#include "include/wd_sched.h" - - #include "uadk_benchmark.h" - #include "sec_uadk_benchmark.h" -@@ -38,7 +40,8 @@ enum test_type { - SVA_SOFT = 0x5, - NOSVA_SOFT = 0x6, - INSTR_MODE = 0x7, -- INVALID_MODE = 0x8, -+ MULTIBUF_MODE = 0x8, -+ INVALID_MODE = 0x9, - }; - - struct acc_sva_item { -@@ -53,6 +56,7 @@ static struct acc_sva_item sys_name_item[] = { - {"sva-soft", SVA_SOFT}, - {"nosva-soft", NOSVA_SOFT}, - {"instr", INSTR_MODE}, -+ {"multibuff", MULTIBUF_MODE}, - }; - - struct acc_alg_item { -@@ -493,11 +497,15 @@ static void parse_alg_param(struct acc_option *option) - option->subtype = AEAD_TYPE; - } else if (option->algtype <= SHA512_256) { - snprintf(option->algclass, MAX_ALG_NAME, "%s", "digest"); -- if (option->modetype == INSTR_MODE) -- option->subtype = DIGEST_INSTR_TYPE; -- else -- option->subtype = DIGEST_TYPE; -+ option->subtype = DIGEST_TYPE; - option->acctype = SEC_TYPE; -+ if (option->modetype == INSTR_MODE) { -+ option->sched_type = SCHED_POLICY_NONE; -+ option->task_type = TASK_INSTR; -+ } else if (option->modetype == MULTIBUF_MODE) { -+ option->sched_type = SCHED_POLICY_SINGLE; -+ option->task_type = TASK_INSTR; -+ } - } - } - } -@@ -559,7 +567,9 @@ static int benchmark_run(struct acc_option *option) - - switch(option->acctype) { - case SEC_TYPE: -- if ((option->modetype == SVA_MODE) || (option->modetype == INSTR_MODE)) { -+ if ((option->modetype == SVA_MODE) || -+ (option->modetype == INSTR_MODE) || -+ (option->modetype == MULTIBUF_MODE)) { - ret = sec_uadk_benchmark(option); - } else if (option->modetype == NOSVA_MODE) { - ret = sec_wd_benchmark(option); -@@ -623,6 +633,8 @@ int acc_benchmark_run(struct acc_option *option) - int i, ret = 0; - int status; - -+ option->sched_type = SCHED_POLICY_RR; -+ option->task_type = TASK_HW; - parse_alg_param(option); - dump_param(option); - g_run_options = option; -@@ -712,7 +724,7 @@ static void print_help(void) - ACC_TST_PRT("DESCRIPTION\n"); - ACC_TST_PRT(" [--alg aes-128-cbc ]:\n"); - ACC_TST_PRT(" The name of the algorithm for benchmarking\n"); -- ACC_TST_PRT(" [--mode sva/nosva/soft/sva-soft/nosva-soft/instr]: start UADK or Warpdrive or Openssl or Instruction mode test\n"); -+ ACC_TST_PRT(" [--mode sva/nosva/soft/sva-soft/nosva-soft/instr/multibuff]: start UADK or Warpdrive or Openssl or Instruction mode test\n"); - ACC_TST_PRT(" [--sync/--async]: start asynchronous/synchronous mode test\n"); - ACC_TST_PRT(" [--opt 0,1,2,3,4,5]:\n"); - ACC_TST_PRT(" SEC/ZIP: 0/1:encryption/decryption or compression/decompression\n"); -diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h -index ea8e437..c493ac3 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.h -+++ b/uadk_tool/benchmark/uadk_benchmark.h -@@ -77,6 +77,8 @@ struct acc_option { - u32 complevel; - u32 inittype; - bool latency; -+ u32 sched_type; -+ int task_type; - }; - - enum acc_type { -@@ -104,7 +106,6 @@ enum alg_type { - SM2_TYPE, - X25519_TYPE, - X448_TYPE, -- DIGEST_INSTR_TYPE, - CIPHER_INSTR_TYPE, - }; - --- -2.25.1 - diff --git a/0040-uadk-tool-fix-the-msg-pool-release-bug-of-async-zip-.patch b/0040-uadk-tool-fix-the-msg-pool-release-bug-of-async-zip-.patch deleted file mode 100644 index 9eaee1f..0000000 --- a/0040-uadk-tool-fix-the-msg-pool-release-bug-of-async-zip-.patch +++ /dev/null @@ -1,467 +0,0 @@ -From 34c49db7d9eba5255f678179da95a15976dbb305 Mon Sep 17 00:00:00 2001 -From: Chenghai Huang -Date: Mon, 11 Mar 2024 16:36:13 +0800 -Subject: [PATCH 40/44] uadk tool: fix the msg pool release bug of async zip - benchmark - -Ensure that all packets in the msg pool are removed before end. -In V2, resources such as tags are released in a centralized manner -to avoid errors caused by asynchronous resource release sequence. -In V1, before the packet sending thread releases the tag, ensure -that the poll thread has ended. - -Signed-off-by: Chenghai Huang ---- - uadk_tool/benchmark/uadk_benchmark.c | 4 +- - uadk_tool/benchmark/zip_uadk_benchmark.c | 151 ++++++++++++----------- - uadk_tool/benchmark/zip_wd_benchmark.c | 25 ++-- - 3 files changed, 99 insertions(+), 81 deletions(-) - -diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c -index c1accc6..1262a2a 100644 ---- a/uadk_tool/benchmark/uadk_benchmark.c -+++ b/uadk_tool/benchmark/uadk_benchmark.c -@@ -594,6 +594,7 @@ static int benchmark_run(struct acc_option *option) - } else if (option->modetype == NOSVA_MODE) { - ret = zip_wd_benchmark(option); - } -+ break; - case TRNG_TYPE: - if (option->modetype == SVA_MODE) - ACC_TST_PRT("TRNG not support sva mode..\n"); -@@ -727,7 +728,8 @@ static void print_help(void) - ACC_TST_PRT(" [--mode sva/nosva/soft/sva-soft/nosva-soft/instr/multibuff]: start UADK or Warpdrive or Openssl or Instruction mode test\n"); - ACC_TST_PRT(" [--sync/--async]: start asynchronous/synchronous mode test\n"); - ACC_TST_PRT(" [--opt 0,1,2,3,4,5]:\n"); -- ACC_TST_PRT(" SEC/ZIP: 0/1:encryption/decryption or compression/decompression\n"); -+ ACC_TST_PRT(" SEC: cipher,aead: 0/1:encryption/decryption; digest: 0/1:normal/hmac\n"); -+ ACC_TST_PRT(" ZIP: 0~1:block compression, block decompression; 2~3:stream compression, stream decompression\n"); - ACC_TST_PRT(" HPRE: 0~5:keygen, key compute, Enc, Dec, Sign, Verify\n"); - ACC_TST_PRT(" [--pktlen]:\n"); - ACC_TST_PRT(" set the length of BD message in bytes\n"); -diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c -index 63fbdab..1dd3990 100644 ---- a/uadk_tool/benchmark/zip_uadk_benchmark.c -+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c -@@ -16,7 +16,7 @@ - #define MAX_POOL_LENTH_COMP 1 - #define COMPRESSION_RATIO_FACTOR 0.7 - #define CHUNK_SIZE (128 * 1024) -- -+#define MAX_UNRECV_PACKET_NUM 2 - struct uadk_bd { - u8 *src; - u8 *dst; -@@ -37,11 +37,17 @@ enum ZIP_OP_MODE { - STREAM_MODE - }; - -+enum ZIP_THREAD_STATE { -+ THREAD_PROCESSING, -+ THREAD_COMPLETED -+}; -+ - struct zip_async_tag { - handle_t sess; - u32 td_id; - u32 bd_idx; - u32 cm_len; -+ u32 recv_cnt; - ZSTD_CCtx *cctx; - }; - -@@ -52,6 +58,10 @@ typedef struct uadk_thread_res { - u32 td_id; - u32 win_sz; - u32 comp_lv; -+ u32 send_cnt; -+ struct zip_async_tag *tag; -+ COMP_TUPLE_TAG *ftuple; -+ char *hw_buff_out; - } thread_data; - - struct zip_file_head { -@@ -67,6 +77,7 @@ static unsigned int g_thread_num; - static unsigned int g_ctxnum; - static unsigned int g_pktlen; - static unsigned int g_prefetch; -+static unsigned int g_state; - - #ifndef ZLIB_FSE - static ZSTD_CCtx* zstd_soft_fse_init(unsigned int level) -@@ -541,6 +552,7 @@ static void *zip_lz77_async_cb(struct wd_comp_req *req, void *data) - zstd_output.dst = uadk_pool->bds[idx].dst; - zstd_output.size = tag->cm_len; - zstd_output.pos = 0; -+ __atomic_add_fetch(&tag->recv_cnt, 1, __ATOMIC_RELAXED); - fse_size = zstd_soft_fse(req->priv, &zstd_input, &zstd_output, cctx, ZSTD_e_end); - - uadk_pool->bds[idx].dst_len = fse_size; -@@ -554,6 +566,7 @@ static void *zip_async_cb(struct wd_comp_req *req, void *data) - struct bd_pool *uadk_pool; - int td_id = tag->td_id; - int idx = tag->bd_idx; -+ __atomic_add_fetch(&tag->recv_cnt, 1, __ATOMIC_RELAXED); - - uadk_pool = &g_zip_pool.pool[td_id]; - uadk_pool->bds[idx].dst_len = req->dst_len; -@@ -566,15 +579,14 @@ static void *zip_uadk_poll(void *data) - thread_data *pdata = (thread_data *)data; - u32 expt = ACC_QUEUE_SIZE * g_thread_num; - u32 id = pdata->td_id; -- u32 last_time = 2; // poll need one more recv time - u32 count = 0; - u32 recv = 0; -- int ret; -+ int ret; - - if (id > g_ctxnum) - return NULL; - -- while (last_time) { -+ while (g_state == THREAD_PROCESSING) { - ret = wd_comp_poll_ctx(id, expt, &recv); - count += recv; - recv = 0; -@@ -582,9 +594,6 @@ static void *zip_uadk_poll(void *data) - ZIP_TST_PRT("poll ret: %d!\n", ret); - goto recv_error; - } -- -- if (get_run_state() == 0) -- last_time--; - } - - recv_error: -@@ -596,12 +605,11 @@ recv_error: - static void *zip_uadk_poll2(void *data) - { - u32 expt = ACC_QUEUE_SIZE * g_thread_num; -- u32 last_time = 2; // poll need one more recv time - u32 count = 0; - u32 recv = 0; - int ret; - -- while (last_time) { -+ while (g_state == THREAD_PROCESSING) { - ret = wd_comp_poll(expt, &recv); - count += recv; - recv = 0; -@@ -609,9 +617,6 @@ static void *zip_uadk_poll2(void *data) - ZIP_TST_PRT("poll ret: %d!\n", ret); - goto recv_error; - } -- -- if (get_run_state() == 0) -- last_time--; - } - - recv_error: -@@ -803,11 +808,8 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) - thread_data *pdata = (thread_data *)arg; - struct wd_comp_sess_setup comp_setup = {0}; - ZSTD_CCtx *cctx = zstd_soft_fse_init(15); -- COMP_TUPLE_TAG *ftuple = NULL; - struct bd_pool *uadk_pool; - struct wd_comp_req creq; -- struct zip_async_tag *tag; -- char *hw_buff_out = NULL; - handle_t h_sess; - u32 out_len = 0; - u32 count = 0; -@@ -838,37 +840,22 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) - creq.data_fmt = 0; - creq.status = 0; - -- ftuple = malloc(sizeof(COMP_TUPLE_TAG) * MAX_POOL_LENTH_COMP); -- if (!ftuple) -- goto fse_err; -- -- hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); -- if (!hw_buff_out) -- goto hw_buff_err; -- memset(hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP); -- -- tag = malloc(sizeof(*tag) * MAX_POOL_LENTH_COMP); -- if (!tag) { -- ZIP_TST_PRT("failed to malloc zip tag!\n"); -- goto tag_err; -- } -- - while(1) { - if (get_run_state() == 0) - break; - - i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; -- creq.dst = &hw_buff_out[i]; //temp out -+ creq.dst = &pdata->hw_buff_out[i]; //temp out - creq.src_len = uadk_pool->bds[i].src_len; - creq.dst_len = out_len; -- creq.priv = &ftuple[i]; -+ creq.priv = &pdata->ftuple[i]; - -- tag[i].td_id = pdata->td_id; -- tag[i].bd_idx = i; -- tag[i].cm_len = out_len; -- tag[i].cctx = cctx; -- creq.cb_param = &tag[i]; -+ pdata->tag[i].td_id = pdata->td_id; -+ pdata->tag[i].bd_idx = i; -+ pdata->tag[i].cm_len = out_len; -+ pdata->tag[i].cctx = cctx; -+ creq.cb_param = &pdata->tag[i]; - - ret = wd_do_comp_async(h_sess, &creq); - if (ret == -WD_EBUSY) { -@@ -884,20 +871,8 @@ static void *zip_uadk_blk_lz77_async_run(void *arg) - } - try_cnt = 0; - count++; -+ __atomic_add_fetch(&pdata->send_cnt, 1, __ATOMIC_RELAXED); - } -- -- while (1) { -- if (get_recv_time() > 0) // wait Async mode finish recv -- break; -- usleep(SEND_USLEEP); -- } -- --tag_err: -- free(tag); --hw_buff_err: -- free(hw_buff_out); --fse_err: -- free(ftuple); - wd_comp_free_sess(h_sess); - add_send_complete(); - -@@ -1033,7 +1008,6 @@ static void *zip_uadk_blk_async_run(void *arg) - thread_data *pdata = (thread_data *)arg; - struct wd_comp_sess_setup comp_setup = {0}; - struct bd_pool *uadk_pool; -- struct zip_async_tag *tag; - struct wd_comp_req creq; - handle_t h_sess; - int try_cnt = 0; -@@ -1066,13 +1040,6 @@ static void *zip_uadk_blk_async_run(void *arg) - creq.priv = 0; - creq.status = 0; - -- tag = malloc(sizeof(*tag) * MAX_POOL_LENTH_COMP); -- if (!tag) { -- ZIP_TST_PRT("failed to malloc zip tag!\n"); -- wd_comp_free_sess(h_sess); -- return NULL; -- } -- - while(1) { - if (get_run_state() == 0) - break; -@@ -1083,9 +1050,9 @@ static void *zip_uadk_blk_async_run(void *arg) - creq.src_len = uadk_pool->bds[i].src_len; - creq.dst_len = out_len; - -- tag[i].td_id = pdata->td_id; -- tag[i].bd_idx = i; -- creq.cb_param = &tag[i]; -+ pdata->tag[i].td_id = pdata->td_id; -+ pdata->tag[i].bd_idx = i; -+ creq.cb_param = &pdata->tag[i]; - - ret = wd_do_comp_async(h_sess, &creq); - if (ret == -WD_EBUSY) { -@@ -1101,15 +1068,9 @@ static void *zip_uadk_blk_async_run(void *arg) - } - try_cnt = 0; - count++; -+ __atomic_add_fetch(&pdata->send_cnt, 1, __ATOMIC_RELAXED); - } - -- while (1) { -- if (get_recv_time() > 0) // wait Async mode finish recv -- break; -- usleep(SEND_USLEEP); -- } -- -- free(tag); - wd_comp_free_sess(h_sess); - - add_send_complete(); -@@ -1215,10 +1176,35 @@ static int zip_uadk_async_threads(struct acc_option *options) - threads_args[i].win_sz = threads_option.win_sz; - threads_args[i].comp_lv = threads_option.comp_lv; - threads_args[i].td_id = i; -+ if (threads_option.alg == LZ77_ZSTD) { -+ struct bd_pool *uadk_pool = &g_zip_pool.pool[i]; -+ u32 out_len = uadk_pool->bds[0].dst_len; -+ -+ threads_args[i].ftuple = malloc(sizeof(COMP_TUPLE_TAG) * -+ MAX_POOL_LENTH_COMP); -+ if (!threads_args[i].ftuple) { -+ ZIP_TST_PRT("failed to malloc lz77 ftuple!\n"); -+ goto lz77_free; -+ } -+ -+ threads_args[i].hw_buff_out = malloc(out_len * MAX_POOL_LENTH_COMP); -+ if (!threads_args[i].hw_buff_out) { -+ ZIP_TST_PRT("failed to malloc lz77 hw_buff_out!\n"); -+ goto lz77_free; -+ } -+ memset(threads_args[i].hw_buff_out, 0x0, out_len * MAX_POOL_LENTH_COMP); -+ } -+ threads_args[i].tag = malloc(sizeof(struct zip_async_tag) * MAX_POOL_LENTH_COMP); -+ if (!threads_args[i].tag) { -+ ZIP_TST_PRT("failed to malloc zip tag!\n"); -+ goto tag_free; -+ } -+ threads_args[i].tag->recv_cnt = 0; -+ threads_args[i].send_cnt = 0; - ret = pthread_create(&tdid[i], NULL, uadk_zip_async_run, &threads_args[i]); - if (ret) { - ZIP_TST_PRT("Create async thread fail!\n"); -- goto async_error; -+ goto tag_free; - } - } - -@@ -1227,18 +1213,41 @@ static int zip_uadk_async_threads(struct acc_option *options) - ret = pthread_join(tdid[i], NULL); - if (ret) { - ZIP_TST_PRT("Join async thread fail!\n"); -- goto async_error; -+ goto tag_free; - } - } - -+ /* wait for the poll to clear packets */ -+ g_state = THREAD_PROCESSING; -+ for (i = 0; i < g_thread_num;) { -+ if (threads_args[i].send_cnt <= threads_args[i].tag->recv_cnt + MAX_UNRECV_PACKET_NUM) -+ i++; -+ } -+ g_state = THREAD_COMPLETED; // finish poll -+ - for (i = 0; i < g_ctxnum; i++) { - ret = pthread_join(pollid[i], NULL); - if (ret) { - ZIP_TST_PRT("Join poll thread fail!\n"); -- goto async_error; -+ goto tag_free; - } - } - -+tag_free: -+ for (i = 0; i < g_thread_num; i++) { -+ if (threads_args[i].tag) -+ free(threads_args[i].tag); -+ } -+lz77_free: -+ if (threads_option.alg == LZ77_ZSTD) { -+ for (i = 0; i < g_thread_num; i++) { -+ if (threads_args[i].ftuple) -+ free(threads_args[i].ftuple); -+ -+ if (threads_args[i].hw_buff_out) -+ free(threads_args[i].hw_buff_out); -+ } -+ } - async_error: - return ret; - } -diff --git a/uadk_tool/benchmark/zip_wd_benchmark.c b/uadk_tool/benchmark/zip_wd_benchmark.c -index 4424e08..cbe07fc 100644 ---- a/uadk_tool/benchmark/zip_wd_benchmark.c -+++ b/uadk_tool/benchmark/zip_wd_benchmark.c -@@ -21,6 +21,7 @@ - #define COMPRESSION_RATIO_FACTOR 0.7 - #define MAX_POOL_LENTH_COMP 512 - #define CHUNK_SIZE (128 * 1024) -+#define MAX_UNRECV_PACKET_NUM 2 - - #define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) - #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -@@ -49,6 +50,11 @@ enum ZIP_OP_MODE { - STREAM_MODE - }; - -+enum ZIP_THREAD_STATE { -+ THREAD_PROCESSING, -+ THREAD_COMPLETED -+}; -+ - struct zip_async_tag { - void *ctx; - u32 td_id; -@@ -75,6 +81,8 @@ struct zip_file_head { - - static unsigned int g_thread_num; - static unsigned int g_pktlen; -+static unsigned int g_send_cnt[THREADS_NUM]; -+static unsigned int g_recv_state[THREADS_NUM]; - - static int save_file_data(const char *alg, u32 pkg_len, u32 optype) - { -@@ -470,9 +478,10 @@ static void *zip_wd_poll(void *data) - count += recv; - recv = 0; - -- if (get_run_state() == 0) -+ if (get_run_state() == 0 && g_send_cnt[id] <= count + MAX_UNRECV_PACKET_NUM) - last_time--; - } -+ g_recv_state[id] = THREAD_COMPLETED; - - recv_error: - add_recv_data(count, g_pktlen); -@@ -746,13 +755,11 @@ static void *zip_wd_blk_lz77_async_run(void *arg) - } - try_cnt = 0; - count++; -+ __atomic_add_fetch(&g_send_cnt[pdata->td_id], 1, __ATOMIC_RELAXED); - } - -- while (1) { -- if (get_recv_time() > 0) // wait Async mode finish recv -- break; -+ while (g_recv_state[pdata->td_id] == THREAD_PROCESSING) - usleep(SEND_USLEEP); -- } - - free(tag); - tag_err: -@@ -1011,13 +1018,11 @@ static void *zip_wd_blk_async_run(void *arg) - } - try_cnt = 0; - count++; -+ __atomic_add_fetch(&g_send_cnt[pdata->td_id], 1, __ATOMIC_RELAXED); - } - -- while (1) { -- if (get_recv_time() > 0) // wait Async mode finish recv -- break; -+ while (g_recv_state[pdata->td_id] == THREAD_PROCESSING) - usleep(SEND_USLEEP); -- } - - tag_release: - free(tag); -@@ -1107,6 +1112,7 @@ static int zip_wd_async_threads(struct acc_option *options) - - for (i = 0; i < g_thread_num; i++) { - threads_args[i].td_id = i; -+ g_recv_state[i] = THREAD_PROCESSING; - /* poll thread */ - ret = pthread_create(&pollid[i], NULL, zip_wd_poll, &threads_args[i]); - if (ret) { -@@ -1122,6 +1128,7 @@ static int zip_wd_async_threads(struct acc_option *options) - threads_args[i].comp_lv = threads_option.comp_lv; - threads_args[i].win_size = threads_option.win_size; - threads_args[i].td_id = i; -+ g_send_cnt[i] = 0; - ret = pthread_create(&tdid[i], NULL, wd_zip_async_run, &threads_args[i]); - if (ret) { - ZIP_TST_PRT("Create async thread fail!\n"); --- -2.25.1 - diff --git a/0041-uadk_tool-fix-queue-application-failure-from-multipl.patch b/0041-uadk_tool-fix-queue-application-failure-from-multipl.patch deleted file mode 100644 index d0b59b8..0000000 --- a/0041-uadk_tool-fix-queue-application-failure-from-multipl.patch +++ /dev/null @@ -1,641 +0,0 @@ -From 5210ac8a3f616f381d3990e3ca3f92bf23383f25 Mon Sep 17 00:00:00 2001 -From: Qi Tao -Date: Mon, 11 Mar 2024 16:41:41 +0800 -Subject: [PATCH 41/44] uadk_tool: fix queue application failure from multiple - devices - -Specified device: apply queues from a designated device. -No specified device: apply queues from multiple devices. - -Signed-off-by: Qi Tao ---- - uadk_tool/benchmark/hpre_uadk_benchmark.c | 143 ++++++++++++++++----- - uadk_tool/benchmark/sec_uadk_benchmark.c | 141 +++++++++++++++----- - uadk_tool/benchmark/zip_uadk_benchmark.c | 150 ++++++++++++++++------ - 3 files changed, 329 insertions(+), 105 deletions(-) - -diff --git a/uadk_tool/benchmark/hpre_uadk_benchmark.c b/uadk_tool/benchmark/hpre_uadk_benchmark.c -index 729728f..0148e56 100644 ---- a/uadk_tool/benchmark/hpre_uadk_benchmark.c -+++ b/uadk_tool/benchmark/hpre_uadk_benchmark.c -@@ -344,21 +344,17 @@ static int hpre_uadk_param_parse(thread_data *tddata, struct acc_option *options - return 0; - } - --static int init_hpre_ctx_config(struct acc_option *options) -+static int specified_device_request_ctx(struct acc_option *options) - { -- struct uacce_dev_list *list, *tmp; -- int subtype = options->subtype; -+ struct uacce_dev_list *list = NULL; -+ struct uacce_dev_list *tmp = NULL; - char *alg = options->algclass; - int mode = options->syncmode; - struct uacce_dev *dev = NULL; -- struct sched_params param; -- int max_node, i; -+ int avail_ctx = 0; - char *dev_name; - int ret = 0; -- -- max_node = numa_max_node() + 1; -- if (max_node <= 0) -- return -EINVAL; -+ int i = 0; - - list = wd_get_accel_list(alg); - if (!list) { -@@ -366,15 +362,11 @@ static int init_hpre_ctx_config(struct acc_option *options) - return -ENODEV; - } - -- if (strlen(options->device) == 0) { -- dev = list->dev; -- } else { -- for (tmp = list; tmp; tmp = tmp->next) { -- dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -- if (!strcmp(dev_name, options->device)) { -- dev = tmp->dev; -- break; -- } -+ for (tmp = list; tmp != NULL; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; - } - } - -@@ -384,30 +376,114 @@ static int init_hpre_ctx_config(struct acc_option *options) - goto free_list; - } - -- /* If there is no numa, we defualt config to zero */ -- if (dev->numa_id < 0) -- dev->numa_id = 0; -- -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) { -- ret = -ENOMEM; -+ avail_ctx = wd_get_avail_ctx(dev); -+ if (avail_ctx < 0) { -+ HPRE_TST_PRT("failed to get the number of available ctx from %s\n", options->device); -+ ret = avail_ctx; -+ goto free_list; -+ } else if (avail_ctx < g_ctxnum) { -+ HPRE_TST_PRT("error: not enough ctx available in %s\n", options->device); -+ ret = -ENODEV; - goto free_list; - } - -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ - for (i = 0; i < g_ctxnum; i++) { - g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); - if (!g_ctx_cfg.ctxs[i].ctx) { - HPRE_TST_PRT("failed to alloc %dth ctx\n", i); -- ret = -ENODEV; -+ ret = -ENOMEM; - goto free_ctx; - } -- - g_ctx_cfg.ctxs[i].op_type = 0; - g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } - -+ wd_free_list_accels(list); -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+free_list: -+ wd_free_list_accels(list); -+ -+ return ret; -+} -+ -+static int non_specified_device_request_ctx(struct acc_option *options) -+{ -+ char *alg = options->algclass; -+ int mode = options->syncmode; -+ struct uacce_dev *dev = NULL; -+ int ret = 0; -+ int i = 0; -+ -+ while (i < g_ctxnum) { -+ dev = wd_get_accel_dev(alg); -+ if (!dev) { -+ HPRE_TST_PRT("failed to get %s device\n", alg); -+ ret = -ENODEV; -+ goto free_ctx; -+ } -+ -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ for (; i < g_ctxnum; i++) { -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) -+ break; -+ -+ g_ctx_cfg.ctxs[i].op_type = 0; -+ g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; -+ } -+ -+ free(dev); -+ } -+ -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+ return ret; -+} -+ -+static int init_hpre_ctx_config(struct acc_option *options) -+{ -+ struct sched_params param = {0}; -+ int subtype = options->subtype; -+ int mode = options->syncmode; -+ int max_node; -+ int ret = 0; -+ -+ max_node = numa_max_node() + 1; -+ if (max_node <= 0) -+ return -EINVAL; -+ -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) -+ return -ENOMEM; -+ -+ if (strlen(options->device) != 0) -+ ret = specified_device_request_ctx(options); -+ else -+ ret = non_specified_device_request_ctx(options); -+ -+ if (ret) { -+ HPRE_TST_PRT("failed to request hpre ctx!\n"); -+ goto free_ctxs; -+ } -+ - switch(subtype) { - case RSA_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_rsa_poll_ctx); -@@ -460,7 +536,7 @@ static int init_hpre_ctx_config(struct acc_option *options) - break; - } - if (ret) { -- HPRE_TST_PRT("failed to get hpre ctx!\n"); -+ HPRE_TST_PRT("failed to init hpre ctx!\n"); - goto free_sched; - } - -@@ -470,12 +546,11 @@ free_sched: - wd_sched_rr_release(g_sched); - - free_ctx: -- for (; i >= 0; i--) -+ for (int i = g_ctxnum; i >= 0; i--) - wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -- free(g_ctx_cfg.ctxs); - --free_list: -- wd_free_list_accels(list); -+free_ctxs: -+ free(g_ctx_cfg.ctxs); - - return ret; - } -diff --git a/uadk_tool/benchmark/sec_uadk_benchmark.c b/uadk_tool/benchmark/sec_uadk_benchmark.c -index 2c12c20..56f4fa6 100644 ---- a/uadk_tool/benchmark/sec_uadk_benchmark.c -+++ b/uadk_tool/benchmark/sec_uadk_benchmark.c -@@ -544,21 +544,17 @@ static int sec_uadk_param_parse(thread_data *tddata, struct acc_option *options) - return 0; - } - --static int init_ctx_config(struct acc_option *options) -+static int specified_device_request_ctx(struct acc_option *options) - { -- struct uacce_dev_list *list, *tmp; -- struct sched_params param = {0}; -- int subtype = options->subtype; -+ struct uacce_dev_list *list = NULL; -+ struct uacce_dev_list *tmp = NULL; - char *alg = options->algclass; - int mode = options->syncmode; - struct uacce_dev *dev = NULL; -- int max_node, i; -+ int avail_ctx = 0; - char *dev_name; - int ret = 0; -- -- max_node = numa_max_node() + 1; -- if (max_node <= 0) -- return -EINVAL; -+ int i = 0; - - list = wd_get_accel_list(alg); - if (!list) { -@@ -566,15 +562,11 @@ static int init_ctx_config(struct acc_option *options) - return -ENODEV; - } - -- if (strlen(options->device) == 0) { -- dev = list->dev; -- } else { -- for (tmp = list; tmp; tmp = tmp->next) { -- dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -- if (!strcmp(dev_name, options->device)) { -- dev = tmp->dev; -- break; -- } -+ for (tmp = list; tmp != NULL; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; - } - } - -@@ -584,18 +576,21 @@ static int init_ctx_config(struct acc_option *options) - goto free_list; - } - -- /* If there is no numa, we defualt config to zero */ -- if (dev->numa_id < 0) -- dev->numa_id = 0; -- -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) { -- ret = -ENOMEM; -+ avail_ctx = wd_get_avail_ctx(dev); -+ if (avail_ctx < 0) { -+ SEC_TST_PRT("failed to get the number of available ctx from %s\n", options->device); -+ ret = avail_ctx; -+ goto free_list; -+ } else if (avail_ctx < g_ctxnum) { -+ SEC_TST_PRT("error: not enough ctx available in %s\n", options->device); -+ ret = -ENODEV; - goto free_list; - } - -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ - for (i = 0; i < g_ctxnum; i++) { - g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); - if (!g_ctx_cfg.ctxs[i].ctx) { -@@ -603,11 +598,92 @@ static int init_ctx_config(struct acc_option *options) - ret = -ENOMEM; - goto free_ctx; - } -- - g_ctx_cfg.ctxs[i].op_type = 0; - g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } - -+ wd_free_list_accels(list); -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+free_list: -+ wd_free_list_accels(list); -+ -+ return ret; -+} -+ -+static int non_specified_device_request_ctx(struct acc_option *options) -+{ -+ char *alg = options->algclass; -+ int mode = options->syncmode; -+ struct uacce_dev *dev = NULL; -+ int ret = 0; -+ int i = 0; -+ -+ while (i < g_ctxnum) { -+ dev = wd_get_accel_dev(alg); -+ if (!dev) { -+ SEC_TST_PRT("failed to get %s device\n", alg); -+ ret = -ENODEV; -+ goto free_ctx; -+ } -+ -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ for (; i < g_ctxnum; i++) { -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) -+ break; -+ -+ g_ctx_cfg.ctxs[i].op_type = 0; -+ g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; -+ } -+ -+ free(dev); -+ } -+ -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+ return ret; -+} -+ -+static int init_ctx_config(struct acc_option *options) -+{ -+ struct sched_params param = {0}; -+ int subtype = options->subtype; -+ int mode = options->syncmode; -+ int max_node; -+ int ret = 0; -+ -+ max_node = numa_max_node() + 1; -+ if (max_node <= 0) -+ return -EINVAL; -+ -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) -+ return -ENOMEM; -+ -+ if (strlen(options->device) != 0) -+ ret = specified_device_request_ctx(options); -+ else -+ ret = non_specified_device_request_ctx(options); -+ -+ if (ret) { -+ SEC_TST_PRT("failed to request sec ctx!\n"); -+ goto free_ctxs; -+ } -+ - switch(subtype) { - case CIPHER_TYPE: - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 1, max_node, wd_cipher_poll_ctx); -@@ -652,7 +728,7 @@ static int init_ctx_config(struct acc_option *options) - break; - } - if (ret) { -- SEC_TST_PRT("failed to cipher ctx!\n"); -+ SEC_TST_PRT("failed to init sec ctx!\n"); - goto free_sched; - } - -@@ -662,12 +738,11 @@ free_sched: - wd_sched_rr_release(g_sched); - - free_ctx: -- for (; i >= 0; i--) -+ for (int i = g_ctxnum; i >= 0; i--) - wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -- free(g_ctx_cfg.ctxs); - --free_list: -- wd_free_list_accels(list); -+free_ctxs: -+ free(g_ctx_cfg.ctxs); - - return ret; - } -diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c -index 1dd3990..e2876a9 100644 ---- a/uadk_tool/benchmark/zip_uadk_benchmark.c -+++ b/uadk_tool/benchmark/zip_uadk_benchmark.c -@@ -318,21 +318,17 @@ static int init_ctx_config2(struct acc_option *options) - return 0; - } - --static int init_ctx_config(struct acc_option *options) -+static int specified_device_request_ctx(struct acc_option *options) - { -- struct uacce_dev_list *list, *tmp; -+ struct uacce_dev_list *list = NULL; -+ struct uacce_dev_list *tmp = NULL; - char *alg = options->algclass; -- int optype = options->optype; - int mode = options->syncmode; - struct uacce_dev *dev = NULL; -- int max_node, i; -+ int avail_ctx = 0; - char *dev_name; - int ret = 0; -- -- optype = optype % WD_DIR_MAX; -- max_node = numa_max_node() + 1; -- if (max_node <= 0) -- return -EINVAL; -+ int i = 0; - - list = wd_get_accel_list(alg); - if (!list) { -@@ -340,15 +336,11 @@ static int init_ctx_config(struct acc_option *options) - return -ENODEV; - } - -- if (strlen(options->device) == 0) { -- dev = list->dev; -- } else { -- for (tmp = list; tmp; tmp = tmp->next) { -- dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -- if (!strcmp(dev_name, options->device)) { -- dev = tmp->dev; -- break; -- } -+ for (tmp = list; tmp != NULL; tmp = tmp->next) { -+ dev_name = strrchr(tmp->dev->dev_root, '/') + 1; -+ if (!strcmp(dev_name, options->device)) { -+ dev = tmp->dev; -+ break; - } - } - -@@ -358,29 +350,114 @@ static int init_ctx_config(struct acc_option *options) - goto free_list; - } - -- /* If there is no numa, we defualt config to zero */ -- if (dev->numa_id < 0) -- dev->numa_id = 0; -- -- memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -- g_ctx_cfg.ctx_num = g_ctxnum; -- g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -- if (!g_ctx_cfg.ctxs) { -- ret = -ENOMEM; -+ avail_ctx = wd_get_avail_ctx(dev); -+ if (avail_ctx < 0) { -+ ZIP_TST_PRT("failed to get the number of available ctx from %s\n", options->device); -+ ret = avail_ctx; -+ goto free_list; -+ } else if (avail_ctx < g_ctxnum) { -+ ZIP_TST_PRT("error: not enough ctx available in %s\n", options->device); -+ ret = -ENODEV; - goto free_list; - } - -- for (i = 0; i < g_ctxnum; i++) { -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ for (; i < g_ctxnum; i++) { - g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); - if (!g_ctx_cfg.ctxs[i].ctx) { - ZIP_TST_PRT("failed to alloc %dth ctx\n", i); -+ ret = -ENOMEM; - goto free_ctx; - } -- -- g_ctx_cfg.ctxs[i].op_type = optype; -+ g_ctx_cfg.ctxs[i].op_type = 0; - g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; - } - -+ wd_free_list_accels(list); -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+free_list: -+ wd_free_list_accels(list); -+ -+ return ret; -+} -+ -+static int non_specified_device_request_ctx(struct acc_option *options) -+{ -+ char *alg = options->algclass; -+ int mode = options->syncmode; -+ struct uacce_dev *dev = NULL; -+ int ret = 0; -+ int i = 0; -+ -+ while (i < g_ctxnum) { -+ dev = wd_get_accel_dev(alg); -+ if (!dev) { -+ ZIP_TST_PRT("failed to get %s device\n", alg); -+ ret = -ENODEV; -+ goto free_ctx; -+ } -+ -+ /* If there is no numa, we default config to zero */ -+ if (dev->numa_id < 0) -+ dev->numa_id = 0; -+ -+ for (; i < g_ctxnum; i++) { -+ g_ctx_cfg.ctxs[i].ctx = wd_request_ctx(dev); -+ if (!g_ctx_cfg.ctxs[i].ctx) -+ break; -+ -+ g_ctx_cfg.ctxs[i].op_type = 0; -+ g_ctx_cfg.ctxs[i].ctx_mode = (__u8)mode; -+ } -+ -+ free(dev); -+ } -+ -+ return 0; -+ -+free_ctx: -+ for (; i >= 0; i--) -+ wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -+ -+ return ret; -+} -+ -+static int init_ctx_config(struct acc_option *options) -+{ -+ int optype = options->optype; -+ int mode = options->syncmode; -+ int max_node; -+ int ret = 0; -+ -+ optype = optype % WD_DIR_MAX; -+ max_node = numa_max_node() + 1; -+ if (max_node <= 0) -+ return -EINVAL; -+ -+ memset(&g_ctx_cfg, 0, sizeof(struct wd_ctx_config)); -+ g_ctx_cfg.ctx_num = g_ctxnum; -+ g_ctx_cfg.ctxs = calloc(g_ctxnum, sizeof(struct wd_ctx)); -+ if (!g_ctx_cfg.ctxs) -+ return -ENOMEM; -+ -+ if (strlen(options->device) != 0) -+ ret = specified_device_request_ctx(options); -+ else -+ ret = non_specified_device_request_ctx(options); -+ -+ if (ret) { -+ ZIP_TST_PRT("failed to request zip ctx!\n"); -+ goto free_ctxs; -+ } -+ - g_sched = wd_sched_rr_alloc(SCHED_POLICY_RR, 2, max_node, wd_comp_poll_ctx); - if (!g_sched) { - ZIP_TST_PRT("failed to alloc sched!\n"); -@@ -394,7 +471,7 @@ static int init_ctx_config(struct acc_option *options) - * All contexts for 2 modes & 2 types. - * The test only uses one kind of contexts at the same time. - */ -- param.numa_id = dev->numa_id; -+ param.numa_id = 0; - param.type = optype; - param.mode = mode; - param.begin = 0; -@@ -407,24 +484,21 @@ static int init_ctx_config(struct acc_option *options) - - ret = wd_comp_init(&g_ctx_cfg, g_sched); - if (ret) { -- ZIP_TST_PRT("failed to cipher ctx!\n"); -+ ZIP_TST_PRT("failed to init zip ctx!\n"); - goto free_sched; - } - -- wd_free_list_accels(list); -- - return 0; - - free_sched: - wd_sched_rr_release(g_sched); - - free_ctx: -- for (; i >= 0; i--) -+ for (int i = g_ctxnum; i >= 0; i--) - wd_release_ctx(g_ctx_cfg.ctxs[i].ctx); -- free(g_ctx_cfg.ctxs); - --free_list: -- wd_free_list_accels(list); -+free_ctxs: -+ free(g_ctx_cfg.ctxs); - - return ret; - } --- -2.25.1 - diff --git a/0042-ecc-check-need_debug-before-calling-WD_DEBUG.patch b/0042-ecc-check-need_debug-before-calling-WD_DEBUG.patch deleted file mode 100644 index fff8e56..0000000 --- a/0042-ecc-check-need_debug-before-calling-WD_DEBUG.patch +++ /dev/null @@ -1,54 +0,0 @@ -From ba54780c666c7f655cf6b18d0072e1e892656252 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Wed, 3 Apr 2024 11:24:52 +0800 -Subject: [PATCH 42/44] ecc: check need_debug before calling WD_DEBUG - -Before calling WD_DEBUG, check whether debug logs need to be recorded -to prevent the syslog syscall from affecting the performance. - -Signed-off-by: Weili Qian -Signed-off-by: JiangShui Yang ---- - wd_ecc.c | 8 +++++--- - 1 file changed, 5 insertions(+), 3 deletions(-) - -diff --git a/wd_ecc.c b/wd_ecc.c -index e75bca0..65727e7 100644 ---- a/wd_ecc.c -+++ b/wd_ecc.c -@@ -997,20 +997,19 @@ static int fill_user_curve_cfg(struct wd_ecc_curve *param, - struct wd_ecc_sess_setup *setup) - { - struct wd_ecc_curve *src_param = setup->cv.cfg.pparam; -- __u32 curve_id; -+ bool need_debug = wd_need_debug(); -+ __u32 curve_id = 0; - int ret = 0; - - if (setup->cv.type == WD_CV_CFG_ID) { - curve_id = setup->cv.cfg.id; - ret = fill_param_by_id(param, setup->key_bits, curve_id); -- WD_DEBUG("set curve id %u!\n", curve_id); - } else if (setup->cv.type == WD_CV_CFG_PARAM) { - ret = set_key_cv(param, src_param); - if (ret) { - WD_ERR("failed to set key cv!\n"); - return ret; - } -- WD_DEBUG("set curve by user param!\n"); - } else { - WD_ERR("invalid: fill curve cfg type %u is error!\n", setup->cv.type); - return -WD_EINVAL; -@@ -1022,6 +1021,9 @@ static int fill_user_curve_cfg(struct wd_ecc_curve *param, - return -WD_EINVAL; - } - -+ if (need_debug) -+ WD_DEBUG("curve cfg type is %u, curve_id is %u!\n", setup->cv.type, curve_id); -+ - return ret; - } - --- -2.25.1 - diff --git a/0043-uadk-remove-unused-ioctl-cmd.patch b/0043-uadk-remove-unused-ioctl-cmd.patch deleted file mode 100644 index a0bfc71..0000000 --- a/0043-uadk-remove-unused-ioctl-cmd.patch +++ /dev/null @@ -1,28 +0,0 @@ -From b6aaaaf9eeb1061806c1a00faddd1ce91e5afa33 Mon Sep 17 00:00:00 2001 -From: Weili Qian -Date: Wed, 3 Apr 2024 11:24:53 +0800 -Subject: [PATCH 43/44] uadk: remove unused ioctl cmd - -Remove unused ioctl cmd UACCE_CMD_GET_SS_DMA. - -Signed-off-by: Weili Qian -Signed-off-by: JiangShui Yang ---- - include/uacce.h | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/include/uacce.h b/include/uacce.h -index fb3fb22..bb8d740 100644 ---- a/include/uacce.h -+++ b/include/uacce.h -@@ -15,7 +15,6 @@ extern "C" { - - #define UACCE_CMD_START _IO('W', 0) - #define UACCE_CMD_PUT_Q _IO('W', 1) --#define UACCE_CMD_GET_SS_DMA _IOR('W', 100, unsigned long) - - /** - * UACCE Device flags: --- -2.25.1 - diff --git a/0044-uadk-v1-remove-dummy.patch b/0044-uadk-v1-remove-dummy.patch deleted file mode 100644 index ab6968b..0000000 --- a/0044-uadk-v1-remove-dummy.patch +++ /dev/null @@ -1,247 +0,0 @@ -From deec45b9919adbdf968eae688003b96e69a77011 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Wed, 3 Apr 2024 11:24:54 +0800 -Subject: [PATCH 44/44] uadk: v1: remove dummy - -dummy is no longer use, remove it. - -Signed-off-by: Wenkai Lin -Signed-off-by: JiangShui Yang ---- - Makefile.am | 1 - - v1/internal/wd_dummy_usr_if.h | 45 ------------ - v1/test/test_dummy.c | 126 ---------------------------------- - v1/wd_adapter.c | 13 ---- - 4 files changed, 185 deletions(-) - delete mode 100644 v1/internal/wd_dummy_usr_if.h - delete mode 100644 v1/test/test_dummy.c - -diff --git a/Makefile.am b/Makefile.am -index 68f3106..1049639 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -60,7 +60,6 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ - v1/wd_bmm.c v1/wd_bmm.h \ - v1/wd_ecc.c v1/wd_ecc.h \ - v1/wd_sgl.c v1/wd_sgl.h \ -- v1/drv/dummy_drv.c v1/drv/dummy_drv.h \ - v1/drv/hisi_qm_udrv.c v1/drv/hisi_qm_udrv.h \ - v1/drv/hisi_zip_udrv.c v1/drv/hisi_zip_udrv.h \ - v1/drv/hisi_hpre_udrv.c v1/drv/hisi_hpre_udrv.h \ -diff --git a/v1/internal/wd_dummy_usr_if.h b/v1/internal/wd_dummy_usr_if.h -deleted file mode 100644 -index b5673ec..0000000 ---- a/v1/internal/wd_dummy_usr_if.h -+++ /dev/null -@@ -1,45 +0,0 @@ --/* -- * Copyright 2019 Huawei Technologies Co.,Ltd.All rights reserved. -- * -- * Licensed under the Apache License, Version 2.0 (the "License"); -- * you may not use this file except in compliance with the License. -- * You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --/* -- * This file defines the dummy algo interface between the user -- * and kernel space -- */ -- --#ifndef __DUMMY_USR_IF_H --#define __DUMMY_USR_IF_H -- -- --/* Algorithm name */ --#define AN_DUMMY_MEMCPY "memcopy" -- --#define AAN_AFLAGS "aflags" --#define AAN_MAX_COPY_SIZE "max_copy_size" -- --struct wd_dummy_cpy_param { -- int flags; -- int max_copy_size; --}; -- --struct wd_dummy_cpy_msg { -- char *src_addr; -- char *tgt_addr; -- size_t size; -- void *ptr; -- __u32 ret; --}; -- --#endif -diff --git a/v1/test/test_dummy.c b/v1/test/test_dummy.c -deleted file mode 100644 -index 75ab33a..0000000 ---- a/v1/test/test_dummy.c -+++ /dev/null -@@ -1,126 +0,0 @@ --/* -- * Copyright 2018-2019 Huawei Technologies Co.,Ltd.All rights reserved. -- * -- * Licensed under the Apache License, Version 2.0 (the "License"); -- * you may not use this file except in compliance with the License. -- * You may obtain a copy of the License at -- * -- * http://www.apache.org/licenses/LICENSE-2.0 -- * -- * Unless required by applicable law or agreed to in writing, software -- * distributed under the License is distributed on an "AS IS" BASIS, -- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- * See the License for the specific language governing permissions and -- * limitations under the License. -- */ -- --#include "../config.h" --#include --#include --#include --#include --#include -- --#include "wd_sched.h" --#include "wd_dummy_usr_if.h" --#include "dummy_hw_usr_if.h" -- --#define CPSZ 4096 -- --#define SYS_ERR_COND(cond, msg) if(cond) { \ -- perror(msg); \ -- exit(EXIT_FAILURE); } -- --struct wd_dummy_cpy_msg *msgs; -- --int wd_dummy_memcpy(struct wd_queue *q, void *dst, void *src, size_t size) --{ -- struct wd_dummy_cpy_msg req, *resp; -- int ret; -- -- req.src_addr = src; -- req.tgt_addr = dst; -- req.size = size; -- -- ret = wd_send(q, (void *)&req); -- if (ret) -- return ret; -- -- return wd_recv_sync(q, (void **)&resp, 1000); --} -- --static void wd_dummy_sched_init_cache(struct wd_scheduler *sched, int i) --{ -- sched->msgs[i].msg = &msgs[i]; -- msgs[i].src_addr = sched->msgs[i].data_in; -- msgs[i].tgt_addr = sched->msgs[i].data_out; -- msgs[i].size = sched->msg_data_size; --} -- --static int input_num = 10; --static int wd_dummy_sched_input(struct wd_msg *msg, void *priv) --{ -- SYS_ERR_COND(input_num <= 0, "input"); -- input_num--; -- memset(msg->data_in, '0'+input_num, CPSZ); -- memset(msg->data_out, 'x', CPSZ); -- -- return 0; --} -- --static int wd_dummy_sched_output(struct wd_msg *msg, void *priv) --{ -- int i; -- char *in, *out; -- -- for (i = 0; i < CPSZ; i++) { -- in = (char *)msg->data_in; -- out = (char *)msg->data_out; -- if(in[i] != out[i]) { -- printf("verify result fail on %d\n", i); -- break; -- } -- -- } -- printf("verify result (%d) success (remained=%d)\n", in[0], input_num); -- -- return 0; --} -- --struct wd_scheduler sched = { -- .q_num = 1, -- .ss_region_size = 0, -- .msg_cache_num = 4, -- .msg_data_size = CPSZ, -- .init_cache = wd_dummy_sched_init_cache, -- .input = wd_dummy_sched_input, -- .output = wd_dummy_sched_output, --}; -- --int main(int argc, char *argv[]) --{ -- int ret, i; -- int max_step = 20; -- -- sched.qs = calloc(sched.q_num, sizeof(*sched.qs)); -- SYS_ERR_COND(!sched.qs, "calloc"); -- -- msgs = calloc(sched.msg_cache_num, sizeof(*msgs)); -- SYS_ERR_COND(!msgs, "calloc"); -- -- for (i = 0; i < sched.q_num; i++) -- sched.qs[i].capa.alg = "memcpy"; -- -- ret = wd_sched_init(&sched); -- SYS_ERR_COND(ret, "wd_sched_init"); -- -- while(input_num || !wd_sched_empty(&sched)) { -- ret = wd_sched_work(&sched, input_num); -- SYS_ERR_COND(ret < 0, "wd_sched_work"); -- SYS_ERR_COND(max_step-- < 0, "max_step"); -- } -- -- wd_sched_fini(&sched); -- free(sched.qs); -- return EXIT_SUCCESS; --} -diff --git a/v1/wd_adapter.c b/v1/wd_adapter.c -index d574200..b9b841d 100644 ---- a/v1/wd_adapter.c -+++ b/v1/wd_adapter.c -@@ -20,7 +20,6 @@ - - #include "config.h" - #include "v1/wd_util.h" --#include "v1/drv/dummy_drv.h" - #include "v1/drv/hisi_qm_udrv.h" - #include "v1/drv/hisi_rng_udrv.h" - #include "v1/wd_adapter.h" -@@ -29,18 +28,6 @@ - #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) - - static const struct wd_drv_dio_if hw_dio_tbl[] = { { -- .hw_type = "dummy_v1", -- .open = dummy_set_queue_dio, -- .close = dummy_unset_queue_dio, -- .send = dummy_add_to_dio_q, -- .recv = dummy_get_from_dio_q, -- }, { -- .hw_type = "dummy_v2", -- .open = dummy_set_queue_dio, -- .close = dummy_unset_queue_dio, -- .send = dummy_add_to_dio_q, -- .recv = dummy_get_from_dio_q, -- }, { - .hw_type = HISI_QM_API_VER_BASE WD_UACCE_API_VER_NOIOMMU_SUBFIX, - .open = qm_init_queue, - .close = qm_uninit_queue, --- -2.25.1 - diff --git a/0045-cipher-optimze-input-lengths-check.patch b/0045-cipher-optimze-input-lengths-check.patch deleted file mode 100644 index b0cec55..0000000 --- a/0045-cipher-optimze-input-lengths-check.patch +++ /dev/null @@ -1,114 +0,0 @@ -From 4e1a4eb28f0e476cf4587d56b5cef4350b33ab82 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 16:53:04 +0800 -Subject: [PATCH 45/52] cipher: optimze input lengths check - -It is more reasonable to check the input lengths of various cipher -algorithms at the algorithm layer. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - drv/hisi_sec.c | 19 +++++-------------- - wd_cipher.c | 26 ++++++++++++++++++++++++++ - 2 files changed, 31 insertions(+), 14 deletions(-) - -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index 852340d..6625c41 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -960,10 +960,9 @@ static void parse_cipher_bd2(struct hisi_qp *qp, struct hisi_sec_sqe *sqe, - dump_sec_msg(temp_msg, "cipher"); - } - --static int aes_sm4_len_check(struct wd_cipher_msg *msg) -+static int aes_len_check(struct wd_cipher_msg *msg) - { -- if (msg->alg == WD_CIPHER_AES && -- msg->in_bytes <= AES_BLOCK_SIZE && -+ if (msg->in_bytes <= AES_BLOCK_SIZE && - (msg->mode == WD_CIPHER_CBC_CS1 || - msg->mode == WD_CIPHER_CBC_CS2 || - msg->mode == WD_CIPHER_CBC_CS3)) { -@@ -972,13 +971,6 @@ static int aes_sm4_len_check(struct wd_cipher_msg *msg) - return -WD_EINVAL; - } - -- if ((msg->in_bytes & (AES_BLOCK_SIZE - 1)) && -- (msg->mode == WD_CIPHER_CBC || msg->mode == WD_CIPHER_ECB)) { -- WD_ERR("failed to check input bytes of AES or SM4, size = %u\n", -- msg->in_bytes); -- return -WD_EINVAL; -- } -- - return 0; - } - -@@ -986,8 +978,7 @@ static int cipher_len_check(struct wd_cipher_msg *msg) - { - int ret; - -- if (msg->in_bytes > MAX_INPUT_DATA_LEN || -- !msg->in_bytes) { -+ if (msg->in_bytes > MAX_INPUT_DATA_LEN) { - WD_ERR("input cipher length is error, size = %u\n", - msg->in_bytes); - return -WD_EINVAL; -@@ -1016,8 +1007,8 @@ static int cipher_len_check(struct wd_cipher_msg *msg) - return 0; - } - -- if (msg->alg == WD_CIPHER_AES || msg->alg == WD_CIPHER_SM4) { -- ret = aes_sm4_len_check(msg); -+ if (msg->alg == WD_CIPHER_AES) { -+ ret = aes_len_check(msg); - if (ret) - return ret; - } -diff --git a/wd_cipher.c b/wd_cipher.c -index f35ce6f..279ca8b 100644 ---- a/wd_cipher.c -+++ b/wd_cipher.c -@@ -565,6 +565,28 @@ static int cipher_iv_len_check(struct wd_cipher_req *req, - return ret; - } - -+static int cipher_len_check(handle_t h_sess, struct wd_cipher_req *req) -+{ -+ struct wd_cipher_sess *sess = (struct wd_cipher_sess *)h_sess; -+ -+ if (!req->in_bytes) { -+ WD_ERR("invalid: cipher input length is zero!\n"); -+ return -WD_EINVAL; -+ } -+ -+ if (sess->alg != WD_CIPHER_AES && sess->alg != WD_CIPHER_SM4) -+ return 0; -+ -+ if ((req->in_bytes & (AES_BLOCK_SIZE - 1)) && -+ (sess->mode == WD_CIPHER_CBC || sess->mode == WD_CIPHER_ECB)) { -+ WD_ERR("failed to check input bytes of AES or SM4, size = %u\n", -+ req->in_bytes); -+ return -WD_EINVAL; -+ } -+ -+ return 0; -+} -+ - static int wd_cipher_check_params(handle_t h_sess, - struct wd_cipher_req *req, __u8 mode) - { -@@ -587,6 +609,10 @@ static int wd_cipher_check_params(handle_t h_sess, - return -WD_EINVAL; - } - -+ ret = cipher_len_check(h_sess, req); -+ if (unlikely(ret)) -+ return ret; -+ - ret = wd_check_src_dst(req->src, req->in_bytes, req->dst, req->out_bytes); - if (unlikely(ret)) { - WD_ERR("invalid: src/dst addr is NULL when src/dst size is non-zero!\n"); --- -2.25.1 - diff --git a/0046-uadk-v1-improve-the-judgment-conditions-of-tag.patch b/0046-uadk-v1-improve-the-judgment-conditions-of-tag.patch deleted file mode 100644 index eacd21e..0000000 --- a/0046-uadk-v1-improve-the-judgment-conditions-of-tag.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 628139bccaff2499d35cb530f54519f0aa744923 Mon Sep 17 00:00:00 2001 -From: Longfang Liu -Date: Fri, 29 Mar 2024 16:54:41 +0800 -Subject: [PATCH 46/52] uadk/v1: improve the judgment conditions of tag - -Before calling this function, it is guaranteed that the tag is -not empty. - -In addition, some alarm issues in hpre have been modified. - -Signed-off-by: Longfang Liu -Signed-off-by: Qi Tao ---- - v1/drv/hisi_hpre_udrv.c | 4 ++-- - v1/drv/hisi_sec_udrv.c | 3 +-- - 2 files changed, 3 insertions(+), 4 deletions(-) - -diff --git a/v1/drv/hisi_hpre_udrv.c b/v1/drv/hisi_hpre_udrv.c -index de614f2..eaee4b1 100644 ---- a/v1/drv/hisi_hpre_udrv.c -+++ b/v1/drv/hisi_hpre_udrv.c -@@ -212,13 +212,13 @@ static int qm_fill_rsa_pubkey(struct wcrypto_rsa_pubkey *pubkey, void **data) - wd_e->bsize, wd_e->dsize, "rsa pubkey e"); - if (unlikely(ret)) - return ret; -- wd_e->dsize = wd_e->dsize; -+ wd_e->dsize = wd_e->bsize; - - ret = qm_crypto_bin_to_hpre_bin(wd_n->data, (const char *)wd_n->data, - wd_n->bsize, wd_n->dsize, "rsa pubkey n"); - if (unlikely(ret)) - return ret; -- wd_n->dsize = wd_n->dsize; -+ wd_n->dsize = wd_n->bsize; - - *data = wd_e->data; - return (int)(wd_n->bsize + wd_e->bsize); -diff --git a/v1/drv/hisi_sec_udrv.c b/v1/drv/hisi_sec_udrv.c -index d046327..c0bd73d 100644 ---- a/v1/drv/hisi_sec_udrv.c -+++ b/v1/drv/hisi_sec_udrv.c -@@ -759,8 +759,7 @@ static int fill_cipher_bd2(struct wd_queue *q, struct hisi_sec_sqe *sqe, - return ret; - } - -- if (tag) -- sqe->type2.tag = tag->wcrypto_tag.ctx_id; -+ sqe->type2.tag = tag->wcrypto_tag.ctx_id; - - return ret; - } --- -2.25.1 - diff --git a/0047-uadk-v1-fix-for-sec-cipher-bd1-ci_gen-configuration.patch b/0047-uadk-v1-fix-for-sec-cipher-bd1-ci_gen-configuration.patch deleted file mode 100644 index ab3160a..0000000 --- a/0047-uadk-v1-fix-for-sec-cipher-bd1-ci_gen-configuration.patch +++ /dev/null @@ -1,39 +0,0 @@ -From f59a72aefeb714c95bddca71431e95746094d6f7 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 16:56:43 +0800 -Subject: [PATCH 47/52] uadk/v1: fix for sec cipher bd1 ci_gen configuration - -In storage scenarios, the XTS mode is used for encrypting and decrypting -data on and off disks. According to the definition of this mode, the input -parameter to genarate IV is the LBA, so update SEC bd1 xts mode CI_GEN -from 0 to 3, which means use LBA mode. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - v1/drv/hisi_sec_udrv.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/v1/drv/hisi_sec_udrv.c b/v1/drv/hisi_sec_udrv.c -index c0bd73d..d4e090a 100644 ---- a/v1/drv/hisi_sec_udrv.c -+++ b/v1/drv/hisi_sec_udrv.c -@@ -312,11 +312,10 @@ static int fill_cipher_bd1_type(struct wcrypto_cipher_msg *msg, - - fill_bd_addr_type(msg->data_fmt, sqe); - -- /* -- * BD1 cipher only provides ci_gen=0 for compatibility, so user -- * should prepare iv[gran_num] and iv_bytes is sum of all grans -- */ -- sqe->type1.ci_gen = CI_GEN_BY_ADDR; -+ if (msg->mode == WCRYPTO_CIPHER_XTS) -+ sqe->type1.ci_gen = CI_GEN_BY_LBA; -+ else -+ sqe->type1.ci_gen = CI_GEN_BY_ADDR; - - return WD_SUCCESS; - } --- -2.25.1 - diff --git a/0048-uadk-fix-for-shmget-shmflag.patch b/0048-uadk-fix-for-shmget-shmflag.patch deleted file mode 100644 index bb23b44..0000000 --- a/0048-uadk-fix-for-shmget-shmflag.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 0fc17f5c160cb6ea2d1f4b08e9884f29ff75b2dc Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 16:58:23 +0800 -Subject: [PATCH 48/52] uadk: fix for shmget shmflag - -The shmflag should be 0600 in octal, not 600 in decimal. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - uadk_tool/dfx/uadk_dfx.c | 2 +- - wd_util.c | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/uadk_tool/dfx/uadk_dfx.c b/uadk_tool/dfx/uadk_dfx.c -index 796135a..9c54b7b 100644 ---- a/uadk_tool/dfx/uadk_dfx.c -+++ b/uadk_tool/dfx/uadk_dfx.c -@@ -16,7 +16,7 @@ - - #define uadk_build_date() printf("built on: %s %s\n", __DATE__, __TIME__) - #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) --#define PRIVILEGE_FLAG 666 -+#define PRIVILEGE_FLAG 0666 - - struct uadk_env_var { - const char *module; -diff --git a/wd_util.c b/wd_util.c -index fb58167..2635dc3 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -19,7 +19,7 @@ - #define WD_BALANCE_THRHD 1280 - #define WD_RECV_MAX_CNT_SLEEP 60000000 - #define WD_RECV_MAX_CNT_NOSLEEP 200000000 --#define PRIVILEGE_FLAG 600 -+#define PRIVILEGE_FLAG 0600 - #define MIN(a, b) ((a) > (b) ? (b) : (a)) - #define MAX(a, b) ((a) > (b) ? (a) : (b)) - --- -2.25.1 - diff --git a/0049-sec-optimze-for-directly-assigning-values-to-structu.patch b/0049-sec-optimze-for-directly-assigning-values-to-structu.patch deleted file mode 100644 index 016f5c4..0000000 --- a/0049-sec-optimze-for-directly-assigning-values-to-structu.patch +++ /dev/null @@ -1,100 +0,0 @@ -From 705e33d624defc335cdb1c96335da684868858a3 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 16:59:34 +0800 -Subject: [PATCH 49/52] sec: optimze for directly assigning values to - structures - -It is more reasonable to use pointers for value assignment. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - drv/hisi_sec.c | 36 ++++++++++++------------------------ - 1 file changed, 12 insertions(+), 24 deletions(-) - -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index 6625c41..b218cd8 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -542,66 +542,54 @@ static int hisi_sec_aead_recv_v3(struct wd_alg_driver *drv, handle_t ctx, void * - - static int cipher_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_cipher_send(drv, ctx, msg); - return hisi_sec_cipher_send_v3(drv, ctx, msg); - } - - static int cipher_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_cipher_recv(drv, ctx, msg); - return hisi_sec_cipher_recv_v3(drv, ctx, msg); - } - - static int digest_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_digest_send(drv, ctx, msg); - return hisi_sec_digest_send_v3(drv, ctx, msg); - } - - static int digest_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_digest_recv(drv, ctx, msg); - return hisi_sec_digest_recv_v3(drv, ctx, msg); - } - - static int aead_send(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_aead_send(drv, ctx, msg); - return hisi_sec_aead_send_v3(drv, ctx, msg); - } - - static int aead_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg) - { -- handle_t h_qp = (handle_t)wd_ctx_get_priv(ctx); -- struct hisi_qp *qp = (struct hisi_qp *)h_qp; -- struct hisi_qm_queue_info q_info = qp->q_info; -+ struct hisi_qp *qp = (struct hisi_qp *)wd_ctx_get_priv(ctx); - -- if (q_info.hw_type == HISI_QM_API_VER2_BASE) -+ if (qp->q_info.hw_type == HISI_QM_API_VER2_BASE) - return hisi_sec_aead_recv(drv, ctx, msg); - return hisi_sec_aead_recv_v3(drv, ctx, msg); - } --- -2.25.1 - diff --git a/0050-util-optimize-for-wd_handle_msg_sync.patch b/0050-util-optimize-for-wd_handle_msg_sync.patch deleted file mode 100644 index b38f6d2..0000000 --- a/0050-util-optimize-for-wd_handle_msg_sync.patch +++ /dev/null @@ -1,61 +0,0 @@ -From f36aa5f7e8f82a90aa0cb729bf00cc51f76970d5 Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 17:01:03 +0800 -Subject: [PATCH 50/52] util: optimize for wd_handle_msg_sync - -1. Separate rx_cnt auto-increment and judgment. -2. Reduce the condition judgment in the case of eagain. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - wd_util.c | 26 +++++++++++++++----------- - 1 file changed, 15 insertions(+), 11 deletions(-) - -diff --git a/wd_util.c b/wd_util.c -index 2635dc3..0744ff0 100644 ---- a/wd_util.c -+++ b/wd_util.c -@@ -1822,24 +1822,28 @@ int wd_handle_msg_sync(struct wd_alg_driver *drv, struct wd_msg_handle *msg_hand - do { - if (epoll_en) { - ret = wd_ctx_wait(ctx, POLL_TIME); -- if (ret < 0) -+ if (unlikely(ret < 0)) - WD_ERR("wd ctx wait timeout(%d)!\n", ret); - } - - ret = msg_handle->recv(drv, ctx, msg); -- if (ret == -WD_EAGAIN) { -- if (unlikely(rx_cnt++ >= timeout)) { -- WD_ERR("failed to recv msg: timeout!\n"); -- return -WD_ETIMEDOUT; -+ if (ret != -WD_EAGAIN) { -+ if (unlikely(ret < 0)) { -+ WD_ERR("failed to recv msg: error = %d!\n", ret); -+ return ret; - } -+ break; -+ } - -- if (balance && *balance > WD_BALANCE_THRHD) -- usleep(1); -- } else if (unlikely(ret < 0)) { -- WD_ERR("failed to recv msg: error = %d!\n", ret); -- return ret; -+ rx_cnt++; -+ if (unlikely(rx_cnt >= timeout)) { -+ WD_ERR("failed to recv msg: timeout!\n"); -+ return -WD_ETIMEDOUT; - } -- } while (ret < 0); -+ -+ if (balance && *balance > WD_BALANCE_THRHD) -+ usleep(1); -+ } while (1); - - if (balance) - *balance = rx_cnt; --- -2.25.1 - diff --git a/0051-uadk-drv_hisi-optimize-qm-recv-function.patch b/0051-uadk-drv_hisi-optimize-qm-recv-function.patch deleted file mode 100644 index b070b8e..0000000 --- a/0051-uadk-drv_hisi-optimize-qm-recv-function.patch +++ /dev/null @@ -1,115 +0,0 @@ -From ace1da03900d04a1e14d61200a89c539ff78856d Mon Sep 17 00:00:00 2001 -From: Wenkai Lin -Date: Fri, 29 Mar 2024 17:02:23 +0800 -Subject: [PATCH 51/52] uadk: drv_hisi - optimize qm recv function - -Ensure that the value written by the hardware is -read from the memory each time, reduce the number -of packet receiving times by half. -Also sqe address is only need calculated when packets -are received. - -Signed-off-by: Wenkai Lin -Signed-off-by: Qi Tao ---- - drv/hisi_qm_udrv.c | 45 +++++++++++++++++++++++---------------------- - 1 file changed, 23 insertions(+), 22 deletions(-) - -diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c -index d8b5271..304764e 100644 ---- a/drv/hisi_qm_udrv.c -+++ b/drv/hisi_qm_udrv.c -@@ -21,8 +21,8 @@ - #define QM_DBELL_SQN_MASK 0x3ff - #define QM_DBELL_CMD_MASK 0xf - #define QM_Q_DEPTH 1024 --#define CQE_PHASE(cq) (__le16_to_cpu((cq)->w7) & 0x1) --#define CQE_SQ_HEAD_INDEX(cq) (__le16_to_cpu((cq)->sq_head) & 0xffff) -+#define CQE_PHASE(cqe) (__le16_to_cpu((cqe)->w7) & 0x1) -+#define CQE_SQ_HEAD_INDEX(cqe) (__le16_to_cpu((cqe)->sq_head) & 0xffff) - #define VERSION_ID_SHIFT 9 - - #define UACCE_CMD_QM_SET_QP_CTX _IOWR('H', 10, struct hisi_qp_ctx) -@@ -505,32 +505,33 @@ int hisi_qm_send(handle_t h_qp, const void *req, __u16 expect, __u16 *count) - return 0; - } - --static int hisi_qm_recv_single(struct hisi_qm_queue_info *q_info, void *resp) -+static int hisi_qm_recv_single(struct hisi_qm_queue_info *q_info, handle_t h_ctx, -+ void *resp, __u16 idx) - { -- struct hisi_qp *qp = container_of(q_info, struct hisi_qp, q_info); -+ __u16 i, j, cqe_phase; - struct cqe *cqe; -- __u16 i, j; - - pthread_spin_lock(&q_info->rv_lock); - i = q_info->cq_head_index; - cqe = q_info->cq_base + i * sizeof(struct cqe); -+ cqe_phase = CQE_PHASE(cqe); -+ /* Use dsb to read from memory and improve the receiving efficiency. */ -+ rmb(); - -- if (q_info->cqc_phase == CQE_PHASE(cqe)) { -- /* Make sure cqe valid bit is set */ -- rmb(); -- j = CQE_SQ_HEAD_INDEX(cqe); -- if (unlikely(j >= q_info->sq_depth)) { -- pthread_spin_unlock(&q_info->rv_lock); -- WD_DEV_ERR(qp->h_ctx, "CQE_SQ_HEAD_INDEX(%u) error!\n", j); -- return -WD_EIO; -- } -- memcpy(resp, (void *)((uintptr_t)q_info->sq_base + -- j * q_info->sqe_size), q_info->sqe_size); -- } else { -+ if (q_info->cqc_phase != cqe_phase) { - pthread_spin_unlock(&q_info->rv_lock); - return -WD_EAGAIN; - } - -+ j = CQE_SQ_HEAD_INDEX(cqe); -+ if (unlikely(j >= q_info->sq_depth)) { -+ pthread_spin_unlock(&q_info->rv_lock); -+ WD_DEV_ERR(h_ctx, "CQE_SQ_HEAD_INDEX(%u) error!\n", j); -+ return -WD_EIO; -+ } -+ memcpy((void *)((uintptr_t)resp + idx * q_info->sqe_size), -+ (void *)((uintptr_t)q_info->sq_base + j * q_info->sqe_size), q_info->sqe_size); -+ - if (i == q_info->cq_depth - 1) { - q_info->cqc_phase = !(q_info->cqc_phase); - i = 0; -@@ -544,7 +545,7 @@ static int hisi_qm_recv_single(struct hisi_qm_queue_info *q_info, void *resp) - */ - if (unlikely(wd_ioread32(q_info->ds_rx_base) == 1)) { - pthread_spin_unlock(&q_info->rv_lock); -- WD_DEV_ERR(qp->h_ctx, "wd queue hw error happened after qm receive!\n"); -+ WD_DEV_ERR(h_ctx, "wd queue hw error happened before qm receive!\n"); - return -WD_HW_EACCESS; - } - -@@ -565,8 +566,9 @@ int hisi_qm_recv(handle_t h_qp, void *resp, __u16 expect, __u16 *count) - { - struct hisi_qp *qp = (struct hisi_qp *)h_qp; - struct hisi_qm_queue_info *q_info; -- int recv_num = 0; -- int i, ret, offset; -+ __u16 recv_num = 0; -+ __u16 i; -+ int ret; - - if (unlikely(!resp || !qp || !count)) - return -WD_EINVAL; -@@ -581,8 +583,7 @@ int hisi_qm_recv(handle_t h_qp, void *resp, __u16 expect, __u16 *count) - } - - for (i = 0; i < expect; i++) { -- offset = i * q_info->sqe_size; -- ret = hisi_qm_recv_single(q_info, resp + offset); -+ ret = hisi_qm_recv_single(q_info, qp->h_ctx, resp, i); - if (ret) - break; - recv_num++; --- -2.25.1 - diff --git a/0052-uadk-modify-uadk-static-compile.patch b/0052-uadk-modify-uadk-static-compile.patch deleted file mode 100644 index 00b7cf3..0000000 --- a/0052-uadk-modify-uadk-static-compile.patch +++ /dev/null @@ -1,1307 +0,0 @@ -From a282605e6550b5572072f9968370fd01502a04f5 Mon Sep 17 00:00:00 2001 -From: Longfang Liu -Date: Fri, 29 Mar 2024 17:04:01 +0800 -Subject: [PATCH 52/52] uadk: modify uadk static compile - -After the UADK framework supports dynamic loading. Device drivers are -all default used in the form of dynamic libraries. - -Static compilation requires static declaration and cannot declare -unknown device drivers. Therefore, static compilation only supports -HiSilicon device drivers. - -Signed-off-by: Longfang Liu -Signed-off-by: Qi Tao ---- - drv/hisi_comp.c | 8 ++++++ - drv/hisi_hpre.c | 9 ++++++ - drv/hisi_sec.c | 8 ++++++ - include/wd_alg.h | 26 +++++++++++++---- - include/wd_alg_common.h | 17 +++++++---- - wd_aead.c | 61 ++++++++++++++++++++++++++------------- - wd_alg.c | 25 +++++++++++++++- - wd_cipher.c | 61 ++++++++++++++++++++++++++------------- - wd_comp.c | 63 ++++++++++++++++++++++++++++------------- - wd_dh.c | 61 ++++++++++++++++++++++++++------------- - wd_digest.c | 63 ++++++++++++++++++++++++++++------------- - wd_ecc.c | 61 ++++++++++++++++++++++++++------------- - wd_rsa.c | 61 ++++++++++++++++++++++++++------------- - 13 files changed, 377 insertions(+), 147 deletions(-) - -diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c -index a1af567..2fa5eff 100644 ---- a/drv/hisi_comp.c -+++ b/drv/hisi_comp.c -@@ -1109,7 +1109,11 @@ static struct wd_alg_driver zip_alg_driver[] = { - GEN_ZIP_ALG_DRIVER("lz77_zstd"), - }; - -+#ifdef WD_STATIC_DRV -+void hisi_zip_probe(void) -+#else - static void __attribute__((constructor)) hisi_zip_probe(void) -+#endif - { - int alg_num = ARRAY_SIZE(zip_alg_driver); - int i, ret; -@@ -1124,7 +1128,11 @@ static void __attribute__((constructor)) hisi_zip_probe(void) - } - } - -+#ifdef WD_STATIC_DRV -+void hisi_zip_remove(void) -+#else - static void __attribute__((destructor)) hisi_zip_remove(void) -+#endif - { - int alg_num = ARRAY_SIZE(zip_alg_driver); - int i; -diff --git a/drv/hisi_hpre.c b/drv/hisi_hpre.c -index babc795..68a11ae 100644 ---- a/drv/hisi_hpre.c -+++ b/drv/hisi_hpre.c -@@ -1,3 +1,4 @@ -+ - /* SPDX-License-Identifier: Apache-2.0 */ - /* Copyright 2020-2021 Huawei Technologies Co.,Ltd. All rights reserved. */ - -@@ -2547,7 +2548,11 @@ static struct wd_alg_driver hpre_dh_driver = { - .get_usage = hpre_get_usage, - }; - -+#ifdef WD_STATIC_DRV -+void hisi_hpre_probe(void) -+#else - static void __attribute__((constructor)) hisi_hpre_probe(void) -+#endif - { - __u32 alg_num = ARRAY_SIZE(hpre_ecc_driver); - __u32 i; -@@ -2569,7 +2574,11 @@ static void __attribute__((constructor)) hisi_hpre_probe(void) - } - } - -+#ifdef WD_STATIC_DRV -+void hisi_hpre_remove(void) -+#else - static void __attribute__((destructor)) hisi_hpre_remove(void) -+#endif - { - __u32 alg_num = ARRAY_SIZE(hpre_ecc_driver); - __u32 i; -diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c -index b218cd8..aba4185 100644 ---- a/drv/hisi_sec.c -+++ b/drv/hisi_sec.c -@@ -3087,7 +3087,11 @@ static void hisi_sec_exit(struct wd_alg_driver *drv) - drv->priv = NULL; - } - -+#ifdef WD_STATIC_DRV -+void hisi_sec2_probe(void) -+#else - static void __attribute__((constructor)) hisi_sec2_probe(void) -+#endif - { - int alg_num; - int i, ret; -@@ -3119,7 +3123,11 @@ static void __attribute__((constructor)) hisi_sec2_probe(void) - } - } - -+#ifdef WD_STATIC_DRV -+void hisi_sec2_remove(void) -+#else - static void __attribute__((destructor)) hisi_sec2_remove(void) -+#endif - { - int alg_num; - int i; -diff --git a/include/wd_alg.h b/include/wd_alg.h -index 861b7d9..1735896 100644 ---- a/include/wd_alg.h -+++ b/include/wd_alg.h -@@ -69,7 +69,7 @@ enum alg_dev_type { - UADK_ALG_HW = 0x3 - }; - --/** -+/* - * @drv_name: name of the current device driver - * @alg_name: name of the algorithm supported by the driver - * @priority: priority of the type of algorithm supported by the driver -@@ -133,7 +133,7 @@ inline int wd_alg_driver_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg - return drv->recv(drv, ctx, msg); - } - --/** -+/* - * wd_alg_driver_register() - Register a device driver. - * @wd_alg_driver: a device driver that supports an algorithm. - * -@@ -142,7 +142,7 @@ inline int wd_alg_driver_recv(struct wd_alg_driver *drv, handle_t ctx, void *msg - int wd_alg_driver_register(struct wd_alg_driver *drv); - void wd_alg_driver_unregister(struct wd_alg_driver *drv); - --/** -+/* - * @alg_name: name of the algorithm supported by the driver - * @drv_name: name of the current device driver - * @available: Indicates whether the current driver still has resources available -@@ -165,7 +165,7 @@ struct wd_alg_list { - struct wd_alg_list *next; - }; - --/** -+/* - * wd_request_drv() - Apply for an algorithm driver. - * @alg_name: task algorithm name. - * @hw_mask: the flag of shield hardware device drivers. -@@ -175,7 +175,7 @@ struct wd_alg_list { - struct wd_alg_driver *wd_request_drv(const char *alg_name, bool hw_mask); - void wd_release_drv(struct wd_alg_driver *drv); - --/** -+/* - * wd_drv_alg_support() - Check the algorithms supported by the driver. - * @alg_name: task algorithm name. - * @drv: a device driver that supports an algorithm. -@@ -185,7 +185,7 @@ void wd_release_drv(struct wd_alg_driver *drv); - bool wd_drv_alg_support(const char *alg_name, - struct wd_alg_driver *drv); - --/** -+/* - * wd_enable_drv() - Re-enable use of the current device driver. - * @drv: a device driver that supports an algorithm. - */ -@@ -194,6 +194,20 @@ void wd_disable_drv(struct wd_alg_driver *drv); - - struct wd_alg_list *wd_get_alg_head(void); - -+#ifdef WD_STATIC_DRV -+/* -+ * duplicate drivers will be skipped when it register to alg_list -+ */ -+void hisi_sec2_probe(void); -+void hisi_hpre_probe(void); -+void hisi_zip_probe(void); -+ -+void hisi_sec2_remove(void); -+void hisi_hpre_remove(void); -+void hisi_zip_remove(void); -+ -+#endif -+ - #ifdef __cplusplus - } - #endif -diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h -index 32b8630..1235f1d 100644 ---- a/include/wd_alg_common.h -+++ b/include/wd_alg_common.h -@@ -55,7 +55,12 @@ enum wd_ctx_mode { - CTX_MODE_MAX, - }; - --/** -+enum wd_init_type { -+ WD_TYPE_V1, -+ WD_TYPE_V2, -+}; -+ -+/* - * struct wd_ctx - Define one ctx and related type. - * @ctx: The ctx itself. - * @op_type: Define the operation type of this specific ctx. -@@ -69,7 +74,7 @@ struct wd_ctx { - __u8 ctx_mode; - }; - --/** -+/* - * struct wd_cap_config - Capabilities. - * @ctx_msg_num: number of asynchronous msg pools that the user wants to allocate. - * Optional, user can set ctx_msg_num based on the number of requests -@@ -82,7 +87,7 @@ struct wd_cap_config { - __u32 resv; - }; - --/** -+/* - * struct wd_ctx_config - Define a ctx set and its related attributes, which - * will be used in the scope of current process. - * @ctx_num: The ctx number in below ctx array. -@@ -98,7 +103,7 @@ struct wd_ctx_config { - struct wd_cap_config *cap; - }; - --/** -+/* - * struct wd_ctx_nums - Define the ctx sets numbers. - * @sync_ctx_num: The ctx numbers which are used for sync mode for each - * ctx sets. -@@ -110,7 +115,7 @@ struct wd_ctx_nums { - __u32 async_ctx_num; - }; - --/** -+/* - * struct wd_ctx_params - Define the ctx sets params which are used for init - * algorithms. - * @op_type_num: Used for index of ctx_set_num, the order is the same as -@@ -144,7 +149,7 @@ struct wd_ctx_config_internal { - unsigned long *msg_cnt; - }; - --/** -+/* - * struct wd_comp_sched - Define a scheduler. - * @name: Name of this scheduler. - * @sched_policy: Method for scheduler to perform scheduling -diff --git a/wd_aead.c b/wd_aead.c -index 57daa80..daed761 100644 ---- a/wd_aead.c -+++ b/wd_aead.c -@@ -62,22 +62,48 @@ struct wd_aead_sess { - struct wd_env_config wd_aead_env_config; - static struct wd_init_attrs wd_aead_init_attrs; - --static void wd_aead_close_driver(void) -+static void wd_aead_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_aead_setting.dlh_list); -+ return; -+ } -+ - if (wd_aead_setting.dlhandle) { - wd_release_drv(wd_aead_setting.driver); - dlclose(wd_aead_setting.dlhandle); - wd_aead_setting.dlhandle = NULL; - } -+#else -+ wd_release_drv(wd_aead_setting.driver); -+ hisi_sec2_remove(); -+#endif - } - --static int wd_aead_open_driver(void) -+static int wd_aead_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; - const char *alg_name = "gcm(aes)"; -+#ifndef WD_STATIC_DRV - char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open tham by wd_dlopen_drv() -+ * use NULL means dynamic query path -+ */ -+ wd_aead_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_aead_setting.dlh_list) { -+ WD_ERR("fail to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_sec.so", lib_path, false); - if (ret) - return ret; -@@ -87,17 +113,21 @@ static int wd_aead_open_driver(void) - WD_ERR("failed to open libhisi_sec.so, %s\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_sec2_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_aead_close_driver(); -+ wd_aead_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } - - wd_aead_setting.driver = driver; - -- return 0; -+ return WD_SUCCESS; - } - - static int aes_key_len_check(__u32 length) -@@ -466,7 +496,7 @@ int wd_aead_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_aead_open_driver(); -+ ret = wd_aead_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -479,7 +509,7 @@ int wd_aead_init(struct wd_ctx_config *config, struct wd_sched *sched) - return 0; - - out_close_driver: -- wd_aead_close_driver(); -+ wd_aead_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_aead_setting.status); - return ret; -@@ -509,7 +539,7 @@ void wd_aead_uninit(void) - if (ret) - return; - -- wd_aead_close_driver(); -+ wd_aead_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_aead_setting.status); - } - -@@ -551,16 +581,9 @@ int wd_aead_init2_(char *alg, __u32 sched_type, int task_type, - goto out_uninit; - } - -- /* -- * Driver lib file path could set by env param. -- * then open them by wd_dlopen_drv() -- * use NULL means dynamic query path -- */ -- wd_aead_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_aead_setting.dlh_list) { -- WD_ERR("failed to open driver lib files.\n"); -+ state = wd_aead_open_driver(WD_TYPE_V2); -+ if (state) - goto out_uninit; -- } - - while (ret != 0) { - memset(&wd_aead_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -613,7 +636,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_aead_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_aead_setting.dlh_list); -+ wd_aead_close_driver(WD_TYPE_V2); - out_uninit: - wd_alg_clear_init(&wd_aead_setting.status); - return ret; -@@ -629,7 +652,7 @@ void wd_aead_uninit2(void) - - wd_alg_attrs_uninit(&wd_aead_init_attrs); - wd_alg_drv_unbind(wd_aead_setting.driver); -- wd_dlclose_drv(wd_aead_setting.dlh_list); -+ wd_aead_close_driver(WD_TYPE_V2); - wd_aead_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_aead_setting.status); - } -diff --git a/wd_alg.c b/wd_alg.c -index f34a407..0a15fe8 100644 ---- a/wd_alg.c -+++ b/wd_alg.c -@@ -150,6 +150,26 @@ static bool wd_alg_driver_match(struct wd_alg_driver *drv, - return true; - } - -+static bool wd_alg_repeat_check(struct wd_alg_driver *drv) -+{ -+ struct wd_alg_list *npre = &alg_list_head; -+ struct wd_alg_list *pnext = NULL; -+ -+ pthread_mutex_lock(&mutex); -+ pnext = npre->next; -+ while (pnext) { -+ if (wd_alg_driver_match(drv, pnext)) { -+ pthread_mutex_unlock(&mutex); -+ return true; -+ } -+ npre = pnext; -+ pnext = pnext->next; -+ } -+ pthread_mutex_unlock(&mutex); -+ -+ return false; -+} -+ - int wd_alg_driver_register(struct wd_alg_driver *drv) - { - struct wd_alg_list *new_alg; -@@ -164,6 +184,9 @@ int wd_alg_driver_register(struct wd_alg_driver *drv) - return -WD_EINVAL; - } - -+ if (wd_alg_repeat_check(drv)) -+ return 0; -+ - new_alg = calloc(1, sizeof(struct wd_alg_list)); - if (!new_alg) { - WD_ERR("failed to alloc alg driver memory!\n"); -@@ -238,7 +261,7 @@ bool wd_drv_alg_support(const char *alg_name, - struct wd_alg_list *head = &alg_list_head; - struct wd_alg_list *pnext = head->next; - -- if (!alg_name) -+ if (!alg_name || !drv) - return false; - - while (pnext) { -diff --git a/wd_cipher.c b/wd_cipher.c -index 279ca8b..9b6e884 100644 ---- a/wd_cipher.c -+++ b/wd_cipher.c -@@ -72,22 +72,48 @@ struct wd_cipher_sess { - struct wd_env_config wd_cipher_env_config; - static struct wd_init_attrs wd_cipher_init_attrs; - --static void wd_cipher_close_driver(void) -+static void wd_cipher_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_cipher_setting.dlh_list); -+ return; -+ } -+ - if (wd_cipher_setting.dlhandle) { - wd_release_drv(wd_cipher_setting.driver); - dlclose(wd_cipher_setting.dlhandle); - wd_cipher_setting.dlhandle = NULL; - } -+#else -+ wd_release_drv(wd_cipher_setting.driver); -+ hisi_sec2_remove(); -+#endif - } - --static int wd_cipher_open_driver(void) -+static int wd_cipher_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; - const char *alg_name = "cbc(aes)"; -+#ifndef WD_STATIC_DRV - char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open tham by wd_dlopen_drv() -+ * use NULL means dynamic query path -+ */ -+ wd_cipher_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_cipher_setting.dlh_list) { -+ WD_ERR("fail to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_sec.so", lib_path, false); - if (ret) - return ret; -@@ -97,17 +123,21 @@ static int wd_cipher_open_driver(void) - WD_ERR("failed to open libhisi_sec.so, %s\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_sec2_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_cipher_close_driver(); -+ wd_cipher_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } - - wd_cipher_setting.driver = driver; - -- return 0; -+ return WD_SUCCESS; - } - - static bool is_des_weak_key(const __u8 *key) -@@ -365,7 +395,7 @@ int wd_cipher_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_cipher_open_driver(); -+ ret = wd_cipher_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -378,7 +408,7 @@ int wd_cipher_init(struct wd_ctx_config *config, struct wd_sched *sched) - return 0; - - out_close_driver: -- wd_cipher_close_driver(); -+ wd_cipher_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_cipher_setting.status); - return ret; -@@ -392,7 +422,7 @@ void wd_cipher_uninit(void) - if (ret) - return; - -- wd_cipher_close_driver(); -+ wd_cipher_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_cipher_setting.status); - } - -@@ -421,16 +451,9 @@ int wd_cipher_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_p - goto out_uninit; - } - -- /* -- * Driver lib file path could set by env param. -- * then open tham by wd_dlopen_drv() -- * use NULL means dynamic query path -- */ -- wd_cipher_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_cipher_setting.dlh_list) { -- WD_ERR("fail to open driver lib files.\n"); -+ state = wd_cipher_open_driver(WD_TYPE_V2); -+ if (state) - goto out_uninit; -- } - - while (ret != 0) { - memset(&wd_cipher_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -484,7 +507,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_cipher_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_cipher_setting.dlh_list); -+ wd_cipher_close_driver(WD_TYPE_V2); - out_uninit: - wd_alg_clear_init(&wd_cipher_setting.status); - return ret; -@@ -500,7 +523,7 @@ void wd_cipher_uninit2(void) - - wd_alg_attrs_uninit(&wd_cipher_init_attrs); - wd_alg_drv_unbind(wd_cipher_setting.driver); -- wd_dlclose_drv(wd_cipher_setting.dlh_list); -+ wd_cipher_close_driver(WD_TYPE_V2); - wd_cipher_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_cipher_setting.status); - } -diff --git a/wd_comp.c b/wd_comp.c -index cabd17f..459223e 100644 ---- a/wd_comp.c -+++ b/wd_comp.c -@@ -54,22 +54,48 @@ struct wd_comp_setting { - struct wd_env_config wd_comp_env_config; - static struct wd_init_attrs wd_comp_init_attrs; - --static void wd_comp_close_driver(void) -+static void wd_comp_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_comp_setting.dlh_list); -+ return; -+ } -+ - if (wd_comp_setting.dlhandle) { - wd_release_drv(wd_comp_setting.driver); - dlclose(wd_comp_setting.dlhandle); - wd_comp_setting.dlhandle = NULL; - } -+#else -+ wd_release_drv(wd_comp_setting.driver); -+ hisi_zip_remove(); -+#endif - } - --static int wd_comp_open_driver(void) -+static int wd_comp_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; -- char lib_path[PATH_MAX]; - const char *alg_name = "zlib"; -+#ifndef WD_STATIC_DRV -+ char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open them by wd_dlopen_drv() -+ * use NULL means dynamic query path -+ */ -+ wd_comp_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_comp_setting.dlh_list) { -+ WD_ERR("fail to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_zip.so", lib_path, false); - if (ret) - return ret; -@@ -79,17 +105,21 @@ static int wd_comp_open_driver(void) - WD_ERR("failed to open libhisi_zip.so, %s\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_zip_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_comp_close_driver(); -+ wd_comp_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } - - wd_comp_setting.driver = driver; - -- return 0; -+ return WD_SUCCESS; - } - - static void wd_comp_clear_status(void) -@@ -185,7 +215,7 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_comp_open_driver(); -+ ret = wd_comp_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -198,7 +228,7 @@ int wd_comp_init(struct wd_ctx_config *config, struct wd_sched *sched) - return 0; - - out_clear_driver: -- wd_comp_close_driver(); -+ wd_comp_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_comp_setting.status); - return ret; -@@ -212,7 +242,7 @@ void wd_comp_uninit(void) - if (ret) - return; - -- wd_comp_close_driver(); -+ wd_comp_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_comp_setting.status); - } - -@@ -241,16 +271,9 @@ int wd_comp_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_par - goto out_uninit; - } - -- /* -- * Driver lib file path could set by env param. -- * then open tham by wd_dlopen_drv() -- * use NULL means dynamic query path -- */ -- wd_comp_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_comp_setting.dlh_list) { -- WD_ERR("fail to open driver lib files.\n"); -+ state = wd_comp_open_driver(WD_TYPE_V2); -+ if (state) - goto out_uninit; -- } - - while (ret != 0) { - memset(&wd_comp_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -303,7 +326,7 @@ out_params_uninit: - out_unbind_drv: - wd_alg_drv_unbind(wd_comp_setting.driver); - out_dlclose: -- wd_dlclose_drv(wd_comp_setting.dlh_list); -+ wd_comp_close_driver(WD_TYPE_V2); - out_uninit: - wd_alg_clear_init(&wd_comp_setting.status); - return ret; -@@ -319,7 +342,7 @@ void wd_comp_uninit2(void) - - wd_alg_attrs_uninit(&wd_comp_init_attrs); - wd_alg_drv_unbind(wd_comp_setting.driver); -- wd_dlclose_drv(wd_comp_setting.dlh_list); -+ wd_comp_close_driver(WD_TYPE_V2); - wd_comp_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_comp_setting.status); - } -diff --git a/wd_dh.c b/wd_dh.c -index 4d08de6..36b0cd7 100644 ---- a/wd_dh.c -+++ b/wd_dh.c -@@ -41,23 +41,49 @@ static struct wd_dh_setting { - struct wd_env_config wd_dh_env_config; - static struct wd_init_attrs wd_dh_init_attrs; - --static void wd_dh_close_driver(void) -+static void wd_dh_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_dh_setting.dlh_list); -+ return; -+ } -+ - if (!wd_dh_setting.dlhandle) - return; - - wd_release_drv(wd_dh_setting.driver); - dlclose(wd_dh_setting.dlhandle); - wd_dh_setting.dlhandle = NULL; -+#else -+ wd_release_drv(wd_dh_setting.driver); -+ hisi_hpre_remove(); -+#endif - } - --static int wd_dh_open_driver(void) -+static int wd_dh_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; -- char lib_path[PATH_MAX]; - const char *alg_name = "dh"; -+#ifndef WD_STATIC_DRV -+ char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open them by wd_dlopen_drv() -+ * default dir in the /root/lib/xxx.so and then dlopen -+ */ -+ wd_dh_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_dh_setting.dlh_list) { -+ WD_ERR("failed to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_hpre.so", lib_path, false); - if (ret) - return ret; -@@ -67,10 +93,14 @@ static int wd_dh_open_driver(void) - WD_ERR("failed to open libhisi_hpre.so, %s!\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_hpre_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_dh_close_driver(); -+ wd_dh_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } -@@ -158,7 +188,7 @@ int wd_dh_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_dh_open_driver(); -+ ret = wd_dh_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -171,7 +201,7 @@ int wd_dh_init(struct wd_ctx_config *config, struct wd_sched *sched) - return WD_SUCCESS; - - out_close_driver: -- wd_dh_close_driver(); -+ wd_dh_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_dh_setting.status); - return ret; -@@ -185,7 +215,7 @@ void wd_dh_uninit(void) - if (ret) - return; - -- wd_dh_close_driver(); -+ wd_dh_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_dh_setting.status); - } - -@@ -212,16 +242,9 @@ int wd_dh_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_param - goto out_clear_init; - } - -- /* -- * Driver lib file path could set by env param. -- * than open tham by wd_dlopen_drv() -- * default dir in the /root/lib/xxx.so and then dlopen -- */ -- wd_dh_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_dh_setting.dlh_list) { -- WD_ERR("failed to open driver lib files!\n"); -+ state = wd_dh_open_driver(WD_TYPE_V2); -+ if (state) - goto out_clear_init; -- } - - while (ret) { - memset(&wd_dh_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -275,7 +298,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_dh_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_dh_setting.dlh_list); -+ wd_dh_close_driver(WD_TYPE_V2); - out_clear_init: - wd_alg_clear_init(&wd_dh_setting.status); - return ret; -@@ -291,7 +314,7 @@ void wd_dh_uninit2(void) - - wd_alg_attrs_uninit(&wd_dh_init_attrs); - wd_alg_drv_unbind(wd_dh_setting.driver); -- wd_dlclose_drv(wd_dh_setting.dlh_list); -+ wd_dh_close_driver(WD_TYPE_V2); - wd_dh_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_dh_setting.status); - } -diff --git a/wd_digest.c b/wd_digest.c -index 0df7204..7449259 100644 ---- a/wd_digest.c -+++ b/wd_digest.c -@@ -73,22 +73,48 @@ struct wd_digest_sess { - struct wd_env_config wd_digest_env_config; - static struct wd_init_attrs wd_digest_init_attrs; - --static void wd_digest_close_driver(void) -+static void wd_digest_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_digest_setting.dlh_list); -+ return; -+ } -+ - if (wd_digest_setting.dlhandle) { - wd_release_drv(wd_digest_setting.driver); - dlclose(wd_digest_setting.dlhandle); - wd_digest_setting.dlhandle = NULL; - } -+#else -+ wd_release_drv(wd_digest_setting.driver); -+ hisi_sec2_remove(); -+#endif - } - --static int wd_digest_open_driver(void) -+static int wd_digest_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; - const char *alg_name = "sm3"; -+#ifndef WD_STATIC_DRV - char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open tham by wd_dlopen_drv() -+ * use NULL means dynamic query path -+ */ -+ wd_digest_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_digest_setting.dlh_list) { -+ WD_ERR("fail to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_sec.so", lib_path, false); - if (ret) - return ret; -@@ -98,17 +124,21 @@ static int wd_digest_open_driver(void) - WD_ERR("failed to open libhisi_sec.so, %s\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_sec2_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_digest_close_driver(); -+ wd_digest_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } - - wd_digest_setting.driver = driver; - -- return 0; -+ return WD_SUCCESS; - } - - static int aes_key_len_check(__u32 length) -@@ -277,7 +307,7 @@ int wd_digest_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_digest_open_driver(); -+ ret = wd_digest_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -290,7 +320,7 @@ int wd_digest_init(struct wd_ctx_config *config, struct wd_sched *sched) - return 0; - - out_close_driver: -- wd_digest_close_driver(); -+ wd_digest_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_digest_setting.status); - return ret; -@@ -319,7 +349,7 @@ void wd_digest_uninit(void) - if (ret) - return; - -- wd_digest_close_driver(); -+ wd_digest_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_digest_setting.status); - } - -@@ -356,16 +386,11 @@ int wd_digest_init2_(char *alg, __u32 sched_type, int task_type, - WD_ERR("invalid: digest:%s unsupported!\n", alg); - goto out_uninit; - } -- /* -- * Driver lib file path could set by env param. -- * then open them by wd_dlopen_drv() -- * use NULL means dynamic query path -- */ -- wd_digest_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_digest_setting.dlh_list) { -- WD_ERR("failed to open driver lib files.\n"); -+ -+ state = wd_digest_open_driver(WD_TYPE_V2); -+ if (state) - goto out_uninit; -- } -+ - - while (ret != 0) { - memset(&wd_digest_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -417,7 +442,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_digest_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_digest_setting.dlh_list); -+ wd_digest_close_driver(WD_TYPE_V2); - out_uninit: - wd_alg_clear_init(&wd_digest_setting.status); - return ret; -@@ -433,7 +458,7 @@ void wd_digest_uninit2(void) - - wd_alg_attrs_uninit(&wd_digest_init_attrs); - wd_alg_drv_unbind(wd_digest_setting.driver); -- wd_dlclose_drv(wd_digest_setting.dlh_list); -+ wd_digest_close_driver(WD_TYPE_V2); - wd_digest_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_digest_setting.status); - } -diff --git a/wd_ecc.c b/wd_ecc.c -index e75bca0..24f167f 100644 ---- a/wd_ecc.c -+++ b/wd_ecc.c -@@ -95,23 +95,49 @@ static const struct curve_param_desc curve_pram_list[] = { - { ECC_CURVE_G, offsetof(struct wd_ecc_prikey, g), offsetof(struct wd_ecc_pubkey, g) } - }; - --static void wd_ecc_close_driver(void) -+static void wd_ecc_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_ecc_setting.dlh_list); -+ return; -+ } -+ - if (!wd_ecc_setting.dlhandle) - return; - - wd_release_drv(wd_ecc_setting.driver); - dlclose(wd_ecc_setting.dlhandle); - wd_ecc_setting.dlhandle = NULL; -+#else -+ wd_release_drv(wd_ecc_setting.driver); -+ hisi_hpre_remove(); -+#endif - } - --static int wd_ecc_open_driver(void) -+static int wd_ecc_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; -- char lib_path[PATH_MAX]; - const char *alg_name = "sm2"; -+#ifndef WD_STATIC_DRV -+ char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open them by wd_dlopen_drv() -+ * default dir in the /root/lib/xxx.so and then dlopen -+ */ -+ wd_ecc_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_ecc_setting.dlh_list) { -+ WD_ERR("failed to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_hpre.so", lib_path, false); - if (ret) - return ret; -@@ -121,10 +147,14 @@ static int wd_ecc_open_driver(void) - WD_ERR("failed to open libhisi_hpre.so, %s!\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_hpre_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_ecc_close_driver(); -+ wd_ecc_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support\n", alg_name); - return -WD_EINVAL; - } -@@ -221,7 +251,7 @@ int wd_ecc_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_ecc_open_driver(); -+ ret = wd_ecc_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -234,7 +264,7 @@ int wd_ecc_init(struct wd_ctx_config *config, struct wd_sched *sched) - return WD_SUCCESS; - - out_close_driver: -- wd_ecc_close_driver(); -+ wd_ecc_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_ecc_setting.status); - return ret; -@@ -248,7 +278,7 @@ void wd_ecc_uninit(void) - if (ret) - return; - -- wd_ecc_close_driver(); -+ wd_ecc_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_ecc_setting.status); - } - -@@ -277,16 +307,9 @@ int wd_ecc_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_para - goto out_clear_init; - } - -- /* -- * Driver lib file path could set by env param. -- * than open tham by wd_dlopen_drv() -- * default dir in the /root/lib/xxx.so and then dlopen -- */ -- wd_ecc_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_ecc_setting.dlh_list) { -- WD_ERR("failed to open driver lib files!\n"); -+ state = wd_ecc_open_driver(WD_TYPE_V2); -+ if (state) - goto out_clear_init; -- } - - while (ret) { - memset(&wd_ecc_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -340,7 +363,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_ecc_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_ecc_setting.dlh_list); -+ wd_ecc_close_driver(WD_TYPE_V2); - out_clear_init: - wd_alg_clear_init(&wd_ecc_setting.status); - return ret; -@@ -356,7 +379,7 @@ void wd_ecc_uninit2(void) - - wd_alg_attrs_uninit(&wd_ecc_init_attrs); - wd_alg_drv_unbind(wd_ecc_setting.driver); -- wd_dlclose_drv(wd_ecc_setting.dlh_list); -+ wd_ecc_close_driver(WD_TYPE_V2); - wd_ecc_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_ecc_setting.status); - } -diff --git a/wd_rsa.c b/wd_rsa.c -index 8e51177..f7f815c 100644 ---- a/wd_rsa.c -+++ b/wd_rsa.c -@@ -82,23 +82,49 @@ static struct wd_rsa_setting { - struct wd_env_config wd_rsa_env_config; - static struct wd_init_attrs wd_rsa_init_attrs; - --static void wd_rsa_close_driver(void) -+static void wd_rsa_close_driver(int init_type) - { -+#ifndef WD_STATIC_DRV -+ if (init_type == WD_TYPE_V2) { -+ wd_dlclose_drv(wd_rsa_setting.dlh_list); -+ return; -+ } -+ - if (!wd_rsa_setting.dlhandle) - return; - - wd_release_drv(wd_rsa_setting.driver); - dlclose(wd_rsa_setting.dlhandle); - wd_rsa_setting.dlhandle = NULL; -+#else -+ wd_release_drv(wd_rsa_setting.driver); -+ hisi_hpre_remove(); -+#endif - } - --static int wd_rsa_open_driver(void) -+static int wd_rsa_open_driver(int init_type) - { - struct wd_alg_driver *driver = NULL; -- char lib_path[PATH_MAX]; - const char *alg_name = "rsa"; -+#ifndef WD_STATIC_DRV -+ char lib_path[PATH_MAX]; - int ret; - -+ if (init_type == WD_TYPE_V2) { -+ /* -+ * Driver lib file path could set by env param. -+ * then open them by wd_dlopen_drv() -+ * default dir in the /root/lib/xxx.so and then dlopen -+ */ -+ wd_rsa_setting.dlh_list = wd_dlopen_drv(NULL); -+ if (!wd_rsa_setting.dlh_list) { -+ WD_ERR("failed to open driver lib files.\n"); -+ return -WD_EINVAL; -+ } -+ -+ return WD_SUCCESS; -+ } -+ - ret = wd_get_lib_file_path("libhisi_hpre.so", lib_path, false); - if (ret) - return ret; -@@ -108,10 +134,14 @@ static int wd_rsa_open_driver(void) - WD_ERR("failed to open libhisi_hpre.so, %s!\n", dlerror()); - return -WD_EINVAL; - } -- -+#else -+ hisi_hpre_probe(); -+ if (init_type == WD_TYPE_V2) -+ return WD_SUCCESS; -+#endif - driver = wd_request_drv(alg_name, false); - if (!driver) { -- wd_rsa_close_driver(); -+ wd_rsa_close_driver(WD_TYPE_V1); - WD_ERR("failed to get %s driver support!\n", alg_name); - return -WD_EINVAL; - } -@@ -198,7 +228,7 @@ int wd_rsa_init(struct wd_ctx_config *config, struct wd_sched *sched) - if (ret) - goto out_clear_init; - -- ret = wd_rsa_open_driver(); -+ ret = wd_rsa_open_driver(WD_TYPE_V1); - if (ret) - goto out_clear_init; - -@@ -211,7 +241,7 @@ int wd_rsa_init(struct wd_ctx_config *config, struct wd_sched *sched) - return WD_SUCCESS; - - out_close_driver: -- wd_rsa_close_driver(); -+ wd_rsa_close_driver(WD_TYPE_V1); - out_clear_init: - wd_alg_clear_init(&wd_rsa_setting.status); - return ret; -@@ -225,7 +255,7 @@ void wd_rsa_uninit(void) - if (ret) - return; - -- wd_rsa_close_driver(); -+ wd_rsa_close_driver(WD_TYPE_V1); - wd_alg_clear_init(&wd_rsa_setting.status); - } - -@@ -252,16 +282,9 @@ int wd_rsa_init2_(char *alg, __u32 sched_type, int task_type, struct wd_ctx_para - goto out_clear_init; - } - -- /* -- * Driver lib file path could set by env param. -- * than open tham by wd_dlopen_drv() -- * default dir in the /root/lib/xxx.so and then dlopen -- */ -- wd_rsa_setting.dlh_list = wd_dlopen_drv(NULL); -- if (!wd_rsa_setting.dlh_list) { -- WD_ERR("failed to open driver lib files!\n"); -+ state = wd_rsa_open_driver(WD_TYPE_V2); -+ if (state) - goto out_clear_init; -- } - - while (ret) { - memset(&wd_rsa_setting.config, 0, sizeof(struct wd_ctx_config_internal)); -@@ -315,7 +338,7 @@ out_params_uninit: - out_driver: - wd_alg_drv_unbind(wd_rsa_setting.driver); - out_dlopen: -- wd_dlclose_drv(wd_rsa_setting.dlh_list); -+ wd_rsa_close_driver(WD_TYPE_V2); - out_clear_init: - wd_alg_clear_init(&wd_rsa_setting.status); - return ret; -@@ -331,7 +354,7 @@ void wd_rsa_uninit2(void) - - wd_alg_attrs_uninit(&wd_rsa_init_attrs); - wd_alg_drv_unbind(wd_rsa_setting.driver); -- wd_dlclose_drv(wd_rsa_setting.dlh_list); -+ wd_rsa_close_driver(WD_TYPE_V2); - wd_rsa_setting.dlh_list = NULL; - wd_alg_clear_init(&wd_rsa_setting.status); - } --- -2.25.1 - diff --git a/libwd-2.6.0.tar.gz b/libwd-2.6.0.tar.gz deleted file mode 100644 index d531d16..0000000 Binary files a/libwd-2.6.0.tar.gz and /dev/null differ diff --git a/libwd-2.7.0.tar.gz b/libwd-2.7.0.tar.gz new file mode 100644 index 0000000..e80452f Binary files /dev/null and b/libwd-2.7.0.tar.gz differ diff --git a/libwd.spec b/libwd.spec index 5a06c72..a8b573e 100644 --- a/libwd.spec +++ b/libwd.spec @@ -1,8 +1,8 @@ %define soversion 2 Name: libwd Summary: User Space Accelerator Development Kit -Version: 2.6.0 -Release: 3 +Version: 2.7.0 +Release: 1 License: Apache-2.0 Source: %{name}-%{version}.tar.gz @@ -12,63 +12,27 @@ URL: https://support.huawei.com BuildRoot: %{_tmppath}/%{name}-%{version}-root Conflicts: %{name} < %{version}-%{release} Provides: %{name} = %{version}-%{release} -BuildRequires: numactl-devel, compat-openssl11-devel, zlib-devel +BuildRequires: numactl-devel, openssl-devel, zlib-devel BuildRequires: automake, autoconf, libtool, chrpath BuildRequires: gcc, make ExclusiveArch: aarch64 -Patch01: 0001-uadk-fix-build-issue-of-pthread_atfork.patch -Patch02: 0002-uadk-fix-static-build-error.patch -Patch03: 0003-uadk-add-secure-compilation-option.patch -Patch04: 0004-uadk_tool-fix-build-error.patch -Patch05: 0005-v1-fix-build-error.patch -Patch06: 0006-wd_mempool-fix-build-error.patch -Patch07: 0007-wd_rsa-fix-build-error.patch -Patch08: 0008-test-fix-build-error.patch -Patch0009: 0009-uadk-sec-move-function-to-wd_digest_drv.h.patch -Patch0010: 0010-uadk-digest-add-partial_block-to-store-partial-data.patch -Patch0011: 0011-uadk-digest-add-wd_ctx_spin_lock-function.patch -Patch0012: 0012-uadk-remove-redundant-header-file-in-makefile.patch -Patch0013: 0013-uadk-isa-ce-support-sm3-ce-instruction.patch -Patch0014: 0014-uadk-fix-control-range-of-environmemt-variable.patch -Patch0015: 0015-uadk-util-use-default-sched_type-for-instruction-tas.patch -Patch0016: 0016-uadk-digest-modify-spelling-errors.patch -Patch0017: 0017-uadk-drv-hisi-fix-failed-to-init-drv-after-fork.patch -Patch0018: 0018-wd_rsa-fix-wd_rsa_common_uninit-re-entry.patch -Patch0019: 0019-wd_dh-Fix-wd_aead_uninit-re-entry.patch -Patch0020: 0020-wd_ecc-Fix-wd_ecc_uninit-re-entry.patch -Patch0021: 0021-wd_digest-uninit-check-status-in-one-func.patch -Patch0022: 0022-wd_aead-uninit-check-status-in-one-func.patch -Patch0023: 0023-makefile-install-wd_zlibwrapper.h-to-system.patch -Patch0024: 0024-conf-fix-includedir.patch -Patch0025: 0025-cipher-add-support-for-SM4-CBC-and-CTR-modes-in-CE-i.patch -Patch0026: 0026-cipher-add-support-for-SM4-CFB-and-XTS-modes-in-CE-i.patch -Patch0027: 0027-cipher-add-support-for-SM4-ECB-algorithm-in-CE-instr.patch -Patch0028: 0028-uadk-cipher-isa_ce-support-SM4-cbc_cts-mode.patch -Patch0029: 0029-uadk-wd_alg-check-whether-the-platform-supports-SVE.patch -Patch0030: 0030-uadk-sched-fix-async-mode-ctx-id.patch -Patch0031: 0031-uadk-initializes-ctx-resources-in-SVE-mode.patch -Patch0032: 0032-uadk-hash_mb-support-multi-buffer-calculation-for-sm.patch -Patch0033: 0033-uadk_tool-fix-aead-performance-test-issue.patch -Patch0034: 0034-uadk_tool-fix-the-logic-for-counting-retransmissions.patch -Patch0035: 0035-uadk-tools-support-the-nosva-test-of-a-specified-dev.patch -Patch0036: 0036-uadk-tools-support-designated-device-testing.patch -Patch0037: 0037-uadk_tool-support-sm3-ce-benchmark-and-function-test.patch -Patch0038: 0038-uadk_tool-support-sm4-ce-benchmark-test.patch -Patch0039: 0039-uadk_tool-support-sm3-md5-multibuff-benchmark-test.patch -Patch0040: 0040-uadk-tool-fix-the-msg-pool-release-bug-of-async-zip-.patch -Patch0041: 0041-uadk_tool-fix-queue-application-failure-from-multipl.patch -Patch0042: 0042-ecc-check-need_debug-before-calling-WD_DEBUG.patch -Patch0043: 0043-uadk-remove-unused-ioctl-cmd.patch -Patch0044: 0044-uadk-v1-remove-dummy.patch -Patch0045: 0045-cipher-optimze-input-lengths-check.patch -Patch0046: 0046-uadk-v1-improve-the-judgment-conditions-of-tag.patch -Patch0047: 0047-uadk-v1-fix-for-sec-cipher-bd1-ci_gen-configuration.patch -Patch0048: 0048-uadk-fix-for-shmget-shmflag.patch -Patch0049: 0049-sec-optimze-for-directly-assigning-values-to-structu.patch -Patch0050: 0050-util-optimize-for-wd_handle_msg_sync.patch -Patch0051: 0051-uadk-drv_hisi-optimize-qm-recv-function.patch -Patch0052: 0052-uadk-modify-uadk-static-compile.patch +Patch0001: 0001-uadk_tool-benchmark-skip-sm4-benchmark-if-openssl-wi.patch +Patch0002: 0002-uadk-v1-hpre-remove-redundant-comments.patch +Patch0003: 0003-uadk-v1-fix-for-atomic-memory-order.patch +Patch0004: 0004-uadk-replace-wd_lock-to-pthread_spinlock.patch +Patch0005: 0005-uadk-v1-fix-for-wd_lock-implementation.patch +Patch0006: 0006-uadk-fix-for-env-uninit-segment-fault.patch +Patch0007: 0007-uadk-v1-drv-hisi_zip_udrv-fix-the-wrong-literal-buff.patch +Patch0008: 0008-uadk-v1-replace-wd_spinlock-to-pthread_spin_lock.patch +Patch0009: 0009-uadk_tools-add-segfault-locating-function.patch +Patch0010: 0010-uadk-bugfix-CE-driver-initialization-problem.patch +Patch0011: 0011-uadk-v1-fix-for-sec_dump_bd.patch +Patch0012: 0012-uadk-v1-fix-for-wd_recv_sync-print.patch +Patch0013: 0013-uadk-v1-update-the-symbol-table-for-libraries.patch +Patch0014: 0014-uadk-modify-address-check.patch +Patch0015: 0015-uadk-check-calloc-return-value.patch +Patch0016: 0016-drv-hisi-sec-modify-minor-errors-in-hisi_sec.c.patch %description This package contains the User Space Accelerator Library @@ -88,6 +52,7 @@ make mkdir -p ${RPM_BUILD_ROOT}%{_libdir}/uadk install -b -m755 .libs/libwd*.so.%{version} ${RPM_BUILD_ROOT}%{_libdir} install -b -m755 .libs/libhisi_*.so.%{version} ${RPM_BUILD_ROOT}%{_libdir}/uadk +install -b -m755 .libs/libisa_*.so.%{version} ${RPM_BUILD_ROOT}%{_libdir}/uadk # create symbolic link for lib in $RPM_BUILD_ROOT%{_libdir}/*.so.%{version} ; do @@ -101,9 +66,9 @@ done chrpath -d ${RPM_BUILD_ROOT}%{_libdir}/libwd*.so.%{version} chrpath -d ${RPM_BUILD_ROOT}%{_libdir}/uadk/libhisi_*.so.%{version} +chrpath -d ${RPM_BUILD_ROOT}%{_libdir}/uadk/libisa_*.so.%{version} -mkdir -p ${RPM_BUILD_ROOT}%{_includedir}/warpdrive/include -cp v1/uacce.h ${RPM_BUILD_ROOT}%{_includedir}/warpdrive/include +mkdir -p ${RPM_BUILD_ROOT}%{_includedir}/warpdrive cp v1/*.h ${RPM_BUILD_ROOT}%{_includedir}/warpdrive mkdir -p ${RPM_BUILD_ROOT}%{_includedir}/uadk/v1 @@ -125,17 +90,17 @@ rm -rf ${RPM_BUILD_ROOT} %{_libdir}/uadk/libhisi_*.so.%{version} %{_libdir}/uadk/libhisi_*.so %{_libdir}/uadk/libhisi_*.so.%{soversion} +%{_libdir}/uadk/libisa_*.so.%{version} +%{_libdir}/uadk/libisa_*.so +%{_libdir}/uadk/libisa_*.so.%{soversion} %defattr(644,root,root) -%{_includedir}/warpdrive/include/uacce.h %{_includedir}/warpdrive/*.h %{_includedir}/uadk/*.h %{_includedir}/uadk/v1/*.h %{_libdir}/pkgconfig/*.pc -%exclude %{_includedir}/warpdrive/uacce.h %exclude %{_includedir}/warpdrive/wd_util.h %exclude %{_includedir}/warpdrive/wd_adapter.h %exclude %{_includedir}/uadk/wd_util.h -%exclude %{_includedir}/uadk/hisi_qm_udrv.h %exclude %{_includedir}/uadk/v1/wd_util.h %exclude %{_includedir}/uadk/v1/wd_adapter.h @@ -146,6 +111,9 @@ rm -rf ${RPM_BUILD_ROOT} /sbin/ldconfig %changelog +* Fri Aug 23 2024 JiangShui Yang 2.7.0-1 +- libwd: update the source code to 2.7.0 + * Sun Apr 7 2024 JiangShui Yang 2.6.0-3 - libwd: update the source code