mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-07 13:53:24 +00:00
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Remove VLA usage - Add cryptostat user-space interface - Add notifier for new crypto algorithms Algorithms: - Add OFB mode - Remove speck Drivers: - Remove x86/sha*-mb as they are buggy - Remove pcbc(aes) from x86/aesni - Improve performance of arm/ghash-ce by up to 85% - Implement CTS-CBC in arm64/aes-blk, faster by up to 50% - Remove PMULL based arm64/crc32 driver - Use PMULL in arm64/crct10dif - Add aes-ctr support in s5p-sss - Add caam/qi2 driver Others: - Pick better transform if one becomes available in crc-t10dif" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (124 commits) crypto: chelsio - Update ntx queue received from cxgb4 crypto: ccree - avoid implicit enum conversion crypto: caam - add SPDX license identifier to all files crypto: caam/qi - simplify CGR allocation, freeing crypto: mxs-dcp - make symbols 'sha1_null_hash' and 'sha256_null_hash' static crypto: arm64/aes-blk - ensure XTS mask is always loaded crypto: testmgr - fix sizeof() on COMP_BUF_SIZE crypto: chtls - remove set but not used variable 'csk' crypto: axis - fix platform_no_drv_owner.cocci warnings crypto: x86/aes-ni - fix build error following fpu template removal crypto: arm64/aes - fix handling sub-block CTS-CBC inputs crypto: caam/qi2 - avoid double export crypto: mxs-dcp - Fix AES issues crypto: mxs-dcp - Fix SHA null hashes and output length crypto: mxs-dcp - Implement sha import/export crypto: aegis/generic - fix for big endian systems crypto: morus/generic - fix for big endian systems crypto: lrw - fix rebase error after out of bounds fix crypto: cavium/nitrox - use pci_alloc_irq_vectors() while enabling MSI-X. crypto: cavium/nitrox - NITROX command queue changes. ...
This commit is contained in:
commit
62606c224d
@ -191,21 +191,11 @@ Currently, the following pairs of encryption modes are supported:
|
||||
|
||||
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
|
||||
- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
|
||||
- Speck128/256-XTS for contents and Speck128/256-CTS-CBC for filenames
|
||||
|
||||
It is strongly recommended to use AES-256-XTS for contents encryption.
|
||||
AES-128-CBC was added only for low-powered embedded devices with
|
||||
crypto accelerators such as CAAM or CESA that do not support XTS.
|
||||
|
||||
Similarly, Speck128/256 support was only added for older or low-end
|
||||
CPUs which cannot do AES fast enough -- especially ARM CPUs which have
|
||||
NEON instructions but not the Cryptography Extensions -- and for which
|
||||
it would not otherwise be feasible to use encryption at all. It is
|
||||
not recommended to use Speck on CPUs that have AES instructions.
|
||||
Speck support is only available if it has been enabled in the crypto
|
||||
API via CONFIG_CRYPTO_SPECK. Also, on ARM platforms, to get
|
||||
acceptable performance CONFIG_CRYPTO_SPECK_NEON must be enabled.
|
||||
|
||||
New encryption modes can be added relatively easily, without changes
|
||||
to individual filesystems. However, authenticated encryption (AE)
|
||||
modes are not currently supported because of the difficulty of dealing
|
||||
|
@ -7578,14 +7578,6 @@ S: Supported
|
||||
F: drivers/infiniband/hw/i40iw/
|
||||
F: include/uapi/rdma/i40iw-abi.h
|
||||
|
||||
INTEL SHA MULTIBUFFER DRIVER
|
||||
M: Megha Dey <megha.dey@linux.intel.com>
|
||||
R: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
L: linux-crypto@vger.kernel.org
|
||||
S: Supported
|
||||
F: arch/x86/crypto/sha*-mb/
|
||||
F: crypto/mcryptd.c
|
||||
|
||||
INTEL TELEMETRY DRIVER
|
||||
M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
|
||||
L: platform-driver-x86@vger.kernel.org
|
||||
|
@ -99,6 +99,7 @@ config CRYPTO_GHASH_ARM_CE
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_CRYPTD
|
||||
select CRYPTO_GF128MUL
|
||||
help
|
||||
Use an implementation of GHASH (used by the GCM AEAD chaining mode)
|
||||
that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
|
||||
@ -121,10 +122,4 @@ config CRYPTO_CHACHA20_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_SPECK_NEON
|
||||
tristate "NEON accelerated Speck cipher algorithms"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SPECK
|
||||
|
||||
endif
|
||||
|
@ -10,7 +10,6 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
|
||||
|
||||
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
|
||||
@ -54,7 +53,6 @@ ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
|
||||
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
speck-neon-y := speck-neon-core.o speck-neon-glue.o
|
||||
|
||||
ifdef REGENERATE_ARM_CRYPTO
|
||||
quiet_cmd_perl = PERL $@
|
||||
|
@ -18,6 +18,34 @@
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
/*
|
||||
* NEON doesn't have a rotate instruction. The alternatives are, more or less:
|
||||
*
|
||||
* (a) vshl.u32 + vsri.u32 (needs temporary register)
|
||||
* (b) vshl.u32 + vshr.u32 + vorr (needs temporary register)
|
||||
* (c) vrev32.16 (16-bit rotations only)
|
||||
* (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only,
|
||||
* needs index vector)
|
||||
*
|
||||
* ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit
|
||||
* rotations, the only choices are (a) and (b). We use (a) since it takes
|
||||
* two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
|
||||
*
|
||||
* For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
|
||||
* and doesn't need a temporary register.
|
||||
*
|
||||
* For the 8-bit rotation, we use vtbl.8 + vtbl.8. On Cortex-A7, this sequence
|
||||
* is twice as fast as (a), even when doing (a) on multiple registers
|
||||
* simultaneously to eliminate the stall between vshl and vsri. Also, it
|
||||
* parallelizes better when temporary registers are scarce.
|
||||
*
|
||||
* A disadvantage is that on Cortex-A53, the vtbl sequence is the same speed as
|
||||
* (a), so the need to load the rotation table actually makes the vtbl method
|
||||
* slightly slower overall on that CPU (~1.3% slower ChaCha20). Still, it
|
||||
* seems to be a good compromise to get a more significant speed boost on some
|
||||
* CPUs, e.g. ~4.8% faster ChaCha20 on Cortex-A7.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
@ -46,7 +74,9 @@ ENTRY(chacha20_block_xor_neon)
|
||||
vmov q10, q2
|
||||
vmov q11, q3
|
||||
|
||||
adr ip, .Lrol8_table
|
||||
mov r3, #10
|
||||
vld1.8 {d10}, [ip, :64]
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
@ -62,9 +92,9 @@ ENTRY(chacha20_block_xor_neon)
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
veor q3, q3, q0
|
||||
vtbl.8 d6, {d6}, d10
|
||||
vtbl.8 d7, {d7}, d10
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
@ -92,9 +122,9 @@ ENTRY(chacha20_block_xor_neon)
|
||||
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
|
||||
vadd.i32 q0, q0, q1
|
||||
veor q4, q3, q0
|
||||
vshl.u32 q3, q4, #8
|
||||
vsri.u32 q3, q4, #24
|
||||
veor q3, q3, q0
|
||||
vtbl.8 d6, {d6}, d10
|
||||
vtbl.8 d7, {d7}, d10
|
||||
|
||||
// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
|
||||
vadd.i32 q2, q2, q3
|
||||
@ -139,13 +169,17 @@ ENTRY(chacha20_block_xor_neon)
|
||||
bx lr
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 4
|
||||
.Lctrinc: .word 0, 1, 2, 3
|
||||
.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6
|
||||
|
||||
.align 5
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
push {r4-r6, lr}
|
||||
mov ip, sp // preserve the stack pointer
|
||||
sub r3, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic r3, r3, #0x1f // aligned to 32 bytes
|
||||
mov sp, r3
|
||||
push {r4-r5}
|
||||
mov r4, sp // preserve the stack pointer
|
||||
sub ip, sp, #0x20 // allocate a 32 byte buffer
|
||||
bic ip, ip, #0x1f // aligned to 32 bytes
|
||||
mov sp, ip
|
||||
|
||||
// r0: Input state matrix, s
|
||||
// r1: 4 data blocks output, o
|
||||
@ -155,25 +189,24 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
// requires no word shuffling. The words are re-interleaved before the
|
||||
// final addition of the original state and the XORing step.
|
||||
//
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
add r3, r0, #0x20
|
||||
// x0..15[0-3] = s0..15[0-3]
|
||||
add ip, r0, #0x20
|
||||
vld1.32 {q0-q1}, [r0]
|
||||
vld1.32 {q2-q3}, [r3]
|
||||
vld1.32 {q2-q3}, [ip]
|
||||
|
||||
adr r3, CTRINC
|
||||
adr r5, .Lctrinc
|
||||
vdup.32 q15, d7[1]
|
||||
vdup.32 q14, d7[0]
|
||||
vld1.32 {q11}, [r3, :128]
|
||||
vld1.32 {q4}, [r5, :128]
|
||||
vdup.32 q13, d6[1]
|
||||
vdup.32 q12, d6[0]
|
||||
vadd.i32 q12, q12, q11 // x12 += counter values 0-3
|
||||
vdup.32 q11, d5[1]
|
||||
vdup.32 q10, d5[0]
|
||||
vadd.u32 q12, q12, q4 // x12 += counter values 0-3
|
||||
vdup.32 q9, d4[1]
|
||||
vdup.32 q8, d4[0]
|
||||
vdup.32 q7, d3[1]
|
||||
@ -185,9 +218,13 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
vdup.32 q1, d0[1]
|
||||
vdup.32 q0, d0[0]
|
||||
|
||||
adr ip, .Lrol8_table
|
||||
mov r3, #10
|
||||
b 1f
|
||||
|
||||
.Ldoubleround4:
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
1:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
@ -236,24 +273,25 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
vld1.8 {d16}, [ip, :64]
|
||||
vadd.i32 q0, q0, q4
|
||||
vadd.i32 q1, q1, q5
|
||||
vadd.i32 q2, q2, q6
|
||||
vadd.i32 q3, q3, q7
|
||||
|
||||
veor q8, q12, q0
|
||||
veor q9, q13, q1
|
||||
vshl.u32 q12, q8, #8
|
||||
vshl.u32 q13, q9, #8
|
||||
vsri.u32 q12, q8, #24
|
||||
vsri.u32 q13, q9, #24
|
||||
veor q12, q12, q0
|
||||
veor q13, q13, q1
|
||||
veor q14, q14, q2
|
||||
veor q15, q15, q3
|
||||
|
||||
veor q8, q14, q2
|
||||
veor q9, q15, q3
|
||||
vshl.u32 q14, q8, #8
|
||||
vshl.u32 q15, q9, #8
|
||||
vsri.u32 q14, q8, #24
|
||||
vsri.u32 q15, q9, #24
|
||||
vtbl.8 d24, {d24}, d16
|
||||
vtbl.8 d25, {d25}, d16
|
||||
vtbl.8 d26, {d26}, d16
|
||||
vtbl.8 d27, {d27}, d16
|
||||
vtbl.8 d28, {d28}, d16
|
||||
vtbl.8 d29, {d29}, d16
|
||||
vtbl.8 d30, {d30}, d16
|
||||
vtbl.8 d31, {d31}, d16
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
@ -332,24 +370,25 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
vld1.8 {d16}, [ip, :64]
|
||||
vadd.i32 q0, q0, q5
|
||||
vadd.i32 q1, q1, q6
|
||||
vadd.i32 q2, q2, q7
|
||||
vadd.i32 q3, q3, q4
|
||||
|
||||
veor q8, q15, q0
|
||||
veor q9, q12, q1
|
||||
vshl.u32 q15, q8, #8
|
||||
vshl.u32 q12, q9, #8
|
||||
vsri.u32 q15, q8, #24
|
||||
vsri.u32 q12, q9, #24
|
||||
veor q15, q15, q0
|
||||
veor q12, q12, q1
|
||||
veor q13, q13, q2
|
||||
veor q14, q14, q3
|
||||
|
||||
veor q8, q13, q2
|
||||
veor q9, q14, q3
|
||||
vshl.u32 q13, q8, #8
|
||||
vshl.u32 q14, q9, #8
|
||||
vsri.u32 q13, q8, #24
|
||||
vsri.u32 q14, q9, #24
|
||||
vtbl.8 d30, {d30}, d16
|
||||
vtbl.8 d31, {d31}, d16
|
||||
vtbl.8 d24, {d24}, d16
|
||||
vtbl.8 d25, {d25}, d16
|
||||
vtbl.8 d26, {d26}, d16
|
||||
vtbl.8 d27, {d27}, d16
|
||||
vtbl.8 d28, {d28}, d16
|
||||
vtbl.8 d29, {d29}, d16
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
@ -379,104 +418,76 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
vsri.u32 q6, q9, #25
|
||||
|
||||
subs r3, r3, #1
|
||||
beq 0f
|
||||
bne .Ldoubleround4
|
||||
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
b .Ldoubleround4
|
||||
// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
|
||||
// x8..9[0-3] are on the stack.
|
||||
|
||||
// x0[0-3] += s0[0]
|
||||
// x1[0-3] += s0[1]
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
0: ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q0, q0, q8
|
||||
vadd.i32 q1, q1, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q2, q2, q8
|
||||
vadd.i32 q3, q3, q9
|
||||
|
||||
// x4[0-3] += s1[0]
|
||||
// x5[0-3] += s1[1]
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q8, r3
|
||||
vdup.32 q9, r4
|
||||
vadd.i32 q4, q4, q8
|
||||
vadd.i32 q5, q5, q9
|
||||
vdup.32 q8, r5
|
||||
vdup.32 q9, r6
|
||||
vadd.i32 q6, q6, q8
|
||||
vadd.i32 q7, q7, q9
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q0, q1
|
||||
vzip.32 q2, q3
|
||||
vzip.32 q4, q5
|
||||
vzip.32 q6, q7
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
// Re-interleave the words in the first two rows of each block (x0..7).
|
||||
// Also add the counter values 0-3 to x12[0-3].
|
||||
vld1.32 {q8}, [r5, :128] // load counter values 0-3
|
||||
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
|
||||
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
|
||||
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
|
||||
vzip.32 q6, q7 // => (6 7 6 7) (6 7 6 7)
|
||||
vadd.u32 q12, q8 // x12 += counter values 0-3
|
||||
vswp d1, d4
|
||||
vswp d3, d6
|
||||
vld1.32 {q8-q9}, [r0]! // load s0..7
|
||||
vswp d9, d12
|
||||
vswp d11, d14
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
// Swap q1 and q4 so that we'll free up consecutive registers (q0-q1)
|
||||
// after XORing the first 32 bytes.
|
||||
vswp q1, q4
|
||||
|
||||
// First two rows of each block are (q0 q1) (q2 q6) (q4 q5) (q3 q7)
|
||||
|
||||
// x0..3[0-3] += s0..3[0-3] (add orig state to 1st row of each block)
|
||||
vadd.u32 q0, q0, q8
|
||||
vadd.u32 q2, q2, q8
|
||||
vadd.u32 q4, q4, q8
|
||||
vadd.u32 q3, q3, q8
|
||||
|
||||
// x4..7[0-3] += s4..7[0-3] (add orig state to 2nd row of each block)
|
||||
vadd.u32 q1, q1, q9
|
||||
vadd.u32 q6, q6, q9
|
||||
vadd.u32 q5, q5, q9
|
||||
vadd.u32 q7, q7, q9
|
||||
|
||||
// XOR first 32 bytes using keystream from first two rows of first block
|
||||
vld1.8 {q8-q9}, [r2]!
|
||||
veor q8, q8, q0
|
||||
veor q9, q9, q4
|
||||
veor q9, q9, q1
|
||||
vst1.8 {q8-q9}, [r1]!
|
||||
|
||||
// Re-interleave the words in the last two rows of each block (x8..15).
|
||||
vld1.32 {q8-q9}, [sp, :256]
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
vadd.i32 q8, q8, q0
|
||||
vadd.i32 q9, q9, q4
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q10, q10, q0
|
||||
vadd.i32 q11, q11, q4
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
ldmia r0!, {r3-r6}
|
||||
vdup.32 q0, r3
|
||||
vdup.32 q4, r4
|
||||
adr r3, CTRINC
|
||||
vadd.i32 q12, q12, q0
|
||||
vld1.32 {q0}, [r3, :128]
|
||||
vadd.i32 q13, q13, q4
|
||||
vadd.i32 q12, q12, q0 // x12 += counter values 0-3
|
||||
|
||||
vdup.32 q0, r5
|
||||
vdup.32 q4, r6
|
||||
vadd.i32 q14, q14, q0
|
||||
vadd.i32 q15, q15, q4
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
vzip.32 q8, q9
|
||||
vzip.32 q10, q11
|
||||
vzip.32 q12, q13
|
||||
vzip.32 q14, q15
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
vswp d17, d20
|
||||
vswp d19, d22
|
||||
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
|
||||
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
|
||||
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
|
||||
vzip.32 q10, q11 // => (10 11 10 11) (10 11 10 11)
|
||||
vld1.32 {q0-q1}, [r0] // load s8..15
|
||||
vswp d25, d28
|
||||
vswp d27, d30
|
||||
vswp d17, d20
|
||||
vswp d19, d22
|
||||
|
||||
vmov q4, q1
|
||||
// Last two rows of each block are (q8 q12) (q10 q14) (q9 q13) (q11 q15)
|
||||
|
||||
// x8..11[0-3] += s8..11[0-3] (add orig state to 3rd row of each block)
|
||||
vadd.u32 q8, q8, q0
|
||||
vadd.u32 q10, q10, q0
|
||||
vadd.u32 q9, q9, q0
|
||||
vadd.u32 q11, q11, q0
|
||||
|
||||
// x12..15[0-3] += s12..15[0-3] (add orig state to 4th row of each block)
|
||||
vadd.u32 q12, q12, q1
|
||||
vadd.u32 q14, q14, q1
|
||||
vadd.u32 q13, q13, q1
|
||||
vadd.u32 q15, q15, q1
|
||||
|
||||
// XOR the rest of the data with the keystream
|
||||
|
||||
vld1.8 {q0-q1}, [r2]!
|
||||
veor q0, q0, q8
|
||||
@ -509,13 +520,11 @@ ENTRY(chacha20_4block_xor_neon)
|
||||
vst1.8 {q0-q1}, [r1]!
|
||||
|
||||
vld1.8 {q0-q1}, [r2]
|
||||
mov sp, r4 // restore original stack pointer
|
||||
veor q0, q0, q11
|
||||
veor q1, q1, q15
|
||||
vst1.8 {q0-q1}, [r1]
|
||||
|
||||
mov sp, ip
|
||||
pop {r4-r6, pc}
|
||||
pop {r4-r5}
|
||||
bx lr
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
.align 4
|
||||
CTRINC: .word 0, 1, 2, 3
|
||||
|
@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void)
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
static const struct cpu_feature crc32_cpu_feature[] = {
|
||||
static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = {
|
||||
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
|
||||
|
@ -63,6 +63,33 @@
|
||||
k48 .req d31
|
||||
SHASH2_p64 .req d31
|
||||
|
||||
HH .req q10
|
||||
HH3 .req q11
|
||||
HH4 .req q12
|
||||
HH34 .req q13
|
||||
|
||||
HH_L .req d20
|
||||
HH_H .req d21
|
||||
HH3_L .req d22
|
||||
HH3_H .req d23
|
||||
HH4_L .req d24
|
||||
HH4_H .req d25
|
||||
HH34_L .req d26
|
||||
HH34_H .req d27
|
||||
SHASH2_H .req d29
|
||||
|
||||
XL2 .req q5
|
||||
XM2 .req q6
|
||||
XH2 .req q7
|
||||
T3 .req q8
|
||||
|
||||
XL2_L .req d10
|
||||
XL2_H .req d11
|
||||
XM2_L .req d12
|
||||
XM2_H .req d13
|
||||
T3_L .req d16
|
||||
T3_H .req d17
|
||||
|
||||
.text
|
||||
.fpu crypto-neon-fp-armv8
|
||||
|
||||
@ -175,12 +202,77 @@
|
||||
beq 0f
|
||||
vld1.64 {T1}, [ip]
|
||||
teq r0, #0
|
||||
b 1f
|
||||
b 3f
|
||||
|
||||
0: vld1.64 {T1}, [r2]!
|
||||
0: .ifc \pn, p64
|
||||
tst r0, #3 // skip until #blocks is a
|
||||
bne 2f // round multiple of 4
|
||||
|
||||
vld1.8 {XL2-XM2}, [r2]!
|
||||
1: vld1.8 {T3-T2}, [r2]!
|
||||
vrev64.8 XL2, XL2
|
||||
vrev64.8 XM2, XM2
|
||||
|
||||
subs r0, r0, #4
|
||||
|
||||
vext.8 T1, XL2, XL2, #8
|
||||
veor XL2_H, XL2_H, XL_L
|
||||
veor XL, XL, T1
|
||||
|
||||
vrev64.8 T3, T3
|
||||
vrev64.8 T1, T2
|
||||
|
||||
vmull.p64 XH, HH4_H, XL_H // a1 * b1
|
||||
veor XL2_H, XL2_H, XL_H
|
||||
vmull.p64 XL, HH4_L, XL_L // a0 * b0
|
||||
vmull.p64 XM, HH34_H, XL2_H // (a1 + a0)(b1 + b0)
|
||||
|
||||
vmull.p64 XH2, HH3_H, XM2_L // a1 * b1
|
||||
veor XM2_L, XM2_L, XM2_H
|
||||
vmull.p64 XL2, HH3_L, XM2_H // a0 * b0
|
||||
vmull.p64 XM2, HH34_L, XM2_L // (a1 + a0)(b1 + b0)
|
||||
|
||||
veor XH, XH, XH2
|
||||
veor XL, XL, XL2
|
||||
veor XM, XM, XM2
|
||||
|
||||
vmull.p64 XH2, HH_H, T3_L // a1 * b1
|
||||
veor T3_L, T3_L, T3_H
|
||||
vmull.p64 XL2, HH_L, T3_H // a0 * b0
|
||||
vmull.p64 XM2, SHASH2_H, T3_L // (a1 + a0)(b1 + b0)
|
||||
|
||||
veor XH, XH, XH2
|
||||
veor XL, XL, XL2
|
||||
veor XM, XM, XM2
|
||||
|
||||
vmull.p64 XH2, SHASH_H, T1_L // a1 * b1
|
||||
veor T1_L, T1_L, T1_H
|
||||
vmull.p64 XL2, SHASH_L, T1_H // a0 * b0
|
||||
vmull.p64 XM2, SHASH2_p64, T1_L // (a1 + a0)(b1 + b0)
|
||||
|
||||
veor XH, XH, XH2
|
||||
veor XL, XL, XL2
|
||||
veor XM, XM, XM2
|
||||
|
||||
beq 4f
|
||||
|
||||
vld1.8 {XL2-XM2}, [r2]!
|
||||
|
||||
veor T1, XL, XH
|
||||
veor XM, XM, T1
|
||||
|
||||
__pmull_reduce_p64
|
||||
|
||||
veor T1, T1, XH
|
||||
veor XL, XL, T1
|
||||
|
||||
b 1b
|
||||
.endif
|
||||
|
||||
2: vld1.64 {T1}, [r2]!
|
||||
subs r0, r0, #1
|
||||
|
||||
1: /* multiply XL by SHASH in GF(2^128) */
|
||||
3: /* multiply XL by SHASH in GF(2^128) */
|
||||
#ifndef CONFIG_CPU_BIG_ENDIAN
|
||||
vrev64.8 T1, T1
|
||||
#endif
|
||||
@ -193,7 +285,7 @@
|
||||
__pmull_\pn XL, XL_L, SHASH_L, s1l, s2l, s3l, s4l @ a0 * b0
|
||||
__pmull_\pn XM, T1_L, SHASH2_\pn @ (a1+a0)(b1+b0)
|
||||
|
||||
veor T1, XL, XH
|
||||
4: veor T1, XL, XH
|
||||
veor XM, XM, T1
|
||||
|
||||
__pmull_reduce_\pn
|
||||
@ -212,8 +304,14 @@
|
||||
* struct ghash_key const *k, const char *head)
|
||||
*/
|
||||
ENTRY(pmull_ghash_update_p64)
|
||||
vld1.64 {SHASH}, [r3]
|
||||
vld1.64 {SHASH}, [r3]!
|
||||
vld1.64 {HH}, [r3]!
|
||||
vld1.64 {HH3-HH4}, [r3]
|
||||
|
||||
veor SHASH2_p64, SHASH_L, SHASH_H
|
||||
veor SHASH2_H, HH_L, HH_H
|
||||
veor HH34_L, HH3_L, HH3_H
|
||||
veor HH34_H, HH4_L, HH4_H
|
||||
|
||||
vmov.i8 MASK, #0xe1
|
||||
vshl.u64 MASK, MASK, #57
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
|
||||
*
|
||||
* Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2015 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
@ -28,8 +28,10 @@ MODULE_ALIAS_CRYPTO("ghash");
|
||||
#define GHASH_DIGEST_SIZE 16
|
||||
|
||||
struct ghash_key {
|
||||
u64 a;
|
||||
u64 b;
|
||||
u64 h[2];
|
||||
u64 h2[2];
|
||||
u64 h3[2];
|
||||
u64 h4[2];
|
||||
};
|
||||
|
||||
struct ghash_desc_ctx {
|
||||
@ -117,26 +119,40 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghash_reflect(u64 h[], const be128 *k)
|
||||
{
|
||||
u64 carry = be64_to_cpu(k->a) >> 63;
|
||||
|
||||
h[0] = (be64_to_cpu(k->b) << 1) | carry;
|
||||
h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
|
||||
|
||||
if (carry)
|
||||
h[1] ^= 0xc200000000000000UL;
|
||||
}
|
||||
|
||||
static int ghash_setkey(struct crypto_shash *tfm,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
struct ghash_key *key = crypto_shash_ctx(tfm);
|
||||
u64 a, b;
|
||||
be128 h, k;
|
||||
|
||||
if (keylen != GHASH_BLOCK_SIZE) {
|
||||
crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* perform multiplication by 'x' in GF(2^128) */
|
||||
b = get_unaligned_be64(inkey);
|
||||
a = get_unaligned_be64(inkey + 8);
|
||||
memcpy(&k, inkey, GHASH_BLOCK_SIZE);
|
||||
ghash_reflect(key->h, &k);
|
||||
|
||||
key->a = (a << 1) | (b >> 63);
|
||||
key->b = (b << 1) | (a >> 63);
|
||||
h = k;
|
||||
gf128mul_lle(&h, &k);
|
||||
ghash_reflect(key->h2, &h);
|
||||
|
||||
if (b >> 63)
|
||||
key->b ^= 0xc200000000000000UL;
|
||||
gf128mul_lle(&h, &k);
|
||||
ghash_reflect(key->h3, &h);
|
||||
|
||||
gf128mul_lle(&h, &k);
|
||||
ghash_reflect(key->h4, &h);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,434 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
.fpu neon
|
||||
|
||||
// arguments
|
||||
ROUND_KEYS .req r0 // const {u64,u32} *round_keys
|
||||
NROUNDS .req r1 // int nrounds
|
||||
DST .req r2 // void *dst
|
||||
SRC .req r3 // const void *src
|
||||
NBYTES .req r4 // unsigned int nbytes
|
||||
TWEAK .req r5 // void *tweak
|
||||
|
||||
// registers which hold the data being encrypted/decrypted
|
||||
X0 .req q0
|
||||
X0_L .req d0
|
||||
X0_H .req d1
|
||||
Y0 .req q1
|
||||
Y0_H .req d3
|
||||
X1 .req q2
|
||||
X1_L .req d4
|
||||
X1_H .req d5
|
||||
Y1 .req q3
|
||||
Y1_H .req d7
|
||||
X2 .req q4
|
||||
X2_L .req d8
|
||||
X2_H .req d9
|
||||
Y2 .req q5
|
||||
Y2_H .req d11
|
||||
X3 .req q6
|
||||
X3_L .req d12
|
||||
X3_H .req d13
|
||||
Y3 .req q7
|
||||
Y3_H .req d15
|
||||
|
||||
// the round key, duplicated in all lanes
|
||||
ROUND_KEY .req q8
|
||||
ROUND_KEY_L .req d16
|
||||
ROUND_KEY_H .req d17
|
||||
|
||||
// index vector for vtbl-based 8-bit rotates
|
||||
ROTATE_TABLE .req d18
|
||||
|
||||
// multiplication table for updating XTS tweaks
|
||||
GF128MUL_TABLE .req d19
|
||||
GF64MUL_TABLE .req d19
|
||||
|
||||
// current XTS tweak value(s)
|
||||
TWEAKV .req q10
|
||||
TWEAKV_L .req d20
|
||||
TWEAKV_H .req d21
|
||||
|
||||
TMP0 .req q12
|
||||
TMP0_L .req d24
|
||||
TMP0_H .req d25
|
||||
TMP1 .req q13
|
||||
TMP2 .req q14
|
||||
TMP3 .req q15
|
||||
|
||||
.align 4
|
||||
.Lror64_8_table:
|
||||
.byte 1, 2, 3, 4, 5, 6, 7, 0
|
||||
.Lror32_8_table:
|
||||
.byte 1, 2, 3, 0, 5, 6, 7, 4
|
||||
.Lrol64_8_table:
|
||||
.byte 7, 0, 1, 2, 3, 4, 5, 6
|
||||
.Lrol32_8_table:
|
||||
.byte 3, 0, 1, 2, 7, 4, 5, 6
|
||||
.Lgf128mul_table:
|
||||
.byte 0, 0x87
|
||||
.fill 14
|
||||
.Lgf64mul_table:
|
||||
.byte 0, 0x1b, (0x1b << 1), (0x1b << 1) ^ 0x1b
|
||||
.fill 12
|
||||
|
||||
/*
|
||||
* _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
|
||||
*
|
||||
* Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
|
||||
* Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
|
||||
* of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
|
||||
*
|
||||
* The 8-bit rotates are implemented using vtbl instead of vshr + vsli because
|
||||
* the vtbl approach is faster on some processors and the same speed on others.
|
||||
*/
|
||||
.macro _speck_round_128bytes n
|
||||
|
||||
// x = ror(x, 8)
|
||||
vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
|
||||
vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
|
||||
vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
|
||||
vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
|
||||
vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
|
||||
vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
|
||||
vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
|
||||
vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
|
||||
|
||||
// x += y
|
||||
vadd.u\n X0, Y0
|
||||
vadd.u\n X1, Y1
|
||||
vadd.u\n X2, Y2
|
||||
vadd.u\n X3, Y3
|
||||
|
||||
// x ^= k
|
||||
veor X0, ROUND_KEY
|
||||
veor X1, ROUND_KEY
|
||||
veor X2, ROUND_KEY
|
||||
veor X3, ROUND_KEY
|
||||
|
||||
// y = rol(y, 3)
|
||||
vshl.u\n TMP0, Y0, #3
|
||||
vshl.u\n TMP1, Y1, #3
|
||||
vshl.u\n TMP2, Y2, #3
|
||||
vshl.u\n TMP3, Y3, #3
|
||||
vsri.u\n TMP0, Y0, #(\n - 3)
|
||||
vsri.u\n TMP1, Y1, #(\n - 3)
|
||||
vsri.u\n TMP2, Y2, #(\n - 3)
|
||||
vsri.u\n TMP3, Y3, #(\n - 3)
|
||||
|
||||
// y ^= x
|
||||
veor Y0, TMP0, X0
|
||||
veor Y1, TMP1, X1
|
||||
veor Y2, TMP2, X2
|
||||
veor Y3, TMP3, X3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
|
||||
*
|
||||
* This is the inverse of _speck_round_128bytes().
|
||||
*/
|
||||
.macro _speck_unround_128bytes n
|
||||
|
||||
// y ^= x
|
||||
veor TMP0, Y0, X0
|
||||
veor TMP1, Y1, X1
|
||||
veor TMP2, Y2, X2
|
||||
veor TMP3, Y3, X3
|
||||
|
||||
// y = ror(y, 3)
|
||||
vshr.u\n Y0, TMP0, #3
|
||||
vshr.u\n Y1, TMP1, #3
|
||||
vshr.u\n Y2, TMP2, #3
|
||||
vshr.u\n Y3, TMP3, #3
|
||||
vsli.u\n Y0, TMP0, #(\n - 3)
|
||||
vsli.u\n Y1, TMP1, #(\n - 3)
|
||||
vsli.u\n Y2, TMP2, #(\n - 3)
|
||||
vsli.u\n Y3, TMP3, #(\n - 3)
|
||||
|
||||
// x ^= k
|
||||
veor X0, ROUND_KEY
|
||||
veor X1, ROUND_KEY
|
||||
veor X2, ROUND_KEY
|
||||
veor X3, ROUND_KEY
|
||||
|
||||
// x -= y
|
||||
vsub.u\n X0, Y0
|
||||
vsub.u\n X1, Y1
|
||||
vsub.u\n X2, Y2
|
||||
vsub.u\n X3, Y3
|
||||
|
||||
// x = rol(x, 8);
|
||||
vtbl.8 X0_L, {X0_L}, ROTATE_TABLE
|
||||
vtbl.8 X0_H, {X0_H}, ROTATE_TABLE
|
||||
vtbl.8 X1_L, {X1_L}, ROTATE_TABLE
|
||||
vtbl.8 X1_H, {X1_H}, ROTATE_TABLE
|
||||
vtbl.8 X2_L, {X2_L}, ROTATE_TABLE
|
||||
vtbl.8 X2_H, {X2_H}, ROTATE_TABLE
|
||||
vtbl.8 X3_L, {X3_L}, ROTATE_TABLE
|
||||
vtbl.8 X3_H, {X3_H}, ROTATE_TABLE
|
||||
.endm
|
||||
|
||||
.macro _xts128_precrypt_one dst_reg, tweak_buf, tmp
|
||||
|
||||
// Load the next source block
|
||||
vld1.8 {\dst_reg}, [SRC]!
|
||||
|
||||
// Save the current tweak in the tweak buffer
|
||||
vst1.8 {TWEAKV}, [\tweak_buf:128]!
|
||||
|
||||
// XOR the next source block with the current tweak
|
||||
veor \dst_reg, TWEAKV
|
||||
|
||||
/*
|
||||
* Calculate the next tweak by multiplying the current one by x,
|
||||
* modulo p(x) = x^128 + x^7 + x^2 + x + 1.
|
||||
*/
|
||||
vshr.u64 \tmp, TWEAKV, #63
|
||||
vshl.u64 TWEAKV, #1
|
||||
veor TWEAKV_H, \tmp\()_L
|
||||
vtbl.8 \tmp\()_H, {GF128MUL_TABLE}, \tmp\()_H
|
||||
veor TWEAKV_L, \tmp\()_H
|
||||
.endm
|
||||
|
||||
.macro _xts64_precrypt_two dst_reg, tweak_buf, tmp
|
||||
|
||||
// Load the next two source blocks
|
||||
vld1.8 {\dst_reg}, [SRC]!
|
||||
|
||||
// Save the current two tweaks in the tweak buffer
|
||||
vst1.8 {TWEAKV}, [\tweak_buf:128]!
|
||||
|
||||
// XOR the next two source blocks with the current two tweaks
|
||||
veor \dst_reg, TWEAKV
|
||||
|
||||
/*
|
||||
* Calculate the next two tweaks by multiplying the current ones by x^2,
|
||||
* modulo p(x) = x^64 + x^4 + x^3 + x + 1.
|
||||
*/
|
||||
vshr.u64 \tmp, TWEAKV, #62
|
||||
vshl.u64 TWEAKV, #2
|
||||
vtbl.8 \tmp\()_L, {GF64MUL_TABLE}, \tmp\()_L
|
||||
vtbl.8 \tmp\()_H, {GF64MUL_TABLE}, \tmp\()_H
|
||||
veor TWEAKV, \tmp
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_xts_crypt() - Speck-XTS encryption/decryption
|
||||
*
|
||||
* Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
|
||||
* using Speck-XTS, specifically the variant with a block size of '2n' and round
|
||||
* count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
|
||||
* the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
|
||||
* nonzero multiple of 128.
|
||||
*/
|
||||
.macro _speck_xts_crypt n, decrypting
|
||||
push {r4-r7}
|
||||
mov r7, sp
|
||||
|
||||
/*
|
||||
* The first four parameters were passed in registers r0-r3. Load the
|
||||
* additional parameters, which were passed on the stack.
|
||||
*/
|
||||
ldr NBYTES, [sp, #16]
|
||||
ldr TWEAK, [sp, #20]
|
||||
|
||||
/*
|
||||
* If decrypting, modify the ROUND_KEYS parameter to point to the last
|
||||
* round key rather than the first, since for decryption the round keys
|
||||
* are used in reverse order.
|
||||
*/
|
||||
.if \decrypting
|
||||
.if \n == 64
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #3
|
||||
sub ROUND_KEYS, #8
|
||||
.else
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS, lsl #2
|
||||
sub ROUND_KEYS, #4
|
||||
.endif
|
||||
.endif
|
||||
|
||||
// Load the index vector for vtbl-based 8-bit rotates
|
||||
.if \decrypting
|
||||
ldr r12, =.Lrol\n\()_8_table
|
||||
.else
|
||||
ldr r12, =.Lror\n\()_8_table
|
||||
.endif
|
||||
vld1.8 {ROTATE_TABLE}, [r12:64]
|
||||
|
||||
// One-time XTS preparation
|
||||
|
||||
/*
|
||||
* Allocate stack space to store 128 bytes worth of tweaks. For
|
||||
* performance, this space is aligned to a 16-byte boundary so that we
|
||||
* can use the load/store instructions that declare 16-byte alignment.
|
||||
* For Thumb2 compatibility, don't do the 'bic' directly on 'sp'.
|
||||
*/
|
||||
sub r12, sp, #128
|
||||
bic r12, #0xf
|
||||
mov sp, r12
|
||||
|
||||
.if \n == 64
|
||||
// Load first tweak
|
||||
vld1.8 {TWEAKV}, [TWEAK]
|
||||
|
||||
// Load GF(2^128) multiplication table
|
||||
ldr r12, =.Lgf128mul_table
|
||||
vld1.8 {GF128MUL_TABLE}, [r12:64]
|
||||
.else
|
||||
// Load first tweak
|
||||
vld1.8 {TWEAKV_L}, [TWEAK]
|
||||
|
||||
// Load GF(2^64) multiplication table
|
||||
ldr r12, =.Lgf64mul_table
|
||||
vld1.8 {GF64MUL_TABLE}, [r12:64]
|
||||
|
||||
// Calculate second tweak, packing it together with the first
|
||||
vshr.u64 TMP0_L, TWEAKV_L, #63
|
||||
vtbl.u8 TMP0_L, {GF64MUL_TABLE}, TMP0_L
|
||||
vshl.u64 TWEAKV_H, TWEAKV_L, #1
|
||||
veor TWEAKV_H, TMP0_L
|
||||
.endif
|
||||
|
||||
.Lnext_128bytes_\@:
|
||||
|
||||
/*
|
||||
* Load the source blocks into {X,Y}[0-3], XOR them with their XTS tweak
|
||||
* values, and save the tweaks on the stack for later. Then
|
||||
* de-interleave the 'x' and 'y' elements of each block, i.e. make it so
|
||||
* that the X[0-3] registers contain only the second halves of blocks,
|
||||
* and the Y[0-3] registers contain only the first halves of blocks.
|
||||
* (Speck uses the order (y, x) rather than the more intuitive (x, y).)
|
||||
*/
|
||||
mov r12, sp
|
||||
.if \n == 64
|
||||
_xts128_precrypt_one X0, r12, TMP0
|
||||
_xts128_precrypt_one Y0, r12, TMP0
|
||||
_xts128_precrypt_one X1, r12, TMP0
|
||||
_xts128_precrypt_one Y1, r12, TMP0
|
||||
_xts128_precrypt_one X2, r12, TMP0
|
||||
_xts128_precrypt_one Y2, r12, TMP0
|
||||
_xts128_precrypt_one X3, r12, TMP0
|
||||
_xts128_precrypt_one Y3, r12, TMP0
|
||||
vswp X0_L, Y0_H
|
||||
vswp X1_L, Y1_H
|
||||
vswp X2_L, Y2_H
|
||||
vswp X3_L, Y3_H
|
||||
.else
|
||||
_xts64_precrypt_two X0, r12, TMP0
|
||||
_xts64_precrypt_two Y0, r12, TMP0
|
||||
_xts64_precrypt_two X1, r12, TMP0
|
||||
_xts64_precrypt_two Y1, r12, TMP0
|
||||
_xts64_precrypt_two X2, r12, TMP0
|
||||
_xts64_precrypt_two Y2, r12, TMP0
|
||||
_xts64_precrypt_two X3, r12, TMP0
|
||||
_xts64_precrypt_two Y3, r12, TMP0
|
||||
vuzp.32 Y0, X0
|
||||
vuzp.32 Y1, X1
|
||||
vuzp.32 Y2, X2
|
||||
vuzp.32 Y3, X3
|
||||
.endif
|
||||
|
||||
// Do the cipher rounds
|
||||
|
||||
mov r12, ROUND_KEYS
|
||||
mov r6, NROUNDS
|
||||
|
||||
.Lnext_round_\@:
|
||||
.if \decrypting
|
||||
.if \n == 64
|
||||
vld1.64 ROUND_KEY_L, [r12]
|
||||
sub r12, #8
|
||||
vmov ROUND_KEY_H, ROUND_KEY_L
|
||||
.else
|
||||
vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]
|
||||
sub r12, #4
|
||||
.endif
|
||||
_speck_unround_128bytes \n
|
||||
.else
|
||||
.if \n == 64
|
||||
vld1.64 ROUND_KEY_L, [r12]!
|
||||
vmov ROUND_KEY_H, ROUND_KEY_L
|
||||
.else
|
||||
vld1.32 {ROUND_KEY_L[],ROUND_KEY_H[]}, [r12]!
|
||||
.endif
|
||||
_speck_round_128bytes \n
|
||||
.endif
|
||||
subs r6, r6, #1
|
||||
bne .Lnext_round_\@
|
||||
|
||||
// Re-interleave the 'x' and 'y' elements of each block
|
||||
.if \n == 64
|
||||
vswp X0_L, Y0_H
|
||||
vswp X1_L, Y1_H
|
||||
vswp X2_L, Y2_H
|
||||
vswp X3_L, Y3_H
|
||||
.else
|
||||
vzip.32 Y0, X0
|
||||
vzip.32 Y1, X1
|
||||
vzip.32 Y2, X2
|
||||
vzip.32 Y3, X3
|
||||
.endif
|
||||
|
||||
// XOR the encrypted/decrypted blocks with the tweaks we saved earlier
|
||||
mov r12, sp
|
||||
vld1.8 {TMP0, TMP1}, [r12:128]!
|
||||
vld1.8 {TMP2, TMP3}, [r12:128]!
|
||||
veor X0, TMP0
|
||||
veor Y0, TMP1
|
||||
veor X1, TMP2
|
||||
veor Y1, TMP3
|
||||
vld1.8 {TMP0, TMP1}, [r12:128]!
|
||||
vld1.8 {TMP2, TMP3}, [r12:128]!
|
||||
veor X2, TMP0
|
||||
veor Y2, TMP1
|
||||
veor X3, TMP2
|
||||
veor Y3, TMP3
|
||||
|
||||
// Store the ciphertext in the destination buffer
|
||||
vst1.8 {X0, Y0}, [DST]!
|
||||
vst1.8 {X1, Y1}, [DST]!
|
||||
vst1.8 {X2, Y2}, [DST]!
|
||||
vst1.8 {X3, Y3}, [DST]!
|
||||
|
||||
// Continue if there are more 128-byte chunks remaining, else return
|
||||
subs NBYTES, #128
|
||||
bne .Lnext_128bytes_\@
|
||||
|
||||
// Store the next tweak
|
||||
.if \n == 64
|
||||
vst1.8 {TWEAKV}, [TWEAK]
|
||||
.else
|
||||
vst1.8 {TWEAKV_L}, [TWEAK]
|
||||
.endif
|
||||
|
||||
mov sp, r7
|
||||
pop {r4-r7}
|
||||
bx lr
|
||||
.endm
|
||||
|
||||
ENTRY(speck128_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=64, decrypting=0
|
||||
ENDPROC(speck128_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck128_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=64, decrypting=1
|
||||
ENDPROC(speck128_xts_decrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=32, decrypting=0
|
||||
ENDPROC(speck64_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=32, decrypting=1
|
||||
ENDPROC(speck64_xts_decrypt_neon)
|
@ -1,288 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*
|
||||
* Note: the NIST recommendation for XTS only specifies a 128-bit block size,
|
||||
* but a 64-bit version (needed for Speck64) is fairly straightforward; the math
|
||||
* is just done in GF(2^64) instead of GF(2^128), with the reducing polynomial
|
||||
* x^64 + x^4 + x^3 + x + 1 from the original XEX paper (Rogaway, 2004:
|
||||
* "Efficient Instantiations of Tweakable Blockciphers and Refinements to Modes
|
||||
* OCB and PMAC"), represented as 0x1B.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/speck.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* The assembly functions only handle multiples of 128 bytes */
|
||||
#define SPECK_NEON_CHUNK_SIZE 128
|
||||
|
||||
/* Speck128 */
|
||||
|
||||
struct speck128_xts_tfm_ctx {
|
||||
struct speck128_tfm_ctx main_key;
|
||||
struct speck128_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck128_xts_crypt(struct skcipher_request *req,
|
||||
speck128_crypt_one_t crypt_one,
|
||||
speck128_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
le128 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
le128_xor((le128 *)dst, (const le128 *)src, &tweak);
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
|
||||
gf128mul_x_ble(&tweak, &tweak);
|
||||
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck128_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_encrypt,
|
||||
speck128_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_decrypt,
|
||||
speck128_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
/* Speck64 */
|
||||
|
||||
struct speck64_xts_tfm_ctx {
|
||||
struct speck64_tfm_ctx main_key;
|
||||
struct speck64_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
|
||||
speck64_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
__le64 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
*(__le64 *)dst = *(__le64 *)src ^ tweak;
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
*(__le64 *)dst ^= tweak;
|
||||
tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
|
||||
((tweak & cpu_to_le64(1ULL << 63)) ?
|
||||
0x1B : 0));
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck64_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_encrypt,
|
||||
speck64_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_decrypt,
|
||||
speck64_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
static struct skcipher_alg speck_algs[] = {
|
||||
{
|
||||
.base.cra_name = "xts(speck128)",
|
||||
.base.cra_driver_name = "xts-speck128-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK128_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK128_128_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK128_256_KEY_SIZE,
|
||||
.ivsize = SPECK128_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck128_xts_setkey,
|
||||
.encrypt = speck128_xts_encrypt,
|
||||
.decrypt = speck128_xts_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "xts(speck64)",
|
||||
.base.cra_driver_name = "xts-speck64-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK64_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK64_96_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK64_128_KEY_SIZE,
|
||||
.ivsize = SPECK64_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck64_xts_setkey,
|
||||
.encrypt = speck64_xts_encrypt,
|
||||
.decrypt = speck64_xts_decrypt,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init speck_neon_module_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_NEON))
|
||||
return -ENODEV;
|
||||
return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
static void __exit speck_neon_module_exit(void)
|
||||
{
|
||||
crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
module_init(speck_neon_module_init);
|
||||
module_exit(speck_neon_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck128)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck128-neon");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck64)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck64-neon");
|
@ -698,6 +698,7 @@ CONFIG_MEMTEST=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_CRYPTO_ECHAINIV=y
|
||||
CONFIG_CRYPTO_ANSI_CPRNG=y
|
||||
CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM=y
|
||||
CONFIG_ARM64_CRYPTO=y
|
||||
CONFIG_CRYPTO_SHA1_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA2_ARM64_CE=y
|
||||
@ -706,7 +707,6 @@ CONFIG_CRYPTO_SHA3_ARM64=m
|
||||
CONFIG_CRYPTO_SM3_ARM64_CE=m
|
||||
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
|
||||
CONFIG_CRYPTO_CRC32_ARM64_CE=m
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
CONFIG_CRYPTO_CHACHA20_NEON=m
|
||||
|
@ -66,11 +66,6 @@ config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
|
||||
depends on CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_AES_ARM64
|
||||
tristate "AES core cipher using scalar instructions"
|
||||
select CRYPTO_AES
|
||||
@ -119,10 +114,4 @@ config CRYPTO_AES_ARM64_BS
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_SIMD
|
||||
|
||||
config CRYPTO_SPECK_NEON
|
||||
tristate "NEON accelerated Speck cipher algorithms"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SPECK
|
||||
|
||||
endif
|
||||
|
@ -32,9 +32,6 @@ ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
|
||||
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
|
||||
crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
|
||||
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
|
||||
|
||||
@ -56,9 +53,6 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
|
||||
speck-neon-y := speck-neon-core.o speck-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
|
||||
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
|
||||
|
@ -17,6 +17,11 @@
|
||||
|
||||
.arch armv8-a+crypto
|
||||
|
||||
xtsmask .req v16
|
||||
|
||||
.macro xts_reload_mask, tmp
|
||||
.endm
|
||||
|
||||
/* preload all round keys */
|
||||
.macro load_round_keys, rounds, rk
|
||||
cmp \rounds, #12
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <crypto/xts.h>
|
||||
@ -31,6 +32,8 @@
|
||||
#define aes_ecb_decrypt ce_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt ce_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt ce_aes_cbc_decrypt
|
||||
#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
|
||||
#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
@ -45,6 +48,8 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#define aes_ecb_decrypt neon_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt neon_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt neon_aes_cbc_decrypt
|
||||
#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
|
||||
#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
@ -63,30 +68,41 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
/* defined in aes-modes.S */
|
||||
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 ctr[]);
|
||||
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int blocks, u32 const rk2[], u8 iv[],
|
||||
int first);
|
||||
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int blocks, u32 const rk2[], u8 iv[],
|
||||
int first);
|
||||
|
||||
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
int blocks, u8 dg[], int enc_before,
|
||||
int enc_after);
|
||||
|
||||
struct cts_cbc_req_ctx {
|
||||
struct scatterlist sg_src[2];
|
||||
struct scatterlist sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
};
|
||||
|
||||
struct crypto_aes_xts_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
@ -142,7 +158,7 @@ static int ecb_encrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks);
|
||||
ctx->key_enc, rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -162,7 +178,7 @@ static int ecb_decrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, rounds, blocks);
|
||||
ctx->key_dec, rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -182,7 +198,7 @@ static int cbc_encrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
|
||||
ctx->key_enc, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -202,13 +218,149 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
|
||||
ctx->key_dec, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
skcipher_request_set_tfm(&rctx->subreq, tfm);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
unsigned int blocks;
|
||||
|
||||
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_enc, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||
rctx->subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||
rctx->subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&rctx->subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_enc, rounds, walk.nbytes, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
skcipher_request_set_tfm(&rctx->subreq, tfm);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
unsigned int blocks;
|
||||
|
||||
skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_dec, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
|
||||
rctx->subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
|
||||
rctx->subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&rctx->subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &rctx->subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_dec, rounds, walk.nbytes, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
@ -222,7 +374,7 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
|
||||
ctx->key_enc, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -238,7 +390,7 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
blocks = -1;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
|
||||
aes_ctr_encrypt(tail, NULL, ctx->key_enc, rounds,
|
||||
blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
|
||||
@ -272,8 +424,8 @@ static int xts_encrypt(struct skcipher_request *req)
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
kernel_neon_begin();
|
||||
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_enc, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
ctx->key1.key_enc, rounds, blocks,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -294,8 +446,8 @@ static int xts_decrypt(struct skcipher_request *req)
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
kernel_neon_begin();
|
||||
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key1.key_dec, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
ctx->key1.key_dec, rounds, blocks,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
@ -334,6 +486,24 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = cbc_encrypt,
|
||||
.decrypt = cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__cts(cbc(aes))",
|
||||
.cra_driver_name = "__cts-cbc-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.walksize = 2 * AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = cts_cbc_encrypt,
|
||||
.decrypt = cts_cbc_decrypt,
|
||||
.init = cts_cbc_init_tfm,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__ctr(aes)",
|
||||
@ -412,7 +582,6 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *consts = (be128 *)ctx->consts;
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
int err;
|
||||
|
||||
@ -422,7 +591,8 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
|
||||
/* encrypt the zero vector */
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
|
||||
rounds, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
cmac_gf128_mul_by_x(consts, consts);
|
||||
@ -441,7 +611,6 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
u8 key[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
@ -451,8 +620,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
|
||||
aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
|
||||
kernel_neon_end();
|
||||
|
||||
return cbcmac_setkey(tfm, key, sizeof(key));
|
||||
|
@ -14,12 +14,12 @@
|
||||
.align 4
|
||||
|
||||
aes_encrypt_block4x:
|
||||
encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_encrypt_block4x)
|
||||
|
||||
aes_decrypt_block4x:
|
||||
decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
|
||||
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
ret
|
||||
ENDPROC(aes_decrypt_block4x)
|
||||
|
||||
@ -31,71 +31,57 @@ ENDPROC(aes_decrypt_block4x)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ecb_encrypt)
|
||||
frame_push 5
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
.Lecbencrestart:
|
||||
enc_prepare w22, x21, x5
|
||||
enc_prepare w3, x2, x5
|
||||
|
||||
.LecbencloopNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lecbenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
bl aes_encrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
cond_yield_neon .Lecbencrestart
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
b .LecbencloopNx
|
||||
.Lecbenc1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
||||
encrypt_block v0, w22, x21, x5, w6
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
encrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lecbencloop
|
||||
.Lecbencout:
|
||||
frame_pop
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_ecb_decrypt)
|
||||
frame_push 5
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
|
||||
.Lecbdecrestart:
|
||||
dec_prepare w22, x21, x5
|
||||
dec_prepare w3, x2, x5
|
||||
|
||||
.LecbdecloopNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lecbdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
bl aes_decrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
cond_yield_neon .Lecbdecrestart
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
b .LecbdecloopNx
|
||||
.Lecbdec1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
ld1 {v0.16b}, [x20], #16 /* get next ct block */
|
||||
decrypt_block v0, w22, x21, x5, w6
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
ld1 {v0.16b}, [x1], #16 /* get next ct block */
|
||||
decrypt_block v0, w3, x2, x5, w6
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lecbdecloop
|
||||
.Lecbdecout:
|
||||
frame_pop
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_ecb_decrypt)
|
||||
|
||||
@ -108,162 +94,211 @@ AES_ENDPROC(aes_ecb_decrypt)
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_cbc_encrypt)
|
||||
frame_push 6
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lcbcencrestart:
|
||||
ld1 {v4.16b}, [x24] /* get iv */
|
||||
enc_prepare w22, x21, x6
|
||||
ld1 {v4.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x6
|
||||
|
||||
.Lcbcencloop4x:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lcbcenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
|
||||
encrypt_block v0, w22, x21, x6, w7
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
eor v1.16b, v1.16b, v0.16b
|
||||
encrypt_block v1, w22, x21, x6, w7
|
||||
encrypt_block v1, w3, x2, x6, w7
|
||||
eor v2.16b, v2.16b, v1.16b
|
||||
encrypt_block v2, w22, x21, x6, w7
|
||||
encrypt_block v2, w3, x2, x6, w7
|
||||
eor v3.16b, v3.16b, v2.16b
|
||||
encrypt_block v3, w22, x21, x6, w7
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
encrypt_block v3, w3, x2, x6, w7
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v3.16b
|
||||
st1 {v4.16b}, [x24] /* return iv */
|
||||
cond_yield_neon .Lcbcencrestart
|
||||
b .Lcbcencloop4x
|
||||
.Lcbcenc1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lcbcencout
|
||||
.Lcbcencloop:
|
||||
ld1 {v0.16b}, [x20], #16 /* get next pt block */
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
|
||||
encrypt_block v4, w22, x21, x6, w7
|
||||
st1 {v4.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
encrypt_block v4, w3, x2, x6, w7
|
||||
st1 {v4.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lcbcencloop
|
||||
.Lcbcencout:
|
||||
st1 {v4.16b}, [x24] /* return iv */
|
||||
frame_pop
|
||||
st1 {v4.16b}, [x5] /* return iv */
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_cbc_decrypt)
|
||||
frame_push 6
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lcbcdecrestart:
|
||||
ld1 {v7.16b}, [x24] /* get iv */
|
||||
dec_prepare w22, x21, x6
|
||||
ld1 {v7.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x6
|
||||
|
||||
.LcbcdecloopNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lcbcdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
mov v4.16b, v0.16b
|
||||
mov v5.16b, v1.16b
|
||||
mov v6.16b, v2.16b
|
||||
bl aes_decrypt_block4x
|
||||
sub x20, x20, #16
|
||||
sub x1, x1, #16
|
||||
eor v0.16b, v0.16b, v7.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
|
||||
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
|
||||
eor v2.16b, v2.16b, v5.16b
|
||||
eor v3.16b, v3.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
st1 {v7.16b}, [x24] /* return iv */
|
||||
cond_yield_neon .Lcbcdecrestart
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
b .LcbcdecloopNx
|
||||
.Lcbcdec1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lcbcdecout
|
||||
.Lcbcdecloop:
|
||||
ld1 {v1.16b}, [x20], #16 /* get next ct block */
|
||||
ld1 {v1.16b}, [x1], #16 /* get next ct block */
|
||||
mov v0.16b, v1.16b /* ...and copy to v0 */
|
||||
decrypt_block v0, w22, x21, x6, w7
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
||||
mov v7.16b, v1.16b /* ct is next iv */
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
st1 {v7.16b}, [x24] /* return iv */
|
||||
frame_pop
|
||||
st1 {v7.16b}, [x5] /* return iv */
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
* aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_cbc_cts_encrypt)
|
||||
adr_l x8, .Lcts_permute_table
|
||||
sub x4, x4, #16
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
ld1 {v3.16b}, [x8]
|
||||
ld1 {v4.16b}, [x9]
|
||||
|
||||
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||
ld1 {v1.16b}, [x1]
|
||||
|
||||
ld1 {v5.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x6
|
||||
|
||||
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||
tbl v1.16b, {v1.16b}, v4.16b
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
|
||||
eor v1.16b, v1.16b, v0.16b
|
||||
tbl v0.16b, {v0.16b}, v3.16b
|
||||
encrypt_block v1, w3, x2, x6, w7
|
||||
|
||||
add x4, x0, x4
|
||||
st1 {v0.16b}, [x4] /* overlapping stores */
|
||||
st1 {v1.16b}, [x0]
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_cts_encrypt)
|
||||
|
||||
AES_ENTRY(aes_cbc_cts_decrypt)
|
||||
adr_l x8, .Lcts_permute_table
|
||||
sub x4, x4, #16
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
ld1 {v3.16b}, [x8]
|
||||
ld1 {v4.16b}, [x9]
|
||||
|
||||
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||
ld1 {v1.16b}, [x1]
|
||||
|
||||
ld1 {v5.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x6
|
||||
|
||||
tbl v2.16b, {v1.16b}, v4.16b
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v2.16b, v2.16b, v0.16b
|
||||
|
||||
tbx v0.16b, {v1.16b}, v4.16b
|
||||
tbl v2.16b, {v2.16b}, v3.16b
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||
|
||||
add x4, x0, x4
|
||||
st1 {v2.16b}, [x4] /* overlapping stores */
|
||||
st1 {v0.16b}, [x0]
|
||||
ret
|
||||
AES_ENDPROC(aes_cbc_cts_decrypt)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 6
|
||||
.Lcts_permute_table:
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.previous
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 ctr[])
|
||||
*/
|
||||
|
||||
AES_ENTRY(aes_ctr_encrypt)
|
||||
frame_push 6
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
|
||||
.Lctrrestart:
|
||||
enc_prepare w22, x21, x6
|
||||
ld1 {v4.16b}, [x24]
|
||||
enc_prepare w3, x2, x6
|
||||
ld1 {v4.16b}, [x5]
|
||||
|
||||
umov x6, v4.d[1] /* keep swabbed ctr in reg */
|
||||
rev x6, x6
|
||||
cmn w6, w4 /* 32 bit overflow? */
|
||||
bcs .Lctrloop
|
||||
.LctrloopNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lctr1x
|
||||
cmn w6, #4 /* 32 bit overflow? */
|
||||
bcs .Lctr1x
|
||||
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
|
||||
dup v7.4s, w6
|
||||
add w7, w6, #1
|
||||
mov v0.16b, v4.16b
|
||||
add v7.4s, v7.4s, v8.4s
|
||||
add w8, w6, #2
|
||||
mov v1.16b, v4.16b
|
||||
rev32 v8.16b, v7.16b
|
||||
add w9, w6, #3
|
||||
mov v2.16b, v4.16b
|
||||
rev w7, w7
|
||||
mov v3.16b, v4.16b
|
||||
mov v1.s[3], v8.s[0]
|
||||
mov v2.s[3], v8.s[1]
|
||||
mov v3.s[3], v8.s[2]
|
||||
ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
|
||||
rev w8, w8
|
||||
mov v1.s[3], w7
|
||||
rev w9, w9
|
||||
mov v2.s[3], w8
|
||||
mov v3.s[3], w9
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
|
||||
bl aes_encrypt_block4x
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ld1 {v5.16b}, [x20], #16 /* get 1 input block */
|
||||
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
add x6, x6, #4
|
||||
rev x7, x6
|
||||
ins v4.d[1], x7
|
||||
cbz w23, .Lctrout
|
||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
||||
cond_yield_neon .Lctrrestart
|
||||
cbz w4, .Lctrout
|
||||
b .LctrloopNx
|
||||
.Lctr1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lctrout
|
||||
.Lctrloop:
|
||||
mov v0.16b, v4.16b
|
||||
encrypt_block v0, w22, x21, x8, w7
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
|
||||
adds x6, x6, #1 /* increment BE ctr */
|
||||
rev x7, x6
|
||||
@ -271,22 +306,22 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
bcs .Lctrcarry /* overflow? */
|
||||
|
||||
.Lctrcarrydone:
|
||||
subs w23, w23, #1
|
||||
subs w4, w4, #1
|
||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x19], #16
|
||||
st1 {v3.16b}, [x0], #16
|
||||
bne .Lctrloop
|
||||
|
||||
.Lctrout:
|
||||
st1 {v4.16b}, [x24] /* return next CTR value */
|
||||
.Lctrret:
|
||||
frame_pop
|
||||
st1 {v4.16b}, [x5] /* return next CTR value */
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lctrtailblock:
|
||||
st1 {v0.16b}, [x19]
|
||||
b .Lctrret
|
||||
st1 {v0.16b}, [x0]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lctrcarry:
|
||||
umov x7, v4.d[0] /* load upper word of ctr */
|
||||
@ -296,7 +331,6 @@ AES_ENTRY(aes_ctr_encrypt)
|
||||
ins v4.d[0], x7
|
||||
b .Lctrcarrydone
|
||||
AES_ENDPROC(aes_ctr_encrypt)
|
||||
.ltorg
|
||||
|
||||
|
||||
/*
|
||||
@ -306,150 +340,132 @@ AES_ENDPROC(aes_ctr_encrypt)
|
||||
* int blocks, u8 const rk2[], u8 iv[], int first)
|
||||
*/
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
.macro next_tweak, out, in, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, \const\().16b
|
||||
and \tmp\().16b, \tmp\().16b, xtsmask.16b
|
||||
add \out\().2d, \in\().2d, \in\().2d
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.Lxts_mul_x:
|
||||
CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
.macro xts_load_mask, tmp
|
||||
movi xtsmask.2s, #0x1
|
||||
movi \tmp\().2s, #0x87
|
||||
uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
|
||||
.endm
|
||||
|
||||
AES_ENTRY(aes_xts_encrypt)
|
||||
frame_push 6
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x6
|
||||
|
||||
ld1 {v4.16b}, [x24]
|
||||
ld1 {v4.16b}, [x6]
|
||||
xts_load_mask v8
|
||||
cbz w7, .Lxtsencnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
|
||||
enc_switch_key w3, x2, x8
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsencNx
|
||||
|
||||
.Lxtsencrestart:
|
||||
ld1 {v4.16b}, [x24]
|
||||
.Lxtsencnotfirst:
|
||||
enc_prepare w22, x21, x8
|
||||
enc_prepare w3, x2, x8
|
||||
.LxtsencloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
next_tweak v4, v4, v8
|
||||
.LxtsencNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lxtsenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
next_tweak v5, v4, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
next_tweak v6, v5, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v7, v8
|
||||
next_tweak v7, v6, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
bl aes_encrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w23, .Lxtsencout
|
||||
st1 {v4.16b}, [x24]
|
||||
cond_yield_neon .Lxtsencrestart
|
||||
cbz w4, .Lxtsencout
|
||||
xts_reload_mask v8
|
||||
b .LxtsencloopNx
|
||||
.Lxtsenc1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lxtsencout
|
||||
.Lxtsencloop:
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
encrypt_block v0, w22, x21, x8, w7
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak v4, v4, v7, v8
|
||||
next_tweak v4, v4, v8
|
||||
b .Lxtsencloop
|
||||
.Lxtsencout:
|
||||
st1 {v4.16b}, [x24]
|
||||
frame_pop
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_encrypt)
|
||||
|
||||
|
||||
AES_ENTRY(aes_xts_decrypt)
|
||||
frame_push 6
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x6
|
||||
|
||||
ld1 {v4.16b}, [x24]
|
||||
ld1 {v4.16b}, [x6]
|
||||
xts_load_mask v8
|
||||
cbz w7, .Lxtsdecnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
|
||||
dec_prepare w3, x2, x8
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsdecNx
|
||||
|
||||
.Lxtsdecrestart:
|
||||
ld1 {v4.16b}, [x24]
|
||||
.Lxtsdecnotfirst:
|
||||
dec_prepare w22, x21, x8
|
||||
dec_prepare w3, x2, x8
|
||||
.LxtsdecloopNx:
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
next_tweak v4, v4, v8
|
||||
.LxtsdecNx:
|
||||
subs w23, w23, #4
|
||||
subs w4, w4, #4
|
||||
bmi .Lxtsdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
|
||||
next_tweak v5, v4, v7, v8
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
next_tweak v5, v4, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v7, v8
|
||||
next_tweak v6, v5, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v7, v8
|
||||
next_tweak v7, v6, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
bl aes_decrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x19], #64
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w23, .Lxtsdecout
|
||||
st1 {v4.16b}, [x24]
|
||||
cond_yield_neon .Lxtsdecrestart
|
||||
cbz w4, .Lxtsdecout
|
||||
xts_reload_mask v8
|
||||
b .LxtsdecloopNx
|
||||
.Lxtsdec1x:
|
||||
adds w23, w23, #4
|
||||
adds w4, w4, #4
|
||||
beq .Lxtsdecout
|
||||
.Lxtsdecloop:
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
decrypt_block v0, w22, x21, x8, w7
|
||||
decrypt_block v0, w3, x2, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x19], #16
|
||||
subs w23, w23, #1
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak v4, v4, v7, v8
|
||||
next_tweak v4, v4, v8
|
||||
b .Lxtsdecloop
|
||||
.Lxtsdecout:
|
||||
st1 {v4.16b}, [x24]
|
||||
frame_pop
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
||||
|
||||
|
@ -14,6 +14,12 @@
|
||||
#define AES_ENTRY(func) ENTRY(neon_ ## func)
|
||||
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
|
||||
|
||||
xtsmask .req v7
|
||||
|
||||
.macro xts_reload_mask, tmp
|
||||
xts_load_mask \tmp
|
||||
.endm
|
||||
|
||||
/* multiply by polynomial 'x' in GF(2^8) */
|
||||
.macro mul_by_x, out, in, temp, const
|
||||
sshr \temp, \in, #7
|
||||
|
@ -1,287 +0,0 @@
|
||||
/*
|
||||
* Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
|
||||
* calculation.
|
||||
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
|
||||
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
|
||||
* at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2B: Instruction Set Reference, N-Z
|
||||
*
|
||||
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
|
||||
* Alexander Boyko <Alexander_Boyko@xyratex.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 6
|
||||
.cpu generic+crypto+crc
|
||||
|
||||
.Lcrc32_constants:
|
||||
/*
|
||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||
* #define CONSTANT_R1 0x154442bd4LL
|
||||
*
|
||||
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
|
||||
* #define CONSTANT_R2 0x1c6e41596LL
|
||||
*/
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
|
||||
/*
|
||||
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
|
||||
* #define CONSTANT_R3 0x1751997d0LL
|
||||
*
|
||||
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
|
||||
* #define CONSTANT_R4 0x0ccaa009eLL
|
||||
*/
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
|
||||
/*
|
||||
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
|
||||
* #define CONSTANT_R5 0x163cd6124LL
|
||||
*/
|
||||
.quad 0x0000000163cd6124
|
||||
.quad 0x00000000FFFFFFFF
|
||||
|
||||
/*
|
||||
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
|
||||
*
|
||||
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
|
||||
* = 0x1F7011641LL
|
||||
* #define CONSTANT_RU 0x1F7011641LL
|
||||
*/
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
|
||||
.Lcrc32c_constants:
|
||||
.octa 0x000000009e4addf800000000740eef02
|
||||
.octa 0x000000014cd00bd600000000f20c0dfe
|
||||
.quad 0x00000000dd45aab8
|
||||
.quad 0x00000000FFFFFFFF
|
||||
.octa 0x00000000dea713f10000000105ec76f0
|
||||
|
||||
vCONSTANT .req v0
|
||||
dCONSTANT .req d0
|
||||
qCONSTANT .req q0
|
||||
|
||||
BUF .req x19
|
||||
LEN .req x20
|
||||
CRC .req x21
|
||||
CONST .req x22
|
||||
|
||||
vzr .req v9
|
||||
|
||||
/**
|
||||
* Calculate crc32
|
||||
* BUF - buffer
|
||||
* LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
|
||||
* CRC - initial crc32
|
||||
* return %eax crc32
|
||||
* uint crc32_pmull_le(unsigned char const *buffer,
|
||||
* size_t len, uint crc32)
|
||||
*/
|
||||
.text
|
||||
ENTRY(crc32_pmull_le)
|
||||
adr_l x3, .Lcrc32_constants
|
||||
b 0f
|
||||
|
||||
ENTRY(crc32c_pmull_le)
|
||||
adr_l x3, .Lcrc32c_constants
|
||||
|
||||
0: frame_push 4, 64
|
||||
|
||||
mov BUF, x0
|
||||
mov LEN, x1
|
||||
mov CRC, x2
|
||||
mov CONST, x3
|
||||
|
||||
bic LEN, LEN, #15
|
||||
ld1 {v1.16b-v4.16b}, [BUF], #0x40
|
||||
movi vzr.16b, #0
|
||||
fmov dCONSTANT, CRC
|
||||
eor v1.16b, v1.16b, vCONSTANT.16b
|
||||
sub LEN, LEN, #0x40
|
||||
cmp LEN, #0x40
|
||||
b.lt less_64
|
||||
|
||||
ldr qCONSTANT, [CONST]
|
||||
|
||||
loop_64: /* 64 bytes Full cache line folding */
|
||||
sub LEN, LEN, #0x40
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull2 v6.1q, v2.2d, vCONSTANT.2d
|
||||
pmull2 v7.1q, v3.2d, vCONSTANT.2d
|
||||
pmull2 v8.1q, v4.2d, vCONSTANT.2d
|
||||
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
pmull v2.1q, v2.1d, vCONSTANT.1d
|
||||
pmull v3.1q, v3.1d, vCONSTANT.1d
|
||||
pmull v4.1q, v4.1d, vCONSTANT.1d
|
||||
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld1 {v5.16b}, [BUF], #0x10
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
ld1 {v6.16b}, [BUF], #0x10
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
ld1 {v7.16b}, [BUF], #0x10
|
||||
eor v4.16b, v4.16b, v8.16b
|
||||
ld1 {v8.16b}, [BUF], #0x10
|
||||
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v4.16b, v4.16b, v8.16b
|
||||
|
||||
cmp LEN, #0x40
|
||||
b.lt less_64
|
||||
|
||||
if_will_cond_yield_neon
|
||||
stp q1, q2, [sp, #.Lframe_local_offset]
|
||||
stp q3, q4, [sp, #.Lframe_local_offset + 32]
|
||||
do_cond_yield_neon
|
||||
ldp q1, q2, [sp, #.Lframe_local_offset]
|
||||
ldp q3, q4, [sp, #.Lframe_local_offset + 32]
|
||||
ldr qCONSTANT, [CONST]
|
||||
movi vzr.16b, #0
|
||||
endif_yield_neon
|
||||
b loop_64
|
||||
|
||||
less_64: /* Folding cache line into 128bit */
|
||||
ldr qCONSTANT, [CONST, #16]
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v3.16b
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
|
||||
cbz LEN, fold_64
|
||||
|
||||
loop_16: /* Folding rest buffer into 128bit */
|
||||
subs LEN, LEN, #0x10
|
||||
|
||||
ld1 {v2.16b}, [BUF], #0x10
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
b.ne loop_16
|
||||
|
||||
fold_64:
|
||||
/* perform the last 64 bit fold, also adds 32 zeroes
|
||||
* to the input stream */
|
||||
ext v2.16b, v1.16b, v1.16b, #8
|
||||
pmull2 v2.1q, v2.2d, vCONSTANT.2d
|
||||
ext v1.16b, v1.16b, vzr.16b, #8
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* final 32-bit fold */
|
||||
ldr dCONSTANT, [CONST, #32]
|
||||
ldr d3, [CONST, #40]
|
||||
|
||||
ext v2.16b, v1.16b, vzr.16b, #4
|
||||
and v1.16b, v1.16b, v3.16b
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
ldr qCONSTANT, [CONST, #48]
|
||||
|
||||
and v2.16b, v1.16b, v3.16b
|
||||
ext v2.16b, vzr.16b, v2.16b, #8
|
||||
pmull2 v2.1q, v2.2d, vCONSTANT.2d
|
||||
and v2.16b, v2.16b, v3.16b
|
||||
pmull v2.1q, v2.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
mov w0, v1.s[1]
|
||||
|
||||
frame_pop
|
||||
ret
|
||||
ENDPROC(crc32_pmull_le)
|
||||
ENDPROC(crc32c_pmull_le)
|
||||
|
||||
.macro __crc32, c
|
||||
0: subs x2, x2, #16
|
||||
b.mi 8f
|
||||
ldp x3, x4, [x1], #16
|
||||
CPU_BE( rev x3, x3 )
|
||||
CPU_BE( rev x4, x4 )
|
||||
crc32\c\()x w0, w0, x3
|
||||
crc32\c\()x w0, w0, x4
|
||||
b.ne 0b
|
||||
ret
|
||||
|
||||
8: tbz x2, #3, 4f
|
||||
ldr x3, [x1], #8
|
||||
CPU_BE( rev x3, x3 )
|
||||
crc32\c\()x w0, w0, x3
|
||||
4: tbz x2, #2, 2f
|
||||
ldr w3, [x1], #4
|
||||
CPU_BE( rev w3, w3 )
|
||||
crc32\c\()w w0, w0, w3
|
||||
2: tbz x2, #1, 1f
|
||||
ldrh w3, [x1], #2
|
||||
CPU_BE( rev16 w3, w3 )
|
||||
crc32\c\()h w0, w0, w3
|
||||
1: tbz x2, #0, 0f
|
||||
ldrb w3, [x1]
|
||||
crc32\c\()b w0, w0, w3
|
||||
0: ret
|
||||
.endm
|
||||
|
||||
.align 5
|
||||
ENTRY(crc32_armv8_le)
|
||||
__crc32
|
||||
ENDPROC(crc32_armv8_le)
|
||||
|
||||
.align 5
|
||||
ENTRY(crc32c_armv8_le)
|
||||
__crc32 c
|
||||
ENDPROC(crc32c_armv8_le)
|
@ -1,244 +0,0 @@
|
||||
/*
|
||||
* Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
|
||||
*
|
||||
* Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#define PMULL_MIN_LEN 64L /* minimum size of buffer
|
||||
* for crc32_pmull_le_16 */
|
||||
#define SCALE_F 16L /* size of NEON register */
|
||||
|
||||
asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
|
||||
asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
|
||||
asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
|
||||
static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = ~0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = *mctx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32c_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
unsigned int l;
|
||||
|
||||
if ((u64)data % SCALE_F) {
|
||||
l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
|
||||
|
||||
*crc = fallback_crc32(*crc, data, l);
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length >= PMULL_MIN_LEN && may_use_simd()) {
|
||||
l = round_down(length, SCALE_F);
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc32_pmull_le(data, l, *crc);
|
||||
kernel_neon_end();
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length > 0)
|
||||
*crc = fallback_crc32(*crc, data, length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
unsigned int l;
|
||||
|
||||
if ((u64)data % SCALE_F) {
|
||||
l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
|
||||
|
||||
*crc = fallback_crc32c(*crc, data, l);
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length >= PMULL_MIN_LEN && may_use_simd()) {
|
||||
l = round_down(length, SCALE_F);
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc32c_pmull_le(data, l, *crc);
|
||||
kernel_neon_end();
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length > 0) {
|
||||
*crc = fallback_crc32c(*crc, data, length);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(*crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(~*crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg crc32_pmull_algs[] = { {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32_update,
|
||||
.final = crc32_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
|
||||
.base.cra_ctxsize = sizeof(u32),
|
||||
.base.cra_init = crc32_pmull_cra_init,
|
||||
.base.cra_name = "crc32",
|
||||
.base.cra_driver_name = "crc32-arm64-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32c_update,
|
||||
.final = crc32c_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
|
||||
.base.cra_ctxsize = sizeof(u32),
|
||||
.base.cra_init = crc32c_pmull_cra_init,
|
||||
.base.cra_name = "crc32c",
|
||||
.base.cra_driver_name = "crc32c-arm64-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
if (elf_hwcap & HWCAP_CRC32) {
|
||||
fallback_crc32 = crc32_armv8_le;
|
||||
fallback_crc32c = crc32c_armv8_le;
|
||||
} else {
|
||||
fallback_crc32 = crc32_le;
|
||||
fallback_crc32c = __crc32c_le;
|
||||
}
|
||||
} else if (!(elf_hwcap & HWCAP_CRC32)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
static void __exit crc32_pmull_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
static const struct cpu_feature crc32_cpu_feature[] = {
|
||||
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
|
||||
|
||||
module_init(crc32_pmull_mod_init);
|
||||
module_exit(crc32_pmull_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -80,7 +80,186 @@
|
||||
|
||||
vzr .req v13
|
||||
|
||||
ENTRY(crc_t10dif_pmull)
|
||||
ad .req v14
|
||||
bd .req v10
|
||||
|
||||
k00_16 .req v15
|
||||
k32_48 .req v16
|
||||
|
||||
t3 .req v17
|
||||
t4 .req v18
|
||||
t5 .req v19
|
||||
t6 .req v20
|
||||
t7 .req v21
|
||||
t8 .req v22
|
||||
t9 .req v23
|
||||
|
||||
perm1 .req v24
|
||||
perm2 .req v25
|
||||
perm3 .req v26
|
||||
perm4 .req v27
|
||||
|
||||
bd1 .req v28
|
||||
bd2 .req v29
|
||||
bd3 .req v30
|
||||
bd4 .req v31
|
||||
|
||||
.macro __pmull_init_p64
|
||||
.endm
|
||||
|
||||
.macro __pmull_pre_p64, bd
|
||||
.endm
|
||||
|
||||
.macro __pmull_init_p8
|
||||
// k00_16 := 0x0000000000000000_000000000000ffff
|
||||
// k32_48 := 0x00000000ffffffff_0000ffffffffffff
|
||||
movi k32_48.2d, #0xffffffff
|
||||
mov k32_48.h[2], k32_48.h[0]
|
||||
ushr k00_16.2d, k32_48.2d, #32
|
||||
|
||||
// prepare the permutation vectors
|
||||
mov_q x5, 0x080f0e0d0c0b0a09
|
||||
movi perm4.8b, #8
|
||||
dup perm1.2d, x5
|
||||
eor perm1.16b, perm1.16b, perm4.16b
|
||||
ushr perm2.2d, perm1.2d, #8
|
||||
ushr perm3.2d, perm1.2d, #16
|
||||
ushr perm4.2d, perm1.2d, #24
|
||||
sli perm2.2d, perm1.2d, #56
|
||||
sli perm3.2d, perm1.2d, #48
|
||||
sli perm4.2d, perm1.2d, #40
|
||||
.endm
|
||||
|
||||
.macro __pmull_pre_p8, bd
|
||||
tbl bd1.16b, {\bd\().16b}, perm1.16b
|
||||
tbl bd2.16b, {\bd\().16b}, perm2.16b
|
||||
tbl bd3.16b, {\bd\().16b}, perm3.16b
|
||||
tbl bd4.16b, {\bd\().16b}, perm4.16b
|
||||
.endm
|
||||
|
||||
__pmull_p8_core:
|
||||
.L__pmull_p8_core:
|
||||
ext t4.8b, ad.8b, ad.8b, #1 // A1
|
||||
ext t5.8b, ad.8b, ad.8b, #2 // A2
|
||||
ext t6.8b, ad.8b, ad.8b, #3 // A3
|
||||
|
||||
pmull t4.8h, t4.8b, bd.8b // F = A1*B
|
||||
pmull t8.8h, ad.8b, bd1.8b // E = A*B1
|
||||
pmull t5.8h, t5.8b, bd.8b // H = A2*B
|
||||
pmull t7.8h, ad.8b, bd2.8b // G = A*B2
|
||||
pmull t6.8h, t6.8b, bd.8b // J = A3*B
|
||||
pmull t9.8h, ad.8b, bd3.8b // I = A*B3
|
||||
pmull t3.8h, ad.8b, bd4.8b // K = A*B4
|
||||
b 0f
|
||||
|
||||
.L__pmull_p8_core2:
|
||||
tbl t4.16b, {ad.16b}, perm1.16b // A1
|
||||
tbl t5.16b, {ad.16b}, perm2.16b // A2
|
||||
tbl t6.16b, {ad.16b}, perm3.16b // A3
|
||||
|
||||
pmull2 t4.8h, t4.16b, bd.16b // F = A1*B
|
||||
pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1
|
||||
pmull2 t5.8h, t5.16b, bd.16b // H = A2*B
|
||||
pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2
|
||||
pmull2 t6.8h, t6.16b, bd.16b // J = A3*B
|
||||
pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3
|
||||
pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4
|
||||
|
||||
0: eor t4.16b, t4.16b, t8.16b // L = E + F
|
||||
eor t5.16b, t5.16b, t7.16b // M = G + H
|
||||
eor t6.16b, t6.16b, t9.16b // N = I + J
|
||||
|
||||
uzp1 t8.2d, t4.2d, t5.2d
|
||||
uzp2 t4.2d, t4.2d, t5.2d
|
||||
uzp1 t7.2d, t6.2d, t3.2d
|
||||
uzp2 t6.2d, t6.2d, t3.2d
|
||||
|
||||
// t4 = (L) (P0 + P1) << 8
|
||||
// t5 = (M) (P2 + P3) << 16
|
||||
eor t8.16b, t8.16b, t4.16b
|
||||
and t4.16b, t4.16b, k32_48.16b
|
||||
|
||||
// t6 = (N) (P4 + P5) << 24
|
||||
// t7 = (K) (P6 + P7) << 32
|
||||
eor t7.16b, t7.16b, t6.16b
|
||||
and t6.16b, t6.16b, k00_16.16b
|
||||
|
||||
eor t8.16b, t8.16b, t4.16b
|
||||
eor t7.16b, t7.16b, t6.16b
|
||||
|
||||
zip2 t5.2d, t8.2d, t4.2d
|
||||
zip1 t4.2d, t8.2d, t4.2d
|
||||
zip2 t3.2d, t7.2d, t6.2d
|
||||
zip1 t6.2d, t7.2d, t6.2d
|
||||
|
||||
ext t4.16b, t4.16b, t4.16b, #15
|
||||
ext t5.16b, t5.16b, t5.16b, #14
|
||||
ext t6.16b, t6.16b, t6.16b, #13
|
||||
ext t3.16b, t3.16b, t3.16b, #12
|
||||
|
||||
eor t4.16b, t4.16b, t5.16b
|
||||
eor t6.16b, t6.16b, t3.16b
|
||||
ret
|
||||
ENDPROC(__pmull_p8_core)
|
||||
|
||||
.macro __pmull_p8, rq, ad, bd, i
|
||||
.ifnc \bd, v10
|
||||
.err
|
||||
.endif
|
||||
mov ad.16b, \ad\().16b
|
||||
.ifb \i
|
||||
pmull \rq\().8h, \ad\().8b, bd.8b // D = A*B
|
||||
.else
|
||||
pmull2 \rq\().8h, \ad\().16b, bd.16b // D = A*B
|
||||
.endif
|
||||
|
||||
bl .L__pmull_p8_core\i
|
||||
|
||||
eor \rq\().16b, \rq\().16b, t4.16b
|
||||
eor \rq\().16b, \rq\().16b, t6.16b
|
||||
.endm
|
||||
|
||||
.macro fold64, p, reg1, reg2
|
||||
ldp q11, q12, [arg2], #0x20
|
||||
|
||||
__pmull_\p v8, \reg1, v10, 2
|
||||
__pmull_\p \reg1, \reg1, v10
|
||||
|
||||
CPU_LE( rev64 v11.16b, v11.16b )
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
|
||||
__pmull_\p v9, \reg2, v10, 2
|
||||
__pmull_\p \reg2, \reg2, v10
|
||||
|
||||
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
|
||||
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
|
||||
eor \reg1\().16b, \reg1\().16b, v8.16b
|
||||
eor \reg2\().16b, \reg2\().16b, v9.16b
|
||||
eor \reg1\().16b, \reg1\().16b, v11.16b
|
||||
eor \reg2\().16b, \reg2\().16b, v12.16b
|
||||
.endm
|
||||
|
||||
.macro fold16, p, reg, rk
|
||||
__pmull_\p v8, \reg, v10
|
||||
__pmull_\p \reg, \reg, v10, 2
|
||||
.ifnb \rk
|
||||
ldr_l q10, \rk, x8
|
||||
__pmull_pre_\p v10
|
||||
.endif
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
eor v7.16b, v7.16b, \reg\().16b
|
||||
.endm
|
||||
|
||||
.macro __pmull_p64, rd, rn, rm, n
|
||||
.ifb \n
|
||||
pmull \rd\().1q, \rn\().1d, \rm\().1d
|
||||
.else
|
||||
pmull2 \rd\().1q, \rn\().2d, \rm\().2d
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro crc_t10dif_pmull, p
|
||||
frame_push 3, 128
|
||||
|
||||
mov arg1_low32, w0
|
||||
@ -89,6 +268,8 @@ ENTRY(crc_t10dif_pmull)
|
||||
|
||||
movi vzr.16b, #0 // init zero register
|
||||
|
||||
__pmull_init_\p
|
||||
|
||||
// adjust the 16-bit initial_crc value, scale it to 32 bits
|
||||
lsl arg1_low32, arg1_low32, #16
|
||||
|
||||
@ -96,7 +277,7 @@ ENTRY(crc_t10dif_pmull)
|
||||
cmp arg3, #256
|
||||
|
||||
// for sizes less than 128, we can't fold 64B at a time...
|
||||
b.lt _less_than_128
|
||||
b.lt .L_less_than_128_\@
|
||||
|
||||
// load the initial crc value
|
||||
// crc value does not need to be byte-reflected, but it needs
|
||||
@ -137,6 +318,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
ldr_l q10, rk3, x8 // xmm10 has rk3 and rk4
|
||||
// type of pmull instruction
|
||||
// will determine which constant to use
|
||||
__pmull_pre_\p v10
|
||||
|
||||
//
|
||||
// we subtract 256 instead of 128 to save one instruction from the loop
|
||||
@ -147,41 +329,19 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
// buffer. The _fold_64_B_loop will fold 64B at a time
|
||||
// until we have 64+y Bytes of buffer
|
||||
|
||||
|
||||
// fold 64B at a time. This section of the code folds 4 vector
|
||||
// registers in parallel
|
||||
_fold_64_B_loop:
|
||||
.L_fold_64_B_loop_\@:
|
||||
|
||||
.macro fold64, reg1, reg2
|
||||
ldp q11, q12, [arg2], #0x20
|
||||
|
||||
pmull2 v8.1q, \reg1\().2d, v10.2d
|
||||
pmull \reg1\().1q, \reg1\().1d, v10.1d
|
||||
|
||||
CPU_LE( rev64 v11.16b, v11.16b )
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
|
||||
pmull2 v9.1q, \reg2\().2d, v10.2d
|
||||
pmull \reg2\().1q, \reg2\().1d, v10.1d
|
||||
|
||||
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
|
||||
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
|
||||
eor \reg1\().16b, \reg1\().16b, v8.16b
|
||||
eor \reg2\().16b, \reg2\().16b, v9.16b
|
||||
eor \reg1\().16b, \reg1\().16b, v11.16b
|
||||
eor \reg2\().16b, \reg2\().16b, v12.16b
|
||||
.endm
|
||||
|
||||
fold64 v0, v1
|
||||
fold64 v2, v3
|
||||
fold64 v4, v5
|
||||
fold64 v6, v7
|
||||
fold64 \p, v0, v1
|
||||
fold64 \p, v2, v3
|
||||
fold64 \p, v4, v5
|
||||
fold64 \p, v6, v7
|
||||
|
||||
subs arg3, arg3, #128
|
||||
|
||||
// check if there is another 64B in the buffer to be able to fold
|
||||
b.lt _fold_64_B_end
|
||||
b.lt .L_fold_64_B_end_\@
|
||||
|
||||
if_will_cond_yield_neon
|
||||
stp q0, q1, [sp, #.Lframe_local_offset]
|
||||
@ -195,11 +355,13 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
|
||||
ldr_l q10, rk3, x8
|
||||
movi vzr.16b, #0 // init zero register
|
||||
__pmull_init_\p
|
||||
__pmull_pre_\p v10
|
||||
endif_yield_neon
|
||||
|
||||
b _fold_64_B_loop
|
||||
b .L_fold_64_B_loop_\@
|
||||
|
||||
_fold_64_B_end:
|
||||
.L_fold_64_B_end_\@:
|
||||
// at this point, the buffer pointer is pointing at the last y Bytes
|
||||
// of the buffer the 64B of folded data is in 4 of the vector
|
||||
// registers: v0, v1, v2, v3
|
||||
@ -208,38 +370,29 @@ _fold_64_B_end:
|
||||
// constants
|
||||
|
||||
ldr_l q10, rk9, x8
|
||||
__pmull_pre_\p v10
|
||||
|
||||
.macro fold16, reg, rk
|
||||
pmull v8.1q, \reg\().1d, v10.1d
|
||||
pmull2 \reg\().1q, \reg\().2d, v10.2d
|
||||
.ifnb \rk
|
||||
ldr_l q10, \rk, x8
|
||||
.endif
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
eor v7.16b, v7.16b, \reg\().16b
|
||||
.endm
|
||||
|
||||
fold16 v0, rk11
|
||||
fold16 v1, rk13
|
||||
fold16 v2, rk15
|
||||
fold16 v3, rk17
|
||||
fold16 v4, rk19
|
||||
fold16 v5, rk1
|
||||
fold16 v6
|
||||
fold16 \p, v0, rk11
|
||||
fold16 \p, v1, rk13
|
||||
fold16 \p, v2, rk15
|
||||
fold16 \p, v3, rk17
|
||||
fold16 \p, v4, rk19
|
||||
fold16 \p, v5, rk1
|
||||
fold16 \p, v6
|
||||
|
||||
// instead of 64, we add 48 to the loop counter to save 1 instruction
|
||||
// from the loop instead of a cmp instruction, we use the negative
|
||||
// flag with the jl instruction
|
||||
adds arg3, arg3, #(128-16)
|
||||
b.lt _final_reduction_for_128
|
||||
b.lt .L_final_reduction_for_128_\@
|
||||
|
||||
// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
|
||||
// and the rest is in memory. We can fold 16 bytes at a time if y>=16
|
||||
// continue folding 16B at a time
|
||||
|
||||
_16B_reduction_loop:
|
||||
pmull v8.1q, v7.1d, v10.1d
|
||||
pmull2 v7.1q, v7.2d, v10.2d
|
||||
.L_16B_reduction_loop_\@:
|
||||
__pmull_\p v8, v7, v10
|
||||
__pmull_\p v7, v7, v10, 2
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
|
||||
ldr q0, [arg2], #16
|
||||
@ -251,22 +404,22 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
// instead of a cmp instruction, we utilize the flags with the
|
||||
// jge instruction equivalent of: cmp arg3, 16-16
|
||||
// check if there is any more 16B in the buffer to be able to fold
|
||||
b.ge _16B_reduction_loop
|
||||
b.ge .L_16B_reduction_loop_\@
|
||||
|
||||
// now we have 16+z bytes left to reduce, where 0<= z < 16.
|
||||
// first, we reduce the data in the xmm7 register
|
||||
|
||||
_final_reduction_for_128:
|
||||
.L_final_reduction_for_128_\@:
|
||||
// check if any more data to fold. If not, compute the CRC of
|
||||
// the final 128 bits
|
||||
adds arg3, arg3, #16
|
||||
b.eq _128_done
|
||||
b.eq .L_128_done_\@
|
||||
|
||||
// here we are getting data that is less than 16 bytes.
|
||||
// since we know that there was data before the pointer, we can
|
||||
// offset the input pointer before the actual point, to receive
|
||||
// exactly 16 bytes. after that the registers need to be adjusted.
|
||||
_get_last_two_regs:
|
||||
.L_get_last_two_regs_\@:
|
||||
add arg2, arg2, arg3
|
||||
ldr q1, [arg2, #-16]
|
||||
CPU_LE( rev64 v1.16b, v1.16b )
|
||||
@ -291,47 +444,48 @@ CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
|
||||
bsl v0.16b, v2.16b, v1.16b
|
||||
|
||||
// fold 16 Bytes
|
||||
pmull v8.1q, v7.1d, v10.1d
|
||||
pmull2 v7.1q, v7.2d, v10.2d
|
||||
__pmull_\p v8, v7, v10
|
||||
__pmull_\p v7, v7, v10, 2
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
|
||||
_128_done:
|
||||
.L_128_done_\@:
|
||||
// compute crc of a 128-bit value
|
||||
ldr_l q10, rk5, x8 // rk5 and rk6 in xmm10
|
||||
__pmull_pre_\p v10
|
||||
|
||||
// 64b fold
|
||||
ext v0.16b, vzr.16b, v7.16b, #8
|
||||
mov v7.d[0], v7.d[1]
|
||||
pmull v7.1q, v7.1d, v10.1d
|
||||
__pmull_\p v7, v7, v10
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
|
||||
// 32b fold
|
||||
ext v0.16b, v7.16b, vzr.16b, #4
|
||||
mov v7.s[3], vzr.s[0]
|
||||
pmull2 v0.1q, v0.2d, v10.2d
|
||||
__pmull_\p v0, v0, v10, 2
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
|
||||
// barrett reduction
|
||||
_barrett:
|
||||
ldr_l q10, rk7, x8
|
||||
__pmull_pre_\p v10
|
||||
mov v0.d[0], v7.d[1]
|
||||
|
||||
pmull v0.1q, v0.1d, v10.1d
|
||||
__pmull_\p v0, v0, v10
|
||||
ext v0.16b, vzr.16b, v0.16b, #12
|
||||
pmull2 v0.1q, v0.2d, v10.2d
|
||||
__pmull_\p v0, v0, v10, 2
|
||||
ext v0.16b, vzr.16b, v0.16b, #12
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
mov w0, v7.s[1]
|
||||
|
||||
_cleanup:
|
||||
.L_cleanup_\@:
|
||||
// scale the result back to 16 bits
|
||||
lsr x0, x0, #16
|
||||
frame_pop
|
||||
ret
|
||||
|
||||
_less_than_128:
|
||||
cbz arg3, _cleanup
|
||||
.L_less_than_128_\@:
|
||||
cbz arg3, .L_cleanup_\@
|
||||
|
||||
movi v0.16b, #0
|
||||
mov v0.s[3], arg1_low32 // get the initial crc value
|
||||
@ -342,20 +496,21 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
eor v7.16b, v7.16b, v0.16b // xor the initial crc value
|
||||
|
||||
cmp arg3, #16
|
||||
b.eq _128_done // exactly 16 left
|
||||
b.lt _less_than_16_left
|
||||
b.eq .L_128_done_\@ // exactly 16 left
|
||||
b.lt .L_less_than_16_left_\@
|
||||
|
||||
ldr_l q10, rk1, x8 // rk1 and rk2 in xmm10
|
||||
__pmull_pre_\p v10
|
||||
|
||||
// update the counter. subtract 32 instead of 16 to save one
|
||||
// instruction from the loop
|
||||
subs arg3, arg3, #32
|
||||
b.ge _16B_reduction_loop
|
||||
b.ge .L_16B_reduction_loop_\@
|
||||
|
||||
add arg3, arg3, #16
|
||||
b _get_last_two_regs
|
||||
b .L_get_last_two_regs_\@
|
||||
|
||||
_less_than_16_left:
|
||||
.L_less_than_16_left_\@:
|
||||
// shl r9, 4
|
||||
adr_l x0, tbl_shf_table + 16
|
||||
sub x0, x0, arg3
|
||||
@ -363,8 +518,17 @@ _less_than_16_left:
|
||||
movi v9.16b, #0x80
|
||||
eor v0.16b, v0.16b, v9.16b
|
||||
tbl v7.16b, {v7.16b}, v0.16b
|
||||
b _128_done
|
||||
ENDPROC(crc_t10dif_pmull)
|
||||
b .L_128_done_\@
|
||||
.endm
|
||||
|
||||
ENTRY(crc_t10dif_pmull_p8)
|
||||
crc_t10dif_pmull p8
|
||||
ENDPROC(crc_t10dif_pmull_p8)
|
||||
|
||||
.align 5
|
||||
ENTRY(crc_t10dif_pmull_p64)
|
||||
crc_t10dif_pmull p64
|
||||
ENDPROC(crc_t10dif_pmull_p64)
|
||||
|
||||
// precomputed constants
|
||||
// these constants are precomputed from the poly:
|
||||
|
@ -22,7 +22,10 @@
|
||||
|
||||
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
|
||||
|
||||
asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
|
||||
asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 buf[], u64 len);
|
||||
asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 buf[], u64 len);
|
||||
|
||||
static u16 (*crc_t10dif_pmull)(u16 init_crc, const u8 buf[], u64 len);
|
||||
|
||||
static int crct10dif_init(struct shash_desc *desc)
|
||||
{
|
||||
@ -85,6 +88,11 @@ static struct shash_alg crc_t10dif_alg = {
|
||||
|
||||
static int __init crc_t10dif_mod_init(void)
|
||||
{
|
||||
if (elf_hwcap & HWCAP_PMULL)
|
||||
crc_t10dif_pmull = crc_t10dif_pmull_p64;
|
||||
else
|
||||
crc_t10dif_pmull = crc_t10dif_pmull_p8;
|
||||
|
||||
return crypto_register_shash(&crc_t10dif_alg);
|
||||
}
|
||||
|
||||
@ -93,8 +101,10 @@ static void __exit crc_t10dif_mod_exit(void)
|
||||
crypto_unregister_shash(&crc_t10dif_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
|
||||
module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
|
||||
module_exit(crc_t10dif_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
|
||||
|
@ -1,352 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
// arguments
|
||||
ROUND_KEYS .req x0 // const {u64,u32} *round_keys
|
||||
NROUNDS .req w1 // int nrounds
|
||||
NROUNDS_X .req x1
|
||||
DST .req x2 // void *dst
|
||||
SRC .req x3 // const void *src
|
||||
NBYTES .req w4 // unsigned int nbytes
|
||||
TWEAK .req x5 // void *tweak
|
||||
|
||||
// registers which hold the data being encrypted/decrypted
|
||||
// (underscores avoid a naming collision with ARM64 registers x0-x3)
|
||||
X_0 .req v0
|
||||
Y_0 .req v1
|
||||
X_1 .req v2
|
||||
Y_1 .req v3
|
||||
X_2 .req v4
|
||||
Y_2 .req v5
|
||||
X_3 .req v6
|
||||
Y_3 .req v7
|
||||
|
||||
// the round key, duplicated in all lanes
|
||||
ROUND_KEY .req v8
|
||||
|
||||
// index vector for tbl-based 8-bit rotates
|
||||
ROTATE_TABLE .req v9
|
||||
ROTATE_TABLE_Q .req q9
|
||||
|
||||
// temporary registers
|
||||
TMP0 .req v10
|
||||
TMP1 .req v11
|
||||
TMP2 .req v12
|
||||
TMP3 .req v13
|
||||
|
||||
// multiplication table for updating XTS tweaks
|
||||
GFMUL_TABLE .req v14
|
||||
GFMUL_TABLE_Q .req q14
|
||||
|
||||
// next XTS tweak value(s)
|
||||
TWEAKV_NEXT .req v15
|
||||
|
||||
// XTS tweaks for the blocks currently being encrypted/decrypted
|
||||
TWEAKV0 .req v16
|
||||
TWEAKV1 .req v17
|
||||
TWEAKV2 .req v18
|
||||
TWEAKV3 .req v19
|
||||
TWEAKV4 .req v20
|
||||
TWEAKV5 .req v21
|
||||
TWEAKV6 .req v22
|
||||
TWEAKV7 .req v23
|
||||
|
||||
.align 4
|
||||
.Lror64_8_table:
|
||||
.octa 0x080f0e0d0c0b0a090007060504030201
|
||||
.Lror32_8_table:
|
||||
.octa 0x0c0f0e0d080b0a090407060500030201
|
||||
.Lrol64_8_table:
|
||||
.octa 0x0e0d0c0b0a09080f0605040302010007
|
||||
.Lrol32_8_table:
|
||||
.octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.Lgf128mul_table:
|
||||
.octa 0x00000000000000870000000000000001
|
||||
.Lgf64mul_table:
|
||||
.octa 0x0000000000000000000000002d361b00
|
||||
|
||||
/*
|
||||
* _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
|
||||
*
|
||||
* Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
|
||||
* Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
|
||||
* of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
|
||||
* 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
|
||||
*/
|
||||
.macro _speck_round_128bytes n, lanes
|
||||
|
||||
// x = ror(x, 8)
|
||||
tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
|
||||
tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
|
||||
tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
|
||||
tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
|
||||
|
||||
// x += y
|
||||
add X_0.\lanes, X_0.\lanes, Y_0.\lanes
|
||||
add X_1.\lanes, X_1.\lanes, Y_1.\lanes
|
||||
add X_2.\lanes, X_2.\lanes, Y_2.\lanes
|
||||
add X_3.\lanes, X_3.\lanes, Y_3.\lanes
|
||||
|
||||
// x ^= k
|
||||
eor X_0.16b, X_0.16b, ROUND_KEY.16b
|
||||
eor X_1.16b, X_1.16b, ROUND_KEY.16b
|
||||
eor X_2.16b, X_2.16b, ROUND_KEY.16b
|
||||
eor X_3.16b, X_3.16b, ROUND_KEY.16b
|
||||
|
||||
// y = rol(y, 3)
|
||||
shl TMP0.\lanes, Y_0.\lanes, #3
|
||||
shl TMP1.\lanes, Y_1.\lanes, #3
|
||||
shl TMP2.\lanes, Y_2.\lanes, #3
|
||||
shl TMP3.\lanes, Y_3.\lanes, #3
|
||||
sri TMP0.\lanes, Y_0.\lanes, #(\n - 3)
|
||||
sri TMP1.\lanes, Y_1.\lanes, #(\n - 3)
|
||||
sri TMP2.\lanes, Y_2.\lanes, #(\n - 3)
|
||||
sri TMP3.\lanes, Y_3.\lanes, #(\n - 3)
|
||||
|
||||
// y ^= x
|
||||
eor Y_0.16b, TMP0.16b, X_0.16b
|
||||
eor Y_1.16b, TMP1.16b, X_1.16b
|
||||
eor Y_2.16b, TMP2.16b, X_2.16b
|
||||
eor Y_3.16b, TMP3.16b, X_3.16b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
|
||||
*
|
||||
* This is the inverse of _speck_round_128bytes().
|
||||
*/
|
||||
.macro _speck_unround_128bytes n, lanes
|
||||
|
||||
// y ^= x
|
||||
eor TMP0.16b, Y_0.16b, X_0.16b
|
||||
eor TMP1.16b, Y_1.16b, X_1.16b
|
||||
eor TMP2.16b, Y_2.16b, X_2.16b
|
||||
eor TMP3.16b, Y_3.16b, X_3.16b
|
||||
|
||||
// y = ror(y, 3)
|
||||
ushr Y_0.\lanes, TMP0.\lanes, #3
|
||||
ushr Y_1.\lanes, TMP1.\lanes, #3
|
||||
ushr Y_2.\lanes, TMP2.\lanes, #3
|
||||
ushr Y_3.\lanes, TMP3.\lanes, #3
|
||||
sli Y_0.\lanes, TMP0.\lanes, #(\n - 3)
|
||||
sli Y_1.\lanes, TMP1.\lanes, #(\n - 3)
|
||||
sli Y_2.\lanes, TMP2.\lanes, #(\n - 3)
|
||||
sli Y_3.\lanes, TMP3.\lanes, #(\n - 3)
|
||||
|
||||
// x ^= k
|
||||
eor X_0.16b, X_0.16b, ROUND_KEY.16b
|
||||
eor X_1.16b, X_1.16b, ROUND_KEY.16b
|
||||
eor X_2.16b, X_2.16b, ROUND_KEY.16b
|
||||
eor X_3.16b, X_3.16b, ROUND_KEY.16b
|
||||
|
||||
// x -= y
|
||||
sub X_0.\lanes, X_0.\lanes, Y_0.\lanes
|
||||
sub X_1.\lanes, X_1.\lanes, Y_1.\lanes
|
||||
sub X_2.\lanes, X_2.\lanes, Y_2.\lanes
|
||||
sub X_3.\lanes, X_3.\lanes, Y_3.\lanes
|
||||
|
||||
// x = rol(x, 8)
|
||||
tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
|
||||
tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
|
||||
tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
|
||||
tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
|
||||
.endm
|
||||
|
||||
.macro _next_xts_tweak next, cur, tmp, n
|
||||
.if \n == 64
|
||||
/*
|
||||
* Calculate the next tweak by multiplying the current one by x,
|
||||
* modulo p(x) = x^128 + x^7 + x^2 + x + 1.
|
||||
*/
|
||||
sshr \tmp\().2d, \cur\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
|
||||
shl \next\().2d, \cur\().2d, #1
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \next\().16b, \next\().16b, \tmp\().16b
|
||||
.else
|
||||
/*
|
||||
* Calculate the next two tweaks by multiplying the current ones by x^2,
|
||||
* modulo p(x) = x^64 + x^4 + x^3 + x + 1.
|
||||
*/
|
||||
ushr \tmp\().2d, \cur\().2d, #62
|
||||
shl \next\().2d, \cur\().2d, #2
|
||||
tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
|
||||
eor \next\().16b, \next\().16b, \tmp\().16b
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_xts_crypt() - Speck-XTS encryption/decryption
|
||||
*
|
||||
* Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
|
||||
* using Speck-XTS, specifically the variant with a block size of '2n' and round
|
||||
* count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
|
||||
* the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
|
||||
* nonzero multiple of 128.
|
||||
*/
|
||||
.macro _speck_xts_crypt n, lanes, decrypting
|
||||
|
||||
/*
|
||||
* If decrypting, modify the ROUND_KEYS parameter to point to the last
|
||||
* round key rather than the first, since for decryption the round keys
|
||||
* are used in reverse order.
|
||||
*/
|
||||
.if \decrypting
|
||||
mov NROUNDS, NROUNDS /* zero the high 32 bits */
|
||||
.if \n == 64
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
|
||||
sub ROUND_KEYS, ROUND_KEYS, #8
|
||||
.else
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
|
||||
sub ROUND_KEYS, ROUND_KEYS, #4
|
||||
.endif
|
||||
.endif
|
||||
|
||||
// Load the index vector for tbl-based 8-bit rotates
|
||||
.if \decrypting
|
||||
ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table
|
||||
.else
|
||||
ldr ROTATE_TABLE_Q, .Lror\n\()_8_table
|
||||
.endif
|
||||
|
||||
// One-time XTS preparation
|
||||
.if \n == 64
|
||||
// Load first tweak
|
||||
ld1 {TWEAKV0.16b}, [TWEAK]
|
||||
|
||||
// Load GF(2^128) multiplication table
|
||||
ldr GFMUL_TABLE_Q, .Lgf128mul_table
|
||||
.else
|
||||
// Load first tweak
|
||||
ld1 {TWEAKV0.8b}, [TWEAK]
|
||||
|
||||
// Load GF(2^64) multiplication table
|
||||
ldr GFMUL_TABLE_Q, .Lgf64mul_table
|
||||
|
||||
// Calculate second tweak, packing it together with the first
|
||||
ushr TMP0.2d, TWEAKV0.2d, #63
|
||||
shl TMP1.2d, TWEAKV0.2d, #1
|
||||
tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
|
||||
eor TMP0.8b, TMP0.8b, TMP1.8b
|
||||
mov TWEAKV0.d[1], TMP0.d[0]
|
||||
.endif
|
||||
|
||||
.Lnext_128bytes_\@:
|
||||
|
||||
// Calculate XTS tweaks for next 128 bytes
|
||||
_next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n
|
||||
_next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n
|
||||
_next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n
|
||||
_next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n
|
||||
_next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n
|
||||
_next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n
|
||||
_next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n
|
||||
_next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n
|
||||
|
||||
// Load the next source blocks into {X,Y}[0-3]
|
||||
ld1 {X_0.16b-Y_1.16b}, [SRC], #64
|
||||
ld1 {X_2.16b-Y_3.16b}, [SRC], #64
|
||||
|
||||
// XOR the source blocks with their XTS tweaks
|
||||
eor TMP0.16b, X_0.16b, TWEAKV0.16b
|
||||
eor Y_0.16b, Y_0.16b, TWEAKV1.16b
|
||||
eor TMP1.16b, X_1.16b, TWEAKV2.16b
|
||||
eor Y_1.16b, Y_1.16b, TWEAKV3.16b
|
||||
eor TMP2.16b, X_2.16b, TWEAKV4.16b
|
||||
eor Y_2.16b, Y_2.16b, TWEAKV5.16b
|
||||
eor TMP3.16b, X_3.16b, TWEAKV6.16b
|
||||
eor Y_3.16b, Y_3.16b, TWEAKV7.16b
|
||||
|
||||
/*
|
||||
* De-interleave the 'x' and 'y' elements of each block, i.e. make it so
|
||||
* that the X[0-3] registers contain only the second halves of blocks,
|
||||
* and the Y[0-3] registers contain only the first halves of blocks.
|
||||
* (Speck uses the order (y, x) rather than the more intuitive (x, y).)
|
||||
*/
|
||||
uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes
|
||||
uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
|
||||
uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes
|
||||
uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
|
||||
uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes
|
||||
uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
|
||||
uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes
|
||||
uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
|
||||
|
||||
// Do the cipher rounds
|
||||
mov x6, ROUND_KEYS
|
||||
mov w7, NROUNDS
|
||||
.Lnext_round_\@:
|
||||
.if \decrypting
|
||||
ld1r {ROUND_KEY.\lanes}, [x6]
|
||||
sub x6, x6, #( \n / 8 )
|
||||
_speck_unround_128bytes \n, \lanes
|
||||
.else
|
||||
ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 )
|
||||
_speck_round_128bytes \n, \lanes
|
||||
.endif
|
||||
subs w7, w7, #1
|
||||
bne .Lnext_round_\@
|
||||
|
||||
// Re-interleave the 'x' and 'y' elements of each block
|
||||
zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes
|
||||
zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes
|
||||
zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes
|
||||
zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes
|
||||
zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes
|
||||
zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes
|
||||
zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes
|
||||
zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes
|
||||
|
||||
// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
|
||||
eor X_0.16b, TMP0.16b, TWEAKV0.16b
|
||||
eor Y_0.16b, Y_0.16b, TWEAKV1.16b
|
||||
eor X_1.16b, TMP1.16b, TWEAKV2.16b
|
||||
eor Y_1.16b, Y_1.16b, TWEAKV3.16b
|
||||
eor X_2.16b, TMP2.16b, TWEAKV4.16b
|
||||
eor Y_2.16b, Y_2.16b, TWEAKV5.16b
|
||||
eor X_3.16b, TMP3.16b, TWEAKV6.16b
|
||||
eor Y_3.16b, Y_3.16b, TWEAKV7.16b
|
||||
mov TWEAKV0.16b, TWEAKV_NEXT.16b
|
||||
|
||||
// Store the ciphertext in the destination buffer
|
||||
st1 {X_0.16b-Y_1.16b}, [DST], #64
|
||||
st1 {X_2.16b-Y_3.16b}, [DST], #64
|
||||
|
||||
// Continue if there are more 128-byte chunks remaining
|
||||
subs NBYTES, NBYTES, #128
|
||||
bne .Lnext_128bytes_\@
|
||||
|
||||
// Store the next tweak and return
|
||||
.if \n == 64
|
||||
st1 {TWEAKV_NEXT.16b}, [TWEAK]
|
||||
.else
|
||||
st1 {TWEAKV_NEXT.8b}, [TWEAK]
|
||||
.endif
|
||||
ret
|
||||
.endm
|
||||
|
||||
ENTRY(speck128_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=64, lanes=2d, decrypting=0
|
||||
ENDPROC(speck128_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck128_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=64, lanes=2d, decrypting=1
|
||||
ENDPROC(speck128_xts_decrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=32, lanes=4s, decrypting=0
|
||||
ENDPROC(speck64_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=32, lanes=4s, decrypting=1
|
||||
ENDPROC(speck64_xts_decrypt_neon)
|
@ -1,282 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
* (64-bit version; based on the 32-bit version)
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/speck.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* The assembly functions only handle multiples of 128 bytes */
|
||||
#define SPECK_NEON_CHUNK_SIZE 128
|
||||
|
||||
/* Speck128 */
|
||||
|
||||
struct speck128_xts_tfm_ctx {
|
||||
struct speck128_tfm_ctx main_key;
|
||||
struct speck128_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck128_xts_crypt(struct skcipher_request *req,
|
||||
speck128_crypt_one_t crypt_one,
|
||||
speck128_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
le128 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
le128_xor((le128 *)dst, (const le128 *)src, &tweak);
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
|
||||
gf128mul_x_ble(&tweak, &tweak);
|
||||
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck128_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_encrypt,
|
||||
speck128_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_decrypt,
|
||||
speck128_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
/* Speck64 */
|
||||
|
||||
struct speck64_xts_tfm_ctx {
|
||||
struct speck64_tfm_ctx main_key;
|
||||
struct speck64_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
|
||||
speck64_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
__le64 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
*(__le64 *)dst = *(__le64 *)src ^ tweak;
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
*(__le64 *)dst ^= tweak;
|
||||
tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
|
||||
((tweak & cpu_to_le64(1ULL << 63)) ?
|
||||
0x1B : 0));
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck64_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_encrypt,
|
||||
speck64_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_decrypt,
|
||||
speck64_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
static struct skcipher_alg speck_algs[] = {
|
||||
{
|
||||
.base.cra_name = "xts(speck128)",
|
||||
.base.cra_driver_name = "xts-speck128-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK128_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK128_128_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK128_256_KEY_SIZE,
|
||||
.ivsize = SPECK128_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck128_xts_setkey,
|
||||
.encrypt = speck128_xts_encrypt,
|
||||
.decrypt = speck128_xts_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "xts(speck64)",
|
||||
.base.cra_driver_name = "xts-speck64-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK64_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK64_96_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK64_128_KEY_SIZE,
|
||||
.ivsize = SPECK64_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck64_xts_setkey,
|
||||
.encrypt = speck64_xts_encrypt,
|
||||
.decrypt = speck64_xts_decrypt,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init speck_neon_module_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
static void __exit speck_neon_module_exit(void)
|
||||
{
|
||||
crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
module_init(speck_neon_module_init);
|
||||
module_exit(speck_neon_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck128)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck128-neon");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck64)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck64-neon");
|
@ -621,7 +621,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -657,7 +656,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -578,7 +578,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -614,7 +613,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -599,7 +599,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -635,7 +634,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -580,7 +580,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -616,7 +615,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -602,7 +602,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -638,7 +637,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -684,7 +684,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -720,7 +719,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -570,7 +570,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -606,7 +605,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -593,7 +593,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -629,7 +628,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -571,7 +571,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -607,7 +606,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -572,7 +572,6 @@ CONFIG_CRYPTO_ECDH=m
|
||||
CONFIG_CRYPTO_MANAGER=y
|
||||
CONFIG_CRYPTO_USER=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_AEGIS128=m
|
||||
@ -608,7 +607,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
|
@ -668,7 +668,6 @@ CONFIG_CRYPTO_USER=m
|
||||
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
|
||||
CONFIG_CRYPTO_PCRYPT=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_LRW=m
|
||||
|
@ -610,7 +610,6 @@ CONFIG_CRYPTO_USER=m
|
||||
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
|
||||
CONFIG_CRYPTO_PCRYPT=m
|
||||
CONFIG_CRYPTO_CRYPTD=m
|
||||
CONFIG_CRYPTO_MCRYPTD=m
|
||||
CONFIG_CRYPTO_TEST=m
|
||||
CONFIG_CRYPTO_CHACHA20POLY1305=m
|
||||
CONFIG_CRYPTO_LRW=m
|
||||
|
@ -44,7 +44,7 @@ struct s390_aes_ctx {
|
||||
int key_len;
|
||||
unsigned long fc;
|
||||
union {
|
||||
struct crypto_skcipher *blk;
|
||||
struct crypto_sync_skcipher *blk;
|
||||
struct crypto_cipher *cip;
|
||||
} fallback;
|
||||
};
|
||||
@ -54,7 +54,7 @@ struct s390_xts_ctx {
|
||||
u8 pcc_key[32];
|
||||
int key_len;
|
||||
unsigned long fc;
|
||||
struct crypto_skcipher *fallback;
|
||||
struct crypto_sync_skcipher *fallback;
|
||||
};
|
||||
|
||||
struct gcm_sg_walk {
|
||||
@ -184,14 +184,15 @@ static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
|
||||
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
|
||||
unsigned int ret;
|
||||
|
||||
crypto_skcipher_clear_flags(sctx->fallback.blk, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
|
||||
crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
|
||||
ret = crypto_skcipher_setkey(sctx->fallback.blk, key, len);
|
||||
ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
|
||||
|
||||
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
|
||||
tfm->crt_flags |= crypto_skcipher_get_flags(sctx->fallback.blk) &
|
||||
tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
|
||||
CRYPTO_TFM_RES_MASK;
|
||||
|
||||
return ret;
|
||||
@ -204,9 +205,9 @@ static int fallback_blk_dec(struct blkcipher_desc *desc,
|
||||
unsigned int ret;
|
||||
struct crypto_blkcipher *tfm = desc->tfm;
|
||||
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
|
||||
SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
|
||||
|
||||
skcipher_request_set_tfm(req, sctx->fallback.blk);
|
||||
skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
|
||||
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
|
||||
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
|
||||
|
||||
@ -223,9 +224,9 @@ static int fallback_blk_enc(struct blkcipher_desc *desc,
|
||||
unsigned int ret;
|
||||
struct crypto_blkcipher *tfm = desc->tfm;
|
||||
struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
|
||||
SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
|
||||
|
||||
skcipher_request_set_tfm(req, sctx->fallback.blk);
|
||||
skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
|
||||
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
|
||||
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
|
||||
|
||||
@ -306,8 +307,7 @@ static int fallback_init_blk(struct crypto_tfm *tfm)
|
||||
const char *name = tfm->__crt_alg->cra_name;
|
||||
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
sctx->fallback.blk = crypto_alloc_skcipher(name, 0,
|
||||
CRYPTO_ALG_ASYNC |
|
||||
sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
|
||||
CRYPTO_ALG_NEED_FALLBACK);
|
||||
|
||||
if (IS_ERR(sctx->fallback.blk)) {
|
||||
@ -323,7 +323,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(sctx->fallback.blk);
|
||||
crypto_free_sync_skcipher(sctx->fallback.blk);
|
||||
}
|
||||
|
||||
static struct crypto_alg ecb_aes_alg = {
|
||||
@ -453,14 +453,15 @@ static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
|
||||
unsigned int ret;
|
||||
|
||||
crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
|
||||
crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
|
||||
ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
|
||||
ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
|
||||
|
||||
tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
|
||||
tfm->crt_flags |= crypto_skcipher_get_flags(xts_ctx->fallback) &
|
||||
tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
|
||||
CRYPTO_TFM_RES_MASK;
|
||||
|
||||
return ret;
|
||||
@ -472,10 +473,10 @@ static int xts_fallback_decrypt(struct blkcipher_desc *desc,
|
||||
{
|
||||
struct crypto_blkcipher *tfm = desc->tfm;
|
||||
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
|
||||
SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
|
||||
unsigned int ret;
|
||||
|
||||
skcipher_request_set_tfm(req, xts_ctx->fallback);
|
||||
skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
|
||||
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
|
||||
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
|
||||
|
||||
@ -491,10 +492,10 @@ static int xts_fallback_encrypt(struct blkcipher_desc *desc,
|
||||
{
|
||||
struct crypto_blkcipher *tfm = desc->tfm;
|
||||
struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
|
||||
SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
|
||||
unsigned int ret;
|
||||
|
||||
skcipher_request_set_tfm(req, xts_ctx->fallback);
|
||||
skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
|
||||
skcipher_request_set_callback(req, desc->flags, NULL, NULL);
|
||||
skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
|
||||
|
||||
@ -611,8 +612,7 @@ static int xts_fallback_init(struct crypto_tfm *tfm)
|
||||
const char *name = tfm->__crt_alg->cra_name;
|
||||
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
|
||||
CRYPTO_ALG_ASYNC |
|
||||
xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
|
||||
CRYPTO_ALG_NEED_FALLBACK);
|
||||
|
||||
if (IS_ERR(xts_ctx->fallback)) {
|
||||
@ -627,7 +627,7 @@ static void xts_fallback_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(xts_ctx->fallback);
|
||||
crypto_free_sync_skcipher(xts_ctx->fallback);
|
||||
}
|
||||
|
||||
static struct crypto_alg xts_aes_alg = {
|
||||
|
@ -221,7 +221,6 @@ CONFIG_CRYPTO_SALSA20=m
|
||||
CONFIG_CRYPTO_SEED=m
|
||||
CONFIG_CRYPTO_SERPENT=m
|
||||
CONFIG_CRYPTO_SM4=m
|
||||
CONFIG_CRYPTO_SPECK=m
|
||||
CONFIG_CRYPTO_TEA=m
|
||||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_DEFLATE=m
|
||||
|
@ -60,9 +60,6 @@ endif
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/
|
||||
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
|
||||
endif
|
||||
@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes)
|
||||
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
|
||||
endif
|
||||
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
|
||||
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
|
||||
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o
|
||||
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
|
||||
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
|
||||
|
@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
|
||||
const u8 *in, unsigned int len, u8 *iv);
|
||||
|
||||
int crypto_fpu_init(void);
|
||||
void crypto_fpu_exit(void);
|
||||
|
||||
#define AVX_GEN2_OPTSIZE 640
|
||||
#define AVX_GEN4_OPTSIZE 4096
|
||||
|
||||
@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
|
||||
/* Linearize assoc, if not already linear */
|
||||
if (req->src->length >= assoclen && req->src->length &&
|
||||
(!PageHighMem(sg_page(req->src)) ||
|
||||
req->src->offset + req->src->length < PAGE_SIZE)) {
|
||||
req->src->offset + req->src->length <= PAGE_SIZE)) {
|
||||
scatterwalk_start(&assoc_sg_walk, req->src);
|
||||
assoc = scatterwalk_map(&assoc_sg_walk);
|
||||
} else {
|
||||
@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = {
|
||||
static
|
||||
struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)];
|
||||
|
||||
static struct {
|
||||
const char *algname;
|
||||
const char *drvname;
|
||||
const char *basename;
|
||||
struct simd_skcipher_alg *simd;
|
||||
} aesni_simd_skciphers2[] = {
|
||||
#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \
|
||||
IS_BUILTIN(CONFIG_CRYPTO_PCBC)
|
||||
{
|
||||
.algname = "pcbc(aes)",
|
||||
.drvname = "pcbc-aes-aesni",
|
||||
.basename = "fpu(pcbc(__aes-aesni))",
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key,
|
||||
unsigned int key_len)
|
||||
@ -1422,10 +1403,6 @@ static void aesni_free_simds(void)
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
|
||||
aesni_simd_skciphers[i]; i++)
|
||||
simd_skcipher_free(aesni_simd_skciphers[i]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++)
|
||||
if (aesni_simd_skciphers2[i].simd)
|
||||
simd_skcipher_free(aesni_simd_skciphers2[i].simd);
|
||||
}
|
||||
|
||||
static int __init aesni_init(void)
|
||||
@ -1469,13 +1446,9 @@ static int __init aesni_init(void)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
err = crypto_fpu_init();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
if (err)
|
||||
goto fpu_exit;
|
||||
return err;
|
||||
|
||||
err = crypto_register_skciphers(aesni_skciphers,
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
@ -1499,18 +1472,6 @@ static int __init aesni_init(void)
|
||||
aesni_simd_skciphers[i] = simd;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) {
|
||||
algname = aesni_simd_skciphers2[i].algname;
|
||||
drvname = aesni_simd_skciphers2[i].drvname;
|
||||
basename = aesni_simd_skciphers2[i].basename;
|
||||
simd = simd_skcipher_create_compat(algname, drvname, basename);
|
||||
err = PTR_ERR(simd);
|
||||
if (IS_ERR(simd))
|
||||
continue;
|
||||
|
||||
aesni_simd_skciphers2[i].simd = simd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unregister_simds:
|
||||
@ -1521,8 +1482,6 @@ static int __init aesni_init(void)
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
unregister_algs:
|
||||
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
fpu_exit:
|
||||
crypto_fpu_exit();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void)
|
||||
crypto_unregister_skciphers(aesni_skciphers,
|
||||
ARRAY_SIZE(aesni_skciphers));
|
||||
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
|
||||
|
||||
crypto_fpu_exit();
|
||||
}
|
||||
|
||||
late_initcall(aesni_init);
|
||||
|
@ -1,207 +0,0 @@
|
||||
/*
|
||||
* FPU: Wrapper for blkcipher touching fpu
|
||||
*
|
||||
* Copyright (c) Intel Corp.
|
||||
* Author: Huang Ying <ying.huang@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <asm/fpu/api.h>
|
||||
|
||||
struct crypto_fpu_ctx {
|
||||
struct crypto_skcipher *child;
|
||||
};
|
||||
|
||||
static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(child, key, keylen);
|
||||
crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, 0, NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
req->iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_skcipher_encrypt(subreq);
|
||||
kernel_fpu_end();
|
||||
|
||||
skcipher_request_zero(subreq);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
int err;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, 0, NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
req->iv);
|
||||
|
||||
kernel_fpu_begin();
|
||||
err = crypto_skcipher_decrypt(subreq);
|
||||
kernel_fpu_end();
|
||||
|
||||
skcipher_request_zero(subreq);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher_spawn *spawn;
|
||||
struct crypto_skcipher *cipher;
|
||||
|
||||
spawn = skcipher_instance_ctx(inst);
|
||||
cipher = crypto_spawn_skcipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(ctx->child);
|
||||
}
|
||||
|
||||
static void crypto_fpu_free(struct skcipher_instance *inst)
|
||||
{
|
||||
crypto_drop_skcipher(skcipher_instance_ctx(inst));
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct crypto_skcipher_spawn *spawn;
|
||||
struct skcipher_instance *inst;
|
||||
struct crypto_attr_type *algt;
|
||||
struct skcipher_alg *alg;
|
||||
const char *cipher_name;
|
||||
int err;
|
||||
|
||||
algt = crypto_get_attr_type(tb);
|
||||
if (IS_ERR(algt))
|
||||
return PTR_ERR(algt);
|
||||
|
||||
if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) &
|
||||
algt->mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(algt->mask & CRYPTO_ALG_INTERNAL))
|
||||
return -EINVAL;
|
||||
|
||||
cipher_name = crypto_attr_alg_name(tb[1]);
|
||||
if (IS_ERR(cipher_name))
|
||||
return PTR_ERR(cipher_name);
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
|
||||
if (!inst)
|
||||
return -ENOMEM;
|
||||
|
||||
spawn = skcipher_instance_ctx(inst);
|
||||
|
||||
crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst));
|
||||
err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL,
|
||||
CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC);
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
alg = crypto_skcipher_spawn_alg(spawn);
|
||||
|
||||
err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu",
|
||||
&alg->base);
|
||||
if (err)
|
||||
goto out_drop_skcipher;
|
||||
|
||||
inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL;
|
||||
inst->alg.base.cra_priority = alg->base.cra_priority;
|
||||
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;
|
||||
|
||||
inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg);
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
|
||||
inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg);
|
||||
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx);
|
||||
|
||||
inst->alg.init = crypto_fpu_init_tfm;
|
||||
inst->alg.exit = crypto_fpu_exit_tfm;
|
||||
|
||||
inst->alg.setkey = crypto_fpu_setkey;
|
||||
inst->alg.encrypt = crypto_fpu_encrypt;
|
||||
inst->alg.decrypt = crypto_fpu_decrypt;
|
||||
|
||||
inst->free = crypto_fpu_free;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err)
|
||||
goto out_drop_skcipher;
|
||||
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_drop_skcipher:
|
||||
crypto_drop_skcipher(spawn);
|
||||
out_free_inst:
|
||||
kfree(inst);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_fpu_tmpl = {
|
||||
.name = "fpu",
|
||||
.create = crypto_fpu_create,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
int __init crypto_fpu_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
void crypto_fpu_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_fpu_tmpl);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_CRYPTO("fpu");
|
@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o
|
||||
sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \
|
||||
sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha1_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
|
||||
#ifdef HASH_CTX_DEBUG
|
||||
HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA1_DIGEST_LENGTH 5
|
||||
#define SHA1_LOG2_BLOCK_SIZE 6
|
||||
|
||||
#define SHA1_PADLENGTHFIELD_SIZE 8
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha1_ctx_mgr {
|
||||
struct sha1_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */
|
||||
|
||||
struct sha1_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha1 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,110 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA1 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA1_DIGEST_WORDS 5
|
||||
|
||||
enum job_sts { STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha1 {
|
||||
u8 *buffer;
|
||||
u32 len;
|
||||
u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
/* SHA1 out-of-order scheduler */
|
||||
|
||||
/* typedef uint32_t sha1_digest_array[5][8]; */
|
||||
|
||||
struct sha1_args_x8 {
|
||||
uint32_t digest[5][8];
|
||||
uint8_t *data_ptr[8];
|
||||
};
|
||||
|
||||
struct sha1_lane_data {
|
||||
struct job_sha1 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha1_mb_mgr {
|
||||
struct sha1_args_x8 args;
|
||||
|
||||
uint32_t lens[8];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha1_lane_data ldata[8];
|
||||
};
|
||||
|
||||
|
||||
#define SHA1_MB_MGR_NUM_LANES_AVX2 8
|
||||
|
||||
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state);
|
||||
struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state,
|
||||
struct job_sha1 *job);
|
||||
struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state);
|
||||
struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state);
|
||||
|
||||
#endif
|
@ -1,287 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA1 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_
|
||||
#define _SHA1_MB_MGR_DATASTRUCT_ASM_
|
||||
|
||||
## START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
## FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
## END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - %%tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endif
|
||||
.endstruc
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
########################################################################
|
||||
#### Define SHA1 Out Of Order Data Structures
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # SHA1_ARGS_X8
|
||||
### name size align
|
||||
FIELD _digest, 4*5*8, 16 # transposed digest
|
||||
FIELD _data_ptr, 8*8, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA1_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA1_ARGS_X4_align = _STRUCT_ALIGN
|
||||
_SHA1_ARGS_X8_size = _FIELD_OFFSET
|
||||
_SHA1_ARGS_X8_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align
|
||||
FIELD _lens, 4*8, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
########################################################################
|
||||
#### Define JOB_SHA1 structure
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA1
|
||||
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 4, 4 # length in bytes
|
||||
FIELD _result_digest, 5*4, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA1_size = _FIELD_OFFSET
|
||||
_JOB_SHA1_align = _STRUCT_ALIGN
|
@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA1 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
|
||||
.extern sha1_x8_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
# idx must be a register not clobbered by sha1_x8_avx2
|
||||
#define idx %r8
|
||||
#define DWORD_idx %r8d
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
#define tmp2_w %ebx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 %arg1
|
||||
|
||||
#define extra_blocks %arg2
|
||||
#define p %arg2
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha1_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+3, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne four(%rip), idx
|
||||
offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne five(%rip), idx
|
||||
offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne six(%rip), idx
|
||||
offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne seven(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 8
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 4*I)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens+0*16(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _lens+0*16(state)
|
||||
vmovdqu %xmm1, _lens+1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha1_x8_avx2
|
||||
# state and idx are intact
|
||||
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state , idx, 4) , %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), tmp2_w
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
mov tmp2_w, offset(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha1_mb_mgr_flush_avx2)
|
||||
|
||||
|
||||
#################################################################
|
||||
|
||||
.align 16
|
||||
ENTRY(sha1_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
## if bit 32+3 is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+3), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
test $~0xF, idx
|
||||
jnz .return_null
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), tmp2_w
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
movl tmp2_w, _result_digest+1*16(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha1_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
.quad 2
|
||||
three:
|
||||
.quad 3
|
||||
four:
|
||||
.quad 4
|
||||
five:
|
||||
.quad 5
|
||||
six:
|
||||
.quad 6
|
||||
seven:
|
||||
.quad 7
|
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA1 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha1_mb_mgr.h"
|
||||
|
||||
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
state->unused_lanes = 0xF76543210ULL;
|
||||
for (j = 0; j < 8; j++) {
|
||||
state->lens[j] = 0xFFFFFFFF;
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
||||
}
|
@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA1 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
|
||||
.extern sha1_x8_avx
|
||||
|
||||
# LINUX register definitions
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
size_offset = %rcx
|
||||
tmp2 = %rcx
|
||||
extra_blocks = %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job %rsi
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_x8_avx2
|
||||
idx = %r8
|
||||
DWORD_idx = %r8d
|
||||
last_len = %r8
|
||||
|
||||
p = %r11
|
||||
start_offset = %r11
|
||||
|
||||
unused_lanes = %rbx
|
||||
BYTE_unused_lanes = %bl
|
||||
|
||||
job_rax = %rax
|
||||
len = %rax
|
||||
DWORD_len = %eax
|
||||
|
||||
lane = %r12
|
||||
tmp3 = %r12
|
||||
|
||||
tmp = %r9
|
||||
DWORD_tmp = %r9d
|
||||
|
||||
lane_data = %r10
|
||||
|
||||
# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha1_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
mov unused_lanes, lane
|
||||
and $0xF, lane
|
||||
shr $4, unused_lanes
|
||||
imul $_LANE_DATA_size, lane, lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
shl $4, len
|
||||
or lane, len
|
||||
|
||||
movl DWORD_len, _lens(state , lane, 4)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest(job), %xmm0
|
||||
mov _result_digest+1*16(job), DWORD_tmp
|
||||
vmovd %xmm0, _args_digest(state, lane, 4)
|
||||
vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
|
||||
vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
|
||||
vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
|
||||
movl DWORD_tmp, _args_digest+4*32(state , lane, 4)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
# Find min length
|
||||
vmovdqa _lens(state), %xmm0
|
||||
vmovdqa _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqa %xmm0, _lens + 0*16(state)
|
||||
vmovdqa %xmm1, _lens + 1*16(state)
|
||||
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha1_x8_avx2
|
||||
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
|
||||
movl _args_digest+4*32(state, idx, 4), DWORD_tmp
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
movl DWORD_tmp, _result_digest+1*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
|
||||
ENDPROC(sha1_mb_mgr_submit_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
@ -1,492 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA1 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* James Guilford <james.guilford@intel.com>
|
||||
* Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2014 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha1_mb_mgr_datastruct.S"
|
||||
|
||||
## code to compute oct SHA1 using SSE-256
|
||||
## outer calling routine takes care of save and restore of XMM registers
|
||||
|
||||
## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15
|
||||
##
|
||||
## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
|
||||
## Linux preserves: rdi rbp r8
|
||||
##
|
||||
## clobbers ymm0-15
|
||||
|
||||
|
||||
# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
# "transpose" data in {r0...r7} using temps {t0...t1}
|
||||
# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
|
||||
# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
|
||||
# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
|
||||
# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
|
||||
#
|
||||
# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
|
||||
# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
|
||||
# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
|
||||
# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
|
||||
# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
|
||||
# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
|
||||
# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
|
||||
# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
|
||||
#
|
||||
|
||||
.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
|
||||
# process top half (r0..r3) {a...d}
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
|
||||
vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
|
||||
vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
|
||||
vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
|
||||
|
||||
# use r2 in place of t0
|
||||
# process bottom half (r4..r7) {e...h}
|
||||
vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
|
||||
vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
|
||||
vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
|
||||
vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
|
||||
vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
|
||||
vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
|
||||
vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
|
||||
vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
|
||||
|
||||
vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
|
||||
vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
|
||||
vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
|
||||
vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
|
||||
vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
|
||||
vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
|
||||
vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
|
||||
vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
|
||||
|
||||
.endm
|
||||
##
|
||||
## Magic functions defined in FIPS 180-1
|
||||
##
|
||||
# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D)))
|
||||
.macro MAGIC_F0 regF regB regC regD regT
|
||||
vpxor \regD, \regC, \regF
|
||||
vpand \regB, \regF, \regF
|
||||
vpxor \regD, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D)
|
||||
.macro MAGIC_F1 regF regB regC regD regT
|
||||
vpxor \regC, \regD, \regF
|
||||
vpxor \regB, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D))
|
||||
.macro MAGIC_F2 regF regB regC regD regT
|
||||
vpor \regC, \regB, \regF
|
||||
vpand \regC, \regB, \regT
|
||||
vpand \regD, \regF, \regF
|
||||
vpor \regT, \regF, \regF
|
||||
.endm
|
||||
|
||||
# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D)
|
||||
.macro MAGIC_F3 regF regB regC regD regT
|
||||
MAGIC_F1 \regF,\regB,\regC,\regD,\regT
|
||||
.endm
|
||||
|
||||
# PROLD reg, imm, tmp
|
||||
.macro PROLD reg imm tmp
|
||||
vpsrld $(32-\imm), \reg, \tmp
|
||||
vpslld $\imm, \reg, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro PROLD_nd reg imm tmp src
|
||||
vpsrld $(32-\imm), \src, \tmp
|
||||
vpslld $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC
|
||||
vpaddd \immCNT, \regE, \regE
|
||||
vpaddd \memW*32(%rsp), \regE, \regE
|
||||
PROLD_nd \regT, 5, \regF, \regA
|
||||
vpaddd \regT, \regE, \regE
|
||||
\MAGIC \regF, \regB, \regC, \regD, \regT
|
||||
PROLD \regB, 30, \regT
|
||||
vpaddd \regF, \regE, \regE
|
||||
.endm
|
||||
|
||||
.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC
|
||||
vpaddd \immCNT, \regE, \regE
|
||||
offset = ((\memW - 14) & 15) * 32
|
||||
vmovdqu offset(%rsp), W14
|
||||
vpxor W14, W16, W16
|
||||
offset = ((\memW - 8) & 15) * 32
|
||||
vpxor offset(%rsp), W16, W16
|
||||
offset = ((\memW - 3) & 15) * 32
|
||||
vpxor offset(%rsp), W16, W16
|
||||
vpsrld $(32-1), W16, \regF
|
||||
vpslld $1, W16, W16
|
||||
vpor W16, \regF, \regF
|
||||
|
||||
ROTATE_W
|
||||
|
||||
offset = ((\memW - 0) & 15) * 32
|
||||
vmovdqu \regF, offset(%rsp)
|
||||
vpaddd \regF, \regE, \regE
|
||||
PROLD_nd \regT, 5, \regF, \regA
|
||||
vpaddd \regT, \regE, \regE
|
||||
\MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D)
|
||||
PROLD \regB,30, \regT
|
||||
vpaddd \regF, \regE, \regE
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
########################################################################
|
||||
########################################################################
|
||||
|
||||
## FRAMESZ plus pushes must be an odd multiple of 8
|
||||
YMM_SAVE = (15-15)*32
|
||||
FRAMESZ = 32*16 + YMM_SAVE
|
||||
_YMM = FRAMESZ - YMM_SAVE
|
||||
|
||||
#define VMOVPS vmovups
|
||||
|
||||
IDX = %rax
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
inp4 = %r13
|
||||
inp5 = %r14
|
||||
inp6 = %r15
|
||||
inp7 = %rcx
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
RSP_SAVE = %rdx
|
||||
|
||||
# ymm0 A
|
||||
# ymm1 B
|
||||
# ymm2 C
|
||||
# ymm3 D
|
||||
# ymm4 E
|
||||
# ymm5 F AA
|
||||
# ymm6 T0 BB
|
||||
# ymm7 T1 CC
|
||||
# ymm8 T2 DD
|
||||
# ymm9 T3 EE
|
||||
# ymm10 T4 TMP
|
||||
# ymm11 T5 FUN
|
||||
# ymm12 T6 K
|
||||
# ymm13 T7 W14
|
||||
# ymm14 T8 W15
|
||||
# ymm15 T9 W16
|
||||
|
||||
|
||||
A = %ymm0
|
||||
B = %ymm1
|
||||
C = %ymm2
|
||||
D = %ymm3
|
||||
E = %ymm4
|
||||
F = %ymm5
|
||||
T0 = %ymm6
|
||||
T1 = %ymm7
|
||||
T2 = %ymm8
|
||||
T3 = %ymm9
|
||||
T4 = %ymm10
|
||||
T5 = %ymm11
|
||||
T6 = %ymm12
|
||||
T7 = %ymm13
|
||||
T8 = %ymm14
|
||||
T9 = %ymm15
|
||||
|
||||
AA = %ymm5
|
||||
BB = %ymm6
|
||||
CC = %ymm7
|
||||
DD = %ymm8
|
||||
EE = %ymm9
|
||||
TMP = %ymm10
|
||||
FUN = %ymm11
|
||||
K = %ymm12
|
||||
W14 = %ymm13
|
||||
W15 = %ymm14
|
||||
W16 = %ymm15
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = E
|
||||
E = D
|
||||
D = C
|
||||
C = B
|
||||
B = A
|
||||
A = TMP_
|
||||
.endm
|
||||
|
||||
.macro ROTATE_W
|
||||
TMP_ = W16
|
||||
W16 = W15
|
||||
W15 = W14
|
||||
W14 = TMP_
|
||||
.endm
|
||||
|
||||
# 8 streams x 5 32bit words per digest x 4 bytes per word
|
||||
#define DIGEST_SIZE (8*5*4)
|
||||
|
||||
.align 32
|
||||
|
||||
# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size)
|
||||
# arg 1 : pointer to array[4] of pointer to input data
|
||||
# arg 2 : size (in blocks) ;; assumed to be >= 1
|
||||
#
|
||||
ENTRY(sha1_x8_avx2)
|
||||
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
#save rsp
|
||||
mov %rsp, RSP_SAVE
|
||||
sub $FRAMESZ, %rsp
|
||||
|
||||
#align rsp to 32 Bytes
|
||||
and $~0x1F, %rsp
|
||||
|
||||
## Initialize digests
|
||||
vmovdqu 0*32(arg1), A
|
||||
vmovdqu 1*32(arg1), B
|
||||
vmovdqu 2*32(arg1), C
|
||||
vmovdqu 3*32(arg1), D
|
||||
vmovdqu 4*32(arg1), E
|
||||
|
||||
## transpose input onto stack
|
||||
mov _data_ptr+0*8(arg1),inp0
|
||||
mov _data_ptr+1*8(arg1),inp1
|
||||
mov _data_ptr+2*8(arg1),inp2
|
||||
mov _data_ptr+3*8(arg1),inp3
|
||||
mov _data_ptr+4*8(arg1),inp4
|
||||
mov _data_ptr+5*8(arg1),inp5
|
||||
mov _data_ptr+6*8(arg1),inp6
|
||||
mov _data_ptr+7*8(arg1),inp7
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F
|
||||
I=0
|
||||
.rep 2
|
||||
VMOVPS (inp0, IDX), T0
|
||||
VMOVPS (inp1, IDX), T1
|
||||
VMOVPS (inp2, IDX), T2
|
||||
VMOVPS (inp3, IDX), T3
|
||||
VMOVPS (inp4, IDX), T4
|
||||
VMOVPS (inp5, IDX), T5
|
||||
VMOVPS (inp6, IDX), T6
|
||||
VMOVPS (inp7, IDX), T7
|
||||
|
||||
TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9
|
||||
vpshufb F, T0, T0
|
||||
vmovdqu T0, (I*8)*32(%rsp)
|
||||
vpshufb F, T1, T1
|
||||
vmovdqu T1, (I*8+1)*32(%rsp)
|
||||
vpshufb F, T2, T2
|
||||
vmovdqu T2, (I*8+2)*32(%rsp)
|
||||
vpshufb F, T3, T3
|
||||
vmovdqu T3, (I*8+3)*32(%rsp)
|
||||
vpshufb F, T4, T4
|
||||
vmovdqu T4, (I*8+4)*32(%rsp)
|
||||
vpshufb F, T5, T5
|
||||
vmovdqu T5, (I*8+5)*32(%rsp)
|
||||
vpshufb F, T6, T6
|
||||
vmovdqu T6, (I*8+6)*32(%rsp)
|
||||
vpshufb F, T7, T7
|
||||
vmovdqu T7, (I*8+7)*32(%rsp)
|
||||
add $32, IDX
|
||||
I = (I+1)
|
||||
.endr
|
||||
# save old digests
|
||||
vmovdqu A,AA
|
||||
vmovdqu B,BB
|
||||
vmovdqu C,CC
|
||||
vmovdqu D,DD
|
||||
vmovdqu E,EE
|
||||
|
||||
##
|
||||
## perform 0-79 steps
|
||||
##
|
||||
vmovdqu K00_19(%rip), K
|
||||
## do rounds 0...15
|
||||
I = 0
|
||||
.rep 16
|
||||
SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 16...19
|
||||
vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16
|
||||
vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15
|
||||
.rep 4
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 20...39
|
||||
vmovdqu K20_39(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 40...59
|
||||
vmovdqu K40_59(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
## do rounds 60...79
|
||||
vmovdqu K60_79(%rip), K
|
||||
.rep 20
|
||||
SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3
|
||||
ROTATE_ARGS
|
||||
I = (I+1)
|
||||
.endr
|
||||
|
||||
vpaddd AA,A,A
|
||||
vpaddd BB,B,B
|
||||
vpaddd CC,C,C
|
||||
vpaddd DD,D,D
|
||||
vpaddd EE,E,E
|
||||
|
||||
sub $1, arg2
|
||||
jne lloop
|
||||
|
||||
# write out digests
|
||||
vmovdqu A, 0*32(arg1)
|
||||
vmovdqu B, 1*32(arg1)
|
||||
vmovdqu C, 2*32(arg1)
|
||||
vmovdqu D, 3*32(arg1)
|
||||
vmovdqu E, 4*32(arg1)
|
||||
|
||||
# update input pointers
|
||||
add IDX, inp0
|
||||
add IDX, inp1
|
||||
add IDX, inp2
|
||||
add IDX, inp3
|
||||
add IDX, inp4
|
||||
add IDX, inp5
|
||||
add IDX, inp6
|
||||
add IDX, inp7
|
||||
mov inp0, _data_ptr (arg1)
|
||||
mov inp1, _data_ptr + 1*8(arg1)
|
||||
mov inp2, _data_ptr + 2*8(arg1)
|
||||
mov inp3, _data_ptr + 3*8(arg1)
|
||||
mov inp4, _data_ptr + 4*8(arg1)
|
||||
mov inp5, _data_ptr + 5*8(arg1)
|
||||
mov inp6, _data_ptr + 6*8(arg1)
|
||||
mov inp7, _data_ptr + 7*8(arg1)
|
||||
|
||||
################
|
||||
## Postamble
|
||||
|
||||
mov RSP_SAVE, %rsp
|
||||
|
||||
# restore callee-saved clobbered registers
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
ret
|
||||
ENDPROC(sha1_x8_avx2)
|
||||
|
||||
|
||||
.section .rodata.cst32.K00_19, "aM", @progbits, 32
|
||||
.align 32
|
||||
K00_19:
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
.octa 0x5A8279995A8279995A8279995A827999
|
||||
|
||||
.section .rodata.cst32.K20_39, "aM", @progbits, 32
|
||||
.align 32
|
||||
K20_39:
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1
|
||||
|
||||
.section .rodata.cst32.K40_59, "aM", @progbits, 32
|
||||
.align 32
|
||||
K40_59:
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC
|
||||
|
||||
.section .rodata.cst32.K60_79, "aM", @progbits, 32
|
||||
.align 32
|
||||
K60_79:
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
@ -1,14 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
OBJECT_FILES_NON_STANDARD := y
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o
|
||||
sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \
|
||||
sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha256_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
|
||||
#ifdef HASH_CTX_DEBUG
|
||||
HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA256_DIGEST_LENGTH 8
|
||||
#define SHA256_LOG2_BLOCK_SIZE 6
|
||||
|
||||
#define SHA256_PADLENGTHFIELD_SIZE 8
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha256_ctx_mgr {
|
||||
struct sha256_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */
|
||||
|
||||
struct sha256_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha256 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,108 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA256_DIGEST_WORDS 8
|
||||
|
||||
enum job_sts { STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha256 {
|
||||
u8 *buffer;
|
||||
u32 len;
|
||||
u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
/* SHA256 out-of-order scheduler */
|
||||
|
||||
/* typedef uint32_t sha8_digest_array[8][8]; */
|
||||
|
||||
struct sha256_args_x8 {
|
||||
uint32_t digest[8][8];
|
||||
uint8_t *data_ptr[8];
|
||||
};
|
||||
|
||||
struct sha256_lane_data {
|
||||
struct job_sha256 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha256_mb_mgr {
|
||||
struct sha256_args_x8 args;
|
||||
|
||||
uint32_t lens[8];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha256_lane_data ldata[8];
|
||||
};
|
||||
|
||||
|
||||
#define SHA256_MB_MGR_NUM_LANES_AVX2 8
|
||||
|
||||
void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state);
|
||||
struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state,
|
||||
struct job_sha256 *job);
|
||||
struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state);
|
||||
struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state);
|
||||
|
||||
#endif
|
@ -1,304 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _DATASTRUCT_ASM_
|
||||
#define _DATASTRUCT_ASM_
|
||||
|
||||
#define SZ8 8*SHA256_DIGEST_WORD_SIZE
|
||||
#define ROUNDS 64*SZ8
|
||||
#define PTR_SZ 8
|
||||
#define SHA256_DIGEST_WORD_SIZE 4
|
||||
#define MAX_SHA256_LANES 8
|
||||
#define SHA256_DIGEST_WORDS 8
|
||||
#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE)
|
||||
#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS)
|
||||
#define SHA256_BLK_SZ 64
|
||||
|
||||
# START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
# FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
########################################################################
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - %%tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endif
|
||||
.endstruc
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
########################################################################
|
||||
#### Define SHA256 Out Of Order Data Structures
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # SHA256_ARGS_X4
|
||||
### name size align
|
||||
FIELD _digest, 4*8*8, 4 # transposed digest
|
||||
FIELD _data_ptr, 8*8, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA256_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA256_ARGS_X4_align = _STRUCT_ALIGN
|
||||
_SHA256_ARGS_X8_size = _FIELD_OFFSET
|
||||
_SHA256_ARGS_X8_align = _STRUCT_ALIGN
|
||||
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align
|
||||
FIELD _lens, 4*8, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS #STACK_FRAME
|
||||
### name size align
|
||||
FIELD _data, 16*SZ8, 1 # transposed digest
|
||||
FIELD _digest, 8*SZ8, 1 # array of pointers to data
|
||||
FIELD _ytmp, 4*SZ8, 1
|
||||
FIELD _rsp, 8, 1
|
||||
END_FIELDS
|
||||
|
||||
_STACK_FRAME_size = _FIELD_OFFSET
|
||||
_STACK_FRAME_align = _STRUCT_ALIGN
|
||||
|
||||
#######################################################################
|
||||
|
||||
########################################################################
|
||||
#### Define constants
|
||||
########################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
########################################################################
|
||||
#### Define JOB_SHA256 structure
|
||||
########################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA256
|
||||
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 8, 8 # length in bytes
|
||||
FIELD _result_digest, 8*4, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA256_size = _FIELD_OFFSET
|
||||
_JOB_SHA256_align = _STRUCT_ALIGN
|
@ -1,307 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA256 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha256_x8_avx2
|
||||
|
||||
#LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# Common register definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_mult
|
||||
#define idx %r8
|
||||
#define DWORD_idx %r8d
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
#define tmp2_w %ebx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 %arg1
|
||||
|
||||
#define extra_blocks %arg2
|
||||
#define p %arg2
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha256_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+3, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne four(%rip), idx
|
||||
offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne five(%rip), idx
|
||||
offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne six(%rip), idx
|
||||
offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne seven(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 8
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 4*I)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens+0*16(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _lens+0*16(state)
|
||||
vmovdqu %xmm1, _lens+1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha256_x8_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl $0xFFFFFFFF, _lens(state,idx,4)
|
||||
|
||||
vmovd _args_digest(state , idx, 4) , %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
vmovdqu %xmm1, offset(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha256_mb_mgr_flush_avx2)
|
||||
|
||||
##############################################################################
|
||||
|
||||
.align 16
|
||||
ENTRY(sha256_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
## if bit 32+3 is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+3), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
vmovdqu _lens(state), %xmm0
|
||||
vmovdqu _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
test $~0xF, idx
|
||||
jnz .return_null
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state, idx, 4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
offset = (_result_digest + 1*16)
|
||||
vmovdqu %xmm1, offset(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha256_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
||||
|
||||
.section .rodata.cst8, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
two:
|
||||
.quad 2
|
||||
three:
|
||||
.quad 3
|
||||
four:
|
||||
.quad 4
|
||||
five:
|
||||
.quad 5
|
||||
six:
|
||||
.quad 6
|
||||
seven:
|
||||
.quad 7
|
@ -1,65 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA256 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha256_mb_mgr.h"
|
||||
|
||||
void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
state->unused_lanes = 0xF76543210ULL;
|
||||
for (j = 0; j < 8; j++) {
|
||||
state->lens[j] = 0xFFFFFFFF;
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
||||
}
|
@ -1,214 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA256 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha256_x8_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
size_offset = %rcx
|
||||
tmp2 = %rcx
|
||||
extra_blocks = %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job %rsi
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
# idx must be a register not clobberred by sha1_x8_avx2
|
||||
idx = %r8
|
||||
DWORD_idx = %r8d
|
||||
last_len = %r8
|
||||
|
||||
p = %r11
|
||||
start_offset = %r11
|
||||
|
||||
unused_lanes = %rbx
|
||||
BYTE_unused_lanes = %bl
|
||||
|
||||
job_rax = %rax
|
||||
len = %rax
|
||||
DWORD_len = %eax
|
||||
|
||||
lane = %r12
|
||||
tmp3 = %r12
|
||||
|
||||
tmp = %r9
|
||||
DWORD_tmp = %r9d
|
||||
|
||||
lane_data = %r10
|
||||
|
||||
# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha256_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
mov unused_lanes, lane
|
||||
and $0xF, lane
|
||||
shr $4, unused_lanes
|
||||
imul $_LANE_DATA_size, lane, lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
shl $4, len
|
||||
or lane, len
|
||||
|
||||
movl DWORD_len, _lens(state , lane, 4)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest(job), %xmm0
|
||||
vmovdqu _result_digest+1*16(job), %xmm1
|
||||
vmovd %xmm0, _args_digest(state, lane, 4)
|
||||
vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4)
|
||||
vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4)
|
||||
vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4)
|
||||
vmovd %xmm1, _args_digest+4*32(state , lane, 4)
|
||||
|
||||
vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4)
|
||||
vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4)
|
||||
vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
# Find min length
|
||||
vmovdqa _lens(state), %xmm0
|
||||
vmovdqa _lens+1*16(state), %xmm1
|
||||
|
||||
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
|
||||
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F}
|
||||
vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E}
|
||||
vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword
|
||||
|
||||
vmovd %xmm2, DWORD_idx
|
||||
mov idx, len2
|
||||
and $0xF, idx
|
||||
shr $4, len2
|
||||
jz len_is_0
|
||||
|
||||
vpand clear_low_nibble(%rip), %xmm2, %xmm2
|
||||
vpshufd $0, %xmm2, %xmm2
|
||||
|
||||
vpsubd %xmm2, %xmm0, %xmm0
|
||||
vpsubd %xmm2, %xmm1, %xmm1
|
||||
|
||||
vmovdqa %xmm0, _lens + 0*16(state)
|
||||
vmovdqa %xmm1, _lens + 1*16(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha256_x8_avx2
|
||||
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $4, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens(state,idx,4)
|
||||
|
||||
vmovd _args_digest(state, idx, 4), %xmm0
|
||||
vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
|
||||
vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
|
||||
vmovd _args_digest+4*32(state, idx, 4), %xmm1
|
||||
|
||||
vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1
|
||||
vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
|
||||
ENDPROC(sha256_mb_mgr_submit_avx2)
|
||||
|
||||
.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16
|
||||
.align 16
|
||||
clear_low_nibble:
|
||||
.octa 0x000000000000000000000000FFFFFFF0
|
@ -1,598 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA256 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha256_mb_mgr_datastruct.S"
|
||||
|
||||
## code to compute oct SHA256 using SSE-256
|
||||
## outer calling routine takes care of save and restore of XMM registers
|
||||
## Logic designed/laid out by JDG
|
||||
|
||||
## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15
|
||||
## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15
|
||||
## Linux preserves: rdi rbp r8
|
||||
##
|
||||
## clobbers %ymm0-15
|
||||
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
reg3 = %rcx
|
||||
reg4 = %rdx
|
||||
|
||||
# Common definitions
|
||||
STATE = arg1
|
||||
INP_SIZE = arg2
|
||||
|
||||
IDX = %rax
|
||||
ROUND = %rbx
|
||||
TBL = reg3
|
||||
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
inp4 = %r13
|
||||
inp5 = %r14
|
||||
inp6 = %r15
|
||||
inp7 = reg4
|
||||
|
||||
a = %ymm0
|
||||
b = %ymm1
|
||||
c = %ymm2
|
||||
d = %ymm3
|
||||
e = %ymm4
|
||||
f = %ymm5
|
||||
g = %ymm6
|
||||
h = %ymm7
|
||||
|
||||
T1 = %ymm8
|
||||
|
||||
a0 = %ymm12
|
||||
a1 = %ymm13
|
||||
a2 = %ymm14
|
||||
TMP = %ymm15
|
||||
TMP0 = %ymm6
|
||||
TMP1 = %ymm7
|
||||
|
||||
TT0 = %ymm8
|
||||
TT1 = %ymm9
|
||||
TT2 = %ymm10
|
||||
TT3 = %ymm11
|
||||
TT4 = %ymm12
|
||||
TT5 = %ymm13
|
||||
TT6 = %ymm14
|
||||
TT7 = %ymm15
|
||||
|
||||
# Define stack usage
|
||||
|
||||
# Assume stack aligned to 32 bytes before call
|
||||
# Therefore FRAMESZ mod 32 must be 32-8 = 24
|
||||
|
||||
#define FRAMESZ 0x388
|
||||
|
||||
#define VMOVPS vmovups
|
||||
|
||||
# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1
|
||||
# "transpose" data in {r0...r7} using temps {t0...t1}
|
||||
# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
# r4 = {e7 e6 e5 e4 e3 e2 e1 e0}
|
||||
# r5 = {f7 f6 f5 f4 f3 f2 f1 f0}
|
||||
# r6 = {g7 g6 g5 g4 g3 g2 g1 g0}
|
||||
# r7 = {h7 h6 h5 h4 h3 h2 h1 h0}
|
||||
#
|
||||
# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7}
|
||||
# r0 = {h0 g0 f0 e0 d0 c0 b0 a0}
|
||||
# r1 = {h1 g1 f1 e1 d1 c1 b1 a1}
|
||||
# r2 = {h2 g2 f2 e2 d2 c2 b2 a2}
|
||||
# r3 = {h3 g3 f3 e3 d3 c3 b3 a3}
|
||||
# r4 = {h4 g4 f4 e4 d4 c4 b4 a4}
|
||||
# r5 = {h5 g5 f5 e5 d5 c5 b5 a5}
|
||||
# r6 = {h6 g6 f6 e6 d6 c6 b6 a6}
|
||||
# r7 = {h7 g7 f7 e7 d7 c7 b7 a7}
|
||||
#
|
||||
|
||||
.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1
|
||||
# process top half (r0..r3) {a...d}
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1}
|
||||
vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2}
|
||||
vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3}
|
||||
vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0}
|
||||
|
||||
# use r2 in place of t0
|
||||
# process bottom half (r4..r7) {e...h}
|
||||
vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0}
|
||||
vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2}
|
||||
vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0}
|
||||
vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2}
|
||||
vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1}
|
||||
vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2}
|
||||
vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3}
|
||||
vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0}
|
||||
|
||||
vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6
|
||||
vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2
|
||||
vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5
|
||||
vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1
|
||||
vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7
|
||||
vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3
|
||||
vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4
|
||||
vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0
|
||||
|
||||
.endm
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = h
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = TMP_
|
||||
.endm
|
||||
|
||||
.macro _PRORD reg imm tmp
|
||||
vpslld $(32-\imm),\reg,\tmp
|
||||
vpsrld $\imm,\reg, \reg
|
||||
vpor \tmp,\reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORD_nd reg, imm, tmp, src
|
||||
.macro _PRORD_nd reg imm tmp src
|
||||
vpslld $(32-\imm), \src, \tmp
|
||||
vpsrld $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORD dst/src, amt
|
||||
.macro PRORD reg imm
|
||||
_PRORD \reg,\imm,TMP
|
||||
.endm
|
||||
|
||||
# PRORD_nd dst, src, amt
|
||||
.macro PRORD_nd reg tmp imm
|
||||
_PRORD_nd \reg, \imm, TMP, \tmp
|
||||
.endm
|
||||
|
||||
# arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_00_15 _T1 i
|
||||
PRORD_nd a0,e,5 # sig1: a0 = (e >> 5)
|
||||
|
||||
vpxor g, f, a2 # ch: a2 = f^g
|
||||
vpand e,a2, a2 # ch: a2 = (f^g)&e
|
||||
vpxor g, a2, a2 # a2 = ch
|
||||
|
||||
PRORD_nd a1,e,25 # sig1: a1 = (e >> 25)
|
||||
|
||||
vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp)
|
||||
vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
|
||||
vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
|
||||
PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11)
|
||||
vpaddd a2, h, h # h = h + ch
|
||||
PRORD_nd a2,a,11 # sig0: a2 = (a >> 11)
|
||||
vpaddd \_T1,h, h # h = h + ch + W + K
|
||||
vpxor a1, a0, a0 # a0 = sigma1
|
||||
PRORD_nd a1,a,22 # sig0: a1 = (a >> 22)
|
||||
vpxor c, a, \_T1 # maj: T1 = a^c
|
||||
add $SZ8, ROUND # ROUND++
|
||||
vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
|
||||
vpaddd a0, h, h
|
||||
vpaddd h, d, d
|
||||
vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
|
||||
PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13)
|
||||
vpxor a1, a2, a2 # a2 = sig0
|
||||
vpand c, a, a1 # maj: a1 = a&c
|
||||
vpor \_T1, a1, a1 # a1 = maj
|
||||
vpaddd a1, h, h # h = h + ch + W + K + maj
|
||||
vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0
|
||||
ROTATE_ARGS
|
||||
.endm
|
||||
|
||||
# arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_16_XX _T1 i
|
||||
vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1
|
||||
vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1
|
||||
vmovdqu \_T1, a0
|
||||
PRORD \_T1,11
|
||||
vmovdqu a1, a2
|
||||
PRORD a1,2
|
||||
vpxor a0, \_T1, \_T1
|
||||
PRORD \_T1, 7
|
||||
vpxor a2, a1, a1
|
||||
PRORD a1, 17
|
||||
vpsrld $3, a0, a0
|
||||
vpxor a0, \_T1, \_T1
|
||||
vpsrld $10, a2, a2
|
||||
vpxor a2, a1, a1
|
||||
vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1
|
||||
vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1
|
||||
vpaddd a1, \_T1, \_T1
|
||||
|
||||
ROUND_00_15 \_T1,\i
|
||||
.endm
|
||||
|
||||
# SHA256_ARGS:
|
||||
# UINT128 digest[8]; // transposed digests
|
||||
# UINT8 *data_ptr[4];
|
||||
|
||||
# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes);
|
||||
# arg 1 : STATE : pointer to array of pointers to input data
|
||||
# arg 2 : INP_SIZE : size of input in blocks
|
||||
# general registers preserved in outer calling routine
|
||||
# outer calling routine saves all the XMM registers
|
||||
# save rsp, allocate 32-byte aligned for local variables
|
||||
ENTRY(sha256_x8_avx2)
|
||||
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
mov %rsp, IDX
|
||||
sub $FRAMESZ, %rsp
|
||||
and $~0x1F, %rsp
|
||||
mov IDX, _rsp(%rsp)
|
||||
|
||||
# Load the pre-transposed incoming digest.
|
||||
vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a
|
||||
vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b
|
||||
vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c
|
||||
vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d
|
||||
vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e
|
||||
vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f
|
||||
vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g
|
||||
vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h
|
||||
|
||||
lea K256_8(%rip),TBL
|
||||
|
||||
# load the address of each of the 4 message lanes
|
||||
# getting ready to transpose input onto stack
|
||||
mov _args_data_ptr+0*PTR_SZ(STATE),inp0
|
||||
mov _args_data_ptr+1*PTR_SZ(STATE),inp1
|
||||
mov _args_data_ptr+2*PTR_SZ(STATE),inp2
|
||||
mov _args_data_ptr+3*PTR_SZ(STATE),inp3
|
||||
mov _args_data_ptr+4*PTR_SZ(STATE),inp4
|
||||
mov _args_data_ptr+5*PTR_SZ(STATE),inp5
|
||||
mov _args_data_ptr+6*PTR_SZ(STATE),inp6
|
||||
mov _args_data_ptr+7*PTR_SZ(STATE),inp7
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
xor ROUND, ROUND
|
||||
|
||||
# save old digest
|
||||
vmovdqu a, _digest(%rsp)
|
||||
vmovdqu b, _digest+1*SZ8(%rsp)
|
||||
vmovdqu c, _digest+2*SZ8(%rsp)
|
||||
vmovdqu d, _digest+3*SZ8(%rsp)
|
||||
vmovdqu e, _digest+4*SZ8(%rsp)
|
||||
vmovdqu f, _digest+5*SZ8(%rsp)
|
||||
vmovdqu g, _digest+6*SZ8(%rsp)
|
||||
vmovdqu h, _digest+7*SZ8(%rsp)
|
||||
i = 0
|
||||
.rep 2
|
||||
VMOVPS i*32(inp0, IDX), TT0
|
||||
VMOVPS i*32(inp1, IDX), TT1
|
||||
VMOVPS i*32(inp2, IDX), TT2
|
||||
VMOVPS i*32(inp3, IDX), TT3
|
||||
VMOVPS i*32(inp4, IDX), TT4
|
||||
VMOVPS i*32(inp5, IDX), TT5
|
||||
VMOVPS i*32(inp6, IDX), TT6
|
||||
VMOVPS i*32(inp7, IDX), TT7
|
||||
vmovdqu g, _ytmp(%rsp)
|
||||
vmovdqu h, _ytmp+1*SZ8(%rsp)
|
||||
TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1
|
||||
vmovdqu _ytmp(%rsp), g
|
||||
vpshufb TMP1, TT0, TT0
|
||||
vpshufb TMP1, TT1, TT1
|
||||
vpshufb TMP1, TT2, TT2
|
||||
vpshufb TMP1, TT3, TT3
|
||||
vpshufb TMP1, TT4, TT4
|
||||
vpshufb TMP1, TT5, TT5
|
||||
vpshufb TMP1, TT6, TT6
|
||||
vpshufb TMP1, TT7, TT7
|
||||
vmovdqu _ytmp+1*SZ8(%rsp), h
|
||||
vmovdqu TT4, _ytmp(%rsp)
|
||||
vmovdqu TT5, _ytmp+1*SZ8(%rsp)
|
||||
vmovdqu TT6, _ytmp+2*SZ8(%rsp)
|
||||
vmovdqu TT7, _ytmp+3*SZ8(%rsp)
|
||||
ROUND_00_15 TT0,(i*8+0)
|
||||
vmovdqu _ytmp(%rsp), TT0
|
||||
ROUND_00_15 TT1,(i*8+1)
|
||||
vmovdqu _ytmp+1*SZ8(%rsp), TT1
|
||||
ROUND_00_15 TT2,(i*8+2)
|
||||
vmovdqu _ytmp+2*SZ8(%rsp), TT2
|
||||
ROUND_00_15 TT3,(i*8+3)
|
||||
vmovdqu _ytmp+3*SZ8(%rsp), TT3
|
||||
ROUND_00_15 TT0,(i*8+4)
|
||||
ROUND_00_15 TT1,(i*8+5)
|
||||
ROUND_00_15 TT2,(i*8+6)
|
||||
ROUND_00_15 TT3,(i*8+7)
|
||||
i = (i+1)
|
||||
.endr
|
||||
add $64, IDX
|
||||
i = (i*8)
|
||||
|
||||
jmp Lrounds_16_xx
|
||||
.align 16
|
||||
Lrounds_16_xx:
|
||||
.rep 16
|
||||
ROUND_16_XX T1, i
|
||||
i = (i+1)
|
||||
.endr
|
||||
|
||||
cmp $ROUNDS,ROUND
|
||||
jb Lrounds_16_xx
|
||||
|
||||
# add old digest
|
||||
vpaddd _digest+0*SZ8(%rsp), a, a
|
||||
vpaddd _digest+1*SZ8(%rsp), b, b
|
||||
vpaddd _digest+2*SZ8(%rsp), c, c
|
||||
vpaddd _digest+3*SZ8(%rsp), d, d
|
||||
vpaddd _digest+4*SZ8(%rsp), e, e
|
||||
vpaddd _digest+5*SZ8(%rsp), f, f
|
||||
vpaddd _digest+6*SZ8(%rsp), g, g
|
||||
vpaddd _digest+7*SZ8(%rsp), h, h
|
||||
|
||||
sub $1, INP_SIZE # unit is blocks
|
||||
jne lloop
|
||||
|
||||
# write back to memory (state object) the transposed digest
|
||||
vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE)
|
||||
|
||||
# update input pointers
|
||||
add IDX, inp0
|
||||
mov inp0, _args_data_ptr+0*8(STATE)
|
||||
add IDX, inp1
|
||||
mov inp1, _args_data_ptr+1*8(STATE)
|
||||
add IDX, inp2
|
||||
mov inp2, _args_data_ptr+2*8(STATE)
|
||||
add IDX, inp3
|
||||
mov inp3, _args_data_ptr+3*8(STATE)
|
||||
add IDX, inp4
|
||||
mov inp4, _args_data_ptr+4*8(STATE)
|
||||
add IDX, inp5
|
||||
mov inp5, _args_data_ptr+5*8(STATE)
|
||||
add IDX, inp6
|
||||
mov inp6, _args_data_ptr+6*8(STATE)
|
||||
add IDX, inp7
|
||||
mov inp7, _args_data_ptr+7*8(STATE)
|
||||
|
||||
# Postamble
|
||||
mov _rsp(%rsp), %rsp
|
||||
|
||||
# restore callee-saved clobbered registers
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
ret
|
||||
ENDPROC(sha256_x8_avx2)
|
||||
|
||||
.section .rodata.K256_8, "a", @progbits
|
||||
.align 64
|
||||
K256_8:
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
.octa 0x428a2f98428a2f98428a2f98428a2f98
|
||||
.octa 0x71374491713744917137449171374491
|
||||
.octa 0x71374491713744917137449171374491
|
||||
.octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
|
||||
.octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf
|
||||
.octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
|
||||
.octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5
|
||||
.octa 0x3956c25b3956c25b3956c25b3956c25b
|
||||
.octa 0x3956c25b3956c25b3956c25b3956c25b
|
||||
.octa 0x59f111f159f111f159f111f159f111f1
|
||||
.octa 0x59f111f159f111f159f111f159f111f1
|
||||
.octa 0x923f82a4923f82a4923f82a4923f82a4
|
||||
.octa 0x923f82a4923f82a4923f82a4923f82a4
|
||||
.octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
|
||||
.octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5
|
||||
.octa 0xd807aa98d807aa98d807aa98d807aa98
|
||||
.octa 0xd807aa98d807aa98d807aa98d807aa98
|
||||
.octa 0x12835b0112835b0112835b0112835b01
|
||||
.octa 0x12835b0112835b0112835b0112835b01
|
||||
.octa 0x243185be243185be243185be243185be
|
||||
.octa 0x243185be243185be243185be243185be
|
||||
.octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
|
||||
.octa 0x550c7dc3550c7dc3550c7dc3550c7dc3
|
||||
.octa 0x72be5d7472be5d7472be5d7472be5d74
|
||||
.octa 0x72be5d7472be5d7472be5d7472be5d74
|
||||
.octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
|
||||
.octa 0x80deb1fe80deb1fe80deb1fe80deb1fe
|
||||
.octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
|
||||
.octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7
|
||||
.octa 0xc19bf174c19bf174c19bf174c19bf174
|
||||
.octa 0xc19bf174c19bf174c19bf174c19bf174
|
||||
.octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
|
||||
.octa 0xe49b69c1e49b69c1e49b69c1e49b69c1
|
||||
.octa 0xefbe4786efbe4786efbe4786efbe4786
|
||||
.octa 0xefbe4786efbe4786efbe4786efbe4786
|
||||
.octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
|
||||
.octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6
|
||||
.octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
|
||||
.octa 0x240ca1cc240ca1cc240ca1cc240ca1cc
|
||||
.octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
|
||||
.octa 0x2de92c6f2de92c6f2de92c6f2de92c6f
|
||||
.octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
|
||||
.octa 0x4a7484aa4a7484aa4a7484aa4a7484aa
|
||||
.octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
|
||||
.octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc
|
||||
.octa 0x76f988da76f988da76f988da76f988da
|
||||
.octa 0x76f988da76f988da76f988da76f988da
|
||||
.octa 0x983e5152983e5152983e5152983e5152
|
||||
.octa 0x983e5152983e5152983e5152983e5152
|
||||
.octa 0xa831c66da831c66da831c66da831c66d
|
||||
.octa 0xa831c66da831c66da831c66da831c66d
|
||||
.octa 0xb00327c8b00327c8b00327c8b00327c8
|
||||
.octa 0xb00327c8b00327c8b00327c8b00327c8
|
||||
.octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
|
||||
.octa 0xbf597fc7bf597fc7bf597fc7bf597fc7
|
||||
.octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
|
||||
.octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3
|
||||
.octa 0xd5a79147d5a79147d5a79147d5a79147
|
||||
.octa 0xd5a79147d5a79147d5a79147d5a79147
|
||||
.octa 0x06ca635106ca635106ca635106ca6351
|
||||
.octa 0x06ca635106ca635106ca635106ca6351
|
||||
.octa 0x14292967142929671429296714292967
|
||||
.octa 0x14292967142929671429296714292967
|
||||
.octa 0x27b70a8527b70a8527b70a8527b70a85
|
||||
.octa 0x27b70a8527b70a8527b70a8527b70a85
|
||||
.octa 0x2e1b21382e1b21382e1b21382e1b2138
|
||||
.octa 0x2e1b21382e1b21382e1b21382e1b2138
|
||||
.octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
|
||||
.octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc
|
||||
.octa 0x53380d1353380d1353380d1353380d13
|
||||
.octa 0x53380d1353380d1353380d1353380d13
|
||||
.octa 0x650a7354650a7354650a7354650a7354
|
||||
.octa 0x650a7354650a7354650a7354650a7354
|
||||
.octa 0x766a0abb766a0abb766a0abb766a0abb
|
||||
.octa 0x766a0abb766a0abb766a0abb766a0abb
|
||||
.octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
|
||||
.octa 0x81c2c92e81c2c92e81c2c92e81c2c92e
|
||||
.octa 0x92722c8592722c8592722c8592722c85
|
||||
.octa 0x92722c8592722c8592722c8592722c85
|
||||
.octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
|
||||
.octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1
|
||||
.octa 0xa81a664ba81a664ba81a664ba81a664b
|
||||
.octa 0xa81a664ba81a664ba81a664ba81a664b
|
||||
.octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
|
||||
.octa 0xc24b8b70c24b8b70c24b8b70c24b8b70
|
||||
.octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
|
||||
.octa 0xc76c51a3c76c51a3c76c51a3c76c51a3
|
||||
.octa 0xd192e819d192e819d192e819d192e819
|
||||
.octa 0xd192e819d192e819d192e819d192e819
|
||||
.octa 0xd6990624d6990624d6990624d6990624
|
||||
.octa 0xd6990624d6990624d6990624d6990624
|
||||
.octa 0xf40e3585f40e3585f40e3585f40e3585
|
||||
.octa 0xf40e3585f40e3585f40e3585f40e3585
|
||||
.octa 0x106aa070106aa070106aa070106aa070
|
||||
.octa 0x106aa070106aa070106aa070106aa070
|
||||
.octa 0x19a4c11619a4c11619a4c11619a4c116
|
||||
.octa 0x19a4c11619a4c11619a4c11619a4c116
|
||||
.octa 0x1e376c081e376c081e376c081e376c08
|
||||
.octa 0x1e376c081e376c081e376c081e376c08
|
||||
.octa 0x2748774c2748774c2748774c2748774c
|
||||
.octa 0x2748774c2748774c2748774c2748774c
|
||||
.octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
|
||||
.octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5
|
||||
.octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
|
||||
.octa 0x391c0cb3391c0cb3391c0cb3391c0cb3
|
||||
.octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
|
||||
.octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a
|
||||
.octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
|
||||
.octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f
|
||||
.octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
|
||||
.octa 0x682e6ff3682e6ff3682e6ff3682e6ff3
|
||||
.octa 0x748f82ee748f82ee748f82ee748f82ee
|
||||
.octa 0x748f82ee748f82ee748f82ee748f82ee
|
||||
.octa 0x78a5636f78a5636f78a5636f78a5636f
|
||||
.octa 0x78a5636f78a5636f78a5636f78a5636f
|
||||
.octa 0x84c8781484c8781484c8781484c87814
|
||||
.octa 0x84c8781484c8781484c8781484c87814
|
||||
.octa 0x8cc702088cc702088cc702088cc70208
|
||||
.octa 0x8cc702088cc702088cc702088cc70208
|
||||
.octa 0x90befffa90befffa90befffa90befffa
|
||||
.octa 0x90befffa90befffa90befffa90befffa
|
||||
.octa 0xa4506ceba4506ceba4506ceba4506ceb
|
||||
.octa 0xa4506ceba4506ceba4506ceba4506ceb
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
.octa 0xc67178f2c67178f2c67178f2c67178f2
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK:
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
.octa 0x0c0d0e0f08090a0b0405060700010203
|
||||
|
||||
.section .rodata.cst256.K256, "aM", @progbits, 256
|
||||
.align 64
|
||||
.global K256
|
||||
K256:
|
||||
.int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
|
||||
.int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
|
||||
.int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
|
||||
.int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
|
||||
.int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
|
||||
.int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
|
||||
.int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
|
||||
.int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
|
||||
.int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
|
||||
.int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
|
||||
.int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
|
||||
.int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
|
||||
.int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
|
||||
.int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
|
||||
.int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
|
||||
.int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
|
@ -1,12 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Arch-specific CryptoAPI modules.
|
||||
#
|
||||
|
||||
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
|
||||
$(comma)4)$(comma)%ymm2,yes,no)
|
||||
ifeq ($(avx2_supported),yes)
|
||||
obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
|
||||
sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
|
||||
sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
|
||||
endif
|
File diff suppressed because it is too large
Load Diff
@ -1,128 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA512 context
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SHA_MB_CTX_INTERNAL_H
|
||||
#define _SHA_MB_CTX_INTERNAL_H
|
||||
|
||||
#include "sha512_mb_mgr.h"
|
||||
|
||||
#define HASH_UPDATE 0x00
|
||||
#define HASH_LAST 0x01
|
||||
#define HASH_DONE 0x02
|
||||
#define HASH_FINAL 0x04
|
||||
|
||||
#define HASH_CTX_STS_IDLE 0x00
|
||||
#define HASH_CTX_STS_PROCESSING 0x01
|
||||
#define HASH_CTX_STS_LAST 0x02
|
||||
#define HASH_CTX_STS_COMPLETE 0x04
|
||||
|
||||
enum hash_ctx_error {
|
||||
HASH_CTX_ERROR_NONE = 0,
|
||||
HASH_CTX_ERROR_INVALID_FLAGS = -1,
|
||||
HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
|
||||
HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
|
||||
};
|
||||
|
||||
#define hash_ctx_user_data(ctx) ((ctx)->user_data)
|
||||
#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
|
||||
#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
|
||||
#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
|
||||
#define hash_ctx_status(ctx) ((ctx)->status)
|
||||
#define hash_ctx_error(ctx) ((ctx)->error)
|
||||
#define hash_ctx_init(ctx) \
|
||||
do { \
|
||||
(ctx)->error = HASH_CTX_ERROR_NONE; \
|
||||
(ctx)->status = HASH_CTX_STS_COMPLETE; \
|
||||
} while (0)
|
||||
|
||||
/* Hash Constants and Typedefs */
|
||||
#define SHA512_DIGEST_LENGTH 8
|
||||
#define SHA512_LOG2_BLOCK_SIZE 7
|
||||
|
||||
#define SHA512_PADLENGTHFIELD_SIZE 16
|
||||
|
||||
#ifdef SHA_MB_DEBUG
|
||||
#define assert(expr) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
|
||||
#expr, __FILE__, __func__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define assert(expr) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct sha512_ctx_mgr {
|
||||
struct sha512_mb_mgr mgr;
|
||||
};
|
||||
|
||||
/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
|
||||
|
||||
struct sha512_hash_ctx {
|
||||
/* Must be at struct offset 0 */
|
||||
struct job_sha512 job;
|
||||
/* status flag */
|
||||
int status;
|
||||
/* error flag */
|
||||
int error;
|
||||
|
||||
uint64_t total_length;
|
||||
const void *incoming_buffer;
|
||||
uint32_t incoming_buffer_length;
|
||||
uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
|
||||
uint32_t partial_block_buffer_length;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,104 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA512 algorithm manager
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __SHA_MB_MGR_H
|
||||
#define __SHA_MB_MGR_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define NUM_SHA512_DIGEST_WORDS 8
|
||||
|
||||
enum job_sts {STS_UNKNOWN = 0,
|
||||
STS_BEING_PROCESSED = 1,
|
||||
STS_COMPLETED = 2,
|
||||
STS_INTERNAL_ERROR = 3,
|
||||
STS_ERROR = 4
|
||||
};
|
||||
|
||||
struct job_sha512 {
|
||||
u8 *buffer;
|
||||
u64 len;
|
||||
u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
|
||||
enum job_sts status;
|
||||
void *user_data;
|
||||
};
|
||||
|
||||
struct sha512_args_x4 {
|
||||
uint64_t digest[8][4];
|
||||
uint8_t *data_ptr[4];
|
||||
};
|
||||
|
||||
struct sha512_lane_data {
|
||||
struct job_sha512 *job_in_lane;
|
||||
};
|
||||
|
||||
struct sha512_mb_mgr {
|
||||
struct sha512_args_x4 args;
|
||||
|
||||
uint64_t lens[4];
|
||||
|
||||
/* each byte is index (0...7) of unused lanes */
|
||||
uint64_t unused_lanes;
|
||||
/* byte 4 is set to FF as a flag */
|
||||
struct sha512_lane_data ldata[4];
|
||||
};
|
||||
|
||||
#define SHA512_MB_MGR_NUM_LANES_AVX2 4
|
||||
|
||||
void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
|
||||
struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
|
||||
struct job_sha512 *job);
|
||||
struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
|
||||
struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
|
||||
|
||||
#endif
|
@ -1,281 +0,0 @@
|
||||
/*
|
||||
* Header file for multi buffer SHA256 algorithm data structure
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# Macros for defining data structures
|
||||
|
||||
# Usage example
|
||||
|
||||
#START_FIELDS # JOB_AES
|
||||
### name size align
|
||||
#FIELD _plaintext, 8, 8 # pointer to plaintext
|
||||
#FIELD _ciphertext, 8, 8 # pointer to ciphertext
|
||||
#FIELD _IV, 16, 8 # IV
|
||||
#FIELD _keys, 8, 8 # pointer to keys
|
||||
#FIELD _len, 4, 4 # length in bytes
|
||||
#FIELD _status, 4, 4 # status enumeration
|
||||
#FIELD _user_data, 8, 8 # pointer to user data
|
||||
#UNION _union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# size3, align3, \
|
||||
# ...
|
||||
#END_FIELDS
|
||||
#%assign _JOB_AES_size _FIELD_OFFSET
|
||||
#%assign _JOB_AES_align _STRUCT_ALIGN
|
||||
|
||||
#########################################################################
|
||||
|
||||
# Alternate "struc-like" syntax:
|
||||
# STRUCT job_aes2
|
||||
# RES_Q .plaintext, 1
|
||||
# RES_Q .ciphertext, 1
|
||||
# RES_DQ .IV, 1
|
||||
# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
|
||||
# RES_U .union, size1, align1, \
|
||||
# size2, align2, \
|
||||
# ...
|
||||
# ENDSTRUCT
|
||||
# # Following only needed if nesting
|
||||
# %assign job_aes2_size _FIELD_OFFSET
|
||||
# %assign job_aes2_align _STRUCT_ALIGN
|
||||
#
|
||||
# RES_* macros take a name, a count and an optional alignment.
|
||||
# The count in in terms of the base size of the macro, and the
|
||||
# default alignment is the base size.
|
||||
# The macros are:
|
||||
# Macro Base size
|
||||
# RES_B 1
|
||||
# RES_W 2
|
||||
# RES_D 4
|
||||
# RES_Q 8
|
||||
# RES_DQ 16
|
||||
# RES_Y 32
|
||||
# RES_Z 64
|
||||
#
|
||||
# RES_U defines a union. It's arguments are a name and two or more
|
||||
# pairs of "size, alignment"
|
||||
#
|
||||
# The two assigns are only needed if this structure is being nested
|
||||
# within another. Even if the assigns are not done, one can still use
|
||||
# STRUCT_NAME_size as the size of the structure.
|
||||
#
|
||||
# Note that for nesting, you still need to assign to STRUCT_NAME_size.
|
||||
#
|
||||
# The differences between this and using "struc" directly are that each
|
||||
# type is implicitly aligned to its natural length (although this can be
|
||||
# over-ridden with an explicit third parameter), and that the structure
|
||||
# is padded at the end to its overall alignment.
|
||||
#
|
||||
|
||||
#########################################################################
|
||||
|
||||
#ifndef _DATASTRUCT_ASM_
|
||||
#define _DATASTRUCT_ASM_
|
||||
|
||||
#define PTR_SZ 8
|
||||
#define SHA512_DIGEST_WORD_SIZE 8
|
||||
#define SHA512_MB_MGR_NUM_LANES_AVX2 4
|
||||
#define NUM_SHA512_DIGEST_WORDS 8
|
||||
#define SZ4 4*SHA512_DIGEST_WORD_SIZE
|
||||
#define ROUNDS 80*SZ4
|
||||
#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
|
||||
|
||||
# START_FIELDS
|
||||
.macro START_FIELDS
|
||||
_FIELD_OFFSET = 0
|
||||
_STRUCT_ALIGN = 0
|
||||
.endm
|
||||
|
||||
# FIELD name size align
|
||||
.macro FIELD name size align
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
|
||||
\name = _FIELD_OFFSET
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (\size)
|
||||
.if (\align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = \align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# END_FIELDS
|
||||
.macro END_FIELDS
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
|
||||
.endm
|
||||
|
||||
.macro STRUCT p1
|
||||
START_FIELDS
|
||||
.struc \p1
|
||||
.endm
|
||||
|
||||
.macro ENDSTRUCT
|
||||
tmp = _FIELD_OFFSET
|
||||
END_FIELDS
|
||||
tmp = (_FIELD_OFFSET - ##tmp)
|
||||
.if (tmp > 0)
|
||||
.lcomm tmp
|
||||
.endm
|
||||
|
||||
## RES_int name size align
|
||||
.macro RES_int p1 p2 p3
|
||||
name = \p1
|
||||
size = \p2
|
||||
align = .\p3
|
||||
|
||||
_FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
|
||||
.align align
|
||||
.lcomm name size
|
||||
_FIELD_OFFSET = _FIELD_OFFSET + (size)
|
||||
.if (align > _STRUCT_ALIGN)
|
||||
_STRUCT_ALIGN = align
|
||||
.endif
|
||||
.endm
|
||||
|
||||
# macro RES_B name, size [, align]
|
||||
.macro RES_B _name, _size, _align=1
|
||||
RES_int _name _size _align
|
||||
.endm
|
||||
|
||||
# macro RES_W name, size [, align]
|
||||
.macro RES_W _name, _size, _align=2
|
||||
RES_int _name 2*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_D name, size [, align]
|
||||
.macro RES_D _name, _size, _align=4
|
||||
RES_int _name 4*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Q name, size [, align]
|
||||
.macro RES_Q _name, _size, _align=8
|
||||
RES_int _name 8*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_DQ name, size [, align]
|
||||
.macro RES_DQ _name, _size, _align=16
|
||||
RES_int _name 16*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Y name, size [, align]
|
||||
.macro RES_Y _name, _size, _align=32
|
||||
RES_int _name 32*(_size) _align
|
||||
.endm
|
||||
|
||||
# macro RES_Z name, size [, align]
|
||||
.macro RES_Z _name, _size, _align=64
|
||||
RES_int _name 64*(_size) _align
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
###################################################################
|
||||
### Define SHA512 Out Of Order Data Structures
|
||||
###################################################################
|
||||
|
||||
START_FIELDS # LANE_DATA
|
||||
### name size align
|
||||
FIELD _job_in_lane, 8, 8 # pointer to job object
|
||||
END_FIELDS
|
||||
|
||||
_LANE_DATA_size = _FIELD_OFFSET
|
||||
_LANE_DATA_align = _STRUCT_ALIGN
|
||||
|
||||
####################################################################
|
||||
|
||||
START_FIELDS # SHA512_ARGS_X4
|
||||
### name size align
|
||||
FIELD _digest, 8*8*4, 4 # transposed digest
|
||||
FIELD _data_ptr, 8*4, 8 # array of pointers to data
|
||||
END_FIELDS
|
||||
|
||||
_SHA512_ARGS_X4_size = _FIELD_OFFSET
|
||||
_SHA512_ARGS_X4_align = _STRUCT_ALIGN
|
||||
|
||||
#####################################################################
|
||||
|
||||
START_FIELDS # MB_MGR
|
||||
### name size align
|
||||
FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
|
||||
FIELD _lens, 8*4, 8
|
||||
FIELD _unused_lanes, 8, 8
|
||||
FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
|
||||
END_FIELDS
|
||||
|
||||
_MB_MGR_size = _FIELD_OFFSET
|
||||
_MB_MGR_align = _STRUCT_ALIGN
|
||||
|
||||
_args_digest = _args + _digest
|
||||
_args_data_ptr = _args + _data_ptr
|
||||
|
||||
#######################################################################
|
||||
|
||||
#######################################################################
|
||||
#### Define constants
|
||||
#######################################################################
|
||||
|
||||
#define STS_UNKNOWN 0
|
||||
#define STS_BEING_PROCESSED 1
|
||||
#define STS_COMPLETED 2
|
||||
|
||||
#######################################################################
|
||||
#### Define JOB_SHA512 structure
|
||||
#######################################################################
|
||||
|
||||
START_FIELDS # JOB_SHA512
|
||||
### name size align
|
||||
FIELD _buffer, 8, 8 # pointer to buffer
|
||||
FIELD _len, 8, 8 # length in bytes
|
||||
FIELD _result_digest, 8*8, 32 # Digest (output)
|
||||
FIELD _status, 4, 4
|
||||
FIELD _user_data, 8, 8
|
||||
END_FIELDS
|
||||
|
||||
_JOB_SHA512_size = _FIELD_OFFSET
|
||||
_JOB_SHA512_align = _STRUCT_ALIGN
|
@ -1,297 +0,0 @@
|
||||
/*
|
||||
* Flush routine for SHA512 multibuffer
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha512_x4_avx2
|
||||
|
||||
# LINUX register definitions
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
# idx needs to be other than arg1, arg2, rbx, r12
|
||||
#define idx %rdx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
|
||||
#define unused_lanes %rbx
|
||||
#define lane_data %rbx
|
||||
#define tmp2 %rbx
|
||||
|
||||
#define job_rax %rax
|
||||
#define tmp1 %rax
|
||||
#define size_offset %rax
|
||||
#define tmp %rax
|
||||
#define start_offset %rax
|
||||
|
||||
#define tmp3 arg1
|
||||
|
||||
#define extra_blocks arg2
|
||||
#define p arg2
|
||||
|
||||
#define tmp4 %r8
|
||||
#define lens0 %r8
|
||||
|
||||
#define lens1 %r9
|
||||
#define lens2 %r10
|
||||
#define lens3 %r11
|
||||
|
||||
.macro LABEL prefix n
|
||||
\prefix\n\():
|
||||
.endm
|
||||
|
||||
.macro JNE_SKIP i
|
||||
jne skip_\i
|
||||
.endm
|
||||
|
||||
.altmacro
|
||||
.macro SET_OFFSET _offset
|
||||
offset = \_offset
|
||||
.endm
|
||||
.noaltmacro
|
||||
|
||||
# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
|
||||
# arg 1 : rcx : state
|
||||
ENTRY(sha512_mb_mgr_flush_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
|
||||
# If bit (32+3) is set, then all lanes are empty
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $32+7, unused_lanes
|
||||
jc return_null
|
||||
|
||||
# find a lane with a non-null job
|
||||
xor idx, idx
|
||||
offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne one(%rip), idx
|
||||
offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne two(%rip), idx
|
||||
offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
cmovne three(%rip), idx
|
||||
|
||||
# copy idx to empty lanes
|
||||
copy_lane_data:
|
||||
offset = (_args + _data_ptr)
|
||||
mov offset(state,idx,8), tmp
|
||||
|
||||
I = 0
|
||||
.rep 4
|
||||
offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
|
||||
cmpq $0, offset(state)
|
||||
.altmacro
|
||||
JNE_SKIP %I
|
||||
offset = (_args + _data_ptr + 8*I)
|
||||
mov tmp, offset(state)
|
||||
offset = (_lens + 8*I +4)
|
||||
movl $0xFFFFFFFF, offset(state)
|
||||
LABEL skip_ %I
|
||||
I = (I+1)
|
||||
.noaltmacro
|
||||
.endr
|
||||
|
||||
# Find min length
|
||||
mov _lens + 0*8(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens + 1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1,idx
|
||||
mov _lens + 2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens + 3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
mov idx,len2
|
||||
and $0xF,idx
|
||||
and $~0xFF,len2
|
||||
jz len_is_0
|
||||
|
||||
sub len2, lens0
|
||||
sub len2, lens1
|
||||
sub len2, lens2
|
||||
sub len2, lens3
|
||||
shr $32,len2
|
||||
mov lens0, _lens + 0*8(state)
|
||||
mov lens1, _lens + 1*8(state)
|
||||
mov lens2, _lens + 2*8(state)
|
||||
mov lens3, _lens + 3*8(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha512_x4_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens+4(state, idx, 8)
|
||||
|
||||
vmovq _args_digest+0*32(state, idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state, idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state, idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state, idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest+2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest+3*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_flush_avx2)
|
||||
.align 16
|
||||
|
||||
ENTRY(sha512_mb_mgr_get_comp_job_avx2)
|
||||
push %rbx
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
bt $(32+7), unused_lanes
|
||||
jc .return_null
|
||||
|
||||
# Find min length
|
||||
mov _lens(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens+1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1,idx
|
||||
mov _lens+2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens+3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
test $~0xF,idx
|
||||
jnz .return_null
|
||||
and $0xF,idx
|
||||
|
||||
#process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF, _lens+4(state, idx, 8)
|
||||
|
||||
vmovq _args_digest(state, idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state, idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state, idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state, idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest+0*16(job_rax)
|
||||
vmovdqu %xmm1, _result_digest+1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest+2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest+3*16(job_rax)
|
||||
|
||||
pop %rbx
|
||||
|
||||
ret
|
||||
|
||||
.return_null:
|
||||
xor job_rax, job_rax
|
||||
pop %rbx
|
||||
ret
|
||||
ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
|
||||
|
||||
.section .rodata.cst8.one, "aM", @progbits, 8
|
||||
.align 8
|
||||
one:
|
||||
.quad 1
|
||||
|
||||
.section .rodata.cst8.two, "aM", @progbits, 8
|
||||
.align 8
|
||||
two:
|
||||
.quad 2
|
||||
|
||||
.section .rodata.cst8.three, "aM", @progbits, 8
|
||||
.align 8
|
||||
three:
|
||||
.quad 3
|
@ -1,69 +0,0 @@
|
||||
/*
|
||||
* Initialization code for multi buffer SHA256 algorithm for AVX2
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "sha512_mb_mgr.h"
|
||||
|
||||
void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
/* initially all lanes are unused */
|
||||
state->lens[0] = 0xFFFFFFFF00000000;
|
||||
state->lens[1] = 0xFFFFFFFF00000001;
|
||||
state->lens[2] = 0xFFFFFFFF00000002;
|
||||
state->lens[3] = 0xFFFFFFFF00000003;
|
||||
|
||||
state->unused_lanes = 0xFF03020100;
|
||||
for (j = 0; j < 4; j++)
|
||||
state->ldata[j].job_in_lane = NULL;
|
||||
}
|
@ -1,224 +0,0 @@
|
||||
/*
|
||||
* Buffer submit code for multi buffer SHA512 algorithm
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/frame.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
.extern sha512_x4_avx2
|
||||
|
||||
#define arg1 %rdi
|
||||
#define arg2 %rsi
|
||||
|
||||
#define idx %rdx
|
||||
#define last_len %rdx
|
||||
|
||||
#define size_offset %rcx
|
||||
#define tmp2 %rcx
|
||||
|
||||
# Common definitions
|
||||
#define state arg1
|
||||
#define job arg2
|
||||
#define len2 arg2
|
||||
#define p2 arg2
|
||||
|
||||
#define p %r11
|
||||
#define start_offset %r11
|
||||
|
||||
#define unused_lanes %rbx
|
||||
|
||||
#define job_rax %rax
|
||||
#define len %rax
|
||||
|
||||
#define lane %r12
|
||||
#define tmp3 %r12
|
||||
#define lens3 %r12
|
||||
|
||||
#define extra_blocks %r8
|
||||
#define lens0 %r8
|
||||
|
||||
#define tmp %r9
|
||||
#define lens1 %r9
|
||||
|
||||
#define lane_data %r10
|
||||
#define lens2 %r10
|
||||
|
||||
#define DWORD_len %eax
|
||||
|
||||
# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
|
||||
# arg 1 : rcx : state
|
||||
# arg 2 : rdx : job
|
||||
ENTRY(sha512_mb_mgr_submit_avx2)
|
||||
FRAME_BEGIN
|
||||
push %rbx
|
||||
push %r12
|
||||
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movzb %bl,lane
|
||||
shr $8, unused_lanes
|
||||
imul $_LANE_DATA_size, lane,lane_data
|
||||
movl $STS_BEING_PROCESSED, _status(job)
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
movl _len(job), DWORD_len
|
||||
|
||||
mov job, _job_in_lane(lane_data)
|
||||
movl DWORD_len,_lens+4(state , lane, 8)
|
||||
|
||||
# Load digest words from result_digest
|
||||
vmovdqu _result_digest+0*16(job), %xmm0
|
||||
vmovdqu _result_digest+1*16(job), %xmm1
|
||||
vmovdqu _result_digest+2*16(job), %xmm2
|
||||
vmovdqu _result_digest+3*16(job), %xmm3
|
||||
|
||||
vmovq %xmm0, _args_digest(state, lane, 8)
|
||||
vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
|
||||
vmovq %xmm1, _args_digest+2*32(state , lane, 8)
|
||||
vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
|
||||
vmovq %xmm2, _args_digest+4*32(state , lane, 8)
|
||||
vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
|
||||
vmovq %xmm3, _args_digest+6*32(state , lane, 8)
|
||||
vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
|
||||
|
||||
mov _buffer(job), p
|
||||
mov p, _args_data_ptr(state, lane, 8)
|
||||
|
||||
cmp $0xFF, unused_lanes
|
||||
jne return_null
|
||||
|
||||
start_loop:
|
||||
|
||||
# Find min length
|
||||
mov _lens+0*8(state),lens0
|
||||
mov lens0,idx
|
||||
mov _lens+1*8(state),lens1
|
||||
cmp idx,lens1
|
||||
cmovb lens1, idx
|
||||
mov _lens+2*8(state),lens2
|
||||
cmp idx,lens2
|
||||
cmovb lens2,idx
|
||||
mov _lens+3*8(state),lens3
|
||||
cmp idx,lens3
|
||||
cmovb lens3,idx
|
||||
mov idx,len2
|
||||
and $0xF,idx
|
||||
and $~0xFF,len2
|
||||
jz len_is_0
|
||||
|
||||
sub len2,lens0
|
||||
sub len2,lens1
|
||||
sub len2,lens2
|
||||
sub len2,lens3
|
||||
shr $32,len2
|
||||
mov lens0, _lens + 0*8(state)
|
||||
mov lens1, _lens + 1*8(state)
|
||||
mov lens2, _lens + 2*8(state)
|
||||
mov lens3, _lens + 3*8(state)
|
||||
|
||||
# "state" and "args" are the same address, arg1
|
||||
# len is arg2
|
||||
call sha512_x4_avx2
|
||||
# state and idx are intact
|
||||
|
||||
len_is_0:
|
||||
|
||||
# process completed job "idx"
|
||||
imul $_LANE_DATA_size, idx, lane_data
|
||||
lea _ldata(state, lane_data), lane_data
|
||||
|
||||
mov _job_in_lane(lane_data), job_rax
|
||||
mov _unused_lanes(state), unused_lanes
|
||||
movq $0, _job_in_lane(lane_data)
|
||||
movl $STS_COMPLETED, _status(job_rax)
|
||||
shl $8, unused_lanes
|
||||
or idx, unused_lanes
|
||||
mov unused_lanes, _unused_lanes(state)
|
||||
|
||||
movl $0xFFFFFFFF,_lens+4(state,idx,8)
|
||||
vmovq _args_digest+0*32(state , idx, 8), %xmm0
|
||||
vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
|
||||
vmovq _args_digest+2*32(state , idx, 8), %xmm1
|
||||
vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
|
||||
vmovq _args_digest+4*32(state , idx, 8), %xmm2
|
||||
vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
|
||||
vmovq _args_digest+6*32(state , idx, 8), %xmm3
|
||||
vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
|
||||
|
||||
vmovdqu %xmm0, _result_digest + 0*16(job_rax)
|
||||
vmovdqu %xmm1, _result_digest + 1*16(job_rax)
|
||||
vmovdqu %xmm2, _result_digest + 2*16(job_rax)
|
||||
vmovdqu %xmm3, _result_digest + 3*16(job_rax)
|
||||
|
||||
return:
|
||||
pop %r12
|
||||
pop %rbx
|
||||
FRAME_END
|
||||
ret
|
||||
|
||||
return_null:
|
||||
xor job_rax, job_rax
|
||||
jmp return
|
||||
ENDPROC(sha512_mb_mgr_submit_avx2)
|
||||
|
||||
/* UNUSED?
|
||||
.section .rodata.cst16, "aM", @progbits, 16
|
||||
.align 16
|
||||
H0: .int 0x6a09e667
|
||||
H1: .int 0xbb67ae85
|
||||
H2: .int 0x3c6ef372
|
||||
H3: .int 0xa54ff53a
|
||||
H4: .int 0x510e527f
|
||||
H5: .int 0x9b05688c
|
||||
H6: .int 0x1f83d9ab
|
||||
H7: .int 0x5be0cd19
|
||||
*/
|
@ -1,531 +0,0 @@
|
||||
/*
|
||||
* Multi-buffer SHA512 algorithm hash compute routine
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* Contact Information:
|
||||
* Megha Dey <megha.dey@linux.intel.com>
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
# code to compute quad SHA512 using AVX2
|
||||
# use YMMs to tackle the larger digest size
|
||||
# outer calling routine takes care of save and restore of XMM registers
|
||||
# Logic designed/laid out by JDG
|
||||
|
||||
# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
|
||||
# Stack must be aligned to 32 bytes before call
|
||||
# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
|
||||
# Linux preserves: rcx rdx rdi rbp r13 r14 r15
|
||||
# clobbers ymm0-15
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "sha512_mb_mgr_datastruct.S"
|
||||
|
||||
arg1 = %rdi
|
||||
arg2 = %rsi
|
||||
|
||||
# Common definitions
|
||||
STATE = arg1
|
||||
INP_SIZE = arg2
|
||||
|
||||
IDX = %rax
|
||||
ROUND = %rbx
|
||||
TBL = %r8
|
||||
|
||||
inp0 = %r9
|
||||
inp1 = %r10
|
||||
inp2 = %r11
|
||||
inp3 = %r12
|
||||
|
||||
a = %ymm0
|
||||
b = %ymm1
|
||||
c = %ymm2
|
||||
d = %ymm3
|
||||
e = %ymm4
|
||||
f = %ymm5
|
||||
g = %ymm6
|
||||
h = %ymm7
|
||||
|
||||
a0 = %ymm8
|
||||
a1 = %ymm9
|
||||
a2 = %ymm10
|
||||
|
||||
TT0 = %ymm14
|
||||
TT1 = %ymm13
|
||||
TT2 = %ymm12
|
||||
TT3 = %ymm11
|
||||
TT4 = %ymm10
|
||||
TT5 = %ymm9
|
||||
|
||||
T1 = %ymm14
|
||||
TMP = %ymm15
|
||||
|
||||
# Define stack usage
|
||||
STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
|
||||
|
||||
#define VMOVPD vmovupd
|
||||
_digest = SZ4*16
|
||||
|
||||
# transpose r0, r1, r2, r3, t0, t1
|
||||
# "transpose" data in {r0..r3} using temps {t0..t3}
|
||||
# Input looks like: {r0 r1 r2 r3}
|
||||
# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
|
||||
# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
|
||||
# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
|
||||
# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
|
||||
#
|
||||
# output looks like: {t0 r1 r0 r3}
|
||||
# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
|
||||
# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
|
||||
# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
|
||||
# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
|
||||
|
||||
.macro TRANSPOSE r0 r1 r2 r3 t0 t1
|
||||
vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
|
||||
vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
|
||||
vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
|
||||
vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
|
||||
|
||||
vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
|
||||
vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
|
||||
vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
|
||||
vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
|
||||
.endm
|
||||
|
||||
.macro ROTATE_ARGS
|
||||
TMP_ = h
|
||||
h = g
|
||||
g = f
|
||||
f = e
|
||||
e = d
|
||||
d = c
|
||||
c = b
|
||||
b = a
|
||||
a = TMP_
|
||||
.endm
|
||||
|
||||
# PRORQ reg, imm, tmp
|
||||
# packed-rotate-right-double
|
||||
# does a rotate by doing two shifts and an or
|
||||
.macro _PRORQ reg imm tmp
|
||||
vpsllq $(64-\imm),\reg,\tmp
|
||||
vpsrlq $\imm,\reg, \reg
|
||||
vpor \tmp,\reg, \reg
|
||||
.endm
|
||||
|
||||
# non-destructive
|
||||
# PRORQ_nd reg, imm, tmp, src
|
||||
.macro _PRORQ_nd reg imm tmp src
|
||||
vpsllq $(64-\imm), \src, \tmp
|
||||
vpsrlq $\imm, \src, \reg
|
||||
vpor \tmp, \reg, \reg
|
||||
.endm
|
||||
|
||||
# PRORQ dst/src, amt
|
||||
.macro PRORQ reg imm
|
||||
_PRORQ \reg, \imm, TMP
|
||||
.endm
|
||||
|
||||
# PRORQ_nd dst, src, amt
|
||||
.macro PRORQ_nd reg tmp imm
|
||||
_PRORQ_nd \reg, \imm, TMP, \tmp
|
||||
.endm
|
||||
|
||||
#; arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_00_15 _T1 i
|
||||
PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
|
||||
|
||||
vpxor g, f, a2 # ch: a2 = f^g
|
||||
vpand e,a2, a2 # ch: a2 = (f^g)&e
|
||||
vpxor g, a2, a2 # a2 = ch
|
||||
|
||||
PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
|
||||
|
||||
offset = SZ4*(\i & 0xf)
|
||||
vmovdqu \_T1,offset(%rsp)
|
||||
vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
|
||||
vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
|
||||
PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
|
||||
vpaddq a2, h, h # h = h + ch
|
||||
PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
|
||||
vpaddq \_T1,h, h # h = h + ch + W + K
|
||||
vpxor a1, a0, a0 # a0 = sigma1
|
||||
vmovdqu a,\_T1
|
||||
PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
|
||||
vpxor c, \_T1, \_T1 # maj: T1 = a^c
|
||||
add $SZ4, ROUND # ROUND++
|
||||
vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
|
||||
vpaddq a0, h, h
|
||||
vpaddq h, d, d
|
||||
vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
|
||||
PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
|
||||
vpxor a1, a2, a2 # a2 = sig0
|
||||
vpand c, a, a1 # maj: a1 = a&c
|
||||
vpor \_T1, a1, a1 # a1 = maj
|
||||
vpaddq a1, h, h # h = h + ch + W + K + maj
|
||||
vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
|
||||
ROTATE_ARGS
|
||||
.endm
|
||||
|
||||
|
||||
#; arguments passed implicitly in preprocessor symbols i, a...h
|
||||
.macro ROUND_16_XX _T1 i
|
||||
vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
|
||||
vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
|
||||
vmovdqu \_T1, a0
|
||||
PRORQ \_T1,7
|
||||
vmovdqu a1, a2
|
||||
PRORQ a1,42
|
||||
vpxor a0, \_T1, \_T1
|
||||
PRORQ \_T1, 1
|
||||
vpxor a2, a1, a1
|
||||
PRORQ a1, 19
|
||||
vpsrlq $7, a0, a0
|
||||
vpxor a0, \_T1, \_T1
|
||||
vpsrlq $6, a2, a2
|
||||
vpxor a2, a1, a1
|
||||
vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
|
||||
vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
|
||||
vpaddq a1, \_T1, \_T1
|
||||
|
||||
ROUND_00_15 \_T1,\i
|
||||
.endm
|
||||
|
||||
|
||||
# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
|
||||
# arg 1 : STATE : pointer to input data
|
||||
# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
|
||||
ENTRY(sha512_x4_avx2)
|
||||
# general registers preserved in outer calling routine
|
||||
# outer calling routine saves all the XMM registers
|
||||
# save callee-saved clobbered registers to comply with C function ABI
|
||||
push %r12
|
||||
push %r13
|
||||
push %r14
|
||||
push %r15
|
||||
|
||||
sub $STACK_SPACE1, %rsp
|
||||
|
||||
# Load the pre-transposed incoming digest.
|
||||
vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
|
||||
vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
|
||||
vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
|
||||
vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
|
||||
vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
|
||||
vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
|
||||
vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
|
||||
vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
|
||||
|
||||
lea K512_4(%rip),TBL
|
||||
|
||||
# load the address of each of the 4 message lanes
|
||||
# getting ready to transpose input onto stack
|
||||
mov _data_ptr+0*PTR_SZ(STATE),inp0
|
||||
mov _data_ptr+1*PTR_SZ(STATE),inp1
|
||||
mov _data_ptr+2*PTR_SZ(STATE),inp2
|
||||
mov _data_ptr+3*PTR_SZ(STATE),inp3
|
||||
|
||||
xor IDX, IDX
|
||||
lloop:
|
||||
xor ROUND, ROUND
|
||||
|
||||
# save old digest
|
||||
vmovdqu a, _digest(%rsp)
|
||||
vmovdqu b, _digest+1*SZ4(%rsp)
|
||||
vmovdqu c, _digest+2*SZ4(%rsp)
|
||||
vmovdqu d, _digest+3*SZ4(%rsp)
|
||||
vmovdqu e, _digest+4*SZ4(%rsp)
|
||||
vmovdqu f, _digest+5*SZ4(%rsp)
|
||||
vmovdqu g, _digest+6*SZ4(%rsp)
|
||||
vmovdqu h, _digest+7*SZ4(%rsp)
|
||||
i = 0
|
||||
.rep 4
|
||||
vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
|
||||
VMOVPD i*32(inp0, IDX), TT2
|
||||
VMOVPD i*32(inp1, IDX), TT1
|
||||
VMOVPD i*32(inp2, IDX), TT4
|
||||
VMOVPD i*32(inp3, IDX), TT3
|
||||
TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
|
||||
vpshufb TMP, TT0, TT0
|
||||
vpshufb TMP, TT1, TT1
|
||||
vpshufb TMP, TT2, TT2
|
||||
vpshufb TMP, TT3, TT3
|
||||
ROUND_00_15 TT0,(i*4+0)
|
||||
ROUND_00_15 TT1,(i*4+1)
|
||||
ROUND_00_15 TT2,(i*4+2)
|
||||
ROUND_00_15 TT3,(i*4+3)
|
||||
i = (i+1)
|
||||
.endr
|
||||
add $128, IDX
|
||||
|
||||
i = (i*4)
|
||||
|
||||
jmp Lrounds_16_xx
|
||||
.align 16
|
||||
Lrounds_16_xx:
|
||||
.rep 16
|
||||
ROUND_16_XX T1, i
|
||||
i = (i+1)
|
||||
.endr
|
||||
cmp $0xa00,ROUND
|
||||
jb Lrounds_16_xx
|
||||
|
||||
# add old digest
|
||||
vpaddq _digest(%rsp), a, a
|
||||
vpaddq _digest+1*SZ4(%rsp), b, b
|
||||
vpaddq _digest+2*SZ4(%rsp), c, c
|
||||
vpaddq _digest+3*SZ4(%rsp), d, d
|
||||
vpaddq _digest+4*SZ4(%rsp), e, e
|
||||
vpaddq _digest+5*SZ4(%rsp), f, f
|
||||
vpaddq _digest+6*SZ4(%rsp), g, g
|
||||
vpaddq _digest+7*SZ4(%rsp), h, h
|
||||
|
||||
sub $1, INP_SIZE # unit is blocks
|
||||
jne lloop
|
||||
|
||||
# write back to memory (state object) the transposed digest
|
||||
vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
|
||||
|
||||
# update input data pointers
|
||||
add IDX, inp0
|
||||
mov inp0, _data_ptr+0*PTR_SZ(STATE)
|
||||
add IDX, inp1
|
||||
mov inp1, _data_ptr+1*PTR_SZ(STATE)
|
||||
add IDX, inp2
|
||||
mov inp2, _data_ptr+2*PTR_SZ(STATE)
|
||||
add IDX, inp3
|
||||
mov inp3, _data_ptr+3*PTR_SZ(STATE)
|
||||
|
||||
#;;;;;;;;;;;;;;;
|
||||
#; Postamble
|
||||
add $STACK_SPACE1, %rsp
|
||||
# restore callee-saved clobbered registers
|
||||
|
||||
pop %r15
|
||||
pop %r14
|
||||
pop %r13
|
||||
pop %r12
|
||||
|
||||
# outer calling routine restores XMM and other GP registers
|
||||
ret
|
||||
ENDPROC(sha512_x4_avx2)
|
||||
|
||||
.section .rodata.K512_4, "a", @progbits
|
||||
.align 64
|
||||
K512_4:
|
||||
.octa 0x428a2f98d728ae22428a2f98d728ae22,\
|
||||
0x428a2f98d728ae22428a2f98d728ae22
|
||||
.octa 0x7137449123ef65cd7137449123ef65cd,\
|
||||
0x7137449123ef65cd7137449123ef65cd
|
||||
.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
|
||||
0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
|
||||
.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
|
||||
0xe9b5dba58189dbbce9b5dba58189dbbc
|
||||
.octa 0x3956c25bf348b5383956c25bf348b538,\
|
||||
0x3956c25bf348b5383956c25bf348b538
|
||||
.octa 0x59f111f1b605d01959f111f1b605d019,\
|
||||
0x59f111f1b605d01959f111f1b605d019
|
||||
.octa 0x923f82a4af194f9b923f82a4af194f9b,\
|
||||
0x923f82a4af194f9b923f82a4af194f9b
|
||||
.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
|
||||
0xab1c5ed5da6d8118ab1c5ed5da6d8118
|
||||
.octa 0xd807aa98a3030242d807aa98a3030242,\
|
||||
0xd807aa98a3030242d807aa98a3030242
|
||||
.octa 0x12835b0145706fbe12835b0145706fbe,\
|
||||
0x12835b0145706fbe12835b0145706fbe
|
||||
.octa 0x243185be4ee4b28c243185be4ee4b28c,\
|
||||
0x243185be4ee4b28c243185be4ee4b28c
|
||||
.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
|
||||
0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
|
||||
.octa 0x72be5d74f27b896f72be5d74f27b896f,\
|
||||
0x72be5d74f27b896f72be5d74f27b896f
|
||||
.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
|
||||
0x80deb1fe3b1696b180deb1fe3b1696b1
|
||||
.octa 0x9bdc06a725c712359bdc06a725c71235,\
|
||||
0x9bdc06a725c712359bdc06a725c71235
|
||||
.octa 0xc19bf174cf692694c19bf174cf692694,\
|
||||
0xc19bf174cf692694c19bf174cf692694
|
||||
.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
|
||||
0xe49b69c19ef14ad2e49b69c19ef14ad2
|
||||
.octa 0xefbe4786384f25e3efbe4786384f25e3,\
|
||||
0xefbe4786384f25e3efbe4786384f25e3
|
||||
.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
|
||||
0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
|
||||
.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
|
||||
0x240ca1cc77ac9c65240ca1cc77ac9c65
|
||||
.octa 0x2de92c6f592b02752de92c6f592b0275,\
|
||||
0x2de92c6f592b02752de92c6f592b0275
|
||||
.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
|
||||
0x4a7484aa6ea6e4834a7484aa6ea6e483
|
||||
.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
|
||||
0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
|
||||
.octa 0x76f988da831153b576f988da831153b5,\
|
||||
0x76f988da831153b576f988da831153b5
|
||||
.octa 0x983e5152ee66dfab983e5152ee66dfab,\
|
||||
0x983e5152ee66dfab983e5152ee66dfab
|
||||
.octa 0xa831c66d2db43210a831c66d2db43210,\
|
||||
0xa831c66d2db43210a831c66d2db43210
|
||||
.octa 0xb00327c898fb213fb00327c898fb213f,\
|
||||
0xb00327c898fb213fb00327c898fb213f
|
||||
.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
|
||||
0xbf597fc7beef0ee4bf597fc7beef0ee4
|
||||
.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
|
||||
0xc6e00bf33da88fc2c6e00bf33da88fc2
|
||||
.octa 0xd5a79147930aa725d5a79147930aa725,\
|
||||
0xd5a79147930aa725d5a79147930aa725
|
||||
.octa 0x06ca6351e003826f06ca6351e003826f,\
|
||||
0x06ca6351e003826f06ca6351e003826f
|
||||
.octa 0x142929670a0e6e70142929670a0e6e70,\
|
||||
0x142929670a0e6e70142929670a0e6e70
|
||||
.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
|
||||
0x27b70a8546d22ffc27b70a8546d22ffc
|
||||
.octa 0x2e1b21385c26c9262e1b21385c26c926,\
|
||||
0x2e1b21385c26c9262e1b21385c26c926
|
||||
.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
|
||||
0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
|
||||
.octa 0x53380d139d95b3df53380d139d95b3df,\
|
||||
0x53380d139d95b3df53380d139d95b3df
|
||||
.octa 0x650a73548baf63de650a73548baf63de,\
|
||||
0x650a73548baf63de650a73548baf63de
|
||||
.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
|
||||
0x766a0abb3c77b2a8766a0abb3c77b2a8
|
||||
.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
|
||||
0x81c2c92e47edaee681c2c92e47edaee6
|
||||
.octa 0x92722c851482353b92722c851482353b,\
|
||||
0x92722c851482353b92722c851482353b
|
||||
.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
|
||||
0xa2bfe8a14cf10364a2bfe8a14cf10364
|
||||
.octa 0xa81a664bbc423001a81a664bbc423001,\
|
||||
0xa81a664bbc423001a81a664bbc423001
|
||||
.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
|
||||
0xc24b8b70d0f89791c24b8b70d0f89791
|
||||
.octa 0xc76c51a30654be30c76c51a30654be30,\
|
||||
0xc76c51a30654be30c76c51a30654be30
|
||||
.octa 0xd192e819d6ef5218d192e819d6ef5218,\
|
||||
0xd192e819d6ef5218d192e819d6ef5218
|
||||
.octa 0xd69906245565a910d69906245565a910,\
|
||||
0xd69906245565a910d69906245565a910
|
||||
.octa 0xf40e35855771202af40e35855771202a,\
|
||||
0xf40e35855771202af40e35855771202a
|
||||
.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
|
||||
0x106aa07032bbd1b8106aa07032bbd1b8
|
||||
.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
|
||||
0x19a4c116b8d2d0c819a4c116b8d2d0c8
|
||||
.octa 0x1e376c085141ab531e376c085141ab53,\
|
||||
0x1e376c085141ab531e376c085141ab53
|
||||
.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
|
||||
0x2748774cdf8eeb992748774cdf8eeb99
|
||||
.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
|
||||
0x34b0bcb5e19b48a834b0bcb5e19b48a8
|
||||
.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
|
||||
0x391c0cb3c5c95a63391c0cb3c5c95a63
|
||||
.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
|
||||
0x4ed8aa4ae3418acb4ed8aa4ae3418acb
|
||||
.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
|
||||
0x5b9cca4f7763e3735b9cca4f7763e373
|
||||
.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
|
||||
0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
|
||||
.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
|
||||
0x748f82ee5defb2fc748f82ee5defb2fc
|
||||
.octa 0x78a5636f43172f6078a5636f43172f60,\
|
||||
0x78a5636f43172f6078a5636f43172f60
|
||||
.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
|
||||
0x84c87814a1f0ab7284c87814a1f0ab72
|
||||
.octa 0x8cc702081a6439ec8cc702081a6439ec,\
|
||||
0x8cc702081a6439ec8cc702081a6439ec
|
||||
.octa 0x90befffa23631e2890befffa23631e28,\
|
||||
0x90befffa23631e2890befffa23631e28
|
||||
.octa 0xa4506cebde82bde9a4506cebde82bde9,\
|
||||
0xa4506cebde82bde9a4506cebde82bde9
|
||||
.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
|
||||
0xbef9a3f7b2c67915bef9a3f7b2c67915
|
||||
.octa 0xc67178f2e372532bc67178f2e372532b,\
|
||||
0xc67178f2e372532bc67178f2e372532b
|
||||
.octa 0xca273eceea26619cca273eceea26619c,\
|
||||
0xca273eceea26619cca273eceea26619c
|
||||
.octa 0xd186b8c721c0c207d186b8c721c0c207,\
|
||||
0xd186b8c721c0c207d186b8c721c0c207
|
||||
.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
|
||||
0xeada7dd6cde0eb1eeada7dd6cde0eb1e
|
||||
.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
|
||||
0xf57d4f7fee6ed178f57d4f7fee6ed178
|
||||
.octa 0x06f067aa72176fba06f067aa72176fba,\
|
||||
0x06f067aa72176fba06f067aa72176fba
|
||||
.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
|
||||
0x0a637dc5a2c898a60a637dc5a2c898a6
|
||||
.octa 0x113f9804bef90dae113f9804bef90dae,\
|
||||
0x113f9804bef90dae113f9804bef90dae
|
||||
.octa 0x1b710b35131c471b1b710b35131c471b,\
|
||||
0x1b710b35131c471b1b710b35131c471b
|
||||
.octa 0x28db77f523047d8428db77f523047d84,\
|
||||
0x28db77f523047d8428db77f523047d84
|
||||
.octa 0x32caab7b40c7249332caab7b40c72493,\
|
||||
0x32caab7b40c7249332caab7b40c72493
|
||||
.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
|
||||
0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
|
||||
.octa 0x431d67c49c100d4c431d67c49c100d4c,\
|
||||
0x431d67c49c100d4c431d67c49c100d4c
|
||||
.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
|
||||
0x4cc5d4becb3e42b64cc5d4becb3e42b6
|
||||
.octa 0x597f299cfc657e2a597f299cfc657e2a,\
|
||||
0x597f299cfc657e2a597f299cfc657e2a
|
||||
.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
|
||||
0x5fcb6fab3ad6faec5fcb6fab3ad6faec
|
||||
.octa 0x6c44198c4a4758176c44198c4a475817,\
|
||||
0x6c44198c4a4758176c44198c4a475817
|
||||
|
||||
.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
|
||||
.align 32
|
||||
PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
|
||||
.octa 0x18191a1b1c1d1e1f1011121314151617
|
101
crypto/Kconfig
101
crypto/Kconfig
@ -213,20 +213,6 @@ config CRYPTO_CRYPTD
|
||||
converts an arbitrary synchronous software crypto algorithm
|
||||
into an asynchronous algorithm that executes in a kernel thread.
|
||||
|
||||
config CRYPTO_MCRYPTD
|
||||
tristate "Software async multi-buffer crypto daemon"
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_WORKQUEUE
|
||||
help
|
||||
This is a generic software asynchronous crypto daemon that
|
||||
provides the kernel thread to assist multi-buffer crypto
|
||||
algorithms for submitting jobs and flushing jobs in multi-buffer
|
||||
crypto algorithms. Multi-buffer crypto algorithms are executed
|
||||
in the context of this kernel thread and drivers can post
|
||||
their crypto request asynchronously to be processed by this daemon.
|
||||
|
||||
config CRYPTO_AUTHENC
|
||||
tristate "Authenc support"
|
||||
select CRYPTO_AEAD
|
||||
@ -470,6 +456,18 @@ config CRYPTO_LRW
|
||||
The first 128, 192 or 256 bits in the key are used for AES and the
|
||||
rest is used to tie each cipher block to its logical position.
|
||||
|
||||
config CRYPTO_OFB
|
||||
tristate "OFB support"
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
OFB: the Output Feedback mode makes a block cipher into a synchronous
|
||||
stream cipher. It generates keystream blocks, which are then XORed
|
||||
with the plaintext blocks to get the ciphertext. Flipping a bit in the
|
||||
ciphertext produces a flipped bit in the plaintext at the same
|
||||
location. This property allows many error correcting codes to function
|
||||
normally even when applied before encryption.
|
||||
|
||||
config CRYPTO_PCBC
|
||||
tristate "PCBC support"
|
||||
select CRYPTO_BLKCIPHER
|
||||
@ -848,54 +846,6 @@ config CRYPTO_SHA1_PPC_SPE
|
||||
SHA-1 secure hash standard (DFIPS 180-4) implemented
|
||||
using powerpc SPE SIMD instruction set.
|
||||
|
||||
config CRYPTO_SHA1_MB
|
||||
tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SHA1
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MCRYPTD
|
||||
help
|
||||
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
|
||||
using multi-buffer technique. This algorithm computes on
|
||||
multiple data lanes concurrently with SIMD instructions for
|
||||
better throughput. It should not be enabled by default but
|
||||
used when there is significant amount of work to keep the keep
|
||||
the data lanes filled to get performance benefit. If the data
|
||||
lanes remain unfilled, a flush operation will be initiated to
|
||||
process the crypto jobs, adding a slight latency.
|
||||
|
||||
config CRYPTO_SHA256_MB
|
||||
tristate "SHA256 digest algorithm (x86_64 Multi-Buffer, Experimental)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SHA256
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MCRYPTD
|
||||
help
|
||||
SHA-256 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
|
||||
using multi-buffer technique. This algorithm computes on
|
||||
multiple data lanes concurrently with SIMD instructions for
|
||||
better throughput. It should not be enabled by default but
|
||||
used when there is significant amount of work to keep the keep
|
||||
the data lanes filled to get performance benefit. If the data
|
||||
lanes remain unfilled, a flush operation will be initiated to
|
||||
process the crypto jobs, adding a slight latency.
|
||||
|
||||
config CRYPTO_SHA512_MB
|
||||
tristate "SHA512 digest algorithm (x86_64 Multi-Buffer, Experimental)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SHA512
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MCRYPTD
|
||||
help
|
||||
SHA-512 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
|
||||
using multi-buffer technique. This algorithm computes on
|
||||
multiple data lanes concurrently with SIMD instructions for
|
||||
better throughput. It should not be enabled by default but
|
||||
used when there is significant amount of work to keep the keep
|
||||
the data lanes filled to get performance benefit. If the data
|
||||
lanes remain unfilled, a flush operation will be initiated to
|
||||
process the crypto jobs, adding a slight latency.
|
||||
|
||||
config CRYPTO_SHA256
|
||||
tristate "SHA224 and SHA256 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
@ -1133,7 +1083,7 @@ config CRYPTO_AES_NI_INTEL
|
||||
|
||||
In addition to AES cipher algorithm support, the acceleration
|
||||
for some popular block cipher mode is supported too, including
|
||||
ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional
|
||||
ECB, CBC, LRW, XTS. The 64 bit version has additional
|
||||
acceleration for CTR.
|
||||
|
||||
config CRYPTO_AES_SPARC64
|
||||
@ -1590,20 +1540,6 @@ config CRYPTO_SM4
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SPECK
|
||||
tristate "Speck cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
help
|
||||
Speck is a lightweight block cipher that is tuned for optimal
|
||||
performance in software (rather than hardware).
|
||||
|
||||
Speck may not be as secure as AES, and should only be used on systems
|
||||
where AES is not fast enough.
|
||||
|
||||
See also: <https://eprint.iacr.org/2013/404.pdf>
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_TEA
|
||||
tristate "TEA, XTEA and XETA cipher algorithms"
|
||||
select CRYPTO_ALGAPI
|
||||
@ -1875,6 +1811,17 @@ config CRYPTO_USER_API_AEAD
|
||||
This option enables the user-spaces interface for AEAD
|
||||
cipher algorithms.
|
||||
|
||||
config CRYPTO_STATS
|
||||
bool "Crypto usage statistics for User-space"
|
||||
help
|
||||
This option enables the gathering of crypto stats.
|
||||
This will collect:
|
||||
- encrypt/decrypt size and numbers of symmeric operations
|
||||
- compress/decompress size and numbers of compress operations
|
||||
- size and numbers of hash operations
|
||||
- encrypt/decrypt/sign/verify numbers for asymmetric operations
|
||||
- generate/seed numbers for rng operations
|
||||
|
||||
config CRYPTO_HASH_INFO
|
||||
bool
|
||||
|
||||
|
@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
|
||||
obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
|
||||
crypto_user-y := crypto_user_base.o crypto_user_stat.o
|
||||
obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
|
||||
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
|
||||
obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
|
||||
@ -93,7 +94,6 @@ obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
|
||||
obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
|
||||
obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
|
||||
obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
|
||||
obj-$(CONFIG_CRYPTO_MCRYPTD) += mcryptd.o
|
||||
obj-$(CONFIG_CRYPTO_DES) += des_generic.o
|
||||
obj-$(CONFIG_CRYPTO_FCRYPT) += fcrypt.o
|
||||
obj-$(CONFIG_CRYPTO_BLOWFISH) += blowfish_generic.o
|
||||
@ -115,7 +115,6 @@ obj-$(CONFIG_CRYPTO_TEA) += tea.o
|
||||
obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
|
||||
obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
|
||||
obj-$(CONFIG_CRYPTO_SEED) += seed.o
|
||||
obj-$(CONFIG_CRYPTO_SPECK) += speck.o
|
||||
obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
|
||||
@ -143,6 +142,7 @@ obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
|
||||
obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
|
||||
obj-$(CONFIG_CRYPTO_ZSTD) += zstd.o
|
||||
obj-$(CONFIG_CRYPTO_OFB) += ofb.o
|
||||
|
||||
ecdh_generic-y := ecc.o
|
||||
ecdh_generic-y += ecdh.o
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
union aegis_block {
|
||||
__le64 words64[AEGIS_BLOCK_SIZE / sizeof(__le64)];
|
||||
u32 words32[AEGIS_BLOCK_SIZE / sizeof(u32)];
|
||||
__le32 words32[AEGIS_BLOCK_SIZE / sizeof(__le32)];
|
||||
u8 bytes[AEGIS_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
@ -57,24 +57,22 @@ static void crypto_aegis_aesenc(union aegis_block *dst,
|
||||
const union aegis_block *src,
|
||||
const union aegis_block *key)
|
||||
{
|
||||
u32 *d = dst->words32;
|
||||
const u8 *s = src->bytes;
|
||||
const u32 *k = key->words32;
|
||||
const u32 *t0 = crypto_ft_tab[0];
|
||||
const u32 *t1 = crypto_ft_tab[1];
|
||||
const u32 *t2 = crypto_ft_tab[2];
|
||||
const u32 *t3 = crypto_ft_tab[3];
|
||||
u32 d0, d1, d2, d3;
|
||||
|
||||
d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]] ^ k[0];
|
||||
d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]] ^ k[1];
|
||||
d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]] ^ k[2];
|
||||
d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]] ^ k[3];
|
||||
d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
|
||||
d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
|
||||
d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
|
||||
d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
|
||||
|
||||
d[0] = d0;
|
||||
d[1] = d1;
|
||||
d[2] = d2;
|
||||
d[3] = d3;
|
||||
dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
|
||||
dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
|
||||
dst->words32[2] = cpu_to_le32(d2) ^ key->words32[2];
|
||||
dst->words32[3] = cpu_to_le32(d3) ^ key->words32[3];
|
||||
}
|
||||
|
||||
#endif /* _CRYPTO_AEGIS_H */
|
||||
|
@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
|
||||
|
||||
int crypto_ahash_final(struct ahash_request *req)
|
||||
{
|
||||
return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
|
||||
int ret;
|
||||
|
||||
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
|
||||
crypto_stat_ahash_final(req, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_ahash_final);
|
||||
|
||||
int crypto_ahash_finup(struct ahash_request *req)
|
||||
{
|
||||
return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
|
||||
int ret;
|
||||
|
||||
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
|
||||
crypto_stat_ahash_final(req, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_ahash_finup);
|
||||
|
||||
int crypto_ahash_digest(struct ahash_request *req)
|
||||
{
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
int ret;
|
||||
|
||||
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
|
||||
return -ENOKEY;
|
||||
|
||||
return crypto_ahash_op(req, tfm->digest);
|
||||
ret = -ENOKEY;
|
||||
else
|
||||
ret = crypto_ahash_op(req, tfm->digest);
|
||||
crypto_stat_ahash_final(req, ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_ahash_digest);
|
||||
|
||||
@ -550,8 +561,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->halg.base;
|
||||
|
||||
if (alg->halg.digestsize > PAGE_SIZE / 8 ||
|
||||
alg->halg.statesize > PAGE_SIZE / 8 ||
|
||||
if (alg->halg.digestsize > HASH_MAX_DIGESTSIZE ||
|
||||
alg->halg.statesize > HASH_MAX_STATESIZE ||
|
||||
alg->halg.statesize == 0)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -57,9 +57,14 @@ static int crypto_check_alg(struct crypto_alg *alg)
|
||||
if (alg->cra_alignmask & (alg->cra_alignmask + 1))
|
||||
return -EINVAL;
|
||||
|
||||
if (alg->cra_blocksize > PAGE_SIZE / 8)
|
||||
/* General maximums for all algs. */
|
||||
if (alg->cra_alignmask > MAX_ALGAPI_ALIGNMASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (alg->cra_blocksize > MAX_ALGAPI_BLOCKSIZE)
|
||||
return -EINVAL;
|
||||
|
||||
/* Lower maximums for specific alg types. */
|
||||
if (!alg->cra_type && (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) ==
|
||||
CRYPTO_ALG_TYPE_CIPHER) {
|
||||
if (alg->cra_alignmask > MAX_CIPHER_ALIGNMASK)
|
||||
@ -253,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
|
||||
list_add(&alg->cra_list, &crypto_alg_list);
|
||||
list_add(&larval->alg.cra_list, &crypto_alg_list);
|
||||
|
||||
atomic_set(&alg->encrypt_cnt, 0);
|
||||
atomic_set(&alg->decrypt_cnt, 0);
|
||||
atomic64_set(&alg->encrypt_tlen, 0);
|
||||
atomic64_set(&alg->decrypt_tlen, 0);
|
||||
atomic_set(&alg->verify_cnt, 0);
|
||||
atomic_set(&alg->cipher_err_cnt, 0);
|
||||
atomic_set(&alg->sign_cnt, 0);
|
||||
|
||||
out:
|
||||
return larval;
|
||||
|
||||
@ -367,6 +380,8 @@ static void crypto_wait_for_test(struct crypto_larval *larval)
|
||||
|
||||
err = wait_for_completion_killable(&larval->completion);
|
||||
WARN_ON(err);
|
||||
if (!err)
|
||||
crypto_probing_notify(CRYPTO_MSG_ALG_LOADED, larval);
|
||||
|
||||
out:
|
||||
crypto_larval_kill(&larval->alg);
|
||||
|
@ -274,6 +274,8 @@ static int cryptomgr_notify(struct notifier_block *this, unsigned long msg,
|
||||
return cryptomgr_schedule_probe(data);
|
||||
case CRYPTO_MSG_ALG_REGISTER:
|
||||
return cryptomgr_schedule_test(data);
|
||||
case CRYPTO_MSG_ALG_LOADED:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_DONE;
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
struct aead_tfm {
|
||||
struct crypto_aead *aead;
|
||||
struct crypto_skcipher *null_tfm;
|
||||
struct crypto_sync_skcipher *null_tfm;
|
||||
};
|
||||
|
||||
static inline bool aead_sufficient_data(struct sock *sk)
|
||||
@ -75,13 +75,13 @@ static int aead_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
|
||||
return af_alg_sendmsg(sock, msg, size, ivsize);
|
||||
}
|
||||
|
||||
static int crypto_aead_copy_sgl(struct crypto_skcipher *null_tfm,
|
||||
static int crypto_aead_copy_sgl(struct crypto_sync_skcipher *null_tfm,
|
||||
struct scatterlist *src,
|
||||
struct scatterlist *dst, unsigned int len)
|
||||
{
|
||||
SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, null_tfm);
|
||||
|
||||
skcipher_request_set_tfm(skreq, null_tfm);
|
||||
skcipher_request_set_sync_tfm(skreq, null_tfm);
|
||||
skcipher_request_set_callback(skreq, CRYPTO_TFM_REQ_MAY_BACKLOG,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(skreq, src, dst, len, NULL);
|
||||
@ -99,7 +99,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
|
||||
struct af_alg_ctx *ctx = ask->private;
|
||||
struct aead_tfm *aeadc = pask->private;
|
||||
struct crypto_aead *tfm = aeadc->aead;
|
||||
struct crypto_skcipher *null_tfm = aeadc->null_tfm;
|
||||
struct crypto_sync_skcipher *null_tfm = aeadc->null_tfm;
|
||||
unsigned int i, as = crypto_aead_authsize(tfm);
|
||||
struct af_alg_async_req *areq;
|
||||
struct af_alg_tsgl *tsgl, *tmp;
|
||||
@ -478,7 +478,7 @@ static void *aead_bind(const char *name, u32 type, u32 mask)
|
||||
{
|
||||
struct aead_tfm *tfm;
|
||||
struct crypto_aead *aead;
|
||||
struct crypto_skcipher *null_tfm;
|
||||
struct crypto_sync_skcipher *null_tfm;
|
||||
|
||||
tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
|
||||
if (!tfm)
|
||||
|
@ -239,7 +239,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags,
|
||||
struct alg_sock *ask = alg_sk(sk);
|
||||
struct hash_ctx *ctx = ask->private;
|
||||
struct ahash_request *req = &ctx->req;
|
||||
char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req)) ? : 1];
|
||||
char state[HASH_MAX_STATESIZE];
|
||||
struct sock *sk2;
|
||||
struct alg_sock *ask2;
|
||||
struct hash_ctx *ctx2;
|
||||
|
@ -33,7 +33,7 @@ struct authenc_instance_ctx {
|
||||
struct crypto_authenc_ctx {
|
||||
struct crypto_ahash *auth;
|
||||
struct crypto_skcipher *enc;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
};
|
||||
|
||||
struct authenc_request_ctx {
|
||||
@ -185,9 +185,9 @@ static int crypto_authenc_copy_assoc(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *authenc = crypto_aead_reqtfm(req);
|
||||
struct crypto_authenc_ctx *ctx = crypto_aead_ctx(authenc);
|
||||
SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
|
||||
|
||||
skcipher_request_set_tfm(skreq, ctx->null);
|
||||
skcipher_request_set_sync_tfm(skreq, ctx->null);
|
||||
skcipher_request_set_callback(skreq, aead_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(skreq, req->src, req->dst, req->assoclen,
|
||||
@ -318,7 +318,7 @@ static int crypto_authenc_init_tfm(struct crypto_aead *tfm)
|
||||
struct crypto_authenc_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct crypto_ahash *auth;
|
||||
struct crypto_skcipher *enc;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
int err;
|
||||
|
||||
auth = crypto_spawn_ahash(&ictx->auth);
|
||||
|
@ -36,7 +36,7 @@ struct crypto_authenc_esn_ctx {
|
||||
unsigned int reqoff;
|
||||
struct crypto_ahash *auth;
|
||||
struct crypto_skcipher *enc;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
};
|
||||
|
||||
struct authenc_esn_request_ctx {
|
||||
@ -183,9 +183,9 @@ static int crypto_authenc_esn_copy(struct aead_request *req, unsigned int len)
|
||||
{
|
||||
struct crypto_aead *authenc_esn = crypto_aead_reqtfm(req);
|
||||
struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(authenc_esn);
|
||||
SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(skreq, ctx->null);
|
||||
|
||||
skcipher_request_set_tfm(skreq, ctx->null);
|
||||
skcipher_request_set_sync_tfm(skreq, ctx->null);
|
||||
skcipher_request_set_callback(skreq, aead_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(skreq, req->src, req->dst, len, NULL);
|
||||
@ -341,7 +341,7 @@ static int crypto_authenc_esn_init_tfm(struct crypto_aead *tfm)
|
||||
struct crypto_authenc_esn_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct crypto_ahash *auth;
|
||||
struct crypto_skcipher *enc;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
int err;
|
||||
|
||||
auth = crypto_spawn_ahash(&ictx->auth);
|
||||
|
@ -50,7 +50,10 @@ struct crypto_ccm_req_priv_ctx {
|
||||
u32 flags;
|
||||
struct scatterlist src[3];
|
||||
struct scatterlist dst[3];
|
||||
union {
|
||||
struct ahash_request ahreq;
|
||||
struct skcipher_request skreq;
|
||||
};
|
||||
};
|
||||
|
||||
struct cbcmac_tfm_ctx {
|
||||
@ -181,7 +184,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
|
||||
struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
|
||||
AHASH_REQUEST_ON_STACK(ahreq, ctx->mac);
|
||||
struct ahash_request *ahreq = &pctx->ahreq;
|
||||
unsigned int assoclen = req->assoclen;
|
||||
struct scatterlist sg[3];
|
||||
u8 *odata = pctx->odata;
|
||||
@ -427,7 +430,7 @@ static int crypto_ccm_init_tfm(struct crypto_aead *tfm)
|
||||
crypto_aead_set_reqsize(
|
||||
tfm,
|
||||
align + sizeof(struct crypto_ccm_req_priv_ctx) +
|
||||
crypto_skcipher_reqsize(ctr));
|
||||
max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr)));
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -18,20 +18,21 @@
|
||||
static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u32 stream[CHACHA20_BLOCK_WORDS];
|
||||
/* aligned to potentially speed up crypto_xor() */
|
||||
u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
|
||||
|
||||
if (dst != src)
|
||||
memcpy(dst, src, bytes);
|
||||
|
||||
while (bytes >= CHACHA20_BLOCK_SIZE) {
|
||||
chacha20_block(state, stream);
|
||||
crypto_xor(dst, (const u8 *)stream, CHACHA20_BLOCK_SIZE);
|
||||
crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
|
||||
bytes -= CHACHA20_BLOCK_SIZE;
|
||||
dst += CHACHA20_BLOCK_SIZE;
|
||||
}
|
||||
if (bytes) {
|
||||
chacha20_block(state, stream);
|
||||
crypto_xor(dst, (const u8 *)stream, bytes);
|
||||
crypto_xor(dst, stream, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,7 +76,7 @@ struct cryptd_blkcipher_request_ctx {
|
||||
|
||||
struct cryptd_skcipher_ctx {
|
||||
atomic_t refcnt;
|
||||
struct crypto_skcipher *child;
|
||||
struct crypto_sync_skcipher *child;
|
||||
};
|
||||
|
||||
struct cryptd_skcipher_request_ctx {
|
||||
@ -449,14 +449,16 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
struct crypto_sync_skcipher *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) &
|
||||
crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_sync_skcipher_set_flags(child,
|
||||
crypto_skcipher_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_skcipher_setkey(child, key, keylen);
|
||||
crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) &
|
||||
err = crypto_sync_skcipher_setkey(child, key, keylen);
|
||||
crypto_skcipher_set_flags(parent,
|
||||
crypto_sync_skcipher_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
@ -483,13 +485,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
|
||||
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
struct crypto_sync_skcipher *child = ctx->child;
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_sync_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
@ -511,13 +513,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
|
||||
struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_skcipher *child = ctx->child;
|
||||
SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
struct crypto_sync_skcipher *child = ctx->child;
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
skcipher_request_set_tfm(subreq, child);
|
||||
skcipher_request_set_sync_tfm(subreq, child);
|
||||
skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
|
||||
@ -568,7 +570,7 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
ctx->child = (struct crypto_sync_skcipher *)cipher;
|
||||
crypto_skcipher_set_reqsize(
|
||||
tfm, sizeof(struct cryptd_skcipher_request_ctx));
|
||||
return 0;
|
||||
@ -578,7 +580,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_skcipher(ctx->child);
|
||||
crypto_free_sync_skcipher(ctx->child);
|
||||
}
|
||||
|
||||
static void cryptd_skcipher_free(struct skcipher_instance *inst)
|
||||
@ -1243,7 +1245,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
|
||||
{
|
||||
struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
|
||||
|
||||
return ctx->child;
|
||||
return &ctx->child->base;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <linux/string.h>
|
||||
|
||||
static DEFINE_MUTEX(crypto_default_null_skcipher_lock);
|
||||
static struct crypto_skcipher *crypto_default_null_skcipher;
|
||||
static struct crypto_sync_skcipher *crypto_default_null_skcipher;
|
||||
static int crypto_default_null_skcipher_refcnt;
|
||||
|
||||
static int null_compress(struct crypto_tfm *tfm, const u8 *src,
|
||||
@ -152,16 +152,15 @@ MODULE_ALIAS_CRYPTO("compress_null");
|
||||
MODULE_ALIAS_CRYPTO("digest_null");
|
||||
MODULE_ALIAS_CRYPTO("cipher_null");
|
||||
|
||||
struct crypto_skcipher *crypto_get_default_null_skcipher(void)
|
||||
struct crypto_sync_skcipher *crypto_get_default_null_skcipher(void)
|
||||
{
|
||||
struct crypto_skcipher *tfm;
|
||||
struct crypto_sync_skcipher *tfm;
|
||||
|
||||
mutex_lock(&crypto_default_null_skcipher_lock);
|
||||
tfm = crypto_default_null_skcipher;
|
||||
|
||||
if (!tfm) {
|
||||
tfm = crypto_alloc_skcipher("ecb(cipher_null)",
|
||||
0, CRYPTO_ALG_ASYNC);
|
||||
tfm = crypto_alloc_sync_skcipher("ecb(cipher_null)", 0, 0);
|
||||
if (IS_ERR(tfm))
|
||||
goto unlock;
|
||||
|
||||
@ -181,7 +180,7 @@ void crypto_put_default_null_skcipher(void)
|
||||
{
|
||||
mutex_lock(&crypto_default_null_skcipher_lock);
|
||||
if (!--crypto_default_null_skcipher_refcnt) {
|
||||
crypto_free_skcipher(crypto_default_null_skcipher);
|
||||
crypto_free_sync_skcipher(crypto_default_null_skcipher);
|
||||
crypto_default_null_skcipher = NULL;
|
||||
}
|
||||
mutex_unlock(&crypto_default_null_skcipher_lock);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <crypto/internal/rng.h>
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/internal/cryptouser.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -37,7 +38,7 @@
|
||||
static DEFINE_MUTEX(crypto_cfg_mutex);
|
||||
|
||||
/* The crypto netlink socket */
|
||||
static struct sock *crypto_nlsk;
|
||||
struct sock *crypto_nlsk;
|
||||
|
||||
struct crypto_dump_info {
|
||||
struct sk_buff *in_skb;
|
||||
@ -46,7 +47,7 @@ struct crypto_dump_info {
|
||||
u16 nlmsg_flags;
|
||||
};
|
||||
|
||||
static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
|
||||
struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
|
||||
{
|
||||
struct crypto_alg *q, *alg = NULL;
|
||||
|
||||
@ -461,6 +462,7 @@ static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
|
||||
[CRYPTO_MSG_UPDATEALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
|
||||
[CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
|
||||
[CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = 0,
|
||||
[CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
|
||||
};
|
||||
|
||||
static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
|
||||
@ -481,6 +483,9 @@ static const struct crypto_link {
|
||||
.dump = crypto_dump_report,
|
||||
.done = crypto_dump_report_done},
|
||||
[CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
|
||||
[CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
|
||||
.dump = crypto_dump_reportstat,
|
||||
.done = crypto_dump_reportstat_done},
|
||||
};
|
||||
|
||||
static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
|
463
crypto/crypto_user_stat.c
Normal file
463
crypto/crypto_user_stat.c
Normal file
@ -0,0 +1,463 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Crypto user configuration API.
|
||||
*
|
||||
* Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/cryptouser.h>
|
||||
#include <linux/sched.h>
|
||||
#include <net/netlink.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/internal/rng.h>
|
||||
#include <crypto/akcipher.h>
|
||||
#include <crypto/kpp.h>
|
||||
#include <crypto/internal/cryptouser.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
#define null_terminated(x) (strnlen(x, sizeof(x)) < sizeof(x))
|
||||
|
||||
static DEFINE_MUTEX(crypto_cfg_mutex);
|
||||
|
||||
extern struct sock *crypto_nlsk;
|
||||
|
||||
struct crypto_dump_info {
|
||||
struct sk_buff *in_skb;
|
||||
struct sk_buff *out_skb;
|
||||
u32 nlmsg_seq;
|
||||
u16 nlmsg_flags;
|
||||
};
|
||||
|
||||
static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat raead;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strncpy(raead.type, "aead", sizeof(raead.type));
|
||||
|
||||
v32 = atomic_read(&alg->encrypt_cnt);
|
||||
raead.stat_encrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->encrypt_tlen);
|
||||
raead.stat_encrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->decrypt_cnt);
|
||||
raead.stat_decrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->decrypt_tlen);
|
||||
raead.stat_decrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->aead_err_cnt);
|
||||
raead.stat_aead_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
|
||||
sizeof(struct crypto_stat), &raead))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rcipher;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
|
||||
|
||||
v32 = atomic_read(&alg->encrypt_cnt);
|
||||
rcipher.stat_encrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->encrypt_tlen);
|
||||
rcipher.stat_encrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->decrypt_cnt);
|
||||
rcipher.stat_decrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->decrypt_tlen);
|
||||
rcipher.stat_decrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->cipher_err_cnt);
|
||||
rcipher.stat_cipher_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
|
||||
sizeof(struct crypto_stat), &rcipher))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rcomp;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
|
||||
v32 = atomic_read(&alg->compress_cnt);
|
||||
rcomp.stat_compress_cnt = v32;
|
||||
v64 = atomic64_read(&alg->compress_tlen);
|
||||
rcomp.stat_compress_tlen = v64;
|
||||
v32 = atomic_read(&alg->decompress_cnt);
|
||||
rcomp.stat_decompress_cnt = v32;
|
||||
v64 = atomic64_read(&alg->decompress_tlen);
|
||||
rcomp.stat_decompress_tlen = v64;
|
||||
v32 = atomic_read(&alg->cipher_err_cnt);
|
||||
rcomp.stat_compress_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
|
||||
sizeof(struct crypto_stat), &rcomp))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat racomp;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strlcpy(racomp.type, "acomp", sizeof(racomp.type));
|
||||
v32 = atomic_read(&alg->compress_cnt);
|
||||
racomp.stat_compress_cnt = v32;
|
||||
v64 = atomic64_read(&alg->compress_tlen);
|
||||
racomp.stat_compress_tlen = v64;
|
||||
v32 = atomic_read(&alg->decompress_cnt);
|
||||
racomp.stat_decompress_cnt = v32;
|
||||
v64 = atomic64_read(&alg->decompress_tlen);
|
||||
racomp.stat_decompress_tlen = v64;
|
||||
v32 = atomic_read(&alg->cipher_err_cnt);
|
||||
racomp.stat_compress_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
|
||||
sizeof(struct crypto_stat), &racomp))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rakcipher;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
|
||||
v32 = atomic_read(&alg->encrypt_cnt);
|
||||
rakcipher.stat_encrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->encrypt_tlen);
|
||||
rakcipher.stat_encrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->decrypt_cnt);
|
||||
rakcipher.stat_decrypt_cnt = v32;
|
||||
v64 = atomic64_read(&alg->decrypt_tlen);
|
||||
rakcipher.stat_decrypt_tlen = v64;
|
||||
v32 = atomic_read(&alg->sign_cnt);
|
||||
rakcipher.stat_sign_cnt = v32;
|
||||
v32 = atomic_read(&alg->verify_cnt);
|
||||
rakcipher.stat_verify_cnt = v32;
|
||||
v32 = atomic_read(&alg->akcipher_err_cnt);
|
||||
rakcipher.stat_akcipher_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
|
||||
sizeof(struct crypto_stat), &rakcipher))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rkpp;
|
||||
u32 v;
|
||||
|
||||
strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
|
||||
|
||||
v = atomic_read(&alg->setsecret_cnt);
|
||||
rkpp.stat_setsecret_cnt = v;
|
||||
v = atomic_read(&alg->generate_public_key_cnt);
|
||||
rkpp.stat_generate_public_key_cnt = v;
|
||||
v = atomic_read(&alg->compute_shared_secret_cnt);
|
||||
rkpp.stat_compute_shared_secret_cnt = v;
|
||||
v = atomic_read(&alg->kpp_err_cnt);
|
||||
rkpp.stat_kpp_err_cnt = v;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
|
||||
sizeof(struct crypto_stat), &rkpp))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rhash;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strncpy(rhash.type, "ahash", sizeof(rhash.type));
|
||||
|
||||
v32 = atomic_read(&alg->hash_cnt);
|
||||
rhash.stat_hash_cnt = v32;
|
||||
v64 = atomic64_read(&alg->hash_tlen);
|
||||
rhash.stat_hash_tlen = v64;
|
||||
v32 = atomic_read(&alg->hash_err_cnt);
|
||||
rhash.stat_hash_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
|
||||
sizeof(struct crypto_stat), &rhash))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rhash;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strncpy(rhash.type, "shash", sizeof(rhash.type));
|
||||
|
||||
v32 = atomic_read(&alg->hash_cnt);
|
||||
rhash.stat_hash_cnt = v32;
|
||||
v64 = atomic64_read(&alg->hash_tlen);
|
||||
rhash.stat_hash_tlen = v64;
|
||||
v32 = atomic_read(&alg->hash_err_cnt);
|
||||
rhash.stat_hash_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
|
||||
sizeof(struct crypto_stat), &rhash))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
|
||||
{
|
||||
struct crypto_stat rrng;
|
||||
u64 v64;
|
||||
u32 v32;
|
||||
|
||||
strncpy(rrng.type, "rng", sizeof(rrng.type));
|
||||
|
||||
v32 = atomic_read(&alg->generate_cnt);
|
||||
rrng.stat_generate_cnt = v32;
|
||||
v64 = atomic64_read(&alg->generate_tlen);
|
||||
rrng.stat_generate_tlen = v64;
|
||||
v32 = atomic_read(&alg->seed_cnt);
|
||||
rrng.stat_seed_cnt = v32;
|
||||
v32 = atomic_read(&alg->hash_err_cnt);
|
||||
rrng.stat_rng_err_cnt = v32;
|
||||
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
|
||||
sizeof(struct crypto_stat), &rrng))
|
||||
goto nla_put_failure;
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_reportstat_one(struct crypto_alg *alg,
|
||||
struct crypto_user_alg *ualg,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
|
||||
strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
|
||||
sizeof(ualg->cru_driver_name));
|
||||
strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
|
||||
sizeof(ualg->cru_module_name));
|
||||
|
||||
ualg->cru_type = 0;
|
||||
ualg->cru_mask = 0;
|
||||
ualg->cru_flags = alg->cra_flags;
|
||||
ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
|
||||
|
||||
if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
|
||||
goto nla_put_failure;
|
||||
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
|
||||
struct crypto_stat rl;
|
||||
|
||||
strlcpy(rl.type, "larval", sizeof(rl.type));
|
||||
if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
|
||||
sizeof(struct crypto_stat), &rl))
|
||||
goto nla_put_failure;
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
|
||||
case CRYPTO_ALG_TYPE_AEAD:
|
||||
if (crypto_report_aead(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_SKCIPHER:
|
||||
if (crypto_report_cipher(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_BLKCIPHER:
|
||||
if (crypto_report_cipher(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_CIPHER:
|
||||
if (crypto_report_cipher(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_COMPRESS:
|
||||
if (crypto_report_comp(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_ACOMPRESS:
|
||||
if (crypto_report_acomp(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_SCOMPRESS:
|
||||
if (crypto_report_acomp(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_AKCIPHER:
|
||||
if (crypto_report_akcipher(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_KPP:
|
||||
if (crypto_report_kpp(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_AHASH:
|
||||
if (crypto_report_ahash(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_HASH:
|
||||
if (crypto_report_shash(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
case CRYPTO_ALG_TYPE_RNG:
|
||||
if (crypto_report_rng(skb, alg))
|
||||
goto nla_put_failure;
|
||||
break;
|
||||
default:
|
||||
pr_err("ERROR: Unhandled alg %d in %s\n",
|
||||
alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
|
||||
__func__);
|
||||
}
|
||||
|
||||
out:
|
||||
return 0;
|
||||
|
||||
nla_put_failure:
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
static int crypto_reportstat_alg(struct crypto_alg *alg,
|
||||
struct crypto_dump_info *info)
|
||||
{
|
||||
struct sk_buff *in_skb = info->in_skb;
|
||||
struct sk_buff *skb = info->out_skb;
|
||||
struct nlmsghdr *nlh;
|
||||
struct crypto_user_alg *ualg;
|
||||
int err = 0;
|
||||
|
||||
nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
|
||||
CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
|
||||
if (!nlh) {
|
||||
err = -EMSGSIZE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ualg = nlmsg_data(nlh);
|
||||
|
||||
err = crypto_reportstat_one(alg, ualg, skb);
|
||||
if (err) {
|
||||
nlmsg_cancel(skb, nlh);
|
||||
goto out;
|
||||
}
|
||||
|
||||
nlmsg_end(skb, nlh);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
|
||||
struct nlattr **attrs)
|
||||
{
|
||||
struct crypto_user_alg *p = nlmsg_data(in_nlh);
|
||||
struct crypto_alg *alg;
|
||||
struct sk_buff *skb;
|
||||
struct crypto_dump_info info;
|
||||
int err;
|
||||
|
||||
if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
|
||||
return -EINVAL;
|
||||
|
||||
alg = crypto_alg_match(p, 0);
|
||||
if (!alg)
|
||||
return -ENOENT;
|
||||
|
||||
err = -ENOMEM;
|
||||
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
|
||||
if (!skb)
|
||||
goto drop_alg;
|
||||
|
||||
info.in_skb = in_skb;
|
||||
info.out_skb = skb;
|
||||
info.nlmsg_seq = in_nlh->nlmsg_seq;
|
||||
info.nlmsg_flags = 0;
|
||||
|
||||
err = crypto_reportstat_alg(alg, &info);
|
||||
|
||||
drop_alg:
|
||||
crypto_mod_put(alg);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
|
||||
}
|
||||
|
||||
int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
|
||||
{
|
||||
struct crypto_alg *alg;
|
||||
struct crypto_dump_info info;
|
||||
int err;
|
||||
|
||||
if (cb->args[0])
|
||||
goto out;
|
||||
|
||||
cb->args[0] = 1;
|
||||
|
||||
info.in_skb = cb->skb;
|
||||
info.out_skb = skb;
|
||||
info.nlmsg_seq = cb->nlh->nlmsg_seq;
|
||||
info.nlmsg_flags = NLM_F_MULTI;
|
||||
|
||||
list_for_each_entry(alg, &crypto_alg_list, cra_list) {
|
||||
err = crypto_reportstat_alg(alg, &info);
|
||||
if (err)
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
out:
|
||||
return skb->len;
|
||||
out_err:
|
||||
return err;
|
||||
}
|
||||
|
||||
int crypto_dump_reportstat_done(struct netlink_callback *cb)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL");
|
@ -47,9 +47,9 @@ static int echainiv_encrypt(struct aead_request *req)
|
||||
info = req->iv;
|
||||
|
||||
if (req->src != req->dst) {
|
||||
SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
|
||||
|
||||
skcipher_request_set_tfm(nreq, ctx->sknull);
|
||||
skcipher_request_set_sync_tfm(nreq, ctx->sknull);
|
||||
skcipher_request_set_callback(nreq, req->base.flags,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(nreq, req->src, req->dst,
|
||||
|
@ -50,7 +50,7 @@ struct crypto_rfc4543_instance_ctx {
|
||||
|
||||
struct crypto_rfc4543_ctx {
|
||||
struct crypto_aead *child;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
u8 nonce[4];
|
||||
};
|
||||
|
||||
@ -1067,9 +1067,9 @@ static int crypto_rfc4543_copy_src_to_dst(struct aead_request *req, bool enc)
|
||||
unsigned int authsize = crypto_aead_authsize(aead);
|
||||
unsigned int nbytes = req->assoclen + req->cryptlen -
|
||||
(enc ? 0 : authsize);
|
||||
SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->null);
|
||||
|
||||
skcipher_request_set_tfm(nreq, ctx->null);
|
||||
skcipher_request_set_sync_tfm(nreq, ctx->null);
|
||||
skcipher_request_set_callback(nreq, req->base.flags, NULL, NULL);
|
||||
skcipher_request_set_crypt(nreq, req->src, req->dst, nbytes, NULL);
|
||||
|
||||
@ -1093,7 +1093,7 @@ static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
|
||||
struct crypto_aead_spawn *spawn = &ictx->aead;
|
||||
struct crypto_rfc4543_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
struct crypto_aead *aead;
|
||||
struct crypto_skcipher *null;
|
||||
struct crypto_sync_skcipher *null;
|
||||
unsigned long align;
|
||||
int err = 0;
|
||||
|
||||
|
@ -26,12 +26,6 @@
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
/* Crypto notification events. */
|
||||
enum {
|
||||
CRYPTO_MSG_ALG_REQUEST,
|
||||
CRYPTO_MSG_ALG_REGISTER,
|
||||
};
|
||||
|
||||
struct crypto_instance;
|
||||
struct crypto_template;
|
||||
|
||||
@ -90,8 +84,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
|
||||
void *crypto_alloc_tfm(const char *alg_name,
|
||||
const struct crypto_type *frontend, u32 type, u32 mask);
|
||||
|
||||
int crypto_register_notifier(struct notifier_block *nb);
|
||||
int crypto_unregister_notifier(struct notifier_block *nb);
|
||||
int crypto_probing_notify(unsigned long val, void *v);
|
||||
|
||||
unsigned int crypto_alg_extsize(struct crypto_alg *alg);
|
||||
|
351
crypto/lrw.c
351
crypto/lrw.c
@ -29,8 +29,6 @@
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
|
||||
#define LRW_BUFFER_SIZE 128u
|
||||
|
||||
#define LRW_BLOCK_SIZE 16
|
||||
|
||||
struct priv {
|
||||
@ -56,19 +54,7 @@ struct priv {
|
||||
};
|
||||
|
||||
struct rctx {
|
||||
be128 buf[LRW_BUFFER_SIZE / sizeof(be128)];
|
||||
|
||||
be128 t;
|
||||
|
||||
be128 *ext;
|
||||
|
||||
struct scatterlist srcbuf[2];
|
||||
struct scatterlist dstbuf[2];
|
||||
struct scatterlist *src;
|
||||
struct scatterlist *dst;
|
||||
|
||||
unsigned int left;
|
||||
|
||||
struct skcipher_request subreq;
|
||||
};
|
||||
|
||||
@ -120,112 +106,68 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void inc(be128 *iv)
|
||||
/*
|
||||
* Returns the number of trailing '1' bits in the words of the counter, which is
|
||||
* represented by 4 32-bit words, arranged from least to most significant.
|
||||
* At the same time, increments the counter by one.
|
||||
*
|
||||
* For example:
|
||||
*
|
||||
* u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
|
||||
* int i = next_index(&counter);
|
||||
* // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
|
||||
*/
|
||||
static int next_index(u32 *counter)
|
||||
{
|
||||
be64_add_cpu(&iv->b, 1);
|
||||
if (!iv->b)
|
||||
be64_add_cpu(&iv->a, 1);
|
||||
}
|
||||
int i, res = 0;
|
||||
|
||||
/* this returns the number of consequative 1 bits starting
|
||||
* from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
|
||||
static inline int get_index128(be128 *block)
|
||||
{
|
||||
int x;
|
||||
__be32 *p = (__be32 *) block;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (counter[i] + 1 != 0)
|
||||
return res + ffz(counter[i]++);
|
||||
|
||||
for (p += 3, x = 0; x < 128; p--, x += 32) {
|
||||
u32 val = be32_to_cpup(p);
|
||||
|
||||
if (!~val)
|
||||
continue;
|
||||
|
||||
return x + ffz(val);
|
||||
counter[i] = 0;
|
||||
res += 32;
|
||||
}
|
||||
|
||||
return x;
|
||||
/*
|
||||
* If we get here, then x == 128 and we are incrementing the counter
|
||||
* from all ones to all zeros. This means we must return index 127, i.e.
|
||||
* the one corresponding to key2*{ 1,...,1 }.
|
||||
*/
|
||||
return 127;
|
||||
}
|
||||
|
||||
static int post_crypt(struct skcipher_request *req)
|
||||
/*
|
||||
* We compute the tweak masks twice (both before and after the ECB encryption or
|
||||
* decryption) to avoid having to allocate a temporary buffer and/or make
|
||||
* mutliple calls to the 'ecb(..)' instance, which usually would be slower than
|
||||
* just doing the next_index() calls again.
|
||||
*/
|
||||
static int xor_tweak(struct skcipher_request *req, bool second_pass)
|
||||
{
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
be128 *buf = rctx->ext ?: rctx->buf;
|
||||
struct skcipher_request *subreq;
|
||||
const int bs = LRW_BLOCK_SIZE;
|
||||
struct skcipher_walk w;
|
||||
struct scatterlist *sg;
|
||||
unsigned offset;
|
||||
int err;
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
err = skcipher_walk_virt(&w, subreq, false);
|
||||
|
||||
while (w.nbytes) {
|
||||
unsigned int avail = w.nbytes;
|
||||
be128 *wdst;
|
||||
|
||||
wdst = w.dst.virt.addr;
|
||||
|
||||
do {
|
||||
be128_xor(wdst, buf++, wdst);
|
||||
wdst++;
|
||||
} while ((avail -= bs) >= bs);
|
||||
|
||||
err = skcipher_walk_done(&w, avail);
|
||||
}
|
||||
|
||||
rctx->left -= subreq->cryptlen;
|
||||
|
||||
if (err || !rctx->left)
|
||||
goto out;
|
||||
|
||||
rctx->dst = rctx->dstbuf;
|
||||
|
||||
scatterwalk_done(&w.out, 0, 1);
|
||||
sg = w.out.sg;
|
||||
offset = w.out.offset;
|
||||
|
||||
if (rctx->dst != sg) {
|
||||
rctx->dst[0] = *sg;
|
||||
sg_unmark_end(rctx->dst);
|
||||
scatterwalk_crypto_chain(rctx->dst, sg_next(sg), 2);
|
||||
}
|
||||
rctx->dst[0].length -= offset - sg->offset;
|
||||
rctx->dst[0].offset = offset;
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int pre_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
struct priv *ctx = crypto_skcipher_ctx(tfm);
|
||||
be128 *buf = rctx->ext ?: rctx->buf;
|
||||
struct skcipher_request *subreq;
|
||||
const int bs = LRW_BLOCK_SIZE;
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
be128 t = rctx->t;
|
||||
struct skcipher_walk w;
|
||||
struct scatterlist *sg;
|
||||
unsigned cryptlen;
|
||||
unsigned offset;
|
||||
be128 *iv;
|
||||
bool more;
|
||||
__be32 *iv;
|
||||
u32 counter[4];
|
||||
int err;
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
skcipher_request_set_tfm(subreq, tfm);
|
||||
if (second_pass) {
|
||||
req = &rctx->subreq;
|
||||
/* set to our TFM to enforce correct alignment: */
|
||||
skcipher_request_set_tfm(req, tfm);
|
||||
}
|
||||
|
||||
cryptlen = subreq->cryptlen;
|
||||
more = rctx->left > cryptlen;
|
||||
if (!more)
|
||||
cryptlen = rctx->left;
|
||||
err = skcipher_walk_virt(&w, req, false);
|
||||
iv = (__be32 *)w.iv;
|
||||
|
||||
skcipher_request_set_crypt(subreq, rctx->src, rctx->dst,
|
||||
cryptlen, req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&w, subreq, false);
|
||||
iv = w.iv;
|
||||
counter[0] = be32_to_cpu(iv[3]);
|
||||
counter[1] = be32_to_cpu(iv[2]);
|
||||
counter[2] = be32_to_cpu(iv[1]);
|
||||
counter[3] = be32_to_cpu(iv[0]);
|
||||
|
||||
while (w.nbytes) {
|
||||
unsigned int avail = w.nbytes;
|
||||
@ -236,188 +178,85 @@ static int pre_crypt(struct skcipher_request *req)
|
||||
wdst = w.dst.virt.addr;
|
||||
|
||||
do {
|
||||
*buf++ = rctx->t;
|
||||
be128_xor(wdst++, &rctx->t, wsrc++);
|
||||
be128_xor(wdst++, &t, wsrc++);
|
||||
|
||||
/* T <- I*Key2, using the optimization
|
||||
* discussed in the specification */
|
||||
be128_xor(&rctx->t, &rctx->t,
|
||||
&ctx->mulinc[get_index128(iv)]);
|
||||
inc(iv);
|
||||
be128_xor(&t, &t, &ctx->mulinc[next_index(counter)]);
|
||||
} while ((avail -= bs) >= bs);
|
||||
|
||||
if (second_pass && w.nbytes == w.total) {
|
||||
iv[0] = cpu_to_be32(counter[3]);
|
||||
iv[1] = cpu_to_be32(counter[2]);
|
||||
iv[2] = cpu_to_be32(counter[1]);
|
||||
iv[3] = cpu_to_be32(counter[0]);
|
||||
}
|
||||
|
||||
err = skcipher_walk_done(&w, avail);
|
||||
}
|
||||
|
||||
skcipher_request_set_tfm(subreq, ctx->child);
|
||||
skcipher_request_set_crypt(subreq, rctx->dst, rctx->dst,
|
||||
cryptlen, NULL);
|
||||
|
||||
if (err || !more)
|
||||
goto out;
|
||||
|
||||
rctx->src = rctx->srcbuf;
|
||||
|
||||
scatterwalk_done(&w.in, 0, 1);
|
||||
sg = w.in.sg;
|
||||
offset = w.in.offset;
|
||||
|
||||
if (rctx->src != sg) {
|
||||
rctx->src[0] = *sg;
|
||||
sg_unmark_end(rctx->src);
|
||||
scatterwalk_crypto_chain(rctx->src, sg_next(sg), 2);
|
||||
}
|
||||
rctx->src[0].length -= offset - sg->offset;
|
||||
rctx->src[0].offset = offset;
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int init_crypt(struct skcipher_request *req, crypto_completion_t done)
|
||||
static int xor_tweak_pre(struct skcipher_request *req)
|
||||
{
|
||||
return xor_tweak(req, false);
|
||||
}
|
||||
|
||||
static int xor_tweak_post(struct skcipher_request *req)
|
||||
{
|
||||
return xor_tweak(req, true);
|
||||
}
|
||||
|
||||
static void crypt_done(struct crypto_async_request *areq, int err)
|
||||
{
|
||||
struct skcipher_request *req = areq->data;
|
||||
|
||||
if (!err)
|
||||
err = xor_tweak_post(req);
|
||||
|
||||
skcipher_request_complete(req, err);
|
||||
}
|
||||
|
||||
static void init_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
struct skcipher_request *subreq;
|
||||
gfp_t gfp;
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
skcipher_request_set_callback(subreq, req->base.flags, done, req);
|
||||
|
||||
gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
|
||||
GFP_ATOMIC;
|
||||
rctx->ext = NULL;
|
||||
|
||||
subreq->cryptlen = LRW_BUFFER_SIZE;
|
||||
if (req->cryptlen > LRW_BUFFER_SIZE) {
|
||||
unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
|
||||
|
||||
rctx->ext = kmalloc(n, gfp);
|
||||
if (rctx->ext)
|
||||
subreq->cryptlen = n;
|
||||
}
|
||||
|
||||
rctx->src = req->src;
|
||||
rctx->dst = req->dst;
|
||||
rctx->left = req->cryptlen;
|
||||
skcipher_request_set_tfm(subreq, ctx->child);
|
||||
skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
|
||||
/* pass req->iv as IV (will be used by xor_tweak, ECB will ignore it) */
|
||||
skcipher_request_set_crypt(subreq, req->dst, req->dst,
|
||||
req->cryptlen, req->iv);
|
||||
|
||||
/* calculate first value of T */
|
||||
memcpy(&rctx->t, req->iv, sizeof(rctx->t));
|
||||
|
||||
/* T <- I*Key2 */
|
||||
gf128mul_64k_bbe(&rctx->t, ctx->table);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void exit_crypt(struct skcipher_request *req)
|
||||
{
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
|
||||
rctx->left = 0;
|
||||
|
||||
if (rctx->ext)
|
||||
kzfree(rctx->ext);
|
||||
}
|
||||
|
||||
static int do_encrypt(struct skcipher_request *req, int err)
|
||||
{
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
struct skcipher_request *subreq;
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
|
||||
while (!err && rctx->left) {
|
||||
err = pre_crypt(req) ?:
|
||||
crypto_skcipher_encrypt(subreq) ?:
|
||||
post_crypt(req);
|
||||
|
||||
if (err == -EINPROGRESS || err == -EBUSY)
|
||||
return err;
|
||||
}
|
||||
|
||||
exit_crypt(req);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void encrypt_done(struct crypto_async_request *areq, int err)
|
||||
{
|
||||
struct skcipher_request *req = areq->data;
|
||||
struct skcipher_request *subreq;
|
||||
struct rctx *rctx;
|
||||
|
||||
rctx = skcipher_request_ctx(req);
|
||||
|
||||
if (err == -EINPROGRESS) {
|
||||
if (rctx->left != req->cryptlen)
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
|
||||
|
||||
err = do_encrypt(req, err ?: post_crypt(req));
|
||||
if (rctx->left)
|
||||
return;
|
||||
|
||||
out:
|
||||
skcipher_request_complete(req, err);
|
||||
}
|
||||
|
||||
static int encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return do_encrypt(req, init_crypt(req, encrypt_done));
|
||||
}
|
||||
|
||||
static int do_decrypt(struct skcipher_request *req, int err)
|
||||
{
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
struct skcipher_request *subreq;
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
|
||||
while (!err && rctx->left) {
|
||||
err = pre_crypt(req) ?:
|
||||
crypto_skcipher_decrypt(subreq) ?:
|
||||
post_crypt(req);
|
||||
|
||||
if (err == -EINPROGRESS || err == -EBUSY)
|
||||
return err;
|
||||
}
|
||||
|
||||
exit_crypt(req);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void decrypt_done(struct crypto_async_request *areq, int err)
|
||||
{
|
||||
struct skcipher_request *req = areq->data;
|
||||
struct skcipher_request *subreq;
|
||||
struct rctx *rctx;
|
||||
|
||||
rctx = skcipher_request_ctx(req);
|
||||
|
||||
if (err == -EINPROGRESS) {
|
||||
if (rctx->left != req->cryptlen)
|
||||
return;
|
||||
goto out;
|
||||
}
|
||||
|
||||
subreq = &rctx->subreq;
|
||||
subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG;
|
||||
|
||||
err = do_decrypt(req, err ?: post_crypt(req));
|
||||
if (rctx->left)
|
||||
return;
|
||||
|
||||
out:
|
||||
skcipher_request_complete(req, err);
|
||||
init_crypt(req);
|
||||
return xor_tweak_pre(req) ?:
|
||||
crypto_skcipher_encrypt(subreq) ?:
|
||||
xor_tweak_post(req);
|
||||
}
|
||||
|
||||
static int decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return do_decrypt(req, init_crypt(req, decrypt_done));
|
||||
struct rctx *rctx = skcipher_request_ctx(req);
|
||||
struct skcipher_request *subreq = &rctx->subreq;
|
||||
|
||||
init_crypt(req);
|
||||
return xor_tweak_pre(req) ?:
|
||||
crypto_skcipher_decrypt(subreq) ?:
|
||||
xor_tweak_post(req);
|
||||
}
|
||||
|
||||
static int init_tfm(struct crypto_skcipher *tfm)
|
||||
@ -543,7 +382,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
inst->alg.base.cra_priority = alg->base.cra_priority;
|
||||
inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
|
||||
inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
|
||||
(__alignof__(u64) - 1);
|
||||
(__alignof__(__be32) - 1);
|
||||
|
||||
inst->alg.ivsize = LRW_BLOCK_SIZE;
|
||||
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +
|
||||
|
675
crypto/mcryptd.c
675
crypto/mcryptd.c
@ -1,675 +0,0 @@
|
||||
/*
|
||||
* Software multibuffer async crypto daemon.
|
||||
*
|
||||
* Copyright (c) 2014 Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* Adapted from crypto daemon.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/mcryptd.h>
|
||||
#include <crypto/crypto_wq.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/stat.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#define MCRYPTD_MAX_CPU_QLEN 100
|
||||
#define MCRYPTD_BATCH 9
|
||||
|
||||
static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
|
||||
unsigned int tail);
|
||||
|
||||
struct mcryptd_flush_list {
|
||||
struct list_head list;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
static struct mcryptd_flush_list __percpu *mcryptd_flist;
|
||||
|
||||
struct hashd_instance_ctx {
|
||||
struct crypto_ahash_spawn spawn;
|
||||
struct mcryptd_queue *queue;
|
||||
};
|
||||
|
||||
static void mcryptd_queue_worker(struct work_struct *work);
|
||||
|
||||
void mcryptd_arm_flusher(struct mcryptd_alg_cstate *cstate, unsigned long delay)
|
||||
{
|
||||
struct mcryptd_flush_list *flist;
|
||||
|
||||
if (!cstate->flusher_engaged) {
|
||||
/* put the flusher on the flush list */
|
||||
flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
|
||||
mutex_lock(&flist->lock);
|
||||
list_add_tail(&cstate->flush_list, &flist->list);
|
||||
cstate->flusher_engaged = true;
|
||||
cstate->next_flush = jiffies + delay;
|
||||
queue_delayed_work_on(smp_processor_id(), kcrypto_wq,
|
||||
&cstate->flush, delay);
|
||||
mutex_unlock(&flist->lock);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(mcryptd_arm_flusher);
|
||||
|
||||
static int mcryptd_init_queue(struct mcryptd_queue *queue,
|
||||
unsigned int max_cpu_qlen)
|
||||
{
|
||||
int cpu;
|
||||
struct mcryptd_cpu_queue *cpu_queue;
|
||||
|
||||
queue->cpu_queue = alloc_percpu(struct mcryptd_cpu_queue);
|
||||
pr_debug("mqueue:%p mcryptd_cpu_queue %p\n", queue, queue->cpu_queue);
|
||||
if (!queue->cpu_queue)
|
||||
return -ENOMEM;
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
|
||||
pr_debug("cpu_queue #%d %p\n", cpu, queue->cpu_queue);
|
||||
crypto_init_queue(&cpu_queue->queue, max_cpu_qlen);
|
||||
INIT_WORK(&cpu_queue->work, mcryptd_queue_worker);
|
||||
spin_lock_init(&cpu_queue->q_lock);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mcryptd_fini_queue(struct mcryptd_queue *queue)
|
||||
{
|
||||
int cpu;
|
||||
struct mcryptd_cpu_queue *cpu_queue;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu);
|
||||
BUG_ON(cpu_queue->queue.qlen);
|
||||
}
|
||||
free_percpu(queue->cpu_queue);
|
||||
}
|
||||
|
||||
static int mcryptd_enqueue_request(struct mcryptd_queue *queue,
|
||||
struct crypto_async_request *request,
|
||||
struct mcryptd_hash_request_ctx *rctx)
|
||||
{
|
||||
int cpu, err;
|
||||
struct mcryptd_cpu_queue *cpu_queue;
|
||||
|
||||
cpu_queue = raw_cpu_ptr(queue->cpu_queue);
|
||||
spin_lock(&cpu_queue->q_lock);
|
||||
cpu = smp_processor_id();
|
||||
rctx->tag.cpu = smp_processor_id();
|
||||
|
||||
err = crypto_enqueue_request(&cpu_queue->queue, request);
|
||||
pr_debug("enqueue request: cpu %d cpu_queue %p request %p\n",
|
||||
cpu, cpu_queue, request);
|
||||
spin_unlock(&cpu_queue->q_lock);
|
||||
queue_work_on(cpu, kcrypto_wq, &cpu_queue->work);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to opportunisticlly flush the partially completed jobs if
|
||||
* crypto daemon is the only task running.
|
||||
*/
|
||||
static void mcryptd_opportunistic_flush(void)
|
||||
{
|
||||
struct mcryptd_flush_list *flist;
|
||||
struct mcryptd_alg_cstate *cstate;
|
||||
|
||||
flist = per_cpu_ptr(mcryptd_flist, smp_processor_id());
|
||||
while (single_task_running()) {
|
||||
mutex_lock(&flist->lock);
|
||||
cstate = list_first_entry_or_null(&flist->list,
|
||||
struct mcryptd_alg_cstate, flush_list);
|
||||
if (!cstate || !cstate->flusher_engaged) {
|
||||
mutex_unlock(&flist->lock);
|
||||
return;
|
||||
}
|
||||
list_del(&cstate->flush_list);
|
||||
cstate->flusher_engaged = false;
|
||||
mutex_unlock(&flist->lock);
|
||||
cstate->alg_state->flusher(cstate);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called in workqueue context, do one real cryption work (via
|
||||
* req->complete) and reschedule itself if there are more work to
|
||||
* do.
|
||||
*/
|
||||
static void mcryptd_queue_worker(struct work_struct *work)
|
||||
{
|
||||
struct mcryptd_cpu_queue *cpu_queue;
|
||||
struct crypto_async_request *req, *backlog;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Need to loop through more than once for multi-buffer to
|
||||
* be effective.
|
||||
*/
|
||||
|
||||
cpu_queue = container_of(work, struct mcryptd_cpu_queue, work);
|
||||
i = 0;
|
||||
while (i < MCRYPTD_BATCH || single_task_running()) {
|
||||
|
||||
spin_lock_bh(&cpu_queue->q_lock);
|
||||
backlog = crypto_get_backlog(&cpu_queue->queue);
|
||||
req = crypto_dequeue_request(&cpu_queue->queue);
|
||||
spin_unlock_bh(&cpu_queue->q_lock);
|
||||
|
||||
if (!req) {
|
||||
mcryptd_opportunistic_flush();
|
||||
return;
|
||||
}
|
||||
|
||||
if (backlog)
|
||||
backlog->complete(backlog, -EINPROGRESS);
|
||||
req->complete(req, 0);
|
||||
if (!cpu_queue->queue.qlen)
|
||||
return;
|
||||
++i;
|
||||
}
|
||||
if (cpu_queue->queue.qlen)
|
||||
queue_work_on(smp_processor_id(), kcrypto_wq, &cpu_queue->work);
|
||||
}
|
||||
|
||||
void mcryptd_flusher(struct work_struct *__work)
|
||||
{
|
||||
struct mcryptd_alg_cstate *alg_cpu_state;
|
||||
struct mcryptd_alg_state *alg_state;
|
||||
struct mcryptd_flush_list *flist;
|
||||
int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
alg_cpu_state = container_of(to_delayed_work(__work),
|
||||
struct mcryptd_alg_cstate, flush);
|
||||
alg_state = alg_cpu_state->alg_state;
|
||||
if (alg_cpu_state->cpu != cpu)
|
||||
pr_debug("mcryptd error: work on cpu %d, should be cpu %d\n",
|
||||
cpu, alg_cpu_state->cpu);
|
||||
|
||||
if (alg_cpu_state->flusher_engaged) {
|
||||
flist = per_cpu_ptr(mcryptd_flist, cpu);
|
||||
mutex_lock(&flist->lock);
|
||||
list_del(&alg_cpu_state->flush_list);
|
||||
alg_cpu_state->flusher_engaged = false;
|
||||
mutex_unlock(&flist->lock);
|
||||
alg_state->flusher(alg_cpu_state);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mcryptd_flusher);
|
||||
|
||||
static inline struct mcryptd_queue *mcryptd_get_queue(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
|
||||
struct mcryptd_instance_ctx *ictx = crypto_instance_ctx(inst);
|
||||
|
||||
return ictx->queue;
|
||||
}
|
||||
|
||||
static void *mcryptd_alloc_instance(struct crypto_alg *alg, unsigned int head,
|
||||
unsigned int tail)
|
||||
{
|
||||
char *p;
|
||||
struct crypto_instance *inst;
|
||||
int err;
|
||||
|
||||
p = kzalloc(head + sizeof(*inst) + tail, GFP_KERNEL);
|
||||
if (!p)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
inst = (void *)(p + head);
|
||||
|
||||
err = -ENAMETOOLONG;
|
||||
if (snprintf(inst->alg.cra_driver_name, CRYPTO_MAX_ALG_NAME,
|
||||
"mcryptd(%s)", alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
goto out_free_inst;
|
||||
|
||||
memcpy(inst->alg.cra_name, alg->cra_name, CRYPTO_MAX_ALG_NAME);
|
||||
|
||||
inst->alg.cra_priority = alg->cra_priority + 50;
|
||||
inst->alg.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
out:
|
||||
return p;
|
||||
|
||||
out_free_inst:
|
||||
kfree(p);
|
||||
p = ERR_PTR(err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static inline bool mcryptd_check_internal(struct rtattr **tb, u32 *type,
|
||||
u32 *mask)
|
||||
{
|
||||
struct crypto_attr_type *algt;
|
||||
|
||||
algt = crypto_get_attr_type(tb);
|
||||
if (IS_ERR(algt))
|
||||
return false;
|
||||
|
||||
*type |= algt->type & CRYPTO_ALG_INTERNAL;
|
||||
*mask |= algt->mask & CRYPTO_ALG_INTERNAL;
|
||||
|
||||
if (*type & *mask & CRYPTO_ALG_INTERNAL)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
|
||||
struct hashd_instance_ctx *ictx = crypto_instance_ctx(inst);
|
||||
struct crypto_ahash_spawn *spawn = &ictx->spawn;
|
||||
struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
struct crypto_ahash *hash;
|
||||
|
||||
hash = crypto_spawn_ahash(spawn);
|
||||
if (IS_ERR(hash))
|
||||
return PTR_ERR(hash);
|
||||
|
||||
ctx->child = hash;
|
||||
crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
|
||||
sizeof(struct mcryptd_hash_request_ctx) +
|
||||
crypto_ahash_reqsize(hash));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mcryptd_hash_exit_tfm(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
crypto_free_ahash(ctx->child);
|
||||
}
|
||||
|
||||
static int mcryptd_hash_setkey(struct crypto_ahash *parent,
|
||||
const u8 *key, unsigned int keylen)
|
||||
{
|
||||
struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(parent);
|
||||
struct crypto_ahash *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_ahash_set_flags(child, crypto_ahash_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_ahash_setkey(child, key, keylen);
|
||||
crypto_ahash_set_flags(parent, crypto_ahash_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int mcryptd_hash_enqueue(struct ahash_request *req,
|
||||
crypto_completion_t complete)
|
||||
{
|
||||
int ret;
|
||||
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
|
||||
struct mcryptd_queue *queue =
|
||||
mcryptd_get_queue(crypto_ahash_tfm(tfm));
|
||||
|
||||
rctx->complete = req->base.complete;
|
||||
req->base.complete = complete;
|
||||
|
||||
ret = mcryptd_enqueue_request(queue, &req->base, rctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void mcryptd_hash_init(struct crypto_async_request *req_async, int err)
|
||||
{
|
||||
struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
|
||||
struct crypto_ahash *child = ctx->child;
|
||||
struct ahash_request *req = ahash_request_cast(req_async);
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
struct ahash_request *desc = &rctx->areq;
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
ahash_request_set_tfm(desc, child);
|
||||
ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
rctx->complete, req_async);
|
||||
|
||||
rctx->out = req->result;
|
||||
err = crypto_ahash_init(desc);
|
||||
|
||||
out:
|
||||
local_bh_disable();
|
||||
rctx->complete(&req->base, err);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static int mcryptd_hash_init_enqueue(struct ahash_request *req)
|
||||
{
|
||||
return mcryptd_hash_enqueue(req, mcryptd_hash_init);
|
||||
}
|
||||
|
||||
static void mcryptd_hash_update(struct crypto_async_request *req_async, int err)
|
||||
{
|
||||
struct ahash_request *req = ahash_request_cast(req_async);
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
rctx->out = req->result;
|
||||
err = crypto_ahash_update(&rctx->areq);
|
||||
if (err) {
|
||||
req->base.complete = rctx->complete;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return;
|
||||
out:
|
||||
local_bh_disable();
|
||||
rctx->complete(&req->base, err);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static int mcryptd_hash_update_enqueue(struct ahash_request *req)
|
||||
{
|
||||
return mcryptd_hash_enqueue(req, mcryptd_hash_update);
|
||||
}
|
||||
|
||||
static void mcryptd_hash_final(struct crypto_async_request *req_async, int err)
|
||||
{
|
||||
struct ahash_request *req = ahash_request_cast(req_async);
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
rctx->out = req->result;
|
||||
err = crypto_ahash_final(&rctx->areq);
|
||||
if (err) {
|
||||
req->base.complete = rctx->complete;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return;
|
||||
out:
|
||||
local_bh_disable();
|
||||
rctx->complete(&req->base, err);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static int mcryptd_hash_final_enqueue(struct ahash_request *req)
|
||||
{
|
||||
return mcryptd_hash_enqueue(req, mcryptd_hash_final);
|
||||
}
|
||||
|
||||
static void mcryptd_hash_finup(struct crypto_async_request *req_async, int err)
|
||||
{
|
||||
struct ahash_request *req = ahash_request_cast(req_async);
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
rctx->out = req->result;
|
||||
err = crypto_ahash_finup(&rctx->areq);
|
||||
|
||||
if (err) {
|
||||
req->base.complete = rctx->complete;
|
||||
goto out;
|
||||
}
|
||||
|
||||
return;
|
||||
out:
|
||||
local_bh_disable();
|
||||
rctx->complete(&req->base, err);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static int mcryptd_hash_finup_enqueue(struct ahash_request *req)
|
||||
{
|
||||
return mcryptd_hash_enqueue(req, mcryptd_hash_finup);
|
||||
}
|
||||
|
||||
static void mcryptd_hash_digest(struct crypto_async_request *req_async, int err)
|
||||
{
|
||||
struct mcryptd_hash_ctx *ctx = crypto_tfm_ctx(req_async->tfm);
|
||||
struct crypto_ahash *child = ctx->child;
|
||||
struct ahash_request *req = ahash_request_cast(req_async);
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
struct ahash_request *desc = &rctx->areq;
|
||||
|
||||
if (unlikely(err == -EINPROGRESS))
|
||||
goto out;
|
||||
|
||||
ahash_request_set_tfm(desc, child);
|
||||
ahash_request_set_callback(desc, CRYPTO_TFM_REQ_MAY_SLEEP,
|
||||
rctx->complete, req_async);
|
||||
|
||||
rctx->out = req->result;
|
||||
err = crypto_ahash_init(desc) ?: crypto_ahash_finup(desc);
|
||||
|
||||
out:
|
||||
local_bh_disable();
|
||||
rctx->complete(&req->base, err);
|
||||
local_bh_enable();
|
||||
}
|
||||
|
||||
static int mcryptd_hash_digest_enqueue(struct ahash_request *req)
|
||||
{
|
||||
return mcryptd_hash_enqueue(req, mcryptd_hash_digest);
|
||||
}
|
||||
|
||||
static int mcryptd_hash_export(struct ahash_request *req, void *out)
|
||||
{
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
|
||||
return crypto_ahash_export(&rctx->areq, out);
|
||||
}
|
||||
|
||||
static int mcryptd_hash_import(struct ahash_request *req, const void *in)
|
||||
{
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
|
||||
return crypto_ahash_import(&rctx->areq, in);
|
||||
}
|
||||
|
||||
static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
|
||||
struct mcryptd_queue *queue)
|
||||
{
|
||||
struct hashd_instance_ctx *ctx;
|
||||
struct ahash_instance *inst;
|
||||
struct hash_alg_common *halg;
|
||||
struct crypto_alg *alg;
|
||||
u32 type = 0;
|
||||
u32 mask = 0;
|
||||
int err;
|
||||
|
||||
if (!mcryptd_check_internal(tb, &type, &mask))
|
||||
return -EINVAL;
|
||||
|
||||
halg = ahash_attr_alg(tb[1], type, mask);
|
||||
if (IS_ERR(halg))
|
||||
return PTR_ERR(halg);
|
||||
|
||||
alg = &halg->base;
|
||||
pr_debug("crypto: mcryptd hash alg: %s\n", alg->cra_name);
|
||||
inst = mcryptd_alloc_instance(alg, ahash_instance_headroom(),
|
||||
sizeof(*ctx));
|
||||
err = PTR_ERR(inst);
|
||||
if (IS_ERR(inst))
|
||||
goto out_put_alg;
|
||||
|
||||
ctx = ahash_instance_ctx(inst);
|
||||
ctx->queue = queue;
|
||||
|
||||
err = crypto_init_ahash_spawn(&ctx->spawn, halg,
|
||||
ahash_crypto_instance(inst));
|
||||
if (err)
|
||||
goto out_free_inst;
|
||||
|
||||
inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC |
|
||||
(alg->cra_flags & (CRYPTO_ALG_INTERNAL |
|
||||
CRYPTO_ALG_OPTIONAL_KEY));
|
||||
|
||||
inst->alg.halg.digestsize = halg->digestsize;
|
||||
inst->alg.halg.statesize = halg->statesize;
|
||||
inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
|
||||
|
||||
inst->alg.halg.base.cra_init = mcryptd_hash_init_tfm;
|
||||
inst->alg.halg.base.cra_exit = mcryptd_hash_exit_tfm;
|
||||
|
||||
inst->alg.init = mcryptd_hash_init_enqueue;
|
||||
inst->alg.update = mcryptd_hash_update_enqueue;
|
||||
inst->alg.final = mcryptd_hash_final_enqueue;
|
||||
inst->alg.finup = mcryptd_hash_finup_enqueue;
|
||||
inst->alg.export = mcryptd_hash_export;
|
||||
inst->alg.import = mcryptd_hash_import;
|
||||
if (crypto_hash_alg_has_setkey(halg))
|
||||
inst->alg.setkey = mcryptd_hash_setkey;
|
||||
inst->alg.digest = mcryptd_hash_digest_enqueue;
|
||||
|
||||
err = ahash_register_instance(tmpl, inst);
|
||||
if (err) {
|
||||
crypto_drop_ahash(&ctx->spawn);
|
||||
out_free_inst:
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
out_put_alg:
|
||||
crypto_mod_put(alg);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct mcryptd_queue mqueue;
|
||||
|
||||
static int mcryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct crypto_attr_type *algt;
|
||||
|
||||
algt = crypto_get_attr_type(tb);
|
||||
if (IS_ERR(algt))
|
||||
return PTR_ERR(algt);
|
||||
|
||||
switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
|
||||
case CRYPTO_ALG_TYPE_DIGEST:
|
||||
return mcryptd_create_hash(tmpl, tb, &mqueue);
|
||||
break;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void mcryptd_free(struct crypto_instance *inst)
|
||||
{
|
||||
struct mcryptd_instance_ctx *ctx = crypto_instance_ctx(inst);
|
||||
struct hashd_instance_ctx *hctx = crypto_instance_ctx(inst);
|
||||
|
||||
switch (inst->alg.cra_flags & CRYPTO_ALG_TYPE_MASK) {
|
||||
case CRYPTO_ALG_TYPE_AHASH:
|
||||
crypto_drop_ahash(&hctx->spawn);
|
||||
kfree(ahash_instance(inst));
|
||||
return;
|
||||
default:
|
||||
crypto_drop_spawn(&ctx->spawn);
|
||||
kfree(inst);
|
||||
}
|
||||
}
|
||||
|
||||
static struct crypto_template mcryptd_tmpl = {
|
||||
.name = "mcryptd",
|
||||
.create = mcryptd_create,
|
||||
.free = mcryptd_free,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
struct mcryptd_ahash *mcryptd_alloc_ahash(const char *alg_name,
|
||||
u32 type, u32 mask)
|
||||
{
|
||||
char mcryptd_alg_name[CRYPTO_MAX_ALG_NAME];
|
||||
struct crypto_ahash *tfm;
|
||||
|
||||
if (snprintf(mcryptd_alg_name, CRYPTO_MAX_ALG_NAME,
|
||||
"mcryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
|
||||
return ERR_PTR(-EINVAL);
|
||||
tfm = crypto_alloc_ahash(mcryptd_alg_name, type, mask);
|
||||
if (IS_ERR(tfm))
|
||||
return ERR_CAST(tfm);
|
||||
if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
|
||||
crypto_free_ahash(tfm);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
return __mcryptd_ahash_cast(tfm);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mcryptd_alloc_ahash);
|
||||
|
||||
struct crypto_ahash *mcryptd_ahash_child(struct mcryptd_ahash *tfm)
|
||||
{
|
||||
struct mcryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
|
||||
|
||||
return ctx->child;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mcryptd_ahash_child);
|
||||
|
||||
struct ahash_request *mcryptd_ahash_desc(struct ahash_request *req)
|
||||
{
|
||||
struct mcryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
|
||||
return &rctx->areq;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mcryptd_ahash_desc);
|
||||
|
||||
void mcryptd_free_ahash(struct mcryptd_ahash *tfm)
|
||||
{
|
||||
crypto_free_ahash(&tfm->base);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mcryptd_free_ahash);
|
||||
|
||||
static int __init mcryptd_init(void)
|
||||
{
|
||||
int err, cpu;
|
||||
struct mcryptd_flush_list *flist;
|
||||
|
||||
mcryptd_flist = alloc_percpu(struct mcryptd_flush_list);
|
||||
for_each_possible_cpu(cpu) {
|
||||
flist = per_cpu_ptr(mcryptd_flist, cpu);
|
||||
INIT_LIST_HEAD(&flist->list);
|
||||
mutex_init(&flist->lock);
|
||||
}
|
||||
|
||||
err = mcryptd_init_queue(&mqueue, MCRYPTD_MAX_CPU_QLEN);
|
||||
if (err) {
|
||||
free_percpu(mcryptd_flist);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = crypto_register_template(&mcryptd_tmpl);
|
||||
if (err) {
|
||||
mcryptd_fini_queue(&mqueue);
|
||||
free_percpu(mcryptd_flist);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit mcryptd_exit(void)
|
||||
{
|
||||
mcryptd_fini_queue(&mqueue);
|
||||
crypto_unregister_template(&mcryptd_tmpl);
|
||||
free_percpu(mcryptd_flist);
|
||||
}
|
||||
|
||||
subsys_initcall(mcryptd_init);
|
||||
module_exit(mcryptd_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Software async multibuffer crypto daemon");
|
||||
MODULE_ALIAS_CRYPTO("mcryptd");
|
@ -385,14 +385,11 @@ static void crypto_morus1280_final(struct morus1280_state *state,
|
||||
struct morus1280_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
struct morus1280_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words[0] = cpu_to_le64(assocbits);
|
||||
tmp.words[1] = cpu_to_le64(cryptbits);
|
||||
tmp.words[0] = assoclen * 8;
|
||||
tmp.words[1] = cryptlen * 8;
|
||||
tmp.words[2] = 0;
|
||||
tmp.words[3] = 0;
|
||||
|
||||
|
@ -384,21 +384,13 @@ static void crypto_morus640_final(struct morus640_state *state,
|
||||
struct morus640_block *tag_xor,
|
||||
u64 assoclen, u64 cryptlen)
|
||||
{
|
||||
u64 assocbits = assoclen * 8;
|
||||
u64 cryptbits = cryptlen * 8;
|
||||
|
||||
u32 assocbits_lo = (u32)assocbits;
|
||||
u32 assocbits_hi = (u32)(assocbits >> 32);
|
||||
u32 cryptbits_lo = (u32)cryptbits;
|
||||
u32 cryptbits_hi = (u32)(cryptbits >> 32);
|
||||
|
||||
struct morus640_block tmp;
|
||||
unsigned int i;
|
||||
|
||||
tmp.words[0] = cpu_to_le32(assocbits_lo);
|
||||
tmp.words[1] = cpu_to_le32(assocbits_hi);
|
||||
tmp.words[2] = cpu_to_le32(cryptbits_lo);
|
||||
tmp.words[3] = cpu_to_le32(cryptbits_hi);
|
||||
tmp.words[0] = lower_32_bits(assoclen * 8);
|
||||
tmp.words[1] = upper_32_bits(assoclen * 8);
|
||||
tmp.words[2] = lower_32_bits(cryptlen * 8);
|
||||
tmp.words[3] = upper_32_bits(cryptlen * 8);
|
||||
|
||||
for (i = 0; i < MORUS_BLOCK_WORDS; i++)
|
||||
state->s[4].words[i] ^= state->s[0].words[i];
|
||||
|
225
crypto/ofb.c
Normal file
225
crypto/ofb.c
Normal file
@ -0,0 +1,225 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
/*
|
||||
* OFB: Output FeedBack mode
|
||||
*
|
||||
* Copyright (C) 2018 ARM Limited or its affiliates.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Based loosely on public domain code gleaned from libtomcrypt
|
||||
* (https://github.com/libtom/libtomcrypt).
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
struct crypto_ofb_ctx {
|
||||
struct crypto_cipher *child;
|
||||
int cnt;
|
||||
};
|
||||
|
||||
|
||||
static int crypto_ofb_setkey(struct crypto_skcipher *parent, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(parent);
|
||||
struct crypto_cipher *child = ctx->child;
|
||||
int err;
|
||||
|
||||
crypto_cipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
|
||||
crypto_cipher_set_flags(child, crypto_skcipher_get_flags(parent) &
|
||||
CRYPTO_TFM_REQ_MASK);
|
||||
err = crypto_cipher_setkey(child, key, keylen);
|
||||
crypto_skcipher_set_flags(parent, crypto_cipher_get_flags(child) &
|
||||
CRYPTO_TFM_RES_MASK);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int crypto_ofb_encrypt_segment(struct crypto_ofb_ctx *ctx,
|
||||
struct skcipher_walk *walk,
|
||||
struct crypto_cipher *tfm)
|
||||
{
|
||||
int bsize = crypto_cipher_blocksize(tfm);
|
||||
int nbytes = walk->nbytes;
|
||||
|
||||
u8 *src = walk->src.virt.addr;
|
||||
u8 *dst = walk->dst.virt.addr;
|
||||
u8 *iv = walk->iv;
|
||||
|
||||
do {
|
||||
if (ctx->cnt == bsize) {
|
||||
if (nbytes < bsize)
|
||||
break;
|
||||
crypto_cipher_encrypt_one(tfm, iv, iv);
|
||||
ctx->cnt = 0;
|
||||
}
|
||||
*dst = *src ^ iv[ctx->cnt];
|
||||
src++;
|
||||
dst++;
|
||||
ctx->cnt++;
|
||||
} while (--nbytes);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static int crypto_ofb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
unsigned int bsize;
|
||||
struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_cipher *child = ctx->child;
|
||||
int ret = 0;
|
||||
|
||||
bsize = crypto_cipher_blocksize(child);
|
||||
ctx->cnt = bsize;
|
||||
|
||||
ret = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes) {
|
||||
ret = crypto_ofb_encrypt_segment(ctx, &walk, child);
|
||||
ret = skcipher_walk_done(&walk, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* OFB encrypt and decrypt are identical */
|
||||
static int crypto_ofb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return crypto_ofb_encrypt(req);
|
||||
}
|
||||
|
||||
static int crypto_ofb_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct skcipher_instance *inst = skcipher_alg_instance(tfm);
|
||||
struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
|
||||
struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct crypto_cipher *cipher;
|
||||
|
||||
cipher = crypto_spawn_cipher(spawn);
|
||||
if (IS_ERR(cipher))
|
||||
return PTR_ERR(cipher);
|
||||
|
||||
ctx->child = cipher;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_ofb_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct crypto_ofb_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_cipher(ctx->child);
|
||||
}
|
||||
|
||||
static void crypto_ofb_free(struct skcipher_instance *inst)
|
||||
{
|
||||
crypto_drop_skcipher(skcipher_instance_ctx(inst));
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int crypto_ofb_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
{
|
||||
struct skcipher_instance *inst;
|
||||
struct crypto_attr_type *algt;
|
||||
struct crypto_spawn *spawn;
|
||||
struct crypto_alg *alg;
|
||||
u32 mask;
|
||||
int err;
|
||||
|
||||
err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
|
||||
if (!inst)
|
||||
return -ENOMEM;
|
||||
|
||||
algt = crypto_get_attr_type(tb);
|
||||
err = PTR_ERR(algt);
|
||||
if (IS_ERR(algt))
|
||||
goto err_free_inst;
|
||||
|
||||
mask = CRYPTO_ALG_TYPE_MASK |
|
||||
crypto_requires_off(algt->type, algt->mask,
|
||||
CRYPTO_ALG_NEED_FALLBACK);
|
||||
|
||||
alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
|
||||
err = PTR_ERR(alg);
|
||||
if (IS_ERR(alg))
|
||||
goto err_free_inst;
|
||||
|
||||
spawn = skcipher_instance_ctx(inst);
|
||||
err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
|
||||
CRYPTO_ALG_TYPE_MASK);
|
||||
crypto_mod_put(alg);
|
||||
if (err)
|
||||
goto err_free_inst;
|
||||
|
||||
err = crypto_inst_setname(skcipher_crypto_instance(inst), "ofb", alg);
|
||||
if (err)
|
||||
goto err_drop_spawn;
|
||||
|
||||
inst->alg.base.cra_priority = alg->cra_priority;
|
||||
inst->alg.base.cra_blocksize = alg->cra_blocksize;
|
||||
inst->alg.base.cra_alignmask = alg->cra_alignmask;
|
||||
|
||||
/* We access the data as u32s when xoring. */
|
||||
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
|
||||
|
||||
inst->alg.ivsize = alg->cra_blocksize;
|
||||
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
|
||||
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
|
||||
|
||||
inst->alg.base.cra_ctxsize = sizeof(struct crypto_ofb_ctx);
|
||||
|
||||
inst->alg.init = crypto_ofb_init_tfm;
|
||||
inst->alg.exit = crypto_ofb_exit_tfm;
|
||||
|
||||
inst->alg.setkey = crypto_ofb_setkey;
|
||||
inst->alg.encrypt = crypto_ofb_encrypt;
|
||||
inst->alg.decrypt = crypto_ofb_decrypt;
|
||||
|
||||
inst->free = crypto_ofb_free;
|
||||
|
||||
err = skcipher_register_instance(tmpl, inst);
|
||||
if (err)
|
||||
goto err_drop_spawn;
|
||||
|
||||
out:
|
||||
return err;
|
||||
|
||||
err_drop_spawn:
|
||||
crypto_drop_spawn(spawn);
|
||||
err_free_inst:
|
||||
kfree(inst);
|
||||
goto out;
|
||||
}
|
||||
|
||||
static struct crypto_template crypto_ofb_tmpl = {
|
||||
.name = "ofb",
|
||||
.create = crypto_ofb_create,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init crypto_ofb_module_init(void)
|
||||
{
|
||||
return crypto_register_template(&crypto_ofb_tmpl);
|
||||
}
|
||||
|
||||
static void __exit crypto_ofb_module_exit(void)
|
||||
{
|
||||
crypto_unregister_template(&crypto_ofb_tmpl);
|
||||
}
|
||||
|
||||
module_init(crypto_ofb_module_init);
|
||||
module_exit(crypto_ofb_module_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("OFB block cipher algorithm");
|
||||
MODULE_ALIAS_CRYPTO("ofb");
|
@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
|
||||
}
|
||||
|
||||
err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
|
||||
crypto_stat_rng_seed(tfm, err);
|
||||
out:
|
||||
kzfree(buf);
|
||||
return err;
|
||||
|
@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req)
|
||||
pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
|
||||
ctx->key_size - 1 - req->src_len, req->src);
|
||||
|
||||
req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL);
|
||||
if (!req_ctx->out_buf) {
|
||||
kfree(req_ctx->in_buf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf,
|
||||
ctx->key_size, NULL);
|
||||
|
||||
akcipher_request_set_tfm(&req_ctx->child_req, ctx->child);
|
||||
akcipher_request_set_callback(&req_ctx->child_req, req->base.flags,
|
||||
pkcs1pad_encrypt_sign_complete_cb, req);
|
||||
|
@ -73,9 +73,9 @@ static int seqiv_aead_encrypt(struct aead_request *req)
|
||||
info = req->iv;
|
||||
|
||||
if (req->src != req->dst) {
|
||||
SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
|
||||
SYNC_SKCIPHER_REQUEST_ON_STACK(nreq, ctx->sknull);
|
||||
|
||||
skcipher_request_set_tfm(nreq, ctx->sknull);
|
||||
skcipher_request_set_sync_tfm(nreq, ctx->sknull);
|
||||
skcipher_request_set_callback(nreq, req->base.flags,
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(nreq, req->src, req->dst,
|
||||
|
@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_shash_setkey);
|
||||
|
||||
static inline unsigned int shash_align_buffer_size(unsigned len,
|
||||
unsigned long mask)
|
||||
{
|
||||
typedef u8 __aligned_largest u8_aligned;
|
||||
return len + (mask & ~(__alignof__(u8_aligned) - 1));
|
||||
}
|
||||
|
||||
static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
|
||||
unsigned long alignmask = crypto_shash_alignmask(tfm);
|
||||
unsigned int unaligned_len = alignmask + 1 -
|
||||
((unsigned long)data & alignmask);
|
||||
u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
|
||||
__aligned_largest;
|
||||
/*
|
||||
* We cannot count on __aligned() working for large values:
|
||||
* https://patchwork.kernel.org/patch/9507697/
|
||||
*/
|
||||
u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
|
||||
return -EINVAL;
|
||||
|
||||
if (unaligned_len > len)
|
||||
unaligned_len = len;
|
||||
|
||||
@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
|
||||
unsigned long alignmask = crypto_shash_alignmask(tfm);
|
||||
struct shash_alg *shash = crypto_shash_alg(tfm);
|
||||
unsigned int ds = crypto_shash_digestsize(tfm);
|
||||
u8 ubuf[shash_align_buffer_size(ds, alignmask)]
|
||||
__aligned_largest;
|
||||
/*
|
||||
* We cannot count on __aligned() working for large values:
|
||||
* https://patchwork.kernel.org/patch/9507697/
|
||||
*/
|
||||
u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
|
||||
u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
|
||||
int err;
|
||||
|
||||
if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
|
||||
return -EINVAL;
|
||||
|
||||
err = shash->final(desc, buf);
|
||||
if (err)
|
||||
goto out;
|
||||
@ -458,9 +463,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
|
||||
{
|
||||
struct crypto_alg *base = &alg->base;
|
||||
|
||||
if (alg->digestsize > PAGE_SIZE / 8 ||
|
||||
alg->descsize > PAGE_SIZE / 8 ||
|
||||
alg->statesize > PAGE_SIZE / 8)
|
||||
if (alg->digestsize > HASH_MAX_DIGESTSIZE ||
|
||||
alg->descsize > HASH_MAX_DESCSIZE ||
|
||||
alg->statesize > HASH_MAX_STATESIZE)
|
||||
return -EINVAL;
|
||||
|
||||
base->cra_type = &crypto_shash_type;
|
||||
|
@ -949,6 +949,30 @@ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_alloc_skcipher);
|
||||
|
||||
struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
|
||||
const char *alg_name, u32 type, u32 mask)
|
||||
{
|
||||
struct crypto_skcipher *tfm;
|
||||
|
||||
/* Only sync algorithms allowed. */
|
||||
mask |= CRYPTO_ALG_ASYNC;
|
||||
|
||||
tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type2, type, mask);
|
||||
|
||||
/*
|
||||
* Make sure we do not allocate something that might get used with
|
||||
* an on-stack request: check the request size.
|
||||
*/
|
||||
if (!IS_ERR(tfm) && WARN_ON(crypto_skcipher_reqsize(tfm) >
|
||||
MAX_SYNC_SKCIPHER_REQSIZE)) {
|
||||
crypto_free_skcipher(tfm);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
return (struct crypto_sync_skcipher *)tfm;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_alloc_sync_skcipher);
|
||||
|
||||
int crypto_has_skcipher2(const char *alg_name, u32 type, u32 mask)
|
||||
{
|
||||
return crypto_type_has_alg(alg_name, &crypto_skcipher_type2,
|
||||
|
307
crypto/speck.c
307
crypto/speck.c
@ -1,307 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Speck: a lightweight block cipher
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*
|
||||
* Speck has 10 variants, including 5 block sizes. For now we only implement
|
||||
* the variants Speck128/128, Speck128/192, Speck128/256, Speck64/96, and
|
||||
* Speck64/128. Speck${B}/${K} denotes the variant with a block size of B bits
|
||||
* and a key size of K bits. The Speck128 variants are believed to be the most
|
||||
* secure variants, and they use the same block size and key sizes as AES. The
|
||||
* Speck64 variants are less secure, but on 32-bit processors are usually
|
||||
* faster. The remaining variants (Speck32, Speck48, and Speck96) are even less
|
||||
* secure and/or not as well suited for implementation on either 32-bit or
|
||||
* 64-bit processors, so are omitted.
|
||||
*
|
||||
* Reference: "The Simon and Speck Families of Lightweight Block Ciphers"
|
||||
* https://eprint.iacr.org/2013/404.pdf
|
||||
*
|
||||
* In a correspondence, the Speck designers have also clarified that the words
|
||||
* should be interpreted in little-endian format, and the words should be
|
||||
* ordered such that the first word of each block is 'y' rather than 'x', and
|
||||
* the first key word (rather than the last) becomes the first round key.
|
||||
*/
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/speck.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* Speck128 */
|
||||
|
||||
static __always_inline void speck128_round(u64 *x, u64 *y, u64 k)
|
||||
{
|
||||
*x = ror64(*x, 8);
|
||||
*x += *y;
|
||||
*x ^= k;
|
||||
*y = rol64(*y, 3);
|
||||
*y ^= *x;
|
||||
}
|
||||
|
||||
static __always_inline void speck128_unround(u64 *x, u64 *y, u64 k)
|
||||
{
|
||||
*y ^= *x;
|
||||
*y = ror64(*y, 3);
|
||||
*x ^= k;
|
||||
*x -= *y;
|
||||
*x = rol64(*x, 8);
|
||||
}
|
||||
|
||||
void crypto_speck128_encrypt(const struct speck128_tfm_ctx *ctx,
|
||||
u8 *out, const u8 *in)
|
||||
{
|
||||
u64 y = get_unaligned_le64(in);
|
||||
u64 x = get_unaligned_le64(in + 8);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ctx->nrounds; i++)
|
||||
speck128_round(&x, &y, ctx->round_keys[i]);
|
||||
|
||||
put_unaligned_le64(y, out);
|
||||
put_unaligned_le64(x, out + 8);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck128_encrypt);
|
||||
|
||||
static void speck128_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
crypto_speck128_encrypt(crypto_tfm_ctx(tfm), out, in);
|
||||
}
|
||||
|
||||
void crypto_speck128_decrypt(const struct speck128_tfm_ctx *ctx,
|
||||
u8 *out, const u8 *in)
|
||||
{
|
||||
u64 y = get_unaligned_le64(in);
|
||||
u64 x = get_unaligned_le64(in + 8);
|
||||
int i;
|
||||
|
||||
for (i = ctx->nrounds - 1; i >= 0; i--)
|
||||
speck128_unround(&x, &y, ctx->round_keys[i]);
|
||||
|
||||
put_unaligned_le64(y, out);
|
||||
put_unaligned_le64(x, out + 8);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck128_decrypt);
|
||||
|
||||
static void speck128_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
crypto_speck128_decrypt(crypto_tfm_ctx(tfm), out, in);
|
||||
}
|
||||
|
||||
int crypto_speck128_setkey(struct speck128_tfm_ctx *ctx, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u64 l[3];
|
||||
u64 k;
|
||||
int i;
|
||||
|
||||
switch (keylen) {
|
||||
case SPECK128_128_KEY_SIZE:
|
||||
k = get_unaligned_le64(key);
|
||||
l[0] = get_unaligned_le64(key + 8);
|
||||
ctx->nrounds = SPECK128_128_NROUNDS;
|
||||
for (i = 0; i < ctx->nrounds; i++) {
|
||||
ctx->round_keys[i] = k;
|
||||
speck128_round(&l[0], &k, i);
|
||||
}
|
||||
break;
|
||||
case SPECK128_192_KEY_SIZE:
|
||||
k = get_unaligned_le64(key);
|
||||
l[0] = get_unaligned_le64(key + 8);
|
||||
l[1] = get_unaligned_le64(key + 16);
|
||||
ctx->nrounds = SPECK128_192_NROUNDS;
|
||||
for (i = 0; i < ctx->nrounds; i++) {
|
||||
ctx->round_keys[i] = k;
|
||||
speck128_round(&l[i % 2], &k, i);
|
||||
}
|
||||
break;
|
||||
case SPECK128_256_KEY_SIZE:
|
||||
k = get_unaligned_le64(key);
|
||||
l[0] = get_unaligned_le64(key + 8);
|
||||
l[1] = get_unaligned_le64(key + 16);
|
||||
l[2] = get_unaligned_le64(key + 24);
|
||||
ctx->nrounds = SPECK128_256_NROUNDS;
|
||||
for (i = 0; i < ctx->nrounds; i++) {
|
||||
ctx->round_keys[i] = k;
|
||||
speck128_round(&l[i % 3], &k, i);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck128_setkey);
|
||||
|
||||
static int speck128_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
return crypto_speck128_setkey(crypto_tfm_ctx(tfm), key, keylen);
|
||||
}
|
||||
|
||||
/* Speck64 */
|
||||
|
||||
static __always_inline void speck64_round(u32 *x, u32 *y, u32 k)
|
||||
{
|
||||
*x = ror32(*x, 8);
|
||||
*x += *y;
|
||||
*x ^= k;
|
||||
*y = rol32(*y, 3);
|
||||
*y ^= *x;
|
||||
}
|
||||
|
||||
static __always_inline void speck64_unround(u32 *x, u32 *y, u32 k)
|
||||
{
|
||||
*y ^= *x;
|
||||
*y = ror32(*y, 3);
|
||||
*x ^= k;
|
||||
*x -= *y;
|
||||
*x = rol32(*x, 8);
|
||||
}
|
||||
|
||||
void crypto_speck64_encrypt(const struct speck64_tfm_ctx *ctx,
|
||||
u8 *out, const u8 *in)
|
||||
{
|
||||
u32 y = get_unaligned_le32(in);
|
||||
u32 x = get_unaligned_le32(in + 4);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ctx->nrounds; i++)
|
||||
speck64_round(&x, &y, ctx->round_keys[i]);
|
||||
|
||||
put_unaligned_le32(y, out);
|
||||
put_unaligned_le32(x, out + 4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck64_encrypt);
|
||||
|
||||
static void speck64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
crypto_speck64_encrypt(crypto_tfm_ctx(tfm), out, in);
|
||||
}
|
||||
|
||||
void crypto_speck64_decrypt(const struct speck64_tfm_ctx *ctx,
|
||||
u8 *out, const u8 *in)
|
||||
{
|
||||
u32 y = get_unaligned_le32(in);
|
||||
u32 x = get_unaligned_le32(in + 4);
|
||||
int i;
|
||||
|
||||
for (i = ctx->nrounds - 1; i >= 0; i--)
|
||||
speck64_unround(&x, &y, ctx->round_keys[i]);
|
||||
|
||||
put_unaligned_le32(y, out);
|
||||
put_unaligned_le32(x, out + 4);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck64_decrypt);
|
||||
|
||||
static void speck64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
crypto_speck64_decrypt(crypto_tfm_ctx(tfm), out, in);
|
||||
}
|
||||
|
||||
int crypto_speck64_setkey(struct speck64_tfm_ctx *ctx, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 l[3];
|
||||
u32 k;
|
||||
int i;
|
||||
|
||||
switch (keylen) {
|
||||
case SPECK64_96_KEY_SIZE:
|
||||
k = get_unaligned_le32(key);
|
||||
l[0] = get_unaligned_le32(key + 4);
|
||||
l[1] = get_unaligned_le32(key + 8);
|
||||
ctx->nrounds = SPECK64_96_NROUNDS;
|
||||
for (i = 0; i < ctx->nrounds; i++) {
|
||||
ctx->round_keys[i] = k;
|
||||
speck64_round(&l[i % 2], &k, i);
|
||||
}
|
||||
break;
|
||||
case SPECK64_128_KEY_SIZE:
|
||||
k = get_unaligned_le32(key);
|
||||
l[0] = get_unaligned_le32(key + 4);
|
||||
l[1] = get_unaligned_le32(key + 8);
|
||||
l[2] = get_unaligned_le32(key + 12);
|
||||
ctx->nrounds = SPECK64_128_NROUNDS;
|
||||
for (i = 0; i < ctx->nrounds; i++) {
|
||||
ctx->round_keys[i] = k;
|
||||
speck64_round(&l[i % 3], &k, i);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crypto_speck64_setkey);
|
||||
|
||||
static int speck64_setkey(struct crypto_tfm *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
return crypto_speck64_setkey(crypto_tfm_ctx(tfm), key, keylen);
|
||||
}
|
||||
|
||||
/* Algorithm definitions */
|
||||
|
||||
static struct crypto_alg speck_algs[] = {
|
||||
{
|
||||
.cra_name = "speck128",
|
||||
.cra_driver_name = "speck128-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SPECK128_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct speck128_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = SPECK128_128_KEY_SIZE,
|
||||
.cia_max_keysize = SPECK128_256_KEY_SIZE,
|
||||
.cia_setkey = speck128_setkey,
|
||||
.cia_encrypt = speck128_encrypt,
|
||||
.cia_decrypt = speck128_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
.cra_name = "speck64",
|
||||
.cra_driver_name = "speck64-generic",
|
||||
.cra_priority = 100,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SPECK64_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct speck64_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u = {
|
||||
.cipher = {
|
||||
.cia_min_keysize = SPECK64_96_KEY_SIZE,
|
||||
.cia_max_keysize = SPECK64_128_KEY_SIZE,
|
||||
.cia_setkey = speck64_setkey,
|
||||
.cia_encrypt = speck64_encrypt,
|
||||
.cia_decrypt = speck64_decrypt
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init speck_module_init(void)
|
||||
{
|
||||
return crypto_register_algs(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
static void __exit speck_module_exit(void)
|
||||
{
|
||||
crypto_unregister_algs(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
module_init(speck_module_init);
|
||||
module_exit(speck_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Speck block cipher (generic)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("speck128");
|
||||
MODULE_ALIAS_CRYPTO("speck128-generic");
|
||||
MODULE_ALIAS_CRYPTO("speck64");
|
||||
MODULE_ALIAS_CRYPTO("speck64-generic");
|
@ -76,8 +76,7 @@ static char *check[] = {
|
||||
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
|
||||
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
|
||||
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
|
||||
"lzo", "cts", "zlib", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
|
||||
NULL
|
||||
"lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
|
||||
};
|
||||
|
||||
static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
|
||||
@ -1103,6 +1102,9 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
|
||||
break;
|
||||
}
|
||||
|
||||
if (speed[i].klen)
|
||||
crypto_ahash_setkey(tfm, tvmem[0], speed[i].klen);
|
||||
|
||||
pr_info("test%3u "
|
||||
"(%5u byte blocks,%5u bytes per update,%4u updates): ",
|
||||
i, speed[i].blen, speed[i].plen, speed[i].blen / speed[i].plen);
|
||||
@ -1733,6 +1735,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("xts(aes)");
|
||||
ret += tcrypt_test("ctr(aes)");
|
||||
ret += tcrypt_test("rfc3686(ctr(aes))");
|
||||
ret += tcrypt_test("ofb(aes)");
|
||||
break;
|
||||
|
||||
case 11:
|
||||
@ -1878,10 +1881,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
ret += tcrypt_test("ecb(seed)");
|
||||
break;
|
||||
|
||||
case 44:
|
||||
ret += tcrypt_test("zlib");
|
||||
break;
|
||||
|
||||
case 45:
|
||||
ret += tcrypt_test("rfc4309(ccm(aes))");
|
||||
break;
|
||||
@ -2033,6 +2032,8 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
break;
|
||||
case 191:
|
||||
ret += tcrypt_test("ecb(sm4)");
|
||||
ret += tcrypt_test("cbc(sm4)");
|
||||
ret += tcrypt_test("ctr(sm4)");
|
||||
break;
|
||||
case 200:
|
||||
test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
|
||||
@ -2282,6 +2283,20 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
||||
num_mb);
|
||||
break;
|
||||
|
||||
case 218:
|
||||
test_cipher_speed("ecb(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ecb(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cbc(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", ENCRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
|
||||
speed_template_16);
|
||||
break;
|
||||
case 300:
|
||||
if (alg) {
|
||||
test_hash_speed(alg, sec, generic_hash_speed_template);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user